Tableau — Power of Server Repository

It's a casual click from my phone and indirectly related to the agenda of our article here. :)

To get a list of failed extracts for all sites last night?

SELECT "sites"."name"                      AS "SiteName", 
"projects"."name" AS "ProjectName",
"BJ"."item name" AS "ItemName",
Cast("BJ"."item type" AS TEXT) AS "ItemType",
"schedules"."name" AS "ScheduleName",
"tasks"."consecutive_failure_count" AS "ConsecutiveFailures"
FROM (SELECT DISTINCT bj.id AS "Job Id" ,
bj.job_name AS "Job Name",
bj.progress AS "Progress",
bj.notes AS "Notes",
bj.site_id AS "Site Id",
COALESCE(t.id) AS "Task Id",
COALESCE(d.project_id, w.project_id, m_w.project_id, m_d.project_id) AS "Item Project Id",
COALESCE(d.id, w.id, m_w.id, m_d.id) AS "Item Id", COALESCE(d.NAME, w.NAME, bj.title) AS "Item Name",
COALESCE(bj.subtitle) AS "Item Type"
FROM background_jobs bj
INNER JOIN tasks t
ON bj.correlation_id = t.id
AND bj.job_name IN ( 'Refresh Extracts',
'Increment Extracts'
)
LEFT JOIN datasources d
ON ( t.obj_id = d.id
AND t.obj_type = 'Datasource' )
LEFT JOIN workbooks w
ON ( t.obj_id = w.id
AND t.obj_type = 'Workbook' )
LEFT JOIN workbooks m_w
ON t.id IS NULL
AND bj.subtitle = 'Workbook'
AND bj.title = m_w.NAME
AND bj.site_id = m_w.site_id
LEFT JOIN datasources m_d
ON t.id IS NULL
AND bj.subtitle = 'Datasource'
AND bj.title = m_d.NAME
AND bj.site_id = m_d.site_id
AND m_d.connectable = true
WHERE bj.started_at >= @started_at
AND bj.finish_code = 1) "BJ"
LEFT JOIN "public"."sites" "sites"
ON ( "BJ"."site id" = "sites"."id" )
LEFT JOIN "public"."projects" "projects"
ON ( "BJ"."item project id" = "projects"."id" )
LEFT JOIN "public"."tasks" "tasks"
ON ( "BJ"."task id" = "tasks"."id" )
LEFT JOIN "public"."schedules" "schedules"
ON ( "tasks"."schedule_id" = "schedules"."id" )

To get a list of extracts took more than 30min to refresh the extract?

SELECT  
"sites"."name" AS "SiteName",
"projects"."name" AS "ProjectName",
"BJ"."Item Name" AS "ItemName",
CAST("BJ"."Item Type" AS TEXT) AS "ItemType",
"schedules"."name" AS "ScheduleName",
"BJ"."Duration" AS "Duration"
FROM (
SELECT DISTINCT
bj.id AS "Job Id" ,
bj.job_name AS "Job Name" ,
bj.site_id AS "Site Id" ,
COALESCE(t.id) AS "Task Id" ,
COALESCE(d.project_id, w.project_id,
m_w.project_id, m_d.project_id) AS "Item Project Id" ,
COALESCE(d.id, w.id,m_w.id, m_d.id) AS "Item Id" ,
COALESCE(d.name, w.name, bj.title) AS "Item Name" ,
COALESCE(bj.subtitle) AS "Item Type",
age(bj.Completed_at, bj.started_at) AS "Duration"
FROM background_jobs bj
INNER JOIN tasks t
ON bj.correlation_id = t.id
AND bj.job_name IN ('Refresh Extracts','Increment Extracts')
LEFT JOIN datasources d
ON (t.obj_id = d.id AND t.obj_type = 'Datasource')
LEFT JOIN workbooks w
ON (t.obj_id = w.id AND t.obj_type = 'Workbook')
LEFT JOIN workbooks m_w --missing workbooks
ON t.id IS NULL
AND bj.subtitle = 'Workbook'
AND bj.title = m_w.name
AND bj.site_id = m_w.site_id
LEFT JOIN datasources m_d --missing datasources
ON t.id IS NULL
AND bj.subtitle = 'Datasource'
AND bj.title = m_d.name
AND bj.site_id = m_d.site_id
AND m_d.connectable = true
Where bj.started_at >= @started_at
and age(bj.Completed_at, bj.started_at) >= (@duration) --30min
and bj.finish_code = 0
) "BJ"
LEFT JOIN "public"."sites" "sites" ON ("BJ"."Site Id" = "sites"."id")
LEFT JOIN "public"."projects" "projects" ON ("BJ"."Item Project Id" = "projects"."id")
LEFT JOIN "public"."tasks" "tasks" ON ("BJ"."Task Id" = "tasks"."id")
LEFT JOIN "public"."schedules" "schedules" ON ("tasks"."schedule_id" = "schedules"."id")

To get the list of users with no email address?

SELECT 
"sites"."name" AS "SiteName",
"system_users"."name" AS "UserName",
"system_users"."friendly_name" AS "UserFriendlyName"
FROM "public"."users" "users"
INNER JOIN "public"."system_users" "system_users" ON ("users"."system_user_id" = "system_users"."id")
INNER JOIN "public"."sites" "sites" ON ("users"."site_id" = "sites"."id")
WHERE "system_users"."email" IS NULL
AND "sites"."name" != 'Default'
AND "system_users"."name" NOT IN ('guest','_system')

To get the list of Datasources and connected workbooks

SELECT
"Datasource"."Datasource Name" AS "Datasource",
"Sites"."name" AS "Site",
"Projects"."name" AS "Projects",
"Connected Workbooks"."name" AS "Connected Workbooks",
"Datasource Owner"."name" AS "Datasource Owner Name"
FROM (
SELECT

-- All Datasources
d.id AS "Datasource ID" ,
d.luid AS "Datasource LUID" ,
d.name AS "Datasource Name" ,
d.repository_url AS "Datasource Repository URL" ,
d.owner_id AS "Datasource Owner ID" ,
d.project_id AS "Datasource Project ID" ,
d.site_id AS "Datasource Site ID" ,
d.is_hierarchical AS "Datasource Is Hierarchical" ,
d.is_certified AS "Datasource Is Certified" ,
COALESCE(e_dc.has_extract,
d.data_engine_extracts) AS "Datasource Has Extract" ,
d.incrementable_extracts AS "Datasource Incrementable Extracts" ,
d.refreshable_extracts AS "Datasource Refreshable Extracts" ,
d.data_engine_extracts AS "Datasource Data Engine Extracts" ,
d.extracts_refreshed_at AS "Datasource Extracts Refreshed At" ,
d.db_class AS "Datasource DB Class" ,
d.db_name AS "Datasource DB Name" ,
d.table_name AS "Datasource Table Name" ,
CASE d.connectable
WHEN true THEN false
ELSE true
END AS "Is Embedded in Workbook" ,
CASE
WHEN p_dc.dbclass = 'sqlproxy' THEN true
ELSE false
END AS "References Published Data Source" ,
d.parent_workbook_id AS "Parent Workbook ID" ,


-- "Underlying" Data Sources (e.g., if the data source in a Workbook points to a Tableau Server published datasource)

CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.id
ELSE d.id
END AS "Datasource ID (underlying)" , -- represents the underlying datasource id. If a workbook connects to a published datasource, use that ID rather than the datasource id referenced by the workbook.

CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.data_engine_extracts
ELSE d.data_engine_extracts
END AS "Datasource Data Engine Extracts (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.refreshable_extracts
ELSE d.refreshable_extracts
END AS "Datasource Refreshable Extracts (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.incrementable_extracts
ELSE d.incrementable_extracts
END AS "Datasource Incrementable Extracts (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.extracts_refreshed_at
ELSE d.extracts_refreshed_at
END AS "Datasource Extracts Refreshed At (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.extracts_incremented_at
ELSE d.extracts_incremented_at
END AS "Datasource Extracts Incremented At (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.name
ELSE d.name
END AS "Datasource Name (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.owner_id
ELSE d.owner_id
END AS "Datasource Owner ID (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.project_id
ELSE d.project_id
END AS "Datasource Project ID (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.db_class
ELSE d.db_class
END AS "Datasource DB Class (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.db_name
ELSE d.db_name
END AS "Datasource DB Name (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.table_name
ELSE d.table_name
END AS "Datasource Table Name (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.is_hierarchical
ELSE d.is_hierarchical
END AS "Datasource Is Hierarchical (underlying)" ,
CASE p_dc.dbclass
WHEN 'sqlproxy' THEN p_ds.is_certified
ELSE d.is_certified
END AS "Datasource Is Certified (underlying)" ,
p_ds.repository_url AS "Datasource Repository URL (underlying)"

FROM datasources d -- all datasources, published and embedded
LEFT JOIN
(
SELECT
datasource_id ,
has_extract
FROM data_connections -- used to obtain extract information on first-level datasources (e_dc = embedded data connections)
GROUP BY
datasource_id ,
has_extract
) as e_dc
ON d.id = e_dc.datasource_id
LEFT JOIN data_connections p_dc -- used to obtain information on what datasources (in a workbook) are connecting to what published datasources
ON d.id = p_dc.datasource_id
AND p_dc.dbclass = 'sqlproxy'
LEFT JOIN datasources p_ds -- just the published "conectable" datasources for supplemental information
ON p_dc.dbname = p_ds.repository_url
AND p_ds.connectable = true
) "Datasource"
INNER JOIN "public"."sites" "Sites" ON ("Datasource"."Datasource Site ID" = "Sites"."id")
INNER JOIN "public"."projects" "Projects" ON ("Datasource"."Datasource Project ID" = "Projects"."id")
LEFT JOIN "public"."workbooks" "Connected Workbooks" ON ("Datasource"."Parent Workbook ID" = "Connected Workbooks"."id")
LEFT JOIN "public"."_users" "Datasource Owner" ON ("Datasource"."Datasource Owner ID" = "Datasource Owner"."id")
WHERE "Connected Workbooks".name IS NOT NULL

--

--

--

Passionate about the Data Analytics and Business Intelligence stack. Technical Architect at CES IT

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

Magento Maintenance Services and Support Plans

Magento support and maintenance service

How to test software development and how to prevent bugs

How I automated my stock and cryptocurrency content creation with Python

4. Master 🥇 (Path to follow in Computer Science Journey).

Why we decided to partner with Google

Opologic Google Cloud Platform

Yeah! I did, I never thought I would be at this point with the exam having only failed one exam…

Why Asynchronous API Endpoints?

Technology is in no doubt the future and every young person wants to have their hands in it.

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Gunarathinam M

Gunarathinam M

Passionate about the Data Analytics and Business Intelligence stack. Technical Architect at CES IT

More from Medium

Cheat Sheet — Calendar that starts with any date of the month — Just two variables apart

Case Study — SAP BW With Microsoft Power BI : Creating “Near real time” visualization on…

Creating Paginated Reports in Power BI Report Server

Biodiversity Observation Monitor Data Das