-
Notifications
You must be signed in to change notification settings - Fork 328
Expand file tree
/
Copy pathhourly_slot_consumption_by_labels.sql
More file actions
143 lines (140 loc) · 5.99 KB
/
hourly_slot_consumption_by_labels.sql
File metadata and controls
143 lines (140 loc) · 5.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*
* Copyright 2026 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
DECLARE num_days_to_scan INT64 DEFAULT 30;
DECLARE my_reservation_id STRING DEFAULT "your_reservation_id";
CREATE OR REPLACE TABLE optimization_workshop.hourly_slot_consumption_by_labels AS
SELECT * EXCEPT(jobs_per_hour),
-- Get the top 10 query patterns by slot hours
ARRAY(
SELECT AS STRUCT
-- since period is hourly, period_slot_hours/(1hr duration) = num_period_slots
ROUND(period_slot_hours, 2) AS num_period_slots,
ROUND(period_slot_hours/period_total_slot_hours * 100, 2) || "%" AS pct_of_total_period,
COALESCE(slot_hunger,0) AS slot_hunger,
ROUND(avg_period_estimated_runnable_units,2) AS avg_period_estimated_runnable_units,
ROUND(max_duration_minutes, 2) AS max_duration_minutes,
ROUND(p90_duration_minutes, 2) AS p90_duration_minutes,
ROUND(median_duration_minutes, 2) AS median_duration_minutes,
ROUND(max_p90_period_slots, 2) AS max_p90_period_slots,
ROUND(max_period_slots, 2) AS max_period_slots,
ROUND(p90_avg_job_period_slots, 2) AS p90_avg_job_period_slots,
labels,
num_jobs_per_period,
job_url,
parent_job_url,
FROM(
SELECT
FORMAT("%p", JSON_ARRAY(labels)) AS labels,
MAX(duration_ms)/1000/60 AS max_duration_minutes,
SUM(slot_hunger) AS slot_hunger,
AVG(avg_period_estimated_runnable_units) AS avg_period_estimated_runnable_units,
MAX(p90_job_period_slots) AS max_p90_period_slots,
MAX(max_job_period_slots) AS max_period_slots,
ANY_VALUE(p90_avg_job_period_slots) AS p90_avg_job_period_slots,
ANY_VALUE(median_duration_minutes) AS median_duration_minutes,
ANY_VALUE(p90_duration_minutes) AS p90_duration_minutes,
SUM(period_slot_hours) AS period_slot_hours,
COUNT(1) AS num_jobs_per_period,
ANY_VALUE(job_url HAVING MAX duration_ms) AS job_url,
ANY_VALUE(parent_job_url HAVING MAX duration_ms) AS parent_job_url,
ANY_VALUE(user_email HAVING MAX duration_ms) AS user_email,
FROM (
SELECT *,
PERCENTILE_CONT(duration_ms, 0.5) OVER(
PARTITION BY FORMAT("%p", JSON_ARRAY(labels))
)/1000/60 AS median_duration_minutes,
PERCENTILE_CONT(duration_ms, 0.9) OVER(
PARTITION BY FORMAT("%p", JSON_ARRAY(labels))
)/1000/60 AS p90_duration_minutes,
PERCENTILE_CONT(avg_job_period_slots, 0.9) OVER(
PARTITION BY FORMAT("%p", JSON_ARRAY(labels))
) AS p90_avg_job_period_slots,
FROM UNNEST(jobs_per_hour)
)
GROUP BY 1
-- HAVING user_email LIKE '%g serviceaccount.com'
ORDER BY period_slot_hours DESC
LIMIT 10
)
) AS top_job_patterns_per_hour
FROM(
SELECT
FORMAT_TIMESTAMP("%F %H", period_start_hour, "America/New_York") AS period_start_hour,
COUNT(DISTINCT job_id) AS num_jobs_in_period,
SUM(period_slot_ms)/1000/60/60 AS period_total_slot_hours,
MAX(p90_job_period_slots) AS max_p90_job_period_slots,
ARRAY_AGG(STRUCT(
period_slot_ms/1000/60/60 AS period_slot_hours,
p90_job_period_slots,
avg_job_period_slots,
max_job_period_slots,
duration_ms,
avg_period_estimated_runnable_units,
slot_hunger,
labels,
bqutil.fn.job_url(project_id || ':us.' || job_id) AS job_url,
COALESCE(bqutil.fn.job_url(project_id || ':us.' || parent_job_id), '') AS parent_job_url,
user_email
)) AS jobs_per_hour,
FROM(
SELECT
TIMESTAMP_TRUNC(period_start, HOUR, "America/New_York") AS period_start_hour,
project_id,
parent_job_id,
job_id,
user_email,
AVG(COALESCE(period_estimated_runnable_units,0)) AS avg_period_estimated_runnable_units,
SUM(IF(COALESCE(period_estimated_runnable_units,0) >= 1000,1,0)) AS slot_hunger,
SUM(period_slot_ms) AS period_slot_ms,
ANY_VALUE(p90_job_period_slots) AS p90_job_period_slots,
MAX(period_slot_ms/1000) AS max_job_period_slots,
ANY_VALUE(avg_job_period_slots) AS avg_job_period_slots,
FROM(
SELECT *,
PERCENTILE_CONT(period_slot_ms/1000, 0.9) OVER(
PARTITION BY TIMESTAMP_TRUNC(period_start, HOUR, "America/New_York"), job_id
) AS p90_job_period_slots,
AVG(period_slot_ms/1000) OVER(
PARTITION BY TIMESTAMP_TRUNC(period_start, HOUR, "America/New_York"), job_id
) AS avg_job_period_slots
FROM `region-us`.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION
WHERE
DATE(job_creation_time) >= CURRENT_DATE - num_days_to_scan
AND reservation_id = my_reservation_id
AND state IN ('RUNNING','DONE')
AND statement_type != 'SCRIPT'
AND period_slot_ms > 0
-- Uncomment below to ignore slots consumed by failed queries
-- AND error_result IS NULL
)
GROUP BY 1,2,3,4,5
)
JOIN (
SELECT
job_id,
labels,
TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS duration_ms,
FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_ORGANIZATION
WHERE
DATE(creation_time) >= CURRENT_DATE - num_days_to_scan
AND reservation_id = my_reservation_id
AND statement_type != 'SCRIPT'
AND total_slot_ms > 0
-- Uncomment below to ignore slots consumed by failed queries
-- AND error_result IS NULL
) USING(job_id)
GROUP BY period_start_hour
);