Skip to content

Commit 6f4be9c

Browse files
FalafelqueenmgiffordburakgunelilebreRafaeltunetheweb
authored
Sustainability 2024: Queries (#3736)
* Update unminified_css bytes query * Update unused_css_bytes query * Update unused_js_bytes query * Update unminified_js__bytes query * Update cache_header_usage query * Update cdn_adoption query * Update cms_bytes_per_type query * Update ssg_bytes_per_type query * Update ecommerce_bytes_per_type query * Add use of prefers_dark_mode query * Update script usage query * Update stylesheet_count query * Updated for new CO2 calculation * Update ecommerce with new co2 calc * Update Cms with new co2 calc * Add green hosting query * Create favicons.sql Adding based on Laurent Devernay comment in Slack. * Add green third party query * Is root page updates * Update stylesheet count query with root_page filter * Filter root page for comparability * Create query_run_size.sql Adding in a query to track the size of the query. * Update query_run_size.sql Updating the docs * Create global_emissions_per_page.sql, page_byte_pre_type.sql, responsive_images.sql, text_compression.sql * Fix linter issues for recently added SQL queries * run sqlfluff fix * add video_autoplay_values.sql, video_preload_values.sql * fix linter errors * Remove Tablesample mistake * add 2022 queries * Apply suggestions from code review --------- Co-authored-by: Mike Gifford <mike.gifford@civicactions.com> Co-authored-by: Burak Güneli <burak.guneli@gmail.com> Co-authored-by: Rafael Bonalume Lebre <rafabonalebre@gmail.com> Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent 324d22b commit 6f4be9c

26 files changed

Lines changed: 1942 additions & 0 deletions
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#standardSQL
2+
# The distribution of cache header adoption on websites by client.
3+
4+
SELECT
5+
client,
6+
COUNT(0) AS total_requests,
7+
8+
COUNTIF(uses_cache_control) AS total_using_cache_control,
9+
COUNTIF(uses_max_age) AS total_using_max_age,
10+
COUNTIF(uses_expires) AS total_using_expires,
11+
COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires,
12+
COUNTIF(uses_cache_control AND uses_expires) AS total_using_both_cc_and_expires,
13+
COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither_cc_and_expires,
14+
COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control,
15+
COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires,
16+
17+
COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control,
18+
COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age,
19+
COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires,
20+
COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires,
21+
COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both_cc_and_expires,
22+
COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither_cc_nor_expires,
23+
COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control,
24+
COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires
25+
26+
FROM (
27+
SELECT
28+
client,
29+
30+
JSON_EXTRACT_SCALAR(summary, '$.resp_expires') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_expires')) != '' AS uses_expires,
31+
JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control')) != '' AS uses_cache_control,
32+
REGEXP_CONTAINS(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age,
33+
34+
JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NULL OR TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) = '' AS uses_no_etag,
35+
JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) != '' AS uses_etag,
36+
JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified')) != '' AS uses_last_modified,
37+
38+
REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"') AS uses_weak_etag,
39+
REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"') AS uses_strong_etag
40+
41+
FROM
42+
`httparchive.all.requests`
43+
WHERE
44+
date = '2024-06-01'
45+
)
46+
47+
GROUP BY
48+
client
49+
ORDER BY
50+
client;
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#standardSQL
2+
# The distribution of CDN adoption on websites by client.
3+
4+
SELECT
5+
client,
6+
IF(cdn = '', 'No CDN', cdn) AS cdn,
7+
COUNT(0) AS freq,
8+
total,
9+
COUNT(0) / total AS pct
10+
FROM (
11+
SELECT
12+
client,
13+
COUNT(0) AS total,
14+
ARRAY_CONCAT_AGG(SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ')) AS cdn_list
15+
FROM
16+
`httparchive.all.pages`
17+
WHERE
18+
date = '2024-06-01' AND
19+
is_root_page = TRUE
20+
GROUP BY
21+
client
22+
),
23+
UNNEST(cdn_list) AS cdn
24+
GROUP BY
25+
client,
26+
cdn,
27+
total
28+
ORDER BY
29+
pct DESC,
30+
client,
31+
cdn;
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#standardSQL
2+
# Median resource weights by CMS
3+
4+
# Declare variables to calculate the carbon emissions of one byte
5+
# Source: https://sustainablewebdesign.org/calculating-digital-emissions/
6+
# The implementation below does not make the assumptions about returning visitors or caching that are present in the Sustainable Web Design model.
7+
8+
DECLARE grid_intensity NUMERIC DEFAULT 494;
9+
DECLARE embodied_emissions_data_centers NUMERIC DEFAULT 0.012;
10+
DECLARE embodied_emissions_network NUMERIC DEFAULT 0.013;
11+
DECLARE embodied_emissions_user_devices NUMERIC DEFAULT 0.081;
12+
DECLARE operational_emissions_data_centers NUMERIC DEFAULT 0.055;
13+
DECLARE operational_emissions_network NUMERIC DEFAULT 0.059;
14+
DECLARE operational_emissions_user_devices NUMERIC DEFAULT 0.080;
15+
16+
WITH cms_data AS (
17+
SELECT
18+
client,
19+
page,
20+
tech.technology AS cms,
21+
CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 AS total_kb,
22+
23+
-- Operational emissions calculations
24+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity AS op_emissions_dc,
25+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity AS op_emissions_networks,
26+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity AS op_emissions_devices,
27+
28+
-- Embodied emissions calculations
29+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity AS em_emissions_dc,
30+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity AS em_emissions_networks,
31+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity AS em_emissions_devices,
32+
33+
-- Total emissions (operational + embodied)
34+
(
35+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity +
36+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity +
37+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity
38+
) AS total_operational_emissions,
39+
40+
(
41+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity +
42+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity +
43+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity
44+
) AS total_embodied_emissions,
45+
46+
(
47+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_data_centers * grid_intensity +
48+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_network * grid_intensity +
49+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * operational_emissions_user_devices * grid_intensity +
50+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_data_centers * grid_intensity +
51+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_network * grid_intensity +
52+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * embodied_emissions_user_devices * grid_intensity
53+
) AS total_emissions,
54+
55+
-- Proportions of each resource type relative to total bytes
56+
CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS html_proportion,
57+
CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS js_proportion,
58+
CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS css_proportion,
59+
CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS img_proportion,
60+
CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) AS font_proportion,
61+
62+
-- Resource-specific emissions calculations
63+
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
64+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
65+
operational_emissions_data_centers * grid_intensity +
66+
operational_emissions_network * grid_intensity +
67+
operational_emissions_user_devices * grid_intensity +
68+
embodied_emissions_data_centers * grid_intensity +
69+
embodied_emissions_network * grid_intensity +
70+
embodied_emissions_user_devices * grid_intensity
71+
)
72+
)) AS total_html_emissions,
73+
74+
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
75+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
76+
operational_emissions_data_centers * grid_intensity +
77+
operational_emissions_network * grid_intensity +
78+
operational_emissions_user_devices * grid_intensity +
79+
embodied_emissions_data_centers * grid_intensity +
80+
embodied_emissions_network * grid_intensity +
81+
embodied_emissions_user_devices * grid_intensity
82+
)
83+
)) AS total_js_emissions,
84+
85+
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
86+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
87+
operational_emissions_data_centers * grid_intensity +
88+
operational_emissions_network * grid_intensity +
89+
operational_emissions_user_devices * grid_intensity +
90+
embodied_emissions_data_centers * grid_intensity +
91+
embodied_emissions_network * grid_intensity +
92+
embodied_emissions_user_devices * grid_intensity
93+
)
94+
)) AS total_css_emissions,
95+
96+
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
97+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
98+
operational_emissions_data_centers * grid_intensity +
99+
operational_emissions_network * grid_intensity +
100+
operational_emissions_user_devices * grid_intensity +
101+
embodied_emissions_data_centers * grid_intensity +
102+
embodied_emissions_network * grid_intensity +
103+
embodied_emissions_user_devices * grid_intensity
104+
)
105+
)) AS total_img_emissions,
106+
107+
(SAFE_DIVIDE(CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64), CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64)) * (
108+
(CAST(JSON_VALUE(summary, '$.bytesTotal') AS INT64) / 1024 / 1024 / 1024) * (
109+
operational_emissions_data_centers * grid_intensity +
110+
operational_emissions_network * grid_intensity +
111+
operational_emissions_user_devices * grid_intensity +
112+
embodied_emissions_data_centers * grid_intensity +
113+
embodied_emissions_network * grid_intensity +
114+
embodied_emissions_user_devices * grid_intensity
115+
)
116+
)) AS total_font_emissions,
117+
118+
-- Resource-specific size in KB
119+
CAST(JSON_VALUE(summary, '$.bytesHtml') AS INT64) / 1024 AS html_kb,
120+
CAST(JSON_VALUE(summary, '$.bytesJS') AS INT64) / 1024 AS js_kb,
121+
CAST(JSON_VALUE(summary, '$.bytesCss') AS INT64) / 1024 AS css_kb,
122+
CAST(JSON_VALUE(summary, '$.bytesImg') AS INT64) / 1024 AS img_kb,
123+
CAST(JSON_VALUE(summary, '$.bytesFont') AS INT64) / 1024 AS font_kb
124+
FROM
125+
`httparchive.all.pages`,
126+
UNNEST(technologies) AS tech
127+
WHERE
128+
date = '2024-06-01' AND
129+
is_root_page = TRUE AND
130+
'CMS' IN UNNEST(tech.categories)
131+
)
132+
133+
SELECT
134+
client,
135+
cms,
136+
COUNT(0) AS pages,
137+
-- Median resource weights and emissions
138+
APPROX_QUANTILES(total_kb, 1000)[OFFSET(500)] AS median_total_kb,
139+
APPROX_QUANTILES(total_operational_emissions, 1000)[OFFSET(500)] AS median_operational_emissions,
140+
APPROX_QUANTILES(total_embodied_emissions, 1000)[OFFSET(500)] AS median_embodied_emissions,
141+
APPROX_QUANTILES(total_emissions, 1000)[OFFSET(500)] AS median_total_emissions,
142+
143+
-- Resource-specific medians
144+
APPROX_QUANTILES(html_kb, 1000)[OFFSET(500)] AS median_html_kb,
145+
APPROX_QUANTILES(total_html_emissions, 1000)[OFFSET(500)] AS median_total_html_emissions,
146+
APPROX_QUANTILES(js_kb, 1000)[OFFSET(500)] AS median_js_kb,
147+
APPROX_QUANTILES(total_js_emissions, 1000)[OFFSET(500)] AS median_total_js_emissions,
148+
APPROX_QUANTILES(css_kb, 1000)[OFFSET(500)] AS median_css_kb,
149+
APPROX_QUANTILES(total_css_emissions, 1000)[OFFSET(500)] AS median_total_css_emissions,
150+
APPROX_QUANTILES(img_kb, 1000)[OFFSET(500)] AS median_img_kb,
151+
APPROX_QUANTILES(total_img_emissions, 1000)[OFFSET(500)] AS median_total_img_emissions,
152+
APPROX_QUANTILES(font_kb, 1000)[OFFSET(500)] AS median_font_kb,
153+
APPROX_QUANTILES(total_font_emissions, 1000)[OFFSET(500)] AS median_total_font_emissions
154+
FROM
155+
cms_data
156+
GROUP BY
157+
client,
158+
cms
159+
ORDER BY
160+
pages DESC,
161+
cms,
162+
client;

0 commit comments

Comments
 (0)