Skip to content

Commit 52e2859

Browse files
onurglrtunetheweb
andauthored
PWA 2025 queries (#4336)
* Update PWA SQL queries for Web Almanac 2025 Updated SQL queries for the 2025 Web Almanac PWA chapter. * Update manifests_lang.sql * Linting * Update to July and reformat * More fixes --------- Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent 27b360f commit 52e2859

39 files changed

+1839
-0
lines changed

sql/2025/pwa/assetlink_usage.sql

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#standardSQL
2+
# assetlink usage
3+
4+
SELECT
5+
'PWA sites' AS type,
6+
client,
7+
COUNT(0) AS freq,
8+
total,
9+
COUNT(0) / total AS pct
10+
FROM
11+
`httparchive.crawl.pages`
12+
JOIN (
13+
SELECT
14+
client,
15+
COUNT(0) AS total
16+
FROM
17+
`httparchive.crawl.pages`
18+
WHERE
19+
date = '2025-07-01' AND
20+
is_root_page AND
21+
TO_JSON_STRING(custom_metrics.other.pwa.manifests) NOT IN ('[]', '{}', 'null') AND
22+
JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true'
23+
GROUP BY
24+
client
25+
)
26+
USING (client)
27+
WHERE
28+
date = '2025-07-01' AND
29+
is_root_page AND
30+
JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true' AND
31+
TO_JSON_STRING(custom_metrics.other.pwa.manifests) NOT IN ('[]', '{}', 'null') AND
32+
JSON_EXTRACT_SCALAR(JSON_QUERY(custom_metrics.well_known, '$'), "$['/.well-known/assetlinks.json'].found") = 'true'
33+
GROUP BY
34+
client,
35+
total
36+
UNION ALL
37+
SELECT
38+
'All sites' AS type,
39+
client,
40+
COUNT(0) AS freq,
41+
total,
42+
COUNT(0) / total AS pct
43+
FROM
44+
`httparchive.crawl.pages`
45+
JOIN (
46+
SELECT
47+
client,
48+
COUNT(0) AS total
49+
FROM
50+
`httparchive.crawl.pages`
51+
WHERE
52+
date = '2025-07-01' AND is_root_page
53+
GROUP BY
54+
client
55+
)
56+
USING (client)
57+
WHERE
58+
date = '2025-07-01' AND
59+
is_root_page AND
60+
JSON_EXTRACT_SCALAR(JSON_QUERY(custom_metrics.well_known, '$'), "$['/.well-known/assetlinks.json'].found") = 'true'
61+
GROUP BY
62+
client,
63+
total
64+
ORDER BY
65+
type DESC,
66+
freq / total DESC,
67+
client

sql/2025/pwa/fugu.sql

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#standardSQL
2+
CREATE TEMP FUNCTION getFuguAPIsFromOther(other_json STRING)
3+
RETURNS ARRAY<STRING>
4+
LANGUAGE js AS '''
5+
try {
6+
const other = JSON.parse(other_json);
7+
const fugu = other && typeof other === 'object' ? other['fugu-apis'] : null;
8+
if (!fugu || typeof fugu !== 'object') return [];
9+
return Object.keys(fugu);
10+
} catch (e) {
11+
return [];
12+
}
13+
''';
14+
15+
SELECT
16+
client,
17+
fuguAPI,
18+
COUNT(DISTINCT page) AS pages,
19+
total,
20+
COUNT(DISTINCT page) / total AS pct,
21+
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 50), ' ') AS sample_urls
22+
FROM
23+
`httparchive.crawl.pages`
24+
JOIN (
25+
SELECT
26+
client,
27+
COUNT(0) AS total
28+
FROM
29+
`httparchive.crawl.pages`
30+
WHERE
31+
date = '2025-07-01' AND is_root_page
32+
GROUP BY
33+
client
34+
)
35+
USING (client),
36+
UNNEST(
37+
getFuguAPIsFromOther(TO_JSON_STRING(custom_metrics.other))
38+
) AS fuguAPI
39+
WHERE
40+
date = '2025-07-01' AND is_root_page
41+
GROUP BY
42+
fuguAPI,
43+
client,
44+
total
45+
HAVING
46+
COUNT(DISTINCT page) >= 10
47+
ORDER BY
48+
pct DESC,
49+
client;
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#standardSQL
2+
# Top manifest categories
3+
CREATE TEMPORARY FUNCTION getCategories(manifest JSON)
4+
RETURNS ARRAY<STRING> LANGUAGE js AS '''
5+
try {
6+
var $ = Object.values(manifest)[0];
7+
var categories = $.categories;
8+
if (typeof categories == 'string') {
9+
return [categories];
10+
}
11+
return categories;
12+
} catch (e) {
13+
return null;
14+
}
15+
''';
16+
17+
WITH totals AS (
18+
SELECT
19+
client,
20+
COUNT(0) AS total,
21+
COUNTIF(JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true') AS pwa_total
22+
FROM
23+
`httparchive.crawl.pages`
24+
WHERE
25+
date = '2025-07-01' AND
26+
is_root_page AND
27+
TO_JSON_STRING(custom_metrics.other.pwa.manifests) NOT IN ('[]', '{}', 'null')
28+
GROUP BY
29+
client
30+
),
31+
32+
manifests_categories AS (
33+
SELECT
34+
'All Sites' AS type,
35+
client,
36+
category,
37+
COUNT(DISTINCT page) AS freq,
38+
total,
39+
COUNT(DISTINCT page) / total AS pct,
40+
COUNTIF(JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true') AS pwa_freq,
41+
pwa_total,
42+
COUNTIF(JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true') / pwa_total AS pwa_pct
43+
FROM
44+
`httparchive.crawl.pages`,
45+
UNNEST(getCategories(custom_metrics.other.pwa.manifests)) AS category
46+
JOIN
47+
totals
48+
USING (client)
49+
WHERE
50+
date = '2025-07-01' AND
51+
is_root_page AND
52+
TO_JSON_STRING(custom_metrics.other.pwa.manifests) NOT IN ('[]', '{}', 'null')
53+
GROUP BY
54+
client,
55+
category,
56+
total,
57+
pwa_total
58+
HAVING
59+
category IS NOT NULL
60+
ORDER BY
61+
type DESC,
62+
freq / total DESC,
63+
category,
64+
client
65+
)
66+
67+
SELECT
68+
'PWA Sites' AS type,
69+
client,
70+
category,
71+
pwa_freq AS freq,
72+
pwa_total AS total,
73+
pwa_pct AS pct
74+
FROM
75+
manifests_categories
76+
UNION ALL
77+
SELECT
78+
'All Sites' AS type,
79+
client,
80+
category,
81+
freq,
82+
total,
83+
pct
84+
FROM
85+
manifests_categories
86+
ORDER BY
87+
type DESC,
88+
pct DESC,
89+
category,
90+
client
91+
LIMIT 1000
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#standardSQL
2+
# Top most used display values in manifest files
3+
4+
CREATE TEMP FUNCTION getDisplay(manifest JSON) RETURNS STRING LANGUAGE js AS '''
5+
try {
6+
var $ = Object.values(manifest)[0];
7+
if (!('display' in $)) {
8+
return '(not set)';
9+
}
10+
return $.display;
11+
} catch {
12+
return '(not set)'
13+
}
14+
''';
15+
16+
SELECT
17+
'PWA Sites' AS type,
18+
client,
19+
getDisplay(custom_metrics.other.pwa.manifests) AS display,
20+
COUNT(0) AS freq,
21+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
22+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
23+
FROM
24+
`httparchive.crawl.pages`
25+
WHERE
26+
date = '2025-07-01' AND
27+
is_root_page AND
28+
TO_JSON_STRING(custom_metrics.other.pwa.manifests) NOT IN ('[]', '{}', 'null') AND
29+
JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true'
30+
GROUP BY
31+
client,
32+
display
33+
QUALIFY
34+
display IS NOT NULL AND
35+
freq > 100
36+
UNION ALL
37+
SELECT
38+
'All Sites' AS type,
39+
client,
40+
getDisplay(custom_metrics.other.pwa.manifests) AS display,
41+
COUNT(0) AS freq,
42+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
43+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
44+
FROM
45+
`httparchive.crawl.pages`
46+
WHERE
47+
date = '2025-07-01' AND
48+
is_root_page AND
49+
TO_JSON_STRING(custom_metrics.other.pwa.manifests) NOT IN ('[]', '{}', 'null')
50+
GROUP BY
51+
client,
52+
display
53+
QUALIFY
54+
display IS NOT NULL AND
55+
freq > 100
56+
ORDER BY
57+
type DESC,
58+
freq / total DESC,
59+
display,
60+
client
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#standardSQL
2+
# Top manifest icon sizes
3+
CREATE TEMPORARY FUNCTION getIconSizes(manifest JSON)
4+
RETURNS ARRAY<STRING> LANGUAGE js AS '''
5+
try {
6+
var $ = Object.values(manifest)[0];
7+
return $.icons.map(icon => icon.sizes);
8+
} catch (e) {
9+
return null;
10+
}
11+
''';
12+
13+
WITH totals AS (
14+
SELECT
15+
client,
16+
COUNT(0) AS total,
17+
COUNTIF(JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true') AS pwa_total
18+
FROM
19+
`httparchive.crawl.pages`
20+
WHERE
21+
date = '2025-07-01' AND
22+
is_root_page AND
23+
TO_JSON_STRING(custom_metrics.other.pwa.manifests) NOT IN ('[]', '{}', 'null')
24+
GROUP BY
25+
client
26+
),
27+
28+
manifests_icon_sizes AS (
29+
SELECT
30+
'All Sites' AS type,
31+
client,
32+
size,
33+
COUNT(DISTINCT page) AS freq,
34+
total,
35+
COUNT(DISTINCT page) / total AS pct,
36+
COUNTIF(JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true') AS pwa_freq,
37+
pwa_total,
38+
COUNTIF(JSON_VALUE(custom_metrics.other.pwa.serviceWorkerHeuristic) = 'true') / pwa_total AS pwa_pct
39+
FROM
40+
`httparchive.crawl.pages`,
41+
UNNEST(getIconSizes(custom_metrics.other.pwa.manifests)) AS size
42+
JOIN
43+
totals
44+
USING (client)
45+
WHERE
46+
date = '2025-07-01' AND
47+
is_root_page AND
48+
TO_JSON_STRING(custom_metrics.other.pwa.manifests) NOT IN ('[]', '{}', 'null')
49+
GROUP BY
50+
client,
51+
size,
52+
total,
53+
pwa_total
54+
HAVING
55+
size IS NOT NULL
56+
ORDER BY
57+
type DESC,
58+
freq / total DESC,
59+
size,
60+
client
61+
)
62+
63+
SELECT
64+
'PWA Sites' AS type,
65+
client,
66+
size,
67+
pwa_freq AS freq,
68+
pwa_total AS total,
69+
pwa_pct AS pct
70+
FROM
71+
manifests_icon_sizes
72+
WHERE
73+
size IS NOT NULL AND
74+
freq > 100
75+
UNION ALL
76+
SELECT
77+
'All Sites' AS type,
78+
client,
79+
size,
80+
freq,
81+
total,
82+
pct
83+
FROM
84+
manifests_icon_sizes
85+
WHERE
86+
size IS NOT NULL AND
87+
freq > 100
88+
ORDER BY
89+
type DESC,
90+
pct DESC,
91+
size,
92+
client

0 commit comments

Comments
 (0)