Skip to content

Commit 5cea947

Browse files
authored
HTTP queries for 2024 (#3763)
* 2024 HTTP queries * Revert README * Linting * Convert more queries * Linting * Bug fixes * More conversions * More conversions * Final conversions * Linting * More queries * Linting * More linting * Resource Hint and Fetch Priority queries * More queries * Linting * Linting
1 parent 6f4be9c commit 5cea947

34 files changed

Lines changed: 1825 additions & 0 deletions
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#standardSQL
2+
3+
# Measure the distribution of TCP Connections per site.
4+
5+
SELECT
6+
percentile,
7+
client,
8+
http_version_category,
9+
COUNT(0) AS num_pages,
10+
APPROX_QUANTILES(_connections, 1000)[OFFSET(percentile * 10)] AS connections
11+
FROM (
12+
SELECT
13+
client,
14+
page,
15+
CASE
16+
WHEN LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'quic' OR LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) LIKE 'h3%' THEN 'HTTP/2+'
17+
WHEN LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'http/2' OR LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'http/3' THEN 'HTTP/2+'
18+
WHEN JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') IS NULL THEN 'Unknown'
19+
ELSE 'Non-HTTP/2'
20+
END AS http_version_category
21+
FROM
22+
`httparchive.all.requests`
23+
WHERE
24+
date = '2024-06-01' AND
25+
is_root_page AND
26+
is_main_document)
27+
JOIN (
28+
SELECT
29+
client,
30+
page,
31+
CAST(JSON_EXTRACT_SCALAR(summary, '$._connections') AS INT64) AS _connections
32+
FROM
33+
`httparchive.all.pages`
34+
WHERE
35+
date = '2024-06-01' AND
36+
is_root_page)
37+
USING
38+
(client, page),
39+
UNNEST([10, 25, 50, 75, 90]) AS percentile
40+
GROUP BY
41+
percentile,
42+
client,
43+
http_version_category
44+
ORDER BY
45+
percentile,
46+
client,
47+
num_pages DESC,
48+
http_version_category
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
SELECT
2+
client,
3+
COUNT(0) AS total_pages,
4+
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]') AS dns_https,
5+
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]') / COUNT(0) AS pct_dns_https,
6+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_alpn,
7+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_alpn,
8+
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') AS dns_svcb,
9+
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_svcb,
10+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_svcb_alpn,
11+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_svcb_alpn,
12+
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') AS dns_https_or_svcb,
13+
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_https_or_svcb,
14+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_or_svcb_alpn,
15+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_or_svcb_alpn
16+
FROM
17+
`httparchive.all.pages`
18+
WHERE
19+
date = '2024-06-01' AND
20+
is_root_page
21+
GROUP BY
22+
client
23+
ORDER BY
24+
client
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
SELECT
2+
client,
3+
COUNT(0) AS total_pages,
4+
JSON_EXTRACT_SCALAR(r.summary, '$._cdn_provider') AS cdn,
5+
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]') AS dns_https,
6+
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]') / COUNT(0) AS pct_dns_https,
7+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_alpn,
8+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_alpn,
9+
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') AS dns_svcb,
10+
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_svcb,
11+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_svcb_alpn,
12+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_svcb_alpn,
13+
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') AS dns_https_or_svcb,
14+
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_https_or_svcb,
15+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_or_svcb_alpn,
16+
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_or_svcb_alpn
17+
FROM
18+
`httparchive.all.pages` p
19+
INNER JOIN
20+
`httparchive.all.requests` r
21+
USING (client, date, page, is_root_page)
22+
WHERE
23+
date = '2024-06-01' AND
24+
is_root_page AND
25+
is_main_document
26+
GROUP BY
27+
client,
28+
cdn
29+
ORDER BY
30+
client,
31+
cdn
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#standardSQL
2+
3+
# Distribution of number of early hints resources
4+
5+
CREATE TEMPORARY FUNCTION getNumEarlyHints(early_hints_header STRING)
6+
RETURNS STRUCT<num_hints INT, num_resources_hinted INT> LANGUAGE js AS '''
7+
try {
8+
9+
var num_hints = 0;
10+
var num_resources_hinted = 0;
11+
12+
theJSON = JSON.parse(early_hints_header);
13+
14+
for (var key of Object.keys(theJSON)) {
15+
if (theJSON[key].startsWith('link:')) {
16+
num_hints++;
17+
} else {
18+
continue;
19+
};
20+
num_resources_hinted = num_resources_hinted + theJSON[key].split(',').length;
21+
}
22+
23+
return {
24+
num_hints,
25+
num_resources_hinted
26+
};
27+
} catch {
28+
return {
29+
num_hints: 0,
30+
num_resources_hinted: 0
31+
};
32+
}
33+
''';
34+
35+
SELECT
36+
client,
37+
percentile,
38+
COUNT(DISTINCT page) AS num_pages,
39+
APPROX_QUANTILES(early_hints.num_hints, 1000)[OFFSET(percentile * 10)] AS num_hints,
40+
APPROX_QUANTILES(early_hints.num_resources_hinted, 1000)[OFFSET(percentile * 10)] AS num_resources_hinted
41+
FROM
42+
(
43+
SELECT
44+
client,
45+
page,
46+
getNumEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')) AS early_hints
47+
FROM
48+
`httparchive.all.requests`
49+
WHERE
50+
date = '2024-06-01' AND
51+
is_root_page AND
52+
is_main_document
53+
),
54+
UNNEST(GENERATE_ARRAY(1, 100)) AS percentile
55+
GROUP BY
56+
client,
57+
percentile
58+
ORDER BY
59+
client,
60+
percentile
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#standardSQL
2+
3+
# Distribution of number of early hints resources
4+
5+
SELECT
6+
client,
7+
COUNT(DISTINCT page) AS num_pages,
8+
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') IS NOT NULL) AS early_hints,
9+
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') IS NOT NULL) / COUNT(DISTINCT page) AS early_hints_pct,
10+
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') LIKE '%shopify%') AS early_hints_shopify,
11+
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') LIKE '%shopify%') / COUNT(DISTINCT page) AS early_hints_shopify_pct
12+
FROM
13+
`httparchive.all.requests`
14+
WHERE
15+
date = '2024-06-01' AND
16+
is_main_document AND
17+
is_root_page
18+
GROUP BY
19+
client
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING)
2+
RETURNS STRUCT<preconnects INT64, preloads INT64, asTypes ARRAY<STRUCT<key STRING, value INT64>>> LANGUAGE js AS '''
3+
try {
4+
var preconnects = 0;
5+
var preloads = 0;
6+
var as = {};
7+
8+
theJSON = JSON.parse(early_hints_header);
9+
for (var key of Object.keys(theJSON)) {
10+
if (!theJSON[key].startsWith('link:')) {
11+
continue;
12+
};
13+
var hints = theJSON[key].split(',');
14+
hints.forEach(hint => {
15+
16+
var attributes = hint.split(';');
17+
var fetchType='';
18+
var hintType='';
19+
attributes.forEach(attribute => {
20+
if (attribute.trim().startsWith('rel')) {
21+
hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", '');
22+
}
23+
if (attribute.trim().startsWith('as')) {
24+
fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", '');
25+
}
26+
});
27+
if (hintType === 'preconnect') {
28+
preconnects++;
29+
}
30+
if (hintType === 'preload') {
31+
preloads++;
32+
as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1;
33+
}
34+
});
35+
}
36+
var asArray = [];
37+
for (var key in as) {
38+
asArray.push({key: key, value: as[key]});
39+
}
40+
return {
41+
preconnects: preconnects,
42+
preloads: preloads,
43+
asTypes: asArray
44+
};
45+
} catch (e) {
46+
return {};
47+
}
48+
''';
49+
50+
SELECT
51+
client,
52+
is_root_page,
53+
percentile,
54+
asTypes.key AS asType,
55+
APPROX_QUANTILES(CAST(asTypes.value AS INT64), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS number,
56+
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls
57+
FROM
58+
`httparchive.all.requests`,
59+
UNNEST(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).asTypes) AS asTypes,
60+
UNNEST([10, 25, 50, 75, 90, 100]) AS percentile
61+
WHERE
62+
date = '2024-06-01' AND
63+
is_main_document AND
64+
JSON_QUERY(payload, '$._early_hint_headers') != '' AND
65+
asTypes.key IS NOT NULL
66+
GROUP BY
67+
client,
68+
is_root_page,
69+
percentile,
70+
asTypes.key
71+
ORDER BY
72+
client,
73+
is_root_page,
74+
percentile,
75+
asTypes.key
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING)
2+
RETURNS STRUCT<preconnects INT64, preloads INT64, asTypes ARRAY<STRUCT<key STRING, value INT64>>> LANGUAGE js AS '''
3+
try {
4+
var preconnects = 0;
5+
var preloads = 0;
6+
var as = {};
7+
8+
theJSON = JSON.parse(early_hints_header);
9+
for (var key of Object.keys(theJSON)) {
10+
if (!theJSON[key].startsWith('link:')) {
11+
continue;
12+
};
13+
var hints = theJSON[key].split(',');
14+
hints.forEach(hint => {
15+
16+
var attributes = hint.split(';');
17+
var fetchType='';
18+
var hintType='';
19+
attributes.forEach(attribute => {
20+
if (attribute.trim().startsWith('rel')) {
21+
hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", '');
22+
}
23+
if (attribute.trim().startsWith('as')) {
24+
fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", '');
25+
}
26+
});
27+
if (hintType === 'preconnect') {
28+
preconnects++;
29+
}
30+
if (hintType === 'preload') {
31+
preloads++;
32+
as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1;
33+
}
34+
});
35+
}
36+
var asArray = [];
37+
for (var key in as) {
38+
asArray.push({key: key, value: as[key]});
39+
}
40+
return {
41+
preconnects: preconnects,
42+
preloads: preloads,
43+
asTypes: asArray
44+
};
45+
} catch (e) {
46+
return {};
47+
}
48+
''';
49+
50+
WITH totals AS (
51+
SELECT
52+
date,
53+
is_root_page,
54+
client,
55+
COUNT(0) AS total
56+
FROM
57+
`httparchive.all.pages`
58+
WHERE
59+
date = '2024-06-01'
60+
GROUP BY
61+
date,
62+
client,
63+
is_root_page
64+
)
65+
66+
SELECT
67+
client,
68+
is_root_page,
69+
asTypes.key AS asType,
70+
COUNT(DISTINCT page) AS num_pages,
71+
COUNT(DISTINCT page) / total AS pct_pages,
72+
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls
73+
FROM
74+
`httparchive.all.requests`,
75+
UNNEST(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).asTypes) AS asTypes
76+
JOIN
77+
totals
78+
USING (date, client, is_root_page)
79+
WHERE
80+
date = '2024-06-01' AND
81+
is_main_document AND
82+
JSON_QUERY(payload, '$._early_hint_headers') != '' AND
83+
asTypes.key IS NOT NULL
84+
GROUP BY
85+
client,
86+
is_root_page,
87+
total,
88+
asTypes.key
89+
ORDER BY
90+
client,
91+
is_root_page,
92+
pct_pages DESC

0 commit comments

Comments
 (0)