Skip to content

Commit 7e3f106

Browse files
Third-party 2024 queries (#3722)
* A11Y technology usage queries * lighthouse and distribution related queries * Percentage-based analysis queries * third-party blocking queries * CSP frequency * Added mainframe vs iframe analysis * Minor comments * Update distribution_of_third_parties_by_frame.sql removing trailing space * Update top20_third_parties_by_client_and_frame_location.sql Trailing slashes * Update csp_allowed_host_frequency.sql fixing linting error * Update top20_third_parties_by_client_and_frame_location.sql Fixing issues with linter. * Update top20_third_parties_by_client_and_frame_location.sql trailing white spaces. * Update distribution_of_third_parties_by_frame.sql Linting * Update top20_third_parties_by_client_and_frame_location.sql Linting * lint * lint * lint * Ported lighthouse_average_unminified_css_by_3p.sql * Bug fixes * lint * Added third-party requests by rank sql * lint * Added third-party requests per page by rank for this year's chapter --------- Co-authored-by: Mike Gifford <mike.gifford@civicactions.com>
1 parent ad562c6 commit 7e3f106

58 files changed

Lines changed: 3936 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#standardSQL
2+
# Overall A11Y technology usage by domain rank
3+
4+
WITH a11y_technologies AS (
5+
SELECT
6+
_TABLE_SUFFIX AS client,
7+
url
8+
FROM
9+
`httparchive.technologies.2024_06_01_*`
10+
WHERE
11+
category = 'Accessibility'
12+
),
13+
14+
pages AS (
15+
SELECT
16+
_TABLE_SUFFIX AS client,
17+
url,
18+
rank_grouping
19+
FROM
20+
`httparchive.summary_pages.2024_06_01_*`,
21+
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
22+
WHERE
23+
rank <= rank_grouping
24+
),
25+
26+
rank_totals AS (
27+
SELECT
28+
_TABLE_SUFFIX AS client,
29+
rank_grouping,
30+
COUNT(0) AS total
31+
FROM
32+
`httparchive.summary_pages.2024_06_01_*`,
33+
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
34+
WHERE
35+
rank <= rank_grouping
36+
GROUP BY
37+
client,
38+
rank_grouping
39+
)
40+
41+
SELECT
42+
client,
43+
rank_grouping AS rank,
44+
COUNT(DISTINCT url) AS freq,
45+
total,
46+
(COUNT(DISTINCT url) / total) * 100 AS pct
47+
FROM
48+
a11y_technologies
49+
LEFT OUTER JOIN
50+
pages
51+
USING (client, url)
52+
JOIN
53+
rank_totals
54+
USING (client, rank_grouping)
55+
GROUP BY
56+
rank_grouping,
57+
total,
58+
client
59+
ORDER BY
60+
client,
61+
rank
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#standardSQL
2+
# A11Y technology usage
3+
4+
WITH a11y_technologies AS (
5+
SELECT
6+
_TABLE_SUFFIX AS client,
7+
COUNT(DISTINCT url) AS freq
8+
FROM
9+
`httparchive.technologies.2024_06_01_*`
10+
WHERE
11+
category = 'Accessibility'
12+
GROUP BY
13+
client
14+
),
15+
16+
pages AS (
17+
SELECT
18+
_TABLE_SUFFIX AS client,
19+
COUNT(0) AS total
20+
FROM
21+
`httparchive.summary_pages.2024_06_01_*`
22+
GROUP BY
23+
client
24+
)
25+
26+
SELECT
27+
client,
28+
freq,
29+
total,
30+
(freq / total) * 100 AS pct
31+
FROM
32+
a11y_technologies
33+
JOIN
34+
pages
35+
USING (client)
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#standardSQL
2+
# A11Y technology usage by domain rank
3+
4+
WITH a11y_technologies AS (
5+
SELECT
6+
_TABLE_SUFFIX AS client,
7+
app,
8+
url
9+
FROM
10+
`httparchive.technologies.2024_06_01_*`
11+
WHERE
12+
category = 'Accessibility'
13+
),
14+
15+
pages AS (
16+
SELECT
17+
_TABLE_SUFFIX AS client,
18+
url,
19+
rank_grouping
20+
FROM
21+
`httparchive.summary_pages.2024_06_01_*`,
22+
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
23+
WHERE
24+
rank <= rank_grouping
25+
),
26+
27+
rank_totals AS (
28+
SELECT
29+
_TABLE_SUFFIX AS client,
30+
rank_grouping,
31+
COUNT(0) AS total
32+
FROM
33+
`httparchive.summary_pages.2024_06_01_*`,
34+
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
35+
WHERE
36+
rank <= rank_grouping
37+
GROUP BY
38+
client,
39+
rank_grouping
40+
)
41+
42+
SELECT
43+
client,
44+
rank_grouping AS rank,
45+
app,
46+
COUNT(0) AS freq,
47+
total,
48+
(COUNT(0) / total) * 100 AS pct
49+
FROM
50+
a11y_technologies
51+
LEFT OUTER JOIN
52+
pages
53+
USING (client, url)
54+
JOIN
55+
rank_totals
56+
USING (client, rank_grouping)
57+
GROUP BY
58+
rank_grouping,
59+
total,
60+
client,
61+
app
62+
ORDER BY
63+
client,
64+
rank,
65+
pct DESC
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#standardSQL
2+
# Compressed images (excluding SVG) by third parties
3+
4+
WITH requests AS (
5+
SELECT
6+
_TABLE_SUFFIX AS client,
7+
pageid AS page,
8+
url,
9+
resp_content_encoding AS content_encoding,
10+
type,
11+
respBodySize AS size
12+
FROM
13+
`httparchive.summary_requests.2024_06_01_*`
14+
WHERE
15+
type = 'image' AND (
16+
resp_content_encoding = 'gzip' OR
17+
resp_content_encoding = 'br'
18+
) AND NOT (
19+
resp_content_type LIKE 'image/svg%' OR
20+
ENDS_WITH(url, '.svg')
21+
)
22+
),
23+
24+
third_party AS (
25+
SELECT
26+
NET.HOST(domain) AS domain,
27+
COUNT(DISTINCT page) AS page_usage
28+
FROM
29+
`httparchive.almanac.third_parties` tp
30+
JOIN
31+
requests r
32+
ON NET.HOST(r.url) = NET.HOST(tp.domain)
33+
WHERE
34+
date = '2024-06-01' AND
35+
category != 'hosting'
36+
GROUP BY
37+
domain
38+
HAVING
39+
page_usage >= 50
40+
)
41+
42+
SELECT
43+
client,
44+
content_encoding,
45+
domain,
46+
size,
47+
SUM(size) OVER (PARTITION BY client) AS total_size,
48+
size / SUM(size) OVER (PARTITION BY client) AS pct_size,
49+
num_requests,
50+
total_requests,
51+
pct_requests
52+
FROM (
53+
SELECT
54+
client,
55+
content_encoding,
56+
domain,
57+
COUNT(0) AS num_requests,
58+
SUM(size) AS size,
59+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total_requests,
60+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_requests,
61+
RANK() OVER (PARTITION BY client, type, content_encoding ORDER BY COUNT(0) DESC) AS domain_rank
62+
FROM
63+
requests
64+
LEFT JOIN
65+
third_party
66+
ON
67+
NET.HOST(requests.url) = NET.HOST(third_party.domain)
68+
WHERE
69+
domain IS NOT NULL
70+
GROUP BY
71+
client,
72+
type,
73+
content_encoding,
74+
domain
75+
)
76+
WHERE
77+
domain_rank <= 100
78+
ORDER BY
79+
client,
80+
content_encoding,
81+
size DESC
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#standardSQL
2+
#content-encoding by third parties
3+
4+
WITH requests AS (
5+
SELECT
6+
_TABLE_SUFFIX AS client,
7+
pageid AS page,
8+
url,
9+
resp_content_encoding AS content_encoding
10+
FROM
11+
`httparchive.summary_requests.2024_06_01_*`
12+
),
13+
14+
third_party AS (
15+
SELECT
16+
NET.HOST(domain) AS domain,
17+
COUNT(DISTINCT page) AS page_usage
18+
FROM
19+
`httparchive.almanac.third_parties` tp
20+
JOIN
21+
requests r
22+
ON NET.HOST(r.url) = NET.HOST(tp.domain)
23+
WHERE
24+
date = '2024-06-01' AND
25+
category != 'hosting'
26+
GROUP BY
27+
domain
28+
HAVING
29+
page_usage >= 50
30+
)
31+
32+
SELECT
33+
client,
34+
content_encoding,
35+
COUNT(0) AS num_requests,
36+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
37+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
38+
FROM
39+
requests
40+
LEFT JOIN
41+
third_party
42+
ON
43+
NET.HOST(requests.url) = NET.HOST(third_party.domain)
44+
WHERE
45+
domain IS NOT NULL
46+
GROUP BY
47+
client,
48+
content_encoding
49+
ORDER BY
50+
client,
51+
num_requests DESC
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#standardSQL
2+
#content-encoding by third parties by content-type
3+
4+
WITH requests AS (
5+
SELECT
6+
_TABLE_SUFFIX AS client,
7+
pageid AS page,
8+
url,
9+
resp_content_encoding AS content_encoding,
10+
type
11+
FROM
12+
`httparchive.summary_requests.2024_06_01_*`
13+
),
14+
15+
third_party AS (
16+
SELECT
17+
NET.HOST(domain) AS domain,
18+
COUNT(DISTINCT page) AS page_usage
19+
FROM
20+
`httparchive.almanac.third_parties` tp
21+
JOIN
22+
requests r
23+
ON NET.HOST(r.url) = NET.HOST(tp.domain)
24+
WHERE
25+
date = '2024-06-01' AND
26+
category != 'hosting'
27+
GROUP BY
28+
domain
29+
HAVING
30+
page_usage >= 50
31+
)
32+
33+
SELECT
34+
client,
35+
type,
36+
content_encoding,
37+
COUNT(0) AS num_requests,
38+
SUM(COUNT(0)) OVER (PARTITION BY client, type) AS total,
39+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client, type) AS pct
40+
FROM
41+
requests
42+
LEFT JOIN
43+
third_party
44+
ON
45+
NET.HOST(requests.url) = NET.HOST(third_party.domain)
46+
WHERE
47+
domain IS NOT NULL
48+
GROUP BY
49+
client,
50+
type,
51+
content_encoding
52+
ORDER BY
53+
client,
54+
type,
55+
num_requests DESC

0 commit comments

Comments
 (0)