Skip to content

Commit b1cc209

Browse files
committed
Merge branch 'main' of github.com:HTTPArchive/almanac.httparchive.org into production
2 parents ce871e3 + 2cb39ef commit b1cc209

1,015 files changed

Lines changed: 46152 additions & 1590 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/add-to-release-notes.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,6 @@ jobs:
1616
if: github.repository == 'HTTPArchive/almanac.httparchive.org'
1717
steps:
1818
- name: Update release notes
19-
uses: release-drafter/release-drafter@v5.20.0
19+
uses: release-drafter/release-drafter@v5.21.0
2020
env:
2121
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/check-translations-lengths.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ on:
2121
workflow_dispatch:
2222
pull_request:
2323
paths:
24-
- '**.html'
25-
- '**.md'
24+
- 'src/content/*/*/*.md'
25+
- 'src/templates/*/*/*.html'
26+
- 'src/templates/*/*.html'
2627

2728
jobs:
2829
check_lengths:

sql/.sqlfluff

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,13 @@ force_enable = False
215215
blocked_words = None
216216
# Regex of blocked SQL that should not be used.
217217
# Can be overridden with `-- noqa: L062` for those chapters using secondary pages
218+
# TABLESAMPLE - sometimes used for testing. Shouldn't be used in production as not random.
219+
# sample_data - sometimes used for testing. Shouldn't be used in production.
218220
# Block 2022_05_12 (contains secondary pages)
219221
# Block 2022_06_09 (contains secondary pages)
220222
# Block 2022_07_01 (probably forgot to update month to June for 2022)
221223
# Block 2021_06_01 (probably forgot to update month to July for 2021)
222-
blocked_regex = (2022_?05_?12|2022_?06_?09|2022_?07_?01|2021_?06_?01)
224+
blocked_regex = (TABLESAMPLE|sample_data|2022_?05_?12|2022_?06_?09|2022_?07_?01|2021_?06_?01)
223225

224226
[sqlfluff:rules:L063]
225227
# Data Types

sql/2020/css/css_in_js_frameworks_only.sql

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,25 @@ RETURNS ARRAY<STRING> LANGUAGE js AS '''
1313
''';
1414

1515
SELECT
16+
client,
1617
cssInJs,
1718
COUNT(0) AS freq,
1819
SUM(COUNT(0)) OVER () AS total,
1920
COUNT(0) / SUM(COUNT(0)) OVER () AS pct
2021
FROM (
2122
SELECT
23+
_TABLE_SUFFIX AS client,
2224
url,
2325
cssInJs
24-
FROM `httparchive.sample_data.pages_mobile_10k`
25-
CROSS JOIN UNNEST(getCssInJS(payload)) AS cssInJs
26+
FROM
27+
`httparchive.pages.2020_08_01_*`
28+
CROSS JOIN
29+
UNNEST(getCssInJS(payload)) AS cssInJs
2630
)
27-
WHERE cssInJs != 'NONE'
28-
GROUP BY cssInJs
29-
ORDER BY freq
31+
WHERE
32+
cssInJs != 'NONE'
33+
GROUP BY
34+
client,
35+
cssInJs
36+
ORDER BY
37+
freq

sql/2021/cdn/distribution_of_http_versions.sql

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ FROM
3939
CAST(JSON_EXTRACT(payload, '$._socket') AS INT64) AS socket
4040
FROM
4141
`httparchive.almanac.requests`
42-
--`httparchive.sample_data.requests`
4342
WHERE
4443
# WPT changes the response fields based on a redirect (url becomes the Location path instead of the original) causing insonsistencies in the counts, so we ignore them
4544
resp_location = '' OR resp_location IS NULL AND

sql/2021/privacy/top100_cookies_set_from_header.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ request_headers AS (
3838
COUNT(DISTINCT page) OVER (PARTITION BY client) AS websites_per_client
3939
FROM
4040
`httparchive.almanac.requests`
41+
WHERE
42+
date = '2021-07-01'
4143
GROUP BY
4244
client,
4345
page,

sql/2022/accessibility/README.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
# 2022 Accessibility queries
22

3-
<!--
4-
This directory contains all of the 2022 Accessibility chapter queries.
3+
The majority of 2022 accessibility queries here copies from the [2021 accessibility queries](https://github.com/HTTPArchive/almanac.httparchive.org/tree/main/sql/2021/accessibility), with a few exceptions copied from other chapters:
54

6-
Each query should have a corresponding `metric_name.sql` file.
7-
Note that readers are linked to this directory, so try to make the SQL file names descriptive for easy browsing.
5+
- From CSS 2022: `units_properties.sql`
6+
- From CSS 2022: `media_query_features.sql`
7+
- From CSS 2021: `focus_visible.sql` (with minor perf-related change)
8+
- From CSS 2021: `focus_outline_0.sql` (note this doesn’t include the 2021 variant `focus_outline_0_or_none.sql`, which didn’t make it to the published report)
9+
- From Mobile Web 2022: `viewport_zoom_scale.sql` (already copied in 2021 queries)
10+
- From Mobile Web 2022: `viewport_zoom_scale_by_domain_rank.sql`
811

9-
Analysts: if helpful, you can use this README to give additional info about the queries.
10-
-->
12+
Note out of the 44 2021 accessibility queries, we only copied the two thirds that were used in the published report.
1113

1214
## Resources
1315

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#standardSQL
2+
# Overall A11Y technology usage by domain rank
3+
SELECT
4+
client,
5+
rank_grouping,
6+
total_in_rank,
7+
8+
COUNT(DISTINCT url) AS sites_with_a11y_tech,
9+
COUNT(DISTINCT url) / total_in_rank AS pct_sites_with_a11y_tech
10+
FROM (
11+
SELECT
12+
_TABLE_SUFFIX AS client,
13+
url
14+
FROM
15+
`httparchive.technologies.2022_06_01_*`
16+
WHERE
17+
category = 'Accessibility'
18+
)
19+
LEFT OUTER JOIN (
20+
SELECT
21+
_TABLE_SUFFIX AS client,
22+
url,
23+
rank_grouping
24+
FROM
25+
`httparchive.summary_pages.2022_06_01_*`,
26+
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
27+
WHERE
28+
rank <= rank_grouping
29+
) USING (client, url)
30+
JOIN (
31+
SELECT
32+
_TABLE_SUFFIX AS client,
33+
rank_grouping,
34+
COUNT(0) AS total_in_rank
35+
FROM
36+
`httparchive.summary_pages.2022_06_01_*`,
37+
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
38+
WHERE
39+
rank <= rank_grouping
40+
GROUP BY
41+
client,
42+
rank_grouping
43+
) USING (client, rank_grouping)
44+
GROUP BY
45+
rank_grouping,
46+
total_in_rank,
47+
client
48+
ORDER BY
49+
client,
50+
rank_grouping
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#standardSQL
2+
# A11Y technology usage
3+
SELECT
4+
client,
5+
total_sites,
6+
sites_with_a11y_tech,
7+
sites_with_a11y_tech / total_sites AS perc_sites_with_a11y_tech
8+
FROM (
9+
SELECT
10+
_TABLE_SUFFIX AS client,
11+
COUNT(DISTINCT url) AS sites_with_a11y_tech
12+
FROM
13+
`httparchive.technologies.2022_06_01_*`
14+
WHERE
15+
category = 'Accessibility'
16+
GROUP BY
17+
client
18+
)
19+
JOIN (
20+
SELECT
21+
_TABLE_SUFFIX AS client,
22+
COUNT(0) AS total_sites
23+
FROM
24+
`httparchive.summary_pages.2022_06_01_*`
25+
GROUP BY
26+
client
27+
)
28+
USING (client)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#standardSQL
2+
# A11Y technology usage by domain rank
3+
SELECT
4+
client,
5+
rank_grouping,
6+
total_in_rank,
7+
8+
app,
9+
COUNT(0) AS sites_with_app,
10+
COUNT(0) / total_in_rank AS pct_sites_with_app
11+
FROM (
12+
SELECT
13+
_TABLE_SUFFIX AS client,
14+
app,
15+
url
16+
FROM
17+
`httparchive.technologies.2022_06_01_*`
18+
WHERE
19+
category = 'Accessibility'
20+
)
21+
LEFT OUTER JOIN (
22+
SELECT
23+
_TABLE_SUFFIX AS client,
24+
url,
25+
rank_grouping
26+
FROM
27+
`httparchive.summary_pages.2022_06_01_*`,
28+
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
29+
WHERE
30+
rank <= rank_grouping
31+
) USING (client, url)
32+
JOIN (
33+
SELECT
34+
_TABLE_SUFFIX AS client,
35+
rank_grouping,
36+
COUNT(0) AS total_in_rank
37+
FROM
38+
`httparchive.summary_pages.2022_06_01_*`,
39+
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
40+
WHERE
41+
rank <= rank_grouping
42+
GROUP BY
43+
client,
44+
rank_grouping
45+
) USING (client, rank_grouping)
46+
GROUP BY
47+
rank_grouping,
48+
total_in_rank,
49+
client,
50+
app
51+
ORDER BY
52+
app,
53+
rank_grouping,
54+
client

0 commit comments

Comments
 (0)