Skip to content

Commit efc9228

Browse files
committed
Merge branch 'main' of github.com:HTTPArchive/almanac.httparchive.org into production
2 parents 2f81bcb + 50cbaf0 commit efc9228

45 files changed

Lines changed: 787 additions & 181 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/linters/.ecrc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
"AllowedContentTypes": [],
99
"PassedFiles": [],
1010
"Disable": {
11-
// set these options to true to disable specific checks
1211
"EndOfLine": false,
1312
"Indentation": false,
1413
"IndentSize": false,

.github/workflows/code-static-analysis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
uses: actions/checkout@v3
3636
- name: Set up Python 3.8
3737
if: ${{ matrix.language == 'python' }}
38-
uses: actions/setup-python@v4.0.0
38+
uses: actions/setup-python@v4.1.0
3939
with:
4040
python-version: '3.8'
4141
- name: Install dependencies

.github/workflows/lintsql.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
# Full git history is needed to get a proper list of changed files within `super-linter`
2020
fetch-depth: 0
2121
- name: Set up Python 3.8
22-
uses: actions/setup-python@v4.0.0
22+
uses: actions/setup-python@v4.1.0
2323
with:
2424
python-version: '3.8'
2525
- name: Lint SQL code

.github/workflows/predeploy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
with:
3737
node-version: '16'
3838
- name: Set up Python 3.8
39-
uses: actions/setup-python@v4.0.0
39+
uses: actions/setup-python@v4.1.0
4040
with:
4141
python-version: '3.8'
4242
- name: Install Asian Fonts

.github/workflows/test_website.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
with:
3131
node-version: '16'
3232
- name: Set up Python 3.8
33-
uses: actions/setup-python@v4.0.0
33+
uses: actions/setup-python@v4.1.0
3434
with:
3535
python-version: '3.8'
3636
- name: Run the website

sql/2021/privacy/most_common_purposes_for_iab_tcf_v2.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ CREATE TEMPORARY FUNCTION ExtractKeyValuePairs(input STRING) RETURNS ARRAY < STR
77
value STRING > > AS (
88
(
99
SELECT
10-
array(
10+
ARRAY(
1111
SELECT AS STRUCT
12-
trim(split(kv, ':') [SAFE_OFFSET(0)]) AS key,
13-
trim(split(kv, ':') [SAFE_OFFSET(1)]) AS value
12+
TRIM(SPLIT(kv, ':') [SAFE_OFFSET(0)]) AS key,
13+
TRIM(SPLIT(kv, ':') [SAFE_OFFSET(1)]) AS value
1414
FROM
1515
t.kv
1616
)

sql/util/functions.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Rework CSS parsing
2+
CREATE OR REPLACE FUNCTION `httparchive.almanac.PARSE_CSS`(stylesheet STRING) RETURNS STRING LANGUAGE js
3+
OPTIONS (library = ["gs://httparchive/lib/parse-css.js"]) AS R"""
4+
try {
5+
var css = parse(stylesheet)
6+
return JSON.stringify(css);
7+
} catch (e) {
8+
return null;
9+
}
10+
""";

sql/util/green_web_foundation.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
SELECT -- noqa: L044
2+
DATE('2022-06-01') AS date,
3+
NET.HOST(LOWER(url)) AS host,
4+
NET.REG_DOMAIN(LOWER(url)) AS domain,
5+
*
6+
FROM
7+
# This is the raw database dump from GWF.
8+
`httparchive.almanac.green_web_foundation_raw`

sql/util/parsed_css.sql

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,10 @@
1-
CREATE TEMP FUNCTION parseCSS(stylesheet STRING)
2-
RETURNS STRING
3-
LANGUAGE js
4-
OPTIONS (library = "gs://httparchive/lib/parse-css.js")
5-
AS '''
6-
try {
7-
var css = parse(stylesheet)
8-
return JSON.stringify(css);
9-
} catch (e) {
10-
'';
11-
}
12-
''';
13-
141
SELECT
15-
date,
16-
client,
2+
DATE('2022-07-01') AS date,
3+
_TABLE_SUFFIX AS client,
174
page,
185
url,
19-
parseCSS(body) AS css
6+
css
207
FROM
21-
`httparchive.almanac.summary_response_bodies`
8+
`httparchive.experimental_parsed_css.2022_07_01_*` -- noqa: L062
229
WHERE
23-
date = '2020-08-01' AND
24-
type = 'css' AND
25-
LENGTH(body) < 3 * 1024 * 1024 # 3 MB
10+
is_root_page

sql/util/parsed_css_inline.sql

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)