Skip to content

Commit 077eba8

Browse files
vsdaantunetheweb
andauthored
Security sql 2025 update (#4338)
* Changes to 2025 security queries to fix small issues encountered during writing * Fix merge markers * Apply suggestions from code review * Update sql/2025/security/https_server_redirects.sql --------- Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent 7b9b3b3 commit 077eba8

8 files changed

+40
-37
lines changed

sql/2025/security/cookie_age_percentiles.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ LANGUAGE js AS '''
3737
WITH age_values AS (
3838
SELECT
3939
client,
40-
getCookieAgeValues(response_headers.value, INT64(summary.startedDateTime)) AS values
40+
getCookieAgeValues(response_headers.value, UNIX_SECONDS(TIMESTAMP(STRING(payload.startedDateTime)))) AS values
4141
FROM
4242
`httparchive.crawl.requests`,
4343
UNNEST(response_headers) AS response_headers

sql/2025/security/cookie_max_age_expires_top_values.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ WITH max_age_values AS (
3434
FROM
3535
`httparchive.crawl.requests`,
3636
UNNEST(response_headers) AS rh,
37-
UNNEST(JSON_QUERY_ARRAY(getCookieAgeValues(rh.value, INT64(summary.startedDateTime))), '$.maxAge') AS max_age_value
37+
UNNEST(JSON_QUERY_ARRAY(getCookieAgeValues(rh.value, INT64(summary.startedDateTime)), '$.maxAge')) AS max_age_value
3838
WHERE
3939
date = '2025-07-01' AND
4040
is_root_page AND
@@ -46,7 +46,7 @@ expires_values AS (
4646
client,
4747
expires_value
4848
FROM
49-
`httparchive.all.requests`,
49+
`httparchive.crawl.requests`,
5050
UNNEST(response_headers) AS rh,
5151
UNNEST(JSON_QUERY_ARRAY(getCookieAgeValues(rh.value, INT64(summary.startedDateTime)), '$.expires')) AS expires_value
5252
WHERE

sql/2025/security/csp_script_source_list_keywords_per_request.sql

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,19 @@ FROM (
2424
SELECT
2525
client,
2626
COUNT(0) AS total_pages_with_csp,
27-
COUNTIF(csp_header IS NOT NULL) AS freq_csp,
28-
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src')) AS freq_default_script_src,
29-
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src[^;]+strict-dynamic')) AS freq_strict_dynamic,
30-
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src[^;]+nonce-')) AS freq_nonce,
31-
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src[^;]+unsafe-inline')) AS freq_script_unsafe_inline,
32-
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src[^;]+unsafe-eval')) AS freq_script_unsafe_eval,
33-
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)unsafe-inline')) AS freq_unsafe_inline,
34-
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)unsafe-eval')) AS freq_unsafe_eval
27+
COUNTIF(csp_combined IS NOT NULL) AS freq_csp,
28+
COUNTIF(REGEXP_CONTAINS(csp_combined, '(?i)(default|script)-src')) AS freq_default_script_src,
29+
COUNTIF(REGEXP_CONTAINS(csp_combined, '(?i)(default|script)-src[^;]+strict-dynamic')) AS freq_strict_dynamic,
30+
COUNTIF(REGEXP_CONTAINS(csp_combined, '(?i)(default|script)-src[^;]+nonce-')) AS freq_nonce,
31+
COUNTIF(REGEXP_CONTAINS(csp_combined, '(?i)(default|script)-src[^;]+unsafe-inline')) AS freq_script_unsafe_inline,
32+
COUNTIF(REGEXP_CONTAINS(csp_combined, '(?i)(default|script)-src[^;]+unsafe-eval')) AS freq_script_unsafe_eval,
33+
COUNTIF(REGEXP_CONTAINS(csp_combined, '(?i)unsafe-inline')) AS freq_unsafe_inline,
34+
COUNTIF(REGEXP_CONTAINS(csp_combined, '(?i)unsafe-eval')) AS freq_unsafe_eval
3535
FROM (
3636
SELECT
3737
client,
38-
response_headers.value AS csp_header
38+
url,
39+
STRING_AGG(response_headers.value, '; ') AS csp_combined
3940
FROM
4041
`httparchive.crawl.requests`,
4142
UNNEST(response_headers) AS response_headers
@@ -44,6 +45,8 @@ FROM (
4445
is_root_page AND
4546
is_main_document AND
4647
LOWER(response_headers.name) = 'content-security-policy'
48+
GROUP BY
49+
client, url
4750
)
4851
GROUP BY
4952
client

sql/2025/security/hsts_attributes.sql

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,25 @@
33
# Question: How many websites use HSTS includeSubDomains and preload?
44
SELECT
55
client,
6-
COUNT(0) AS total_requests,
7-
COUNTIF(hsts_header_val IS NOT NULL) AS total_hsts_headers,
8-
COUNTIF(hsts_header_val IS NOT NULL) / COUNT(0) AS pct_hsts_requests,
9-
COUNTIF(REGEXP_CONTAINS(hsts_header_val, r'(?i)max-age\s*=\s*\d+') AND NOT REGEXP_CONTAINS(CONCAT(hsts_header_val, ' '), r'(?i)max-age\s*=\s*0\W')) / COUNTIF(hsts_header_val IS NOT NULL) AS pct_valid_max_age,
10-
COUNTIF(REGEXP_CONTAINS(CONCAT(hsts_header_val, ' '), r'(?i)max-age\s*=\s*0\W')) / COUNTIF(hsts_header_val IS NOT NULL) AS pct_zero_max_age,
11-
COUNTIF(REGEXP_CONTAINS(hsts_header_val, r'(?i)includeSubDomains')) / COUNTIF(hsts_header_val IS NOT NULL) AS pct_include_subdomains,
12-
COUNTIF(REGEXP_CONTAINS(hsts_header_val, r'(?i)preload')) / COUNTIF(hsts_header_val IS NOT NULL) AS pct_preload
6+
COUNT(0) AS total_requests_with_hsts_header,
7+
COUNTIF(hsts_header_val IS NOT NULL) AS total_non_null_hsts_headers,
8+
SAFE_DIVIDE(COUNTIF(hsts_header_val IS NOT NULL), COUNT(0)) AS pct_hsts_requests,
9+
SAFE_DIVIDE(COUNTIF(REGEXP_CONTAINS(hsts_header_val, r'(?i)max-age\s*=\s*\d+') AND NOT REGEXP_CONTAINS(CONCAT(hsts_header_val, ' '), r'(?i)max-age\s*=\s*0\W')), COUNTIF(hsts_header_val IS NOT NULL)) AS pct_valid_max_age,
10+
SAFE_DIVIDE(COUNTIF(REGEXP_CONTAINS(CONCAT(hsts_header_val, ' '), r'(?i)max-age\s*=\s*0\W')), COUNTIF(hsts_header_val IS NOT NULL)) AS pct_zero_max_age,
11+
SAFE_DIVIDE(COUNTIF(REGEXP_CONTAINS(hsts_header_val, r'(?i)includeSubDomains')), COUNTIF(hsts_header_val IS NOT NULL)) AS pct_include_subdomains,
12+
SAFE_DIVIDE(COUNTIF(REGEXP_CONTAINS(hsts_header_val, r'(?i)preload')), COUNTIF(hsts_header_val IS NOT NULL)) AS pct_preload
1313
FROM (
1414
SELECT
1515
client,
16-
REGEXP_EXTRACT(summary.respOtherHeaders, r'(?i)strict-transport-security =([^,]+)') AS hsts_header_val
16+
response_headers.value AS hsts_header_val
1717
FROM
18-
`httparchive.crawl.requests`
18+
`httparchive.crawl.requests`,
19+
UNNEST(response_headers) AS response_headers
1920
WHERE
2021
date = '2025-07-01' AND
2122
is_root_page AND
22-
is_main_document
23+
is_main_document AND
24+
LOWER(response_headers.name) = 'strict-transport-security'
2325
)
2426
GROUP BY
2527
client

sql/2025/security/https_server_redirects.sql

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ SELECT
88
COUNT(DISTINCT url) AS total_urls_on_page,
99
COUNT(DISTINCT(CASE WHEN url LIKE 'http://%' THEN url END)) AS count_http_urls_on_page,
1010
COUNT(DISTINCT(CASE WHEN url LIKE 'http://%' THEN url END)) / COUNT(DISTINCT url) AS pct_http_urls_on_page,
11-
COUNT(DISTINCT(CASE WHEN url LIKE 'http://%' AND STRING(summary.resp_location) LIKE 'https://%' AND INT64(summary.status) BETWEEN 300 AND 399 THEN url END)) AS count_http_urls_with_https_redirect_on_page,
12-
COUNT(DISTINCT(CASE WHEN url LIKE 'http://%' AND STRING(summary.resp_location) LIKE 'https://%' AND INT64(summary.status) BETWEEN 300 AND 399 THEN url END)) / COUNT(DISTINCT(CASE WHEN url LIKE 'http://%' THEN url END)) AS pct_http_urls_with_https_redirect_on_page
11+
COUNT(DISTINCT(CASE WHEN url LIKE 'http://%' AND (SELECT value FROM UNNEST(response_headers) WHERE LOWER(name) = 'location' LIMIT 1) LIKE 'https://%' AND INT64(summary.status) BETWEEN 300 AND 399 THEN url END)) AS count_http_urls_with_https_redirect_on_page, -- noqa: AM09
12+
COUNT(DISTINCT(CASE WHEN url LIKE 'http://%' AND (SELECT value FROM UNNEST(response_headers) WHERE LOWER(name) = 'location' LIMIT 1) LIKE 'https://%' AND INT64(summary.status) BETWEEN 300 AND 399 THEN url END)) / COUNT(DISTINCT(CASE WHEN url LIKE 'http://%' THEN url END)) AS pct_http_urls_with_https_redirect_on_page -- noqa: AM09
1313
FROM
14-
`httparchive.crawl.requests`
14+
`httparchive.crawl.requests`,
15+
UNNEST(response_headers) AS response_headers
1516
WHERE
1617
date = '2025-07-01' AND
1718
is_root_page

sql/2025/security/robot_txt_sensitive_disallow.sql

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,10 @@
44
CREATE TEMPORARY FUNCTION getAllDisallowedEndpoints(data JSON)
55
RETURNS ARRAY<STRING> DETERMINISTIC
66
LANGUAGE js AS '''
7-
let parsed_data;
8-
try {
9-
parsed_data = JSON.parse(data);
10-
} catch (e) {
7+
if (data == null || data["/robots.txt"] == undefined || !data["/robots.txt"]["found"]) {
118
return [];
129
}
13-
if (parsed_data == null || parsed_data["/robots.txt"] == undefined || !parsed_data["/robots.txt"]["found"]) {
14-
return [];
15-
}
16-
const parsed_endpoints = parsed_data["/robots.txt"]["data"]["matched_disallows"];
10+
const parsed_endpoints = data["/robots.txt"]["data"]["matched_disallows"];
1711
const endpoints_list = Object.keys(parsed_endpoints).map(key => parsed_endpoints[key]).flat();
1812
return Array.from(new Set(endpoints_list));
1913
''';

sql/2025/security/sri_coverage_per_page.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#standardSQL
22
# Section: Content Inclusion - Subresource Integriy
33
# Question: How many scripts on a page have the integrity attribute? (percentage)
4-
CREATE TEMP FUNCTION getNumScriptElements(sris ARRAY<STRING>) AS (
4+
CREATE TEMP FUNCTION getNumScriptElements(sris ARRAY<JSON>) AS (
55
(SELECT COUNT(0) FROM UNNEST(sris) AS sri WHERE JSON_EXTRACT_SCALAR(sri, '$.tagname') = 'script')
66
);
77

sql/2025/security/tls_ca_issuers_pages_over_time.sql

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Question: What is the distribution of CA issuers for all pages over time?
44
# Note: currently includes HTTP (i.e., pages with no issuer)
55
SELECT
6+
date,
67
client,
78
issuer,
89
SUM(COUNT(0)) OVER (PARTITION BY client, date) AS total_https_pages,
@@ -23,10 +24,12 @@ FROM (
2324
GROUP BY
2425
client,
2526
request_host,
26-
issuer
27+
issuer,
28+
date
2729
)
2830
GROUP BY
2931
client,
30-
issuer
32+
issuer,
33+
date
3134
ORDER BY
32-
pct DESC
35+
date DESC

0 commit comments

Comments
 (0)