diff --git a/sql/2025/css/README.md b/sql/2025/css/README.md index 72688cd864a..dfac99d1833 100644 --- a/sql/2025/css/README.md +++ b/sql/2025/css/README.md @@ -18,3 +18,23 @@ [~google-doc]: https://docs.google.com/document/d/1FtntjUvqNT_66XtKQamZDPy0gI_kLZhkBaK7JJwE2ww [~google-sheets]: https://docs.google.com/spreadsheets/d/1jGINqaVnYrlu7ob4jvxtAafyBH8PCV0BX-UbTCgDsiM/edit [~chapter-markdown]: https://github.com/HTTPArchive/almanac.httparchive.org/tree/main/src/content/en/2025/css.md + +## Queries + +Notes: +* The metrics descriptions that mention "per year", will be reported for all the years from 2019 to 2025, unless explicitly mentioned otherwise +* Percentile metrics will be reported for the 10, 25, 50, 75, and 90th percentiles, unless explicitly mentioned otherwise + +### CSS composition + +- Stylesheets + - [stylesheet-metrics.sql][stylesheet-metrics.sql] + - [stylesheet-percentile-metrics.sql][stylesheet-percentile-metrics.sql] +- Lines of code + - [ ] percentiles of lines of code per stylesheet, per year + - [ ] percentiles of lines of code per page, per year + - [ ] percentiles for atrules, rules, selectors and declarations per page, per year +- Embedded content + - [ ] percentiles of embedded content size, per year +- Comments + - [ ] percentiles of comments per page, per year diff --git a/sql/2025/css/stylesheet-comments-metrics.sql b/sql/2025/css/stylesheet-comments-metrics.sql new file mode 100644 index 00000000000..3a8f9208f7b --- /dev/null +++ b/sql/2025/css/stylesheet-comments-metrics.sql @@ -0,0 +1,55 @@ +#standardSQL +# - average of stylesheet comments per page, per year +# - maximum of stylesheet comments per page, per year +CREATE TEMPORARY FUNCTION getComments(css STRING) +RETURNS INT64 +LANGUAGE js +AS ''' + +try { + if (css === null) + return 0; + + const comments = css.match(/\\/\\*.*?\\*\\//g); + if (comments === null) + return 0; + else + return comments.length; +} catch (e) { + return null; +} +'''; + +WITH +basedata AS ( + SELECT + EXTRACT(YEAR FROM `date`) AS report_year, + page, + getComments(response_body) AS comments + FROM + `httparchive.crawl.requests` TABLESAMPLE SYSTEM (0.01 PERCENT) + WHERE + `date` IN ('2025-07-01', '2024-07-01', '2023-07-01', '2022-07-01', '2021-07-01', '2020-07-01', '2019-07-01') AND + type = 'css' +), + +per_page AS ( + SELECT + report_year, + page, + SUM(comments) AS comments + FROM basedata + GROUP BY + report_year, page +) + +SELECT + report_year, + AVG(comments) AS avg_comments, + MAX(comments) AS max_comments +FROM + per_page +GROUP BY + report_year +ORDER BY + report_year; diff --git a/sql/2025/css/stylesheet-lines-of-code-metrics.sql b/sql/2025/css/stylesheet-lines-of-code-metrics.sql new file mode 100644 index 00000000000..408e49593d6 --- /dev/null +++ b/sql/2025/css/stylesheet-lines-of-code-metrics.sql @@ -0,0 +1,102 @@ +#standardSQL +# - average of at-rules per page, per year +# - average of selectors per page, per year +# - average of declarations per page, per year +# - average of lines of code per page, per year +# - maximum of at-rules per page, per year +# - maximum of selectors per page, per year +# - maximum of declarations per page, per year +# - maximum of lines of code per page, per year +CREATE TEMPORARY FUNCTION getSourceLocStats(parsedCss JSON) +RETURNS STRUCT +LANGUAGE js +AS ''' +const AT_RULE_TYPES = [ + 'charset', + 'custom-media', + 'document', + 'font-face', + 'host', + 'import', + 'keyframes', + 'media', + 'namespace', + 'page', + 'supports', +] + +function processNode(node) { + let atRules = 0; + let selectors = 0; + let declarations = 0; + if (AT_RULE_TYPES.includes(node.type)) + atRules += 1; + if (node.type === 'rule' && node.selectors !== undefined) { + selectors += node.selectors.length; + } + if ((node.type === 'rule' || node.type === 'keyframe') && node.declarations !== undefined) { + declarations += node.declarations.length; + } + if (node.type === 'stylesheet' || node.type === 'media' || node.type === 'keyframes') { + const rules = (node.type === 'stylesheet') ? node.stylesheet.rules : (node.type === 'keyframes') ? node.keyframes : node.rules; + for (const rule of rules) { + const r = processNode(rule); + atRules += r.atRules; + selectors += r.selectors; + declarations += r.declarations; + } + } + return { + atRules: atRules, + selectors: selectors, + declarations: declarations + }; +} + +try { + return processNode(parsedCss); +} catch (e) { + throw e; + return {atRules: 1, selectors: 2, declarations: 3}; +} +'''; + +WITH +basedata AS ( + SELECT + EXTRACT(YEAR FROM `date`) AS report_year, + page, + getSourceLocStats(css) AS source_loc_stats + FROM + `httparchive.crawl.parsed_css` --TABLESAMPLE SYSTEM (0.01 PERCENT) + WHERE `date` IN ('2025-07-01', '2024-07-01', '2023-07-01', '2022-07-01', '2021-07-01', '2020-07-01', '2019-07-01') +), + +per_page AS ( + SELECT + report_year, + page, + SUM(source_loc_stats.AtRules) AS at_rules, + SUM(source_loc_stats.selectors) AS selectors, + SUM(source_loc_stats.declarations) AS declarations + FROM basedata + GROUP BY + report_year, page +) + +SELECT + report_year, + AVG(at_rules) AS avg_at_rules, + AVG(selectors) AS avg_selectors, + AVG(declarations) AS avg_declarations, + AVG(at_rules + selectors + declarations) AS avg_lines_of_code, + MAX(at_rules) AS max_at_rules, + MAX(selectors) AS max_selectors, + MAX(declarations) AS max_declarations, + MAX(at_rules + selectors + declarations) AS max_lines_of_code +FROM + per_page +GROUP BY + report_year +ORDER BY + report_year; diff --git a/sql/2025/css/stylesheet-metrics.sql b/sql/2025/css/stylesheet-metrics.sql new file mode 100644 index 00000000000..e20972718e3 --- /dev/null +++ b/sql/2025/css/stylesheet-metrics.sql @@ -0,0 +1,32 @@ +#standardSQL +# - percentage of mobile pages, per year +# - average number of stylesheets per page, per year +# - largest amount of stylesheets loaded, per year +# - average CSS size per page, per year +# - largest CSS size per page, per year +WITH +basedata AS ( + SELECT + EXTRACT(YEAR FROM `date`) AS report_year, + client, + LAX_INT64(IFNULL(summary.bytesCss, summary.bytesCSS)) AS bytes_css, -- In 2022 property name was changed by bytesCSS to bytesCss + LAX_INT64(IFNULL(summary.reqCss, summary.reqCSS)) AS remote_css, + IFNULL(LAX_INT64(custom_metrics.element_count.style), 0) AS inline_css + FROM + `httparchive.crawl.pages` --TABLESAMPLE SYSTEM (0.01 PERCENT) + WHERE `date` IN ('2025-07-01', '2024-07-01', '2023-07-01', '2022-07-01', '2021-07-01', '2020-07-01', '2019-07-01') +) + +SELECT + report_year, + COUNTIF(client = 'mobile') / COUNT(0) AS pct_mobile, + AVG(remote_css + inline_css) AS avg_stylesheets_per_page, + MAX(remote_css + inline_css) AS max_stylesheets_per_page, + AVG(bytes_css) AS avg_css_bytes_per_page, + MAX(bytes_css) AS max_css_bytes_per_page +FROM + basedata +GROUP BY + report_year +ORDER BY + report_year; diff --git a/sql/2025/css/stylesheet-percentile-metrics.sql b/sql/2025/css/stylesheet-percentile-metrics.sql new file mode 100644 index 00000000000..38497f71160 --- /dev/null +++ b/sql/2025/css/stylesheet-percentile-metrics.sql @@ -0,0 +1,50 @@ +#standardSQL +# - percentiles of the number of stylesheets per page, per year +# - percentiles of CSS file size, per year +WITH +basedata AS ( + SELECT + EXTRACT(YEAR FROM `date`) AS report_year, + client, + LAX_INT64(IFNULL(summary.bytesCss, summary.bytesCSS)) AS bytes_css, -- In 2022 property name was changed by bytesCSS to bytesCss + LAX_INT64(IFNULL(summary.reqCss, summary.reqCSS)) AS remote_css, + IFNULL(LAX_INT64(custom_metrics.element_count.style), 0) AS inline_css + FROM + `httparchive.crawl.pages` --TABLESAMPLE SYSTEM (0.01 PERCENT) + WHERE `date` IN ('2025-07-01', '2024-07-01', '2023-07-01', '2022-07-01', '2021-07-01', '2020-07-01', '2019-07-01') +), + +percentiles AS ( + SELECT + report_year, + percentile, + APPROX_QUANTILES(remote_css + inline_css, 1000)[OFFSET(percentile * 10)] AS num_stylesheets_per_page, + APPROX_QUANTILES(bytes_css, 1000)[OFFSET(percentile * 10)] AS css_bytes_per_page + FROM + basedata + CROSS JOIN + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile + GROUP BY + report_year, percentile +) + +SELECT + report_year, + SUM(CASE WHEN percentile = 10 THEN num_stylesheets_per_page ELSE 0 END) AS num_stylesheets_per_page_p10, + SUM(CASE WHEN percentile = 25 THEN num_stylesheets_per_page ELSE 0 END) AS num_stylesheets_per_page_p25, + SUM(CASE WHEN percentile = 50 THEN num_stylesheets_per_page ELSE 0 END) AS num_stylesheets_per_page_p50, + SUM(CASE WHEN percentile = 75 THEN num_stylesheets_per_page ELSE 0 END) AS num_stylesheets_per_page_p75, + SUM(CASE WHEN percentile = 90 THEN num_stylesheets_per_page ELSE 0 END) AS num_stylesheets_per_page_p90, + SUM(CASE WHEN percentile = 100 THEN num_stylesheets_per_page ELSE 0 END) AS num_stylesheets_per_page_p100, + SUM(CASE WHEN percentile = 10 THEN css_bytes_per_page ELSE 0 END) AS css_bytes_per_page_p10, + SUM(CASE WHEN percentile = 25 THEN css_bytes_per_page ELSE 0 END) AS css_bytes_per_page_p25, + SUM(CASE WHEN percentile = 50 THEN css_bytes_per_page ELSE 0 END) AS css_bytes_per_page_p50, + SUM(CASE WHEN percentile = 75 THEN css_bytes_per_page ELSE 0 END) AS css_bytes_per_page_p75, + SUM(CASE WHEN percentile = 90 THEN css_bytes_per_page ELSE 0 END) AS css_bytes_per_page_p90, + SUM(CASE WHEN percentile = 100 THEN css_bytes_per_page ELSE 0 END) AS css_bytes_per_page_p100 +FROM + percentiles +GROUP BY + report_year +ORDER BY + report_year