Skip to content

Commit 597a5d4

Browse files
dwsmarttunetheweb
andauthored
Fix for missing summary.bytes html doc (#4270)
* corrects filename for response_media_file_type_distribution.sql * adds use-efficient-cache-lifetimes.sql lighthouse metric query * group root and internal pages * get bytesHtmlDoc from requests table --------- Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent 0c42602 commit 597a5d4

File tree

1 file changed

+48
-16
lines changed

1 file changed

+48
-16
lines changed
Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,57 @@
1+
-- Because summary bytesHtmlDoc is a little wrong and always zero in the pages table (see: https://github.com/HTTPArchive/wptagent/issues/47)
2+
-- we need to recalculate it by joining back to the requests table.
3+
-- To optimize performance, we first create a CTE to pre-filter and extract the necessary data
4+
-- from the requests table, then join that CTE in the main query.
5+
-- when fixed, we can revert to just using summary.bytesHtmlDoc directly and remove the join and CTE.
6+
WITH HtmlDocRequests AS (
7+
SELECT
8+
page,
9+
client,
10+
date,
11+
-- Extract and cast the response size once inside the CTE
12+
CAST(JSON_VALUE(summary.respSize) AS INT64) AS respSize_bytes
13+
FROM
14+
`httparchive.crawl.requests`
15+
WHERE
16+
-- Pre-filter the requests table for efficiency.
17+
-- This MUST match the date in the main query's WHERE clause.
18+
date = '2025-07-01' AND
19+
type = 'html' AND
20+
is_main_document
21+
)
22+
23+
-- Main query now joins the CTE
124
SELECT
225
percentile,
3-
client,
4-
is_root_page,
5-
APPROX_QUANTILES(CAST(JSON_VALUE(summary.bytesTotal) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes,
6-
APPROX_QUANTILES(CAST(JSON_VALUE(summary.bytesHtml) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes,
7-
APPROX_QUANTILES(CAST(JSON_VALUE(summary.bytesJS) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes,
8-
APPROX_QUANTILES(CAST(JSON_VALUE(summary.bytesCss) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes,
9-
APPROX_QUANTILES(CAST(JSON_VALUE(summary.bytesImg) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes,
10-
APPROX_QUANTILES(CAST(JSON_VALUE(summary.bytesOther) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes,
11-
APPROX_QUANTILES(CAST(JSON_VALUE(summary.bytesHtmlDoc) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes,
12-
APPROX_QUANTILES(CAST(JSON_VALUE(summary.bytesFont) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes
26+
p.client,
27+
p.is_root_page,
28+
APPROX_QUANTILES(CAST(JSON_VALUE(p.summary.bytesTotal) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS total_kbytes,
29+
APPROX_QUANTILES(CAST(JSON_VALUE(p.summary.bytesHtml) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS html_kbytes,
30+
APPROX_QUANTILES(CAST(JSON_VALUE(p.summary.bytesJS) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS js_kbytes,
31+
APPROX_QUANTILES(CAST(JSON_VALUE(p.summary.bytesCss) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS css_kbytes,
32+
APPROX_QUANTILES(CAST(JSON_VALUE(p.summary.bytesImg) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS img_kbytes,
33+
APPROX_QUANTILES(CAST(JSON_VALUE(p.summary.bytesOther) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS other_kbytes,
34+
-- Use the pre-calculated column from the CTE
35+
APPROX_QUANTILES(r.respSize_bytes / 1024, 1000)[OFFSET(percentile * 10)] AS html_doc_kbytes,
36+
APPROX_QUANTILES(CAST(JSON_VALUE(p.summary.bytesFont) AS INT64) / 1024, 1000)[OFFSET(percentile * 10)] AS font_kbytes
1337
FROM
14-
`httparchive.crawl.pages`,
38+
`httparchive.crawl.pages` p
39+
LEFT JOIN
40+
-- Join the new CTE on the matching keys
41+
HtmlDocRequests r
42+
ON
43+
p.page = r.page AND
44+
p.client = r.client AND
45+
p.date = r.date,
1546
UNNEST([10, 25, 50, 75, 90, 100]) AS percentile
1647
WHERE
17-
date = '2025-07-01'
48+
-- This filter on the main 'pages' table is still required
49+
p.date = '2025-07-01'
1850
GROUP BY
1951
percentile,
20-
client,
21-
is_root_page
52+
p.client,
53+
p.is_root_page
2254
ORDER BY
23-
client,
24-
is_root_page,
55+
p.client,
56+
p.is_root_page,
2557
percentile

0 commit comments

Comments
 (0)