Skip to content

Commit b572b37

Browse files
dwsmarttunetheweb
andauthored
adds response_media_file_type.distribution.sql (#4247)
* adds media file type query * Update sql/2025/page-weight/response_media_file_type.distribution.sql Co-authored-by: Barry Pollard <barrypollard@google.com> * Update sql/2025/page-weight/response_media_file_type.distribution.sql Co-authored-by: Barry Pollard <barrypollard@google.com> * fix trailing comma * linted * Update sql/2025/page-weight/response_media_file_type.distribution.sql Co-authored-by: Barry Pollard <barrypollard@google.com> * linted --------- Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent 93a33ef commit b572b37

1 file changed

Lines changed: 53 additions & 0 deletions

File tree

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
WITH images_types AS (
2+
SELECT
3+
date,
4+
client,
5+
is_root_page,
6+
type,
7+
LOWER(JSON_VALUE(summary.mimeType)) AS mimeType,
8+
SAFE.INT64(summary.respBodySize) AS respBodySize,
9+
-- Only return the extension if the mimeType is missing or set to something generic like octet-stream
10+
IF(JSON_VALUE(summary.mimeType) = '' OR LOWER(JSON_VALUE(summary.mimeType)) LIKE '%octet-stream%', LOWER(JSON_VALUE(summary.ext)), NULL) AS usefulExt
11+
FROM
12+
`httparchive.crawl.requests`
13+
WHERE
14+
date = '2025-07-01' AND
15+
type IN ('image', 'video')
16+
)
17+
18+
SELECT
19+
client,
20+
percentile,
21+
is_root_page,
22+
-- This can all be changed to `JSON_VALUE(summary.format) AS media_format` next year
23+
-- and also remove need for `images_types` CTE, but for now we need this
24+
-- See clean up in https://github.com/HTTPArchive/wptagent/pull/45
25+
CASE
26+
WHEN mimeType = 'image/avif' OR usefulExt = 'avif' THEN 'avif'
27+
WHEN mimeType = 'image/bmp' OR usefulExt = 'bmp' THEN 'bmp'
28+
WHEN mimeType = 'image/gif' OR usefulExt = 'gif' THEN 'gif'
29+
WHEN mimeType IN ('image/x-icon', 'image/vnd.microsoft.icon') OR usefulExt = 'ico' THEN 'ico'
30+
WHEN mimeType IN ('image/jpg', 'image/jpeg') OR usefulExt IN ('jpeg', 'jpg') THEN 'jpg'
31+
WHEN mimeType = 'image/png' OR usefulExt = 'png' THEN 'png'
32+
WHEN mimeType = 'image/svg+xml' OR usefulExt = 'svg' THEN 'svg'
33+
WHEN mimeType IN ('image/webp', 'webp') OR usefulExt = 'webp' THEN 'webp'
34+
WHEN mimeType IN ('video/mp4', 'video/mpeg') OR usefulExt = 'mpeg' THEN 'mpeg'
35+
WHEN mimeType = 'video/webm' OR usefulExt = 'webm' THEN 'webm'
36+
WHEN mimeType = 'video/quicktime' OR usefulExt = 'mov' THEN 'quicktime'
37+
WHEN mimeType = 'video/webp' THEN 'webp Video'
38+
ELSE 'other/unknown'
39+
END AS media_format,
40+
APPROX_QUANTILES(respBodySize / 1024, 1000)[OFFSET(percentile * 10)] AS resp_size
41+
FROM
42+
images_types,
43+
UNNEST([10, 25, 50, 75, 90, 100]) AS percentile
44+
GROUP BY
45+
client,
46+
percentile,
47+
media_format,
48+
is_root_page
49+
ORDER BY
50+
media_format,
51+
client,
52+
is_root_page,
53+
percentile

0 commit comments

Comments
 (0)