|
| 1 | +#standardSQL |
| 2 | +# Measuring img loaded bytes and dimensions |
| 3 | +# bytes_and_dimensions.sql |
| 4 | + |
| 5 | +CREATE TEMPORARY FUNCTION getSrcsetInfo(responsiveImagesJsonString STRING) |
| 6 | +RETURNS ARRAY<STRUCT<imgURL STRING, approximateResourceWidth INT64, approximateResourceHeight INT64, byteSize INT64, bitsPerPixel NUMERIC, isPixel BOOL, isDataURL BOOL, resourceFormat STRING>> |
| 7 | +LANGUAGE js AS ''' |
| 8 | +
|
| 9 | +function pithyType( { contentType, url } ) { |
| 10 | + const subtypeMap = { |
| 11 | + 'svg+xml': 'svg', |
| 12 | + 'svgz': 'svg', |
| 13 | + 'jpeg': 'jpg', |
| 14 | + 'jfif': 'jpg', |
| 15 | + 'x-png': 'png', |
| 16 | + 'vnd.microsoft.icon': 'ico', |
| 17 | + 'x-icon': 'ico', |
| 18 | + 'jxr': 'jxr', |
| 19 | + 'vnd.ms-photo': 'jxr', |
| 20 | + 'hdp': 'jxr', |
| 21 | + 'wdp': 'jxr', |
| 22 | + 'jpf': 'jp2', |
| 23 | + 'jpx': 'jp2', |
| 24 | + 'jpm': 'jp2', |
| 25 | + 'mj2': 'jp2', |
| 26 | + 'x-jp2-container': 'jp2', |
| 27 | + 'x-jp2-codestream': 'jp2', |
| 28 | + 'x-jpeg2000-image': 'jp2', |
| 29 | + 'heic': 'heif', |
| 30 | + 'x-ms-bmp': 'bmp', |
| 31 | + 'x-pict': 'pict', |
| 32 | + 'tif': 'tiff', |
| 33 | + 'x-tif': 'tiff', |
| 34 | + 'x-tiff': 'tiff', |
| 35 | + 'vnd.mozilla.apng': 'apng', |
| 36 | + // identities |
| 37 | + 'apng': 'apng', |
| 38 | + 'jpg': 'jpg', |
| 39 | + 'jp2': 'jp2', |
| 40 | + 'png': 'png', |
| 41 | + 'gif': 'gif', |
| 42 | + 'ico': 'ico', |
| 43 | + 'webp': 'webp', |
| 44 | + 'avif': 'avif', |
| 45 | + 'tiff': 'tiff', |
| 46 | + 'flif': 'flif', |
| 47 | + 'heif': 'heif', |
| 48 | + 'jxl': 'jxl', |
| 49 | + 'avif-sequence': 'avif-sequence', // keep separate from single frames... |
| 50 | + 'heic-sequence': 'heic-sequence', |
| 51 | + 'bmp': 'bmp', |
| 52 | + 'pict': 'pict' |
| 53 | + }; |
| 54 | +
|
| 55 | + function normalizeSubtype( subtype ) { |
| 56 | + if ( subtypeMap[ subtype ] ) { |
| 57 | + return subtypeMap[ subtype ]; |
| 58 | + } |
| 59 | + return 'unknown'; // switch between: |
| 60 | + // `subtype` |
| 61 | + // to see everything, check if there's anything else worth capturing |
| 62 | + // `'unknown'` |
| 63 | + // to make results manageable |
| 64 | + } |
| 65 | + |
| 66 | + // if it's a data url, take the mime type from there, done. |
| 67 | + if ( url && |
| 68 | + typeof url === "string" ) { |
| 69 | + const match = url.toLowerCase().match( /^data:image\\/([\\w\\-\\.\\+]+)/ ); |
| 70 | + if ( match && match[ 1 ] ) { |
| 71 | + return normalizeSubtype( match[ 1 ] ); |
| 72 | + } |
| 73 | + } |
| 74 | +
|
| 75 | + // if we get a content-type header, use it! |
| 76 | + if ( contentType && |
| 77 | + typeof contentType === "string" ) { |
| 78 | + const match = contentType.toLowerCase().match( /image\\/([\\w\\-\\.\\+]+)/ ); |
| 79 | + if ( match && match[ 1 ] ) { |
| 80 | + return normalizeSubtype( match[ 1 ] ); |
| 81 | + } |
| 82 | + } |
| 83 | +
|
| 84 | + // otherwise fall back to extension in the URL |
| 85 | + if ( url && |
| 86 | + typeof url === "string" ) { |
| 87 | + const splitOnSlashes = url.split("/"); |
| 88 | + if ( splitOnSlashes.length > 1 ) { |
| 89 | + const afterLastSlash = splitOnSlashes[ splitOnSlashes.length - 1 ], |
| 90 | + splitOnDots = afterLastSlash.split("."); |
| 91 | + if ( splitOnDots.length > 1 ) { |
| 92 | + return normalizeSubtype( |
| 93 | + splitOnDots[ splitOnDots.length - 1 ] |
| 94 | + .toLowerCase() |
| 95 | + .replace( /^(\\w+)[\\?\\&\\#].*/, '$1' ) // strip query params |
| 96 | + ); |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | +
|
| 101 | + // otherwise throw up our hands |
| 102 | + return 'unknown'; |
| 103 | + } |
| 104 | +
|
| 105 | + const parsed = JSON.parse( responsiveImagesJsonString ); |
| 106 | + if ( parsed && parsed.map ) { |
| 107 | + const dataRegEx = new RegExp('^data'); |
| 108 | + return parsed.map( d => ({ |
| 109 | + imgURL: d.url, |
| 110 | + approximateResourceWidth: Math.floor( d.approximateResourceWidth || 0 ), |
| 111 | + approximateResourceHeight: Math.floor( d.approximateResourceHeight || 0 ), |
| 112 | + byteSize: Math.floor( d.byteSize || 0 ), |
| 113 | + bitsPerPixel: parseFloat( d.bitsPerPixel || 0 ), |
| 114 | + isPixel: d.approximateResourceWidth == 1 && d.approximateResourceHeight == 1, |
| 115 | + isDataURL: dataRegEx.test(d.url), |
| 116 | + resourceFormat: pithyType({ contentType: d.mimeType, url: d.url }) |
| 117 | + }) ); |
| 118 | + } |
| 119 | +'''; |
| 120 | + |
| 121 | +WITH imgs AS ( |
| 122 | + SELECT |
| 123 | + _TABLE_SUFFIX AS client, |
| 124 | + url AS pageURL, |
| 125 | + imgURL, |
| 126 | + approximateResourceWidth, |
| 127 | + approximateResourceHeight, |
| 128 | + byteSize, |
| 129 | + bitsPerPixel, |
| 130 | + isPixel, |
| 131 | + isDataURL, |
| 132 | + (approximateResourceWidth * approximateResourceHeight) / 1000000 AS megapixels, |
| 133 | + (approximateResourceWidth / approximateResourceHeight) AS aspectRatio, |
| 134 | + resourceFormat |
| 135 | + FROM |
| 136 | + `httparchive.pages.2024_06_01_*`, |
| 137 | + UNNEST(getSrcsetInfo(JSON_QUERY(JSON_VALUE(payload, '$._responsive_images'), '$.responsive-images'))) |
| 138 | +), |
| 139 | + |
| 140 | +percentiles AS ( |
| 141 | + SELECT |
| 142 | + client, |
| 143 | + APPROX_QUANTILES(approximateResourceWidth, 1000) AS resourceWidthPercentiles, |
| 144 | + APPROX_QUANTILES(approximateResourceHeight, 1000) AS resourceHeightPercentiles, |
| 145 | + APPROX_QUANTILES(aspectRatio, 1000) AS aspectRatioPercentiles, |
| 146 | + APPROX_QUANTILES(megapixels, 1000) AS megapixelsPercentiles, |
| 147 | + APPROX_QUANTILES(byteSize, 1000) AS byteSizePercentiles, |
| 148 | + APPROX_QUANTILES(bitsPerPixel, 1000) AS bitsPerPixelPercentiles, |
| 149 | + COUNT(0) AS imgCount |
| 150 | + FROM |
| 151 | + imgs |
| 152 | + WHERE |
| 153 | + approximateResourceWidth > 1 AND |
| 154 | + approximateResourceHeight > 1 |
| 155 | + GROUP BY |
| 156 | + client |
| 157 | +) |
| 158 | + |
| 159 | +SELECT |
| 160 | + percentile, |
| 161 | + client, |
| 162 | + imgCount, |
| 163 | + resourceWidthPercentiles[OFFSET(percentile * 10)] AS resourceWidth, |
| 164 | + resourceHeightPercentiles[OFFSET(percentile * 10)] AS resourceHeight, |
| 165 | + aspectRatioPercentiles[OFFSET(percentile * 10)] AS aspectRatio, |
| 166 | + megapixelsPercentiles[OFFSET(percentile * 10)] AS megapixels, |
| 167 | + byteSizePercentiles[OFFSET(percentile * 10)] AS byteSize, |
| 168 | + bitsPerPixelPercentiles[OFFSET(percentile * 10)] AS bitsPerPixel |
| 169 | +FROM |
| 170 | + percentiles, |
| 171 | + UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile |
| 172 | +ORDER BY |
| 173 | + imgCount DESC, |
| 174 | + percentile |
0 commit comments