|
| 1 | +-- Temporary function to extract max-age from cache-control |
| 2 | +CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS INT64 AS ( |
| 3 | + SAFE_CAST( |
| 4 | + REGEXP_EXTRACT( |
| 5 | + ( |
| 6 | + SELECT |
| 7 | + value |
| 8 | + FROM |
| 9 | + UNNEST(response_headers) AS header |
| 10 | + WHERE |
| 11 | + LOWER(header.name) = 'cache-control' |
| 12 | + LIMIT 1 |
| 13 | + ), |
| 14 | + r'max-age=(\d+)' |
| 15 | + ) AS INT64 |
| 16 | + ) |
| 17 | +); |
| 18 | + |
| 19 | +-- Temporary function to check if revalidation is required |
| 20 | +CREATE TEMPORARY FUNCTION REQUIRES_REVALIDATION(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS BOOL AS ( |
| 21 | + EXISTS( |
| 22 | + SELECT 1 |
| 23 | + FROM |
| 24 | + UNNEST(response_headers) AS header |
| 25 | + WHERE |
| 26 | + (LOWER(header.name) = 'cache-control' AND REGEXP_CONTAINS(LOWER(header.value), r'(must-revalidate|no-cache)')) OR |
| 27 | + (LOWER(header.name) IN ('etag', 'last-modified', 'expires')) |
| 28 | + ) |
| 29 | +); |
| 30 | + |
| 31 | +-- Temporary function to check for dynamic content via Set-Cookie |
| 32 | +CREATE TEMPORARY FUNCTION HAS_SET_COOKIE(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS BOOL AS ( |
| 33 | + EXISTS( |
| 34 | + SELECT 1 |
| 35 | + FROM UNNEST(response_headers) AS header |
| 36 | + WHERE LOWER(header.name) = 'set-cookie' |
| 37 | + ) |
| 38 | +); |
| 39 | + |
| 40 | +-- Temporary function to check for Vary headers that indicate dynamic content |
| 41 | +CREATE TEMPORARY FUNCTION HAS_DYNAMIC_VARY(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS BOOL AS ( |
| 42 | + EXISTS( |
| 43 | + SELECT 1 |
| 44 | + FROM UNNEST(response_headers) AS header |
| 45 | + WHERE LOWER(header.name) = 'vary' AND REGEXP_CONTAINS(LOWER(header.value), r'(user-agent|cookie)') |
| 46 | + ) |
| 47 | +); |
| 48 | + |
| 49 | +-- Temporary function to detect presence of ETag |
| 50 | +CREATE TEMPORARY FUNCTION HAS_ETAG(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS BOOL AS ( |
| 51 | + EXISTS( |
| 52 | + SELECT 1 |
| 53 | + FROM UNNEST(response_headers) AS header |
| 54 | + WHERE LOWER(header.name) = 'etag' |
| 55 | + ) |
| 56 | +); |
| 57 | + |
| 58 | +-- Temporary function to check if the page uses https |
| 59 | +CREATE TEMPORARY FUNCTION IS_HTTPS(url STRING) RETURNS BOOL AS ( |
| 60 | + LOWER(SUBSTR(url, 1, 5)) = 'https' |
| 61 | +); |
| 62 | + |
| 63 | +WITH potential_jamstack_sites AS ( |
| 64 | + SELECT |
| 65 | + p.date, |
| 66 | + p.client, |
| 67 | + p.page AS url, |
| 68 | + IS_HTTPS(p.page) AS is_https, |
| 69 | + p.technologies, |
| 70 | + SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.TTFB') AS INT64) AS ttfb, |
| 71 | + SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.reqTotal') AS INT64) AS total_requests, |
| 72 | + SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.bytesTotal') AS INT64) AS bytes_total, |
| 73 | + SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.bytesJS') AS INT64) AS bytes_js, |
| 74 | + SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.bytesCss') AS INT64) AS bytes_css, |
| 75 | + GET_MAX_AGE(r.response_headers) AS max_age, |
| 76 | + REQUIRES_REVALIDATION(r.response_headers) AS req_revalidation, |
| 77 | + |
| 78 | + -- Calculate SSG Score |
| 79 | + MAX( |
| 80 | + CASE |
| 81 | + WHEN tech.technology = 'Next.js' THEN 30 |
| 82 | + WHEN tech.technology = 'Nuxt.js' THEN 30 |
| 83 | + WHEN tech.technology = 'Gatsby' THEN 30 |
| 84 | + WHEN tech.technology = 'Hugo' THEN 100 |
| 85 | + WHEN tech.technology = 'Astro' THEN 50 |
| 86 | + WHEN tech.technology = 'Jekyll' THEN 100 |
| 87 | + WHEN tech.technology = 'Docusaurus' THEN 100 |
| 88 | + WHEN tech.technology = 'Hexo' THEN 100 |
| 89 | + WHEN tech.technology = 'VuePress' THEN 100 |
| 90 | + WHEN tech.technology = 'Gridsome' THEN 100 |
| 91 | + WHEN tech.technology = 'Nextra' THEN 70 |
| 92 | + WHEN tech.technology = 'Mintlify' THEN 70 |
| 93 | + WHEN tech.technology = 'Eleventy' THEN 100 |
| 94 | + WHEN tech.technology = 'Scully' THEN 70 |
| 95 | + WHEN tech.technology = 'Pelican' THEN 100 |
| 96 | + WHEN tech.technology = 'Octopress' THEN 100 |
| 97 | + WHEN tech.technology = 'Retype' THEN 100 |
| 98 | + WHEN tech.technology = 'Bridgetown' THEN 100 |
| 99 | + ELSE 0 |
| 100 | + END |
| 101 | + ) AS ssg_score, |
| 102 | + |
| 103 | + -- Calculate PaaS Score |
| 104 | + MAX(CASE |
| 105 | + WHEN tech.technology = 'Vercel' THEN 30 |
| 106 | + WHEN tech.technology = 'Netlify' THEN 30 |
| 107 | + WHEN tech.technology = 'GitHub Pages' THEN 100 |
| 108 | + WHEN tech.technology = 'Tiiny Host' THEN 100 |
| 109 | + ELSE 0 |
| 110 | + END) AS paas_score, |
| 111 | + |
| 112 | + -- Calculate TTFB_Score |
| 113 | + CASE |
| 114 | + WHEN SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.TTFB') AS INT64) <= 800 THEN 50 |
| 115 | + WHEN SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.TTFB') AS INT64) > 800 AND SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.TTFB') AS INT64) <= 1800 THEN 25 |
| 116 | + ELSE 0 |
| 117 | + END AS ttfb_score, |
| 118 | + |
| 119 | + -- Calculate Cache Score |
| 120 | + (CASE |
| 121 | + WHEN GET_MAX_AGE(r.response_headers) >= 604800 AND NOT REQUIRES_REVALIDATION(r.response_headers) THEN 100 |
| 122 | + WHEN GET_MAX_AGE(r.response_headers) >= 604800 AND REQUIRES_REVALIDATION(r.response_headers) THEN 50 |
| 123 | + ELSE 0 |
| 124 | + END) + |
| 125 | + (CASE WHEN HAS_ETAG(r.response_headers) THEN 10 ELSE 0 END) AS cache_score, |
| 126 | + |
| 127 | + -- Penalties for dynamic content |
| 128 | + CASE |
| 129 | + WHEN HAS_SET_COOKIE(r.response_headers) THEN -10 |
| 130 | + ELSE 0 |
| 131 | + END + CASE WHEN HAS_DYNAMIC_VARY(r.response_headers) THEN -15 ELSE 0 END AS dynamic_penalty |
| 132 | + FROM |
| 133 | + `httparchive.all.pages` p, |
| 134 | + UNNEST(p.technologies) AS tech |
| 135 | + LEFT JOIN |
| 136 | + `httparchive.all.requests` r |
| 137 | + ON |
| 138 | + p.date = r.date AND p.client = r.client AND p.page = r.page |
| 139 | + WHERE |
| 140 | + p.date IN ('2022-06-01', '2023-06-01', '2024-06-01') AND |
| 141 | + p.client = 'mobile' AND |
| 142 | + p.is_root_page AND |
| 143 | + r.is_root_page AND |
| 144 | + r.is_main_document |
| 145 | + GROUP BY |
| 146 | + p.date, p.client, p.page, p.technologies, r.response_headers, p.summary |
| 147 | +), |
| 148 | +-- Combine all the information and calculate total_score |
| 149 | +total_sites AS ( |
| 150 | + SELECT |
| 151 | + p.date, |
| 152 | + p.client, |
| 153 | + p.url, |
| 154 | + p.technologies, |
| 155 | + p.is_https, |
| 156 | + p.total_requests, |
| 157 | + p.bytes_total, |
| 158 | + p.bytes_js, |
| 159 | + p.bytes_css, |
| 160 | + p.ssg_score, |
| 161 | + p.paas_score, |
| 162 | + p.ttfb_score, |
| 163 | + p.max_age, |
| 164 | + p.req_revalidation, |
| 165 | + p.cache_score, |
| 166 | + p.dynamic_penalty, |
| 167 | + |
| 168 | + -- Calculate Total_Score as the sum of Cache_Score, TTFB_Score, SSG_Score, and paas_score, minus dynamic penalties |
| 169 | + ( |
| 170 | + p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty |
| 171 | + ) AS total_score, |
| 172 | + ( |
| 173 | + CASE |
| 174 | + WHEN (p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty) >= 100 THEN 'jamstack' |
| 175 | + WHEN (p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty) >= 50 AND |
| 176 | + (p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty) < 100 |
| 177 | + THEN 'jamstacky' |
| 178 | + WHEN (p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty) < 50 THEN 'no-jamstack' |
| 179 | + ELSE 'no-jamstack' |
| 180 | + END |
| 181 | + ) AS is_jamstack |
| 182 | + FROM |
| 183 | + potential_jamstack_sites p |
| 184 | +), |
| 185 | + |
| 186 | +filtered_sites AS ( |
| 187 | + SELECT |
| 188 | + date, |
| 189 | + url, |
| 190 | + tech.technology AS technology, |
| 191 | + is_jamstack, |
| 192 | + bytes_js, |
| 193 | + bytes_css, |
| 194 | + bytes_total, |
| 195 | + total_requests |
| 196 | + FROM |
| 197 | + total_sites, |
| 198 | + UNNEST(technologies) AS tech |
| 199 | + WHERE |
| 200 | + EXISTS ( |
| 201 | + SELECT 1 |
| 202 | + FROM |
| 203 | + UNNEST(tech.categories) AS category |
| 204 | + ) AND |
| 205 | + is_jamstack IN ('jamstack') |
| 206 | + GROUP BY |
| 207 | + date, |
| 208 | + url, |
| 209 | + is_jamstack, |
| 210 | + technology, |
| 211 | + bytes_js, |
| 212 | + bytes_css, |
| 213 | + bytes_total, |
| 214 | + total_requests |
| 215 | + ORDER BY |
| 216 | + date ASC |
| 217 | +) |
| 218 | + |
| 219 | +SELECT |
| 220 | + date, |
| 221 | + technology, |
| 222 | + APPROX_QUANTILES(ROUND(bytes_js / 1024, 2), 1000)[OFFSET(500)] AS median_js_kb, |
| 223 | + APPROX_QUANTILES(ROUND(bytes_css / 1024, 2), 1000)[OFFSET(500)] AS median_css_kb, |
| 224 | + APPROX_QUANTILES(ROUND(bytes_total / 1024, 2), 1000)[OFFSET(500)] AS median_total_weight_kb, |
| 225 | + APPROX_QUANTILES(total_requests, 1000)[OFFSET(500)] AS median_requests, |
| 226 | + COUNT(DISTINCT url) AS pages |
| 227 | +FROM |
| 228 | + filtered_sites |
| 229 | +WHERE |
| 230 | + technology IN ('Hugo', 'Next.js', 'Astro') |
| 231 | +GROUP BY |
| 232 | + date, |
| 233 | + technology |
| 234 | +ORDER BY |
| 235 | + date ASC, |
| 236 | + pages DESC |
0 commit comments