Skip to content

Commit 0a512e8

Browse files
guacamneumegenkevinfarrugiatunetheweb
authored
Jamstack 2024 chapter (#3834)
* Add jamstack 2024 sql queries * Add jamstack queries * Update jamstack-overview query * Update jamstack queries * Added Jamstack content * small tweaks and added authors, reviewers, editors, and analysts * small content tweak * Resolved SQL lint issue * Fix sql lint issue * added contributors * Added bio * Formatting * Normalise numbers to 2 significant figures * Retake images * Chapter quotes and prerendered * Rename images * Retake CSS size image * Final tweaks --------- Co-authored-by: Mike Neumegen <mneumegen@gmail.com> Co-authored-by: Kevin Farrugia <hello@imkev.dev> Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent e753d61 commit 0a512e8

23 files changed

Lines changed: 1960 additions & 14 deletions
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
-- Temporary function to extract max-age from cache-control
2+
CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS INT64 AS (
3+
SAFE_CAST(
4+
REGEXP_EXTRACT(
5+
(
6+
SELECT
7+
value
8+
FROM
9+
UNNEST(response_headers) AS header
10+
WHERE
11+
LOWER(header.name) = 'cache-control'
12+
LIMIT 1
13+
),
14+
r'max-age=(\d+)'
15+
) AS INT64
16+
)
17+
);
18+
19+
-- Temporary function to check if revalidation is required
20+
CREATE TEMPORARY FUNCTION REQUIRES_REVALIDATION(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS BOOL AS (
21+
EXISTS(
22+
SELECT 1
23+
FROM
24+
UNNEST(response_headers) AS header
25+
WHERE
26+
(LOWER(header.name) = 'cache-control' AND REGEXP_CONTAINS(LOWER(header.value), r'(must-revalidate|no-cache)')) OR
27+
(LOWER(header.name) IN ('etag', 'last-modified', 'expires'))
28+
)
29+
);
30+
31+
-- Temporary function to check for dynamic content via Set-Cookie
32+
CREATE TEMPORARY FUNCTION HAS_SET_COOKIE(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS BOOL AS (
33+
EXISTS(
34+
SELECT 1
35+
FROM UNNEST(response_headers) AS header
36+
WHERE LOWER(header.name) = 'set-cookie'
37+
)
38+
);
39+
40+
-- Temporary function to check for Vary headers that indicate dynamic content
41+
CREATE TEMPORARY FUNCTION HAS_DYNAMIC_VARY(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS BOOL AS (
42+
EXISTS(
43+
SELECT 1
44+
FROM UNNEST(response_headers) AS header
45+
WHERE LOWER(header.name) = 'vary' AND REGEXP_CONTAINS(LOWER(header.value), r'(user-agent|cookie)')
46+
)
47+
);
48+
49+
-- Temporary function to detect presence of ETag
50+
CREATE TEMPORARY FUNCTION HAS_ETAG(response_headers ARRAY<STRUCT<name STRING, value STRING>>) RETURNS BOOL AS (
51+
EXISTS(
52+
SELECT 1
53+
FROM UNNEST(response_headers) AS header
54+
WHERE LOWER(header.name) = 'etag'
55+
)
56+
);
57+
58+
-- Temporary function to check if the page uses https
59+
CREATE TEMPORARY FUNCTION IS_HTTPS(url STRING) RETURNS BOOL AS (
60+
LOWER(SUBSTR(url, 1, 5)) = 'https'
61+
);
62+
63+
WITH potential_jamstack_sites AS (
64+
SELECT
65+
p.date,
66+
p.client,
67+
p.page AS url,
68+
IS_HTTPS(p.page) AS is_https,
69+
p.technologies,
70+
SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.TTFB') AS INT64) AS ttfb,
71+
SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.reqTotal') AS INT64) AS total_requests,
72+
SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.bytesTotal') AS INT64) AS bytes_total,
73+
SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.bytesJS') AS INT64) AS bytes_js,
74+
SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.bytesCss') AS INT64) AS bytes_css,
75+
GET_MAX_AGE(r.response_headers) AS max_age,
76+
REQUIRES_REVALIDATION(r.response_headers) AS req_revalidation,
77+
78+
-- Calculate SSG Score
79+
MAX(
80+
CASE
81+
WHEN tech.technology = 'Next.js' THEN 30
82+
WHEN tech.technology = 'Nuxt.js' THEN 30
83+
WHEN tech.technology = 'Gatsby' THEN 30
84+
WHEN tech.technology = 'Hugo' THEN 100
85+
WHEN tech.technology = 'Astro' THEN 50
86+
WHEN tech.technology = 'Jekyll' THEN 100
87+
WHEN tech.technology = 'Docusaurus' THEN 100
88+
WHEN tech.technology = 'Hexo' THEN 100
89+
WHEN tech.technology = 'VuePress' THEN 100
90+
WHEN tech.technology = 'Gridsome' THEN 100
91+
WHEN tech.technology = 'Nextra' THEN 70
92+
WHEN tech.technology = 'Mintlify' THEN 70
93+
WHEN tech.technology = 'Eleventy' THEN 100
94+
WHEN tech.technology = 'Scully' THEN 70
95+
WHEN tech.technology = 'Pelican' THEN 100
96+
WHEN tech.technology = 'Octopress' THEN 100
97+
WHEN tech.technology = 'Retype' THEN 100
98+
WHEN tech.technology = 'Bridgetown' THEN 100
99+
ELSE 0
100+
END
101+
) AS ssg_score,
102+
103+
-- Calculate PaaS Score
104+
MAX(CASE
105+
WHEN tech.technology = 'Vercel' THEN 30
106+
WHEN tech.technology = 'Netlify' THEN 30
107+
WHEN tech.technology = 'GitHub Pages' THEN 100
108+
WHEN tech.technology = 'Tiiny Host' THEN 100
109+
ELSE 0
110+
END) AS paas_score,
111+
112+
-- Calculate TTFB_Score
113+
CASE
114+
WHEN SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.TTFB') AS INT64) <= 800 THEN 50
115+
WHEN SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.TTFB') AS INT64) > 800 AND SAFE_CAST(JSON_EXTRACT_SCALAR(p.summary, '$.TTFB') AS INT64) <= 1800 THEN 25
116+
ELSE 0
117+
END AS ttfb_score,
118+
119+
-- Calculate Cache Score
120+
(CASE
121+
WHEN GET_MAX_AGE(r.response_headers) >= 604800 AND NOT REQUIRES_REVALIDATION(r.response_headers) THEN 100
122+
WHEN GET_MAX_AGE(r.response_headers) >= 604800 AND REQUIRES_REVALIDATION(r.response_headers) THEN 50
123+
ELSE 0
124+
END) +
125+
(CASE WHEN HAS_ETAG(r.response_headers) THEN 10 ELSE 0 END) AS cache_score,
126+
127+
-- Penalties for dynamic content
128+
CASE
129+
WHEN HAS_SET_COOKIE(r.response_headers) THEN -10
130+
ELSE 0
131+
END + CASE WHEN HAS_DYNAMIC_VARY(r.response_headers) THEN -15 ELSE 0 END AS dynamic_penalty
132+
FROM
133+
`httparchive.all.pages` p,
134+
UNNEST(p.technologies) AS tech
135+
LEFT JOIN
136+
`httparchive.all.requests` r
137+
ON
138+
p.date = r.date AND p.client = r.client AND p.page = r.page
139+
WHERE
140+
p.date IN ('2022-06-01', '2023-06-01', '2024-06-01') AND
141+
p.client = 'mobile' AND
142+
p.is_root_page AND
143+
r.is_root_page AND
144+
r.is_main_document
145+
GROUP BY
146+
p.date, p.client, p.page, p.technologies, r.response_headers, p.summary
147+
),
148+
-- Combine all the information and calculate total_score
149+
total_sites AS (
150+
SELECT
151+
p.date,
152+
p.client,
153+
p.url,
154+
p.technologies,
155+
p.is_https,
156+
p.total_requests,
157+
p.bytes_total,
158+
p.bytes_js,
159+
p.bytes_css,
160+
p.ssg_score,
161+
p.paas_score,
162+
p.ttfb_score,
163+
p.max_age,
164+
p.req_revalidation,
165+
p.cache_score,
166+
p.dynamic_penalty,
167+
168+
-- Calculate Total_Score as the sum of Cache_Score, TTFB_Score, SSG_Score, and paas_score, minus dynamic penalties
169+
(
170+
p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty
171+
) AS total_score,
172+
(
173+
CASE
174+
WHEN (p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty) >= 100 THEN 'jamstack'
175+
WHEN (p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty) >= 50 AND
176+
(p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty) < 100
177+
THEN 'jamstacky'
178+
WHEN (p.cache_score + p.ttfb_score + p.ssg_score + p.paas_score + p.dynamic_penalty) < 50 THEN 'no-jamstack'
179+
ELSE 'no-jamstack'
180+
END
181+
) AS is_jamstack
182+
FROM
183+
potential_jamstack_sites p
184+
),
185+
186+
filtered_sites AS (
187+
SELECT
188+
date,
189+
url,
190+
tech.technology AS technology,
191+
is_jamstack,
192+
bytes_js,
193+
bytes_css,
194+
bytes_total,
195+
total_requests
196+
FROM
197+
total_sites,
198+
UNNEST(technologies) AS tech
199+
WHERE
200+
EXISTS (
201+
SELECT 1
202+
FROM
203+
UNNEST(tech.categories) AS category
204+
) AND
205+
is_jamstack IN ('jamstack')
206+
GROUP BY
207+
date,
208+
url,
209+
is_jamstack,
210+
technology,
211+
bytes_js,
212+
bytes_css,
213+
bytes_total,
214+
total_requests
215+
ORDER BY
216+
date ASC
217+
)
218+
219+
SELECT
220+
date,
221+
technology,
222+
APPROX_QUANTILES(ROUND(bytes_js / 1024, 2), 1000)[OFFSET(500)] AS median_js_kb,
223+
APPROX_QUANTILES(ROUND(bytes_css / 1024, 2), 1000)[OFFSET(500)] AS median_css_kb,
224+
APPROX_QUANTILES(ROUND(bytes_total / 1024, 2), 1000)[OFFSET(500)] AS median_total_weight_kb,
225+
APPROX_QUANTILES(total_requests, 1000)[OFFSET(500)] AS median_requests,
226+
COUNT(DISTINCT url) AS pages
227+
FROM
228+
filtered_sites
229+
WHERE
230+
technology IN ('Hugo', 'Next.js', 'Astro')
231+
GROUP BY
232+
date,
233+
technology
234+
ORDER BY
235+
date ASC,
236+
pages DESC

0 commit comments

Comments
 (0)