Skip to content

Commit e7675e1

Browse files
committed
Merge branch 'main' of github.com:HTTPArchive/almanac.httparchive.org into production
2 parents e3d248b + 83da7bf commit e7675e1

12 files changed

Lines changed: 65 additions & 66 deletions

File tree

Lines changed: 55 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,70 @@
11
#standardSQL
22
# Internal and external link metrics by quantile and rank
3-
4-
CREATE TEMPORARY FUNCTION getOutgoingLinkMetrics(payload STRING)
5-
RETURNS STRUCT<
6-
same_site INT64,
7-
same_property INT64,
8-
other_property INT64
9-
> LANGUAGE js AS '''
10-
var result = {same_site: 0,
11-
same_property: 0,
12-
other_property: 0};
13-
14-
try {
15-
var $ = JSON.parse(payload);
16-
var wpt_bodies = JSON.parse($._wpt_bodies);
17-
18-
if (!wpt_bodies){
19-
return result;
20-
}
21-
22-
var anchors = wpt_bodies.anchors;
23-
24-
if (anchors){
25-
result.same_site = anchors.rendered.same_site;
26-
result.same_property = anchors.rendered.same_property;
27-
result.other_property = anchors.rendered.other_property;
28-
}
29-
30-
} catch (e) {}
31-
32-
return result;
33-
''';
34-
353
WITH page_metrics AS (
364
SELECT
375
client,
386
page,
39-
getOutgoingLinkMetrics(payload) AS outgoing_link_metrics,
40-
JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._wpt_bodies'), '$.is_root_page') AS is_root_page
41-
FROM
42-
`httparchive.all.pages`
7+
is_root_page,
8+
IF(rank <= rank_bucket, rank_bucket, NULL) AS rank,
9+
ANY_VALUE(custom_metrics.wpt_bodies.anchors) AS anchors
10+
FROM httparchive.crawl.pages,
11+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_bucket
4312
WHERE
44-
DATE = '2024-06-01'
13+
date = '2024-06-01'
14+
GROUP BY
15+
client,
16+
page,
17+
is_root_page,
18+
rank
19+
HAVING rank IS NOT NULL
20+
), metric_details AS (
21+
SELECT
22+
client,
23+
is_root_page,
24+
percentile,
25+
rank,
26+
APPROX_QUANTILES(INT64(anchors.rendered.same_site), 1000)[OFFSET(percentile * 10)] AS outgoing_links_same_site,
27+
APPROX_QUANTILES(INT64(anchors.rendered.same_property), 1000)[OFFSET(percentile * 10)] AS outgoing_links_same_property,
28+
APPROX_QUANTILES(INT64(anchors.rendered.other_property), 1000)[OFFSET(percentile * 10)] AS outgoing_links_other_property
29+
FROM page_metrics,
30+
UNNEST([10, 25, 50, 75, 90, 100]) AS percentile
31+
GROUP BY
32+
client,
33+
is_root_page,
34+
rank,
35+
percentile
36+
ORDER BY
37+
client,
38+
is_root_page,
39+
rank,
40+
percentile
41+
), page_counts AS (
42+
SELECT
43+
client,
44+
is_root_page,
45+
rank,
46+
COUNT(DISTINCT page) AS total_pages
47+
FROM page_metrics
48+
GROUP BY
49+
client,
50+
is_root_page,
51+
rank
4552
)
4653

4754
SELECT
4855
client,
49-
CASE
50-
WHEN is_root_page = 'false' THEN 'Secondary Page'
51-
ELSE 'Homepage'
52-
END AS page_type,
56+
is_root_page,
57+
rank,
58+
total_pages,
5359
percentile,
54-
rank_grouping,
55-
CASE
56-
WHEN rank_grouping = 100000000 THEN 'all'
57-
ELSE FORMAT("%'d", rank_grouping)
58-
END AS ranking,
59-
COUNT(DISTINCT page) AS pages,
60-
APPROX_QUANTILES(outgoing_link_metrics.same_site, 1000)[OFFSET(percentile * 10)] AS outgoing_links_same_site,
61-
APPROX_QUANTILES(outgoing_link_metrics.same_property, 1000)[OFFSET(percentile * 10)] AS outgoing_links_same_property,
62-
APPROX_QUANTILES(outgoing_link_metrics.other_property, 1000)[OFFSET(percentile * 10)] AS outgoing_links_other_property
63-
FROM
64-
page_metrics,
65-
UNNEST([10, 25, 50, 75, 90, 100]) AS percentile,
66-
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
67-
GROUP BY
68-
client,
69-
page_type,
70-
rank_grouping,
71-
percentile
60+
outgoing_links_same_site,
61+
outgoing_links_same_property,
62+
outgoing_links_other_property
63+
FROM metric_details
64+
LEFT JOIN page_counts
65+
USING (client, is_root_page, rank)
7266
ORDER BY
7367
client,
74-
page_type,
75-
rank_grouping,
68+
is_root_page,
69+
rank,
7670
percentile

src/config/contributors.json

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -626,11 +626,13 @@
626626
},
627627
"b_atish": {
628628
"name": "Beatriz González Mellídez",
629+
"avatar_url": "756480",
629630
"teams": {
630631
"2024": [
631632
"reviewers"
632633
]
633634
},
635+
"github": "bgonzalez",
634636
"twitter": "b_atish",
635637
"linkedin": "beatrizgonzalezm",
636638
"website": "https://medium.com/@b_atish"
@@ -1985,8 +1987,7 @@
19851987
"analysts",
19861988
"authors"
19871989
]
1988-
},
1989-
"twitter": "SilentJMA"
1990+
}
19901991
},
19911992
"jroakes": {
19921993
"avatar_url": "10191545",
@@ -3047,6 +3048,9 @@
30473048
],
30483049
"2022": [
30493050
"designers"
3051+
],
3052+
"2024": [
3053+
"designers"
30503054
]
30513055
}
30523056
},
@@ -3088,6 +3092,7 @@
30883092
"avatar_url": "116832",
30893093
"github": "mgifford",
30903094
"linkedin": "mgifford",
3095+
"bluesky": "mgifford.bsky.social",
30913096
"mastodon": "https://mastodon.social/@mgifford",
30923097
"name": "Mike Gifford",
30933098
"teams": {

src/config/last_updated.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -758,8 +758,8 @@
758758
},
759759
"en/2024/chapters/cdn.html": {
760760
"date_published": "2024-11-11T00:00:00.000Z",
761-
"date_modified": "2024-11-11T00:00:00.000Z",
762-
"hash": "65d8d20990cc3ff5ab6975659151646b"
761+
"date_modified": "2024-11-13T00:00:00.000Z",
762+
"hash": "a518330b7083e972202dcdfcfe34fbad"
763763
},
764764
"en/2024/chapters/cms.html": {
765765
"date_published": "2024-11-11T00:00:00.000Z",

src/content/en/2024/cdn.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ analysts: [pgjaiganesh, AlexMoening]
99
translators: []
1010
results: https://docs.google.com/spreadsheets/d/15YXQQjyoQ0Bnfw9KNSz_YuGDiCfW978_WKEHvDXjdm4/
1111
joeviggiano_bio: Joe Viggiano is a Principal Solutions Architect at Amazon Web Services helping Media & Entertainment customers deliver media content at scale.
12-
pgjaiganesh_bio: Jaiganesh Girinathan is a Principal Edge Solutions Architect at Amazon Web Services.
12+
pgjaiganesh_bio: Jaiganesh Girinathan is a Principal Solutions Architect at Amazon Web Services with the mission to help customers deliver a fast and secure digital experience.
1313
AlexMoening_bio: Alex Moening is a Senior Edge Solutions Architect at Amazon Web Services.
1414
featured_quote: The benefits of utilizing CDNs have expanded beyond simple performance improvements. In 2024, CDNs play a crucial role in enabling global scalability, enhancing security postures, and facilitating the deployment of complex, distributed applications. By pushing more logic to the edge, businesses can create more responsive and personalized user experiences while reducing the load on origin servers.
1515
featured_stat_1: 70%
-29.1 KB
Binary file not shown.
33.1 KB
Loading
-78 KB
Binary file not shown.
-56.7 KB
Loading
-2.5 KB
Binary file not shown.
-2.73 KB
Loading

0 commit comments

Comments
 (0)