Skip to content

Commit a58c704

Browse files
committed
Merge branch 'main' of github.com:HTTPArchive/almanac.httparchive.org into production
2 parents f79361b + 91885c1 commit a58c704

96 files changed

Lines changed: 1498 additions & 250 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

sql/2024/seo/robots-txt-size-2024.sql

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,31 @@
11
#standardSQL
22
# Robots.txt size
3-
CREATE TEMPORARY FUNCTION getRobotsSize(payload STRING)
4-
RETURNS FLOAT64 LANGUAGE js AS '''
5-
try {
6-
var $ = JSON.parse(payload);
7-
var robots = JSON.parse($._robots_txt);
8-
return robots['size']/1024;
9-
} catch (e) {
10-
return 0;
11-
}
12-
''';
13-
143
SELECT
154
client,
165
COUNT(DISTINCT(site)) AS sites,
6+
SAFE_DIVIDE(COUNTIF(robots_size = 0), COUNT(DISTINCT(site))) AS pct_0,
177
SAFE_DIVIDE(COUNTIF(robots_size > 0 AND robots_size <= 100), COUNT(DISTINCT(site))) AS pct_0_100,
188
SAFE_DIVIDE(COUNTIF(robots_size > 100 AND robots_size <= 200), COUNT(DISTINCT(site))) AS pct_100_200,
199
SAFE_DIVIDE(COUNTIF(robots_size > 200 AND robots_size <= 300), COUNT(DISTINCT(site))) AS pct_200_300,
2010
SAFE_DIVIDE(COUNTIF(robots_size > 300 AND robots_size <= 400), COUNT(DISTINCT(site))) AS pct_300_400,
2111
SAFE_DIVIDE(COUNTIF(robots_size > 400 AND robots_size <= 500), COUNT(DISTINCT(site))) AS pct_400_500,
2212
SAFE_DIVIDE(COUNTIF(robots_size > 500), COUNT(DISTINCT(site))) AS pct_gt500,
23-
SAFE_DIVIDE(COUNTIF(robots_size = 0), COUNT(DISTINCT(site))) AS pct_missing,
13+
SAFE_DIVIDE(COUNTIF(robots_size IS NULL), COUNT(DISTINCT(site))) AS pct_missing,
2414
COUNTIF(robots_size > 500) AS count_gt500,
25-
COUNTIF(robots_size = 0) AS count_missing
15+
COUNTIF(robots_size IS NULL) AS count_missing
2616
FROM (
2717
SELECT
2818
client,
29-
page AS site,
30-
getRobotsSize(payload) AS robots_size
19+
root_page AS site,
20+
custom_metrics.robots_txt,
21+
FLOAT64(custom_metrics.robots_txt.size_kib) AS robots_size
3122
FROM
32-
`httparchive.all.pages`
33-
WHERE date = '2024-06-01'
34-
) -- noqa: L062
23+
`httparchive.crawl.pages`
24+
WHERE
25+
date = '2024-06-01' AND
26+
is_root_page AND -- no need to crawl inner pages for this one
27+
custom_metrics.robots_txt.status IS NOT NULL
28+
)
3529
GROUP BY
3630
client
3731
ORDER BY

src/config/2024.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@
7575
"part": "II",
7676
"chapter_number": "9",
7777
"title": "SEO",
78-
"slug": "seo",
79-
"todo": true
78+
"slug": "seo"
8079
},
8180
{
8281
"part": "II",

src/config/contributors.json

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,10 @@
11911191
},
11921192
"dwsmart": {
11931193
"avatar_url": "11179452",
1194+
"bluesky": "tamethebots.com",
11941195
"github": "dwsmart",
1196+
"linkedin": "davewsmart",
1197+
"mastodon": "https://seocommunity.social/@dwsmart",
11951198
"name": "Dave Smart",
11961199
"teams": {
11971200
"2020": [
@@ -1203,9 +1206,11 @@
12031206
"2022": [
12041207
"authors",
12051208
"reviewers"
1209+
],
1210+
"2024": [
1211+
"authors"
12061212
]
12071213
},
1208-
"twitter": "davewsmart",
12091214
"website": "https://tamethebots.com"
12101215
},
12111216
"dsottimano": {
@@ -2044,7 +2049,9 @@
20442049
},
20452050
"fellowhuman1101": {
20462051
"avatar_url": "52051775",
2052+
"bluesky": "not-a-robot.com",
20472053
"github": "fellowhuman1101",
2054+
"linkedin": "jamie-indigo",
20482055
"name": "Jamie Indigo",
20492056
"teams": {
20502057
"2020": [
@@ -2056,6 +2063,10 @@
20562063
],
20572064
"2022": [
20582065
"authors"
2066+
],
2067+
"2024": [
2068+
"authors",
2069+
"leads"
20592070
]
20602071
},
20612072
"twitter": "Jammer_Volts",
@@ -3023,13 +3034,22 @@
30233034
},
30243035
"MichaelLewittes": {
30253036
"avatar_url": "96250205",
3037+
"bluesky": "michaellewittes.bsky.social",
30263038
"github": "MichaelLewittes",
3039+
"linkedin": "michael-lewittes-a22b831",
3040+
"mastodon": "https://seocommunity.social/@MichaelLewittes",
30273041
"name": "Michael Lewittes",
30283042
"teams": {
30293043
"2022": [
30303044
"editors"
3045+
],
3046+
"2024": [
3047+
"authors",
3048+
"editors"
30313049
]
3032-
}
3050+
},
3051+
"twitter": "MichaelLewittes",
3052+
"website": "https://www.ranktify.com/team"
30333053
},
30343054
"MichaelSolati": {
30353055
"avatar_url": "11811422",
@@ -4871,5 +4891,42 @@
48714891
"reviewers"
48724892
]
48734893
}
4894+
},
4895+
"mikaelaraujo": {
4896+
"avatar_url": "4764075",
4897+
"bluesky": "mikaelaraujo.bsky.social",
4898+
"github": "mikaelaraujo",
4899+
"linkedin": "mikael-araujo",
4900+
"name": "Mikael Araújo",
4901+
"teams": {
4902+
"2024": [
4903+
"authors"
4904+
]
4905+
},
4906+
"threads": "@mikaelaraujo",
4907+
"twitter": "miknaraujo",
4908+
"website": "https://www.mikaelaraujo.com"
4909+
},
4910+
"henryp25": {
4911+
"avatar_url": "62102954",
4912+
"github": "henryp25",
4913+
"linkedin": "henry-price-9ab362b4",
4914+
"name": "Henry Price",
4915+
"teams": {
4916+
"2024": [
4917+
"analysts"
4918+
]
4919+
}
4920+
},
4921+
"cnichols013" :{
4922+
"avatar_url": "73146375",
4923+
"github": "cnichols013",
4924+
"linkedin": "chris-nichols",
4925+
"name": "Chris Nichols",
4926+
"teams": {
4927+
"2024": [
4928+
"analysts"
4929+
]
4930+
}
48744931
}
48754932
}

0 commit comments

Comments
 (0)