Skip to content

Commit cac1042

Browse files
dwsmartfellowhuman1101tunetheweb
authored
SEO 2024 chapter (#3939)
* enable 2024 chapter * adding our authors, analysts & editors * Correct Mikael's threads link * adding henryp25 linkedin * inital commit, up to robots directives * to beginning of IndexIfEmbedded * amend fellowhuman1101 & cnichols013. * adds Jamie's bio and to end of indexIfEmbedded tag converted * images so far * updates fellowhuman1101 bluesky link * more chart images * up to canoncial * done uptp Name attributes in `follow` robots meta tag. chart * more chart images * up to header tags * more figure images * more fig images * more figure images * up to amp * more figure images * all content converted, pre-check * Adding MichaelLewittes details * adds doi * use - instead of * for lists * fixes lighthouse link * fixes 24 valid rules link * homepage(s) to home page(s) * missing word * clear old figures * regenerated and squooshed fig images * Updated as per @barry_pollard * some more signifigant figure formatting. * adds MichaelLewittes as an author * signigfigant figures fixed * robots.txt mentions now code formatted * fixed header and caption casing * Update seo.md homepage fix Code formatting robots.txt, UAs, etc. e.g. changed to 'for example' Added the really interesting point that "follow" and "index" have no function Changed word in for invalid elements in head formatted canonical, hreflang, etc. Fixed eager loading image wording Added contextual text and links between figures 12 & 13 * Update seo.md moved appropriate content to img loading from iframe changed title tag to title element changed wording in title tag word count code format VideoObject, WebSite, SearchAction, WebPage, ListItem Add a link to PageRank Added a link to Crux link Inner pages are all secondary pages now. Added sentence to end of intro "This year, we have analyzed one inner page per site crawled, on top of the home pages this chapter usually analyzes. Since home pages are often quite different to inner pages, this unlocks new insights and allows us to compare home verses inner page behaviors." * adds new paragrah and amends text for `robots.txt` size * adds Barry Pollard as a reviewer * Retake images * fix MichaelLewittes_bio * revert home page back to homepage as per Michael. * Final editing fixes from MichaelLewittes * fix linting issues * More Linting * adds final new line * adds new line to end of sql file * Homepage -> home page * Smart quotes * Web Almanac links * Other links * Misc technical edits * Featured quotes/stats * Linting * Date updates --------- Co-authored-by: Jamie Indigo <52051775+fellowhuman1101@users.noreply.github.com> Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent 605d8e9 commit cac1042

63 files changed

Lines changed: 1283 additions & 35 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

sql/2024/seo/robots-txt-size-2024.sql

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,31 @@
11
#standardSQL
22
# Robots.txt size
3-
CREATE TEMPORARY FUNCTION getRobotsSize(payload STRING)
4-
RETURNS FLOAT64 LANGUAGE js AS '''
5-
try {
6-
var $ = JSON.parse(payload);
7-
var robots = JSON.parse($._robots_txt);
8-
return robots['size']/1024;
9-
} catch (e) {
10-
return 0;
11-
}
12-
''';
13-
143
SELECT
154
client,
165
COUNT(DISTINCT(site)) AS sites,
6+
SAFE_DIVIDE(COUNTIF(robots_size = 0), COUNT(DISTINCT(site))) AS pct_0,
177
SAFE_DIVIDE(COUNTIF(robots_size > 0 AND robots_size <= 100), COUNT(DISTINCT(site))) AS pct_0_100,
188
SAFE_DIVIDE(COUNTIF(robots_size > 100 AND robots_size <= 200), COUNT(DISTINCT(site))) AS pct_100_200,
199
SAFE_DIVIDE(COUNTIF(robots_size > 200 AND robots_size <= 300), COUNT(DISTINCT(site))) AS pct_200_300,
2010
SAFE_DIVIDE(COUNTIF(robots_size > 300 AND robots_size <= 400), COUNT(DISTINCT(site))) AS pct_300_400,
2111
SAFE_DIVIDE(COUNTIF(robots_size > 400 AND robots_size <= 500), COUNT(DISTINCT(site))) AS pct_400_500,
2212
SAFE_DIVIDE(COUNTIF(robots_size > 500), COUNT(DISTINCT(site))) AS pct_gt500,
23-
SAFE_DIVIDE(COUNTIF(robots_size = 0), COUNT(DISTINCT(site))) AS pct_missing,
13+
SAFE_DIVIDE(COUNTIF(robots_size IS NULL), COUNT(DISTINCT(site))) AS pct_missing,
2414
COUNTIF(robots_size > 500) AS count_gt500,
25-
COUNTIF(robots_size = 0) AS count_missing
15+
COUNTIF(robots_size IS NULL) AS count_missing
2616
FROM (
2717
SELECT
2818
client,
29-
page AS site,
30-
getRobotsSize(payload) AS robots_size
19+
root_page AS site,
20+
custom_metrics.robots_txt,
21+
FLOAT64(custom_metrics.robots_txt.size_kib) AS robots_size
3122
FROM
32-
`httparchive.all.pages`
33-
WHERE date = '2024-06-01'
34-
) -- noqa: L062
23+
`httparchive.crawl.pages`
24+
WHERE
25+
date = '2024-06-01' AND
26+
is_root_page AND -- no need to crawl inner pages for this one
27+
custom_metrics.robots_txt.status IS NOT NULL
28+
)
3529
GROUP BY
3630
client
3731
ORDER BY

src/config/2024.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@
7575
"part": "II",
7676
"chapter_number": "9",
7777
"title": "SEO",
78-
"slug": "seo",
79-
"todo": true
78+
"slug": "seo"
8079
},
8180
{
8281
"part": "II",

src/config/contributors.json

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,10 @@
11911191
},
11921192
"dwsmart": {
11931193
"avatar_url": "11179452",
1194+
"bluesky": "tamethebots.com",
11941195
"github": "dwsmart",
1196+
"linkedin": "davewsmart",
1197+
"mastodon": "https://seocommunity.social/@dwsmart",
11951198
"name": "Dave Smart",
11961199
"teams": {
11971200
"2020": [
@@ -1203,9 +1206,11 @@
12031206
"2022": [
12041207
"authors",
12051208
"reviewers"
1209+
],
1210+
"2024": [
1211+
"authors"
12061212
]
12071213
},
1208-
"twitter": "davewsmart",
12091214
"website": "https://tamethebots.com"
12101215
},
12111216
"dsottimano": {
@@ -2044,7 +2049,9 @@
20442049
},
20452050
"fellowhuman1101": {
20462051
"avatar_url": "52051775",
2052+
"bluesky": "not-a-robot.com",
20472053
"github": "fellowhuman1101",
2054+
"linkedin": "jamie-indigo",
20482055
"name": "Jamie Indigo",
20492056
"teams": {
20502057
"2020": [
@@ -2056,6 +2063,10 @@
20562063
],
20572064
"2022": [
20582065
"authors"
2066+
],
2067+
"2024": [
2068+
"authors",
2069+
"leads"
20592070
]
20602071
},
20612072
"twitter": "Jammer_Volts",
@@ -3023,13 +3034,22 @@
30233034
},
30243035
"MichaelLewittes": {
30253036
"avatar_url": "96250205",
3037+
"bluesky": "michaellewittes.bsky.social",
30263038
"github": "MichaelLewittes",
3039+
"linkedin": "michael-lewittes-a22b831",
3040+
"mastodon": "https://seocommunity.social/@MichaelLewittes",
30273041
"name": "Michael Lewittes",
30283042
"teams": {
30293043
"2022": [
30303044
"editors"
3045+
],
3046+
"2024": [
3047+
"authors",
3048+
"editors"
30313049
]
3032-
}
3050+
},
3051+
"twitter": "MichaelLewittes",
3052+
"website": "https://www.ranktify.com/team"
30333053
},
30343054
"MichaelSolati": {
30353055
"avatar_url": "11811422",
@@ -4871,5 +4891,42 @@
48714891
"reviewers"
48724892
]
48734893
}
4894+
},
4895+
"mikaelaraujo": {
4896+
"avatar_url": "4764075",
4897+
"bluesky": "mikaelaraujo.bsky.social",
4898+
"github": "mikaelaraujo",
4899+
"linkedin": "mikael-araujo",
4900+
"name": "Mikael Araújo",
4901+
"teams": {
4902+
"2024": [
4903+
"authors"
4904+
]
4905+
},
4906+
"threads": "@mikaelaraujo",
4907+
"twitter": "miknaraujo",
4908+
"website": "https://www.mikaelaraujo.com"
4909+
},
4910+
"henryp25": {
4911+
"avatar_url": "62102954",
4912+
"github": "henryp25",
4913+
"linkedin": "henry-price-9ab362b4",
4914+
"name": "Henry Price",
4915+
"teams": {
4916+
"2024": [
4917+
"analysts"
4918+
]
4919+
}
4920+
},
4921+
"cnichols013" :{
4922+
"avatar_url": "73146375",
4923+
"github": "cnichols013",
4924+
"linkedin": "chris-nichols",
4925+
"name": "Chris Nichols",
4926+
"teams": {
4927+
"2024": [
4928+
"analysts"
4929+
]
4930+
}
48744931
}
48754932
}

src/config/last_updated.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -837,8 +837,8 @@
837837
"hash": "232a286d67940eaed02f935fd9ce1db6"
838838
},
839839
"en/2024/chapters/seo.html": {
840-
"date_published": "2024-11-11T00:00:00.000Z",
841-
"date_modified": "2024-11-16T00:00:00.000Z",
840+
"date_published": "2024-12-02T00:00:00.000Z",
841+
"date_modified": "2024-12-02T00:00:00.000Z",
842842
"hash": "d7bb5659e4444ac7702888c7b11880cb"
843843
},
844844
"en/2024/chapters/structured-data.html": {

0 commit comments

Comments
 (0)