Skip to content
This repository was archived by the owner on Apr 24, 2025. It is now read-only.

Commit a392744

Browse files
committed
verifies all links within a page, prints broken links
1 parent a645292 commit a392744

2 files changed

Lines changed: 47 additions & 0 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bs4
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import requests
2+
import bs4
3+
4+
def verify(url):
5+
"""verifies all links within a page, prints broken links
6+
Args:
7+
url (str): url of page to check
8+
Returns:
9+
None
10+
"""
11+
12+
res1 = requests.get(url)
13+
14+
try:
15+
res1.raise_for_status()
16+
17+
soup = bs4.BeautifulSoup(res1.text, 'html.parser')
18+
pageLinks = [link.get('href') for link in soup.select('a') if link.get('href')]
19+
20+
brokenCount = 0
21+
goodCount = 0
22+
23+
for link in pageLinks:
24+
25+
if link.startswith('http'):
26+
res2 = requests.get(link)
27+
28+
try:
29+
30+
res2.raise_for_status()
31+
print(f'Good: {link}')
32+
goodCount += 1
33+
34+
except Exception as exc:
35+
print(f'Broken: {link}')
36+
brokenCount += 1
37+
38+
print(f'{goodCount} Good. {brokenCount} Broken')
39+
40+
41+
except Exception as exc:
42+
print('There was a problem: %s' % (exc))
43+
44+
45+
if __name__ == "__main__":
46+
verify('https://automatetheboringstuff.com/chapter11/')

0 commit comments

Comments
 (0)