File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ """
2+ Basic example of scraping pipeline using SmartScraper
3+ """
4+ import os
5+ import json
6+ from dotenv import load_dotenv
7+ from scrapegraphai.graphs import SmartScraperGraph
8+ from scrapegraphai.utils import prettify_exec_info
9+
10+ load_dotenv()
11+
12+ # ************************************************
13+ # Define the configuration for the graph
14+ # ************************************************
15+
16+
17+ graph_config = {
18+ "llm": {
19+ "model": "scrapegraphai/smart-scraper",
20+ "api_key": os.getenv("SCRAPEGRAPH_API_KEY")
21+ },
22+ "verbose": True,
23+ "headless": False,
24+ }
25+
26+ # ************************************************
27+ # Create the SmartScraperGraph instance and run it
28+ # ************************************************
29+
30+ smart_scraper_graph = SmartScraperGraph(
31+ prompt="Extract me all the articles",
32+ source="https://www.wired.com",
33+ config=graph_config
34+ )
35+
36+ result = smart_scraper_graph.run()
37+ print(json.dumps(result, indent=4))
38+
39+ # ************************************************
40+ # Get graph execution info
41+ # ************************************************
42+
43+ graph_exec_info = smart_scraper_graph.get_execution_info()
44+ print(prettify_exec_info(graph_exec_info))
Original file line number Diff line number Diff line change @@ -43,7 +43,8 @@ dependencies = [
4343 "transformers>=4.44.2",
4444 "googlesearch-python>=1.2.5",
4545 "simpleeval>=1.0.0",
46- "async_timeout>=4.0.3"
46+ "async_timeout>=4.0.3",
47+ "scrapegraph-py>=0.0.4"
4748]
4849
4950license = "MIT"
You can’t perform that action at this time.
0 commit comments