Skip to content

Commit e8b2a28

Browse files
VinciGit00claude
andcommitted
feat: add scrapegraph-py PR #84 SDK compatibility
Support both the v2 Client API (PR #82) and the newer ScrapeGraphAI API (PR #84) which uses Pydantic request models and ApiResult[T] wrappers. - Add scrapegraph_py_compat helper with runtime API detection - Route smart_scraper_graph through the compat layer - Add v3-style examples for extract, search, and scrape Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c0f5fd5 commit e8b2a28

5 files changed

Lines changed: 194 additions & 13 deletions

File tree

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""
2+
Scrape a webpage as markdown using the scrapegraph-py v3 API (PR #84).
3+
Uses ScrapeGraphAI client + ScrapeRequest model + ApiResult wrapper.
4+
"""
5+
6+
import json
7+
import os
8+
9+
from dotenv import load_dotenv
10+
from scrapegraph_py import ScrapeGraphAI, ScrapeRequest
11+
12+
load_dotenv()
13+
14+
api_key = os.getenv("SGAI_API_KEY") or os.getenv("SCRAPEGRAPH_API_KEY")
15+
if not api_key:
16+
raise ValueError("SGAI_API_KEY not found in environment variables")
17+
18+
with ScrapeGraphAI(api_key=api_key) as sgai:
19+
result = sgai.scrape(ScrapeRequest(url="https://example.com"))
20+
21+
if result.status == "success":
22+
print(json.dumps(result.data.model_dump(by_alias=True), indent=2, default=str))
23+
else:
24+
raise RuntimeError(result.error)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""
2+
Search the web using the scrapegraph-py v3 API (PR #84).
3+
Uses ScrapeGraphAI client + SearchRequest model + ApiResult wrapper.
4+
"""
5+
6+
import json
7+
import os
8+
9+
from dotenv import load_dotenv
10+
from scrapegraph_py import ScrapeGraphAI, SearchRequest
11+
12+
load_dotenv()
13+
14+
api_key = os.getenv("SGAI_API_KEY") or os.getenv("SCRAPEGRAPH_API_KEY")
15+
if not api_key:
16+
raise ValueError("SGAI_API_KEY not found in environment variables")
17+
18+
with ScrapeGraphAI(api_key=api_key) as sgai:
19+
result = sgai.search(SearchRequest(query="Extract webpage information"))
20+
21+
if result.status == "success":
22+
print(json.dumps(result.data.model_dump(by_alias=True), indent=2, default=str))
23+
else:
24+
raise RuntimeError(result.error)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""
2+
Extract structured data using the scrapegraph-py v3 API (PR #84).
3+
Uses ScrapeGraphAI client + ExtractRequest model + ApiResult wrapper.
4+
"""
5+
6+
import json
7+
import os
8+
9+
from dotenv import load_dotenv
10+
from scrapegraph_py import ExtractRequest, ScrapeGraphAI
11+
12+
load_dotenv()
13+
14+
api_key = os.getenv("SGAI_API_KEY") or os.getenv("SCRAPEGRAPH_API_KEY")
15+
if not api_key:
16+
raise ValueError("SGAI_API_KEY not found in environment variables")
17+
18+
with ScrapeGraphAI(api_key=api_key) as sgai:
19+
result = sgai.extract(
20+
ExtractRequest(
21+
url="https://scrapegraphai.com",
22+
prompt="Extract the founders' informations",
23+
)
24+
)
25+
26+
if result.status == "success":
27+
print(json.dumps(result.data.model_dump(by_alias=True), indent=2, default=str))
28+
else:
29+
raise RuntimeError(result.error)

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -77,21 +77,16 @@ def _create_graph(self) -> BaseGraph:
7777
BaseGraph: A graph instance representing the web scraping workflow.
7878
"""
7979
if self.llm_model == "scrapegraphai/smart-scraper":
80-
try:
81-
from scrapegraph_py import Client
82-
except ImportError:
83-
raise ImportError(
84-
"scrapegraph_py is not installed. Please install it using 'pip install scrapegraph-py'."
85-
)
80+
from ..integrations.scrapegraph_py_compat import extract as sgai_extract
8681

87-
with Client(api_key=self.config.get("api_key")) as sgai_client:
88-
# Extract request (v2 API)
89-
response = sgai_client.extract(
90-
url=self.source,
91-
prompt=self.prompt,
92-
output_schema=self.schema,
93-
)
82+
response = sgai_extract(
83+
api_key=self.config.get("api_key"),
84+
url=self.source,
85+
prompt=self.prompt,
86+
schema=self.schema,
87+
)
9488

89+
if isinstance(response, dict):
9590
if "id" in response:
9691
logger.info(f"Request ID: {response['id']}")
9792
if "data" in response:
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""
2+
Compatibility layer for scrapegraph-py SDK.
3+
4+
Supports both the v2 `Client` API (PR #82) and the newer `ScrapeGraphAI`
5+
API (PR #84) which uses Pydantic request models and an ApiResult wrapper.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
from typing import Any, Optional, Type
11+
12+
from pydantic import BaseModel
13+
14+
15+
def _detect_api() -> str:
16+
try:
17+
from scrapegraph_py import ScrapeGraphAI # noqa: F401
18+
19+
return "v3"
20+
except ImportError:
21+
pass
22+
try:
23+
from scrapegraph_py import Client # noqa: F401
24+
25+
return "v2"
26+
except ImportError as e:
27+
raise ImportError(
28+
"scrapegraph_py is not installed. Install it with 'pip install scrapegraph-py'."
29+
) from e
30+
31+
32+
def _schema_to_dict(schema: Optional[Type[BaseModel]]) -> Optional[dict]:
33+
if schema is None:
34+
return None
35+
if isinstance(schema, dict):
36+
return schema
37+
if isinstance(schema, type) and issubclass(schema, BaseModel):
38+
return schema.model_json_schema()
39+
return None
40+
41+
42+
def _unwrap_result(result: Any) -> dict:
43+
if hasattr(result, "status") and hasattr(result, "data"):
44+
if result.status != "success":
45+
raise RuntimeError(
46+
getattr(result, "error", "scrapegraph-py request failed")
47+
)
48+
data = result.data
49+
if hasattr(data, "model_dump"):
50+
return data.model_dump(by_alias=True, exclude_none=True)
51+
return data if isinstance(data, dict) else {"data": data}
52+
return result
53+
54+
55+
def extract(
56+
api_key: Optional[str],
57+
url: str,
58+
prompt: str,
59+
schema: Optional[Type[BaseModel]] = None,
60+
) -> dict:
61+
"""Call the scrapegraph-py extract endpoint across SDK versions."""
62+
api = _detect_api()
63+
64+
if api == "v3":
65+
from scrapegraph_py import ExtractRequest, ScrapeGraphAI
66+
67+
kwargs: dict[str, Any] = {"url": url, "prompt": prompt}
68+
schema_dict = _schema_to_dict(schema)
69+
if schema_dict is not None:
70+
kwargs["schema_"] = schema_dict
71+
with ScrapeGraphAI(api_key=api_key) as client:
72+
return _unwrap_result(client.extract(ExtractRequest(**kwargs)))
73+
74+
from scrapegraph_py import Client
75+
76+
with Client(api_key=api_key) as client:
77+
return client.extract(url=url, prompt=prompt, output_schema=schema)
78+
79+
80+
def scrape(api_key: Optional[str], url: str) -> dict:
81+
"""Call the scrapegraph-py scrape endpoint across SDK versions."""
82+
api = _detect_api()
83+
84+
if api == "v3":
85+
from scrapegraph_py import ScrapeGraphAI, ScrapeRequest
86+
87+
with ScrapeGraphAI(api_key=api_key) as client:
88+
return _unwrap_result(client.scrape(ScrapeRequest(url=url)))
89+
90+
from scrapegraph_py import Client
91+
92+
with Client(api_key=api_key) as client:
93+
return client.scrape(url=url)
94+
95+
96+
def search(api_key: Optional[str], query: str) -> dict:
97+
"""Call the scrapegraph-py search endpoint across SDK versions."""
98+
api = _detect_api()
99+
100+
if api == "v3":
101+
from scrapegraph_py import ScrapeGraphAI, SearchRequest
102+
103+
with ScrapeGraphAI(api_key=api_key) as client:
104+
return _unwrap_result(client.search(SearchRequest(query=query)))
105+
106+
from scrapegraph_py import Client
107+
108+
with Client(api_key=api_key) as client:
109+
return client.search(query=query)

0 commit comments

Comments
 (0)