Skip to content

Commit dc03c40

Browse files
committed
chore: sync with scrapegraph-py 2.1.1 SDK surface
The SDK's PR #88 (ergonomic-api) made `ScrapeGraphAI` methods take plain kwargs instead of `*Request` model objects, and dropped the old `Client` class entirely. This commit aligns the integration shim and example scripts with that surface and bumps the dep pin from `>=2.0.0` to `>=2.1.1`.
1 parent 69035c6 commit dc03c40

8 files changed

Lines changed: 71 additions & 104 deletions

File tree

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
"""
2-
Scrape a webpage as clean markdown using scrapegraph-py v2 API.
3-
Replaces the old markdownify() call with scrape().
2+
Scrape a webpage as clean markdown using scrapegraph-py.
43
"""
54

65
import json
76
import os
87

98
from dotenv import load_dotenv
10-
from scrapegraph_py import Client
9+
from scrapegraph_py import ScrapeGraphAI
1110

1211
load_dotenv()
1312

14-
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
13+
api_key = os.getenv("SGAI_API_KEY") or os.getenv("SCRAPEGRAPH_API_KEY")
1514
if not api_key:
16-
raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found")
15+
raise ValueError("SGAI_API_KEY environment variable not found")
1716

18-
with Client(api_key=api_key) as client:
19-
response = client.scrape(url="https://example.com")
20-
print(json.dumps(response, indent=2))
17+
with ScrapeGraphAI(api_key=api_key) as sgai:
18+
result = sgai.scrape("https://example.com")
19+
20+
if result.status != "success":
21+
raise RuntimeError(result.error)
22+
23+
print(json.dumps(result.data.model_dump(by_alias=True), indent=2, default=str))

examples/markdownify/markdownify_scrapegraphai_v3.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
"""
2-
Scrape a webpage as markdown using the scrapegraph-py v3 API (PR #84).
3-
Uses ScrapeGraphAI client + ScrapeRequest model + ApiResult wrapper.
2+
Scrape a webpage as markdown using the scrapegraph-py SDK (>=2.1.1).
3+
Uses the ScrapeGraphAI client with ergonomic kwargs and ApiResult wrapper.
44
"""
55

66
import json
77
import os
88

99
from dotenv import load_dotenv
10-
from scrapegraph_py import ScrapeGraphAI, ScrapeRequest
10+
from scrapegraph_py import ScrapeGraphAI
1111

1212
load_dotenv()
1313

@@ -16,7 +16,7 @@
1616
raise ValueError("SGAI_API_KEY not found in environment variables")
1717

1818
with ScrapeGraphAI(api_key=api_key) as sgai:
19-
result = sgai.scrape(ScrapeRequest(url="https://example.com"))
19+
result = sgai.scrape("https://example.com")
2020

2121
if result.status == "success":
2222
print(json.dumps(result.data.model_dump(by_alias=True), indent=2, default=str))
Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
"""
2-
Search the web and extract AI-structured results using scrapegraph-py v2 API.
3-
Replaces the old searchscraper() call with search().
2+
Search the web and extract AI-structured results using scrapegraph-py.
43
"""
54

65
import json
76
import os
87

98
from dotenv import load_dotenv
10-
from scrapegraph_py import Client
9+
from scrapegraph_py import ScrapeGraphAI
1110

1211
load_dotenv()
1312

14-
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
13+
api_key = os.getenv("SGAI_API_KEY") or os.getenv("SCRAPEGRAPH_API_KEY")
1514
if not api_key:
16-
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")
15+
raise ValueError("SGAI_API_KEY not found in environment variables")
1716

18-
with Client(api_key=api_key) as client:
19-
response = client.search(query="Extract webpage information")
20-
print(json.dumps(response, indent=2))
17+
with ScrapeGraphAI(api_key=api_key) as sgai:
18+
result = sgai.search("Extract webpage information")
19+
20+
if result.status != "success":
21+
raise RuntimeError(result.error)
22+
23+
print(json.dumps(result.data.model_dump(by_alias=True), indent=2, default=str))

examples/search_graph/scrapegraphai/searchscraper_scrapegraphai_v3.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
"""
2-
Search the web using the scrapegraph-py v3 API (PR #84).
3-
Uses ScrapeGraphAI client + SearchRequest model + ApiResult wrapper.
2+
Search the web using the scrapegraph-py SDK (>=2.1.1).
3+
Uses the ScrapeGraphAI client with ergonomic kwargs and ApiResult wrapper.
44
"""
55

66
import json
77
import os
88

99
from dotenv import load_dotenv
10-
from scrapegraph_py import ScrapeGraphAI, SearchRequest
10+
from scrapegraph_py import ScrapeGraphAI
1111

1212
load_dotenv()
1313

@@ -16,7 +16,7 @@
1616
raise ValueError("SGAI_API_KEY not found in environment variables")
1717

1818
with ScrapeGraphAI(api_key=api_key) as sgai:
19-
result = sgai.search(SearchRequest(query="Extract webpage information"))
19+
result = sgai.search("Extract webpage information")
2020

2121
if result.status == "success":
2222
print(json.dumps(result.data.model_dump(by_alias=True), indent=2, default=str))
Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,26 @@
11
"""
2-
Extract structured data from a webpage using scrapegraph-py v2 API.
3-
Replaces the old smartscraper() call with extract().
2+
Extract structured data from a webpage using scrapegraph-py.
43
"""
54

65
import json
76
import os
87

98
from dotenv import load_dotenv
10-
from scrapegraph_py import Client
9+
from scrapegraph_py import ScrapeGraphAI
1110

1211
load_dotenv()
1312

14-
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
13+
api_key = os.getenv("SGAI_API_KEY") or os.getenv("SCRAPEGRAPH_API_KEY")
1514
if not api_key:
16-
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")
15+
raise ValueError("SGAI_API_KEY not found in environment variables")
1716

18-
with Client(api_key=api_key) as client:
19-
response = client.extract(
17+
with ScrapeGraphAI(api_key=api_key) as sgai:
18+
result = sgai.extract(
19+
"Extract the founders' informations",
2020
url="https://scrapegraphai.com",
21-
prompt="Extract the founders' informations",
2221
)
23-
print(json.dumps(response, indent=2))
22+
23+
if result.status != "success":
24+
raise RuntimeError(result.error)
25+
26+
print(json.dumps(result.data.model_dump(by_alias=True), indent=2, default=str))

examples/smart_scraper_graph/scrapegraphai/smartscraper_scrapegraphai_v3.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
"""
2-
Extract structured data using the scrapegraph-py v3 API (PR #84).
3-
Uses ScrapeGraphAI client + ExtractRequest model + ApiResult wrapper.
2+
Extract structured data using the scrapegraph-py SDK (>=2.1.1).
3+
Uses the ScrapeGraphAI client with ergonomic kwargs and ApiResult wrapper.
44
"""
55

66
import json
77
import os
88

99
from dotenv import load_dotenv
10-
from scrapegraph_py import ExtractRequest, ScrapeGraphAI
10+
from scrapegraph_py import ScrapeGraphAI
1111

1212
load_dotenv()
1313

@@ -17,10 +17,8 @@
1717

1818
with ScrapeGraphAI(api_key=api_key) as sgai:
1919
result = sgai.extract(
20-
ExtractRequest(
21-
url="https://scrapegraphai.com",
22-
prompt="Extract the founders' informations",
23-
)
20+
"Extract the founders' informations",
21+
url="https://scrapegraphai.com",
2422
)
2523

2624
if result.status == "success":

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ dependencies = [
3232
"jsonschema>=4.25.1",
3333
"duckduckgo-search>=8.1.1",
3434
"pydantic>=2.12.5",
35-
"scrapegraph-py>=2.0.0",
35+
"scrapegraph-py>=2.1.1",
3636
]
3737

3838
readme = "README.md"
Lines changed: 23 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
"""
2-
Compatibility layer for scrapegraph-py SDK.
2+
Compatibility layer for the scrapegraph-py SDK (>=2.1.1).
33
4-
Supports both the v2 `Client` API (PR #82) and the newer `ScrapeGraphAI`
5-
API (PR #84) which uses Pydantic request models and an ApiResult wrapper.
4+
The SDK exposes `ScrapeGraphAI` with ergonomic kwargs (PR #88) and wraps
5+
results in `ApiResult[T]`. This module hides those details from the rest
6+
of the codebase so call sites stay terse.
67
"""
78

89
from __future__ import annotations
@@ -12,23 +13,6 @@
1213
from pydantic import BaseModel
1314

1415

15-
def _detect_api() -> str:
16-
try:
17-
from scrapegraph_py import ScrapeGraphAI # noqa: F401
18-
19-
return "v3"
20-
except ImportError:
21-
pass
22-
try:
23-
from scrapegraph_py import Client # noqa: F401
24-
25-
return "v2"
26-
except ImportError as e:
27-
raise ImportError(
28-
"scrapegraph_py is not installed. Install it with 'pip install scrapegraph-py'."
29-
) from e
30-
31-
3216
def _schema_to_dict(schema: Optional[Type[BaseModel]]) -> Optional[dict]:
3317
if schema is None:
3418
return None
@@ -52,58 +36,34 @@ def _unwrap_result(result: Any) -> dict:
5236
return result
5337

5438

39+
def _client(api_key: Optional[str]):
40+
from scrapegraph_py import ScrapeGraphAI
41+
42+
return ScrapeGraphAI(api_key=api_key)
43+
44+
5545
def extract(
5646
api_key: Optional[str],
5747
url: str,
5848
prompt: str,
5949
schema: Optional[Type[BaseModel]] = None,
6050
) -> dict:
61-
"""Call the scrapegraph-py extract endpoint across SDK versions."""
62-
api = _detect_api()
63-
64-
if api == "v3":
65-
from scrapegraph_py import ExtractRequest, ScrapeGraphAI
66-
67-
kwargs: dict[str, Any] = {"url": url, "prompt": prompt}
68-
schema_dict = _schema_to_dict(schema)
69-
if schema_dict is not None:
70-
kwargs["schema_"] = schema_dict
71-
with ScrapeGraphAI(api_key=api_key) as client:
72-
return _unwrap_result(client.extract(ExtractRequest(**kwargs)))
73-
74-
from scrapegraph_py import Client
75-
76-
with Client(api_key=api_key) as client:
77-
return client.extract(url=url, prompt=prompt, output_schema=schema)
51+
"""Call the scrapegraph-py extract endpoint."""
52+
kwargs: dict[str, Any] = {"url": url}
53+
schema_dict = _schema_to_dict(schema)
54+
if schema_dict is not None:
55+
kwargs["schema"] = schema_dict
56+
with _client(api_key) as client:
57+
return _unwrap_result(client.extract(prompt, **kwargs))
7858

7959

8060
def scrape(api_key: Optional[str], url: str) -> dict:
81-
"""Call the scrapegraph-py scrape endpoint across SDK versions."""
82-
api = _detect_api()
83-
84-
if api == "v3":
85-
from scrapegraph_py import ScrapeGraphAI, ScrapeRequest
86-
87-
with ScrapeGraphAI(api_key=api_key) as client:
88-
return _unwrap_result(client.scrape(ScrapeRequest(url=url)))
89-
90-
from scrapegraph_py import Client
91-
92-
with Client(api_key=api_key) as client:
93-
return client.scrape(url=url)
61+
"""Call the scrapegraph-py scrape endpoint."""
62+
with _client(api_key) as client:
63+
return _unwrap_result(client.scrape(url))
9464

9565

9666
def search(api_key: Optional[str], query: str) -> dict:
97-
"""Call the scrapegraph-py search endpoint across SDK versions."""
98-
api = _detect_api()
99-
100-
if api == "v3":
101-
from scrapegraph_py import ScrapeGraphAI, SearchRequest
102-
103-
with ScrapeGraphAI(api_key=api_key) as client:
104-
return _unwrap_result(client.search(SearchRequest(query=query)))
105-
106-
from scrapegraph_py import Client
107-
108-
with Client(api_key=api_key) as client:
109-
return client.search(query=query)
67+
"""Call the scrapegraph-py search endpoint."""
68+
with _client(api_key) as client:
69+
return _unwrap_result(client.search(query))

0 commit comments

Comments
 (0)