Skip to content

Commit 518945d

Browse files
use custom api for tracing
1 parent 7dc1956 commit 518945d

1 file changed

Lines changed: 94 additions & 59 deletions

File tree

scrapegraphai/telemetry/telemetry.py

Lines changed: 94 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,19 @@
44
import json
55
import logging
66
import os
7-
import platform
87
import threading
98
import uuid
109
from typing import Callable, Dict
1110
from urllib import request
11+
from pydantic import BaseModel, Field
1212

13-
# Load version
1413
VERSION = importlib.metadata.version("scrapegraphai")
15-
STR_VERSION = ".".join([str(i) for i in VERSION])
16-
17-
# 🚀 Your proxy service endpoint (instead of PostHog)
18-
PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/"
19-
14+
TRACK_URL = "https://sgai-oss-tracing.onrender.com/v1/telemetry"
2015
TIMEOUT = 2
2116
DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf")
2217

2318
logger = logging.getLogger(__name__)
2419

25-
# Everything below remains mostly same
2620
def _load_config(config_location: str) -> configparser.ConfigParser:
2721
config = configparser.ConfigParser()
2822
try:
@@ -70,16 +64,6 @@ def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj
7064
MAX_COUNT_SESSION = 1000
7165

7266

73-
BASE_PROPERTIES = {
74-
"os_type": os.name,
75-
"os_version": platform.platform(),
76-
"python_version": f"{platform.python_version()}/{platform.python_implementation()}",
77-
"distinct_id": g_anonymous_id,
78-
"scrapegraphai_version": VERSION,
79-
"telemetry_version": "0.0.4-proxy",
80-
}
81-
82-
8367
def disable_telemetry():
8468
global g_telemetry_enabled
8569
g_telemetry_enabled = False
@@ -95,44 +79,93 @@ def is_telemetry_enabled() -> bool:
9579
return False
9680

9781

98-
# ⭐ UPDATED FOR PROXY — send without API key
99-
def _send_event_json(event_json: dict):
82+
class TelemetryEvent(BaseModel):
83+
user_prompt: str = Field(min_length=1, max_length=4096)
84+
json_schema: str = Field(min_length=512, max_length=16384)
85+
website_content: str = Field(min_length=1, max_length=65536)
86+
llm_response: str = Field(min_length=1, max_length=32768)
87+
llm_model: str = Field(min_length=1, max_length=256)
88+
url: str = Field(min_length=1, max_length=2048)
89+
90+
91+
def _build_valid_telemetry_event(
92+
prompt: str | None,
93+
schema: dict | None,
94+
content: str | None,
95+
response: dict | str | None,
96+
llm_model: str | None,
97+
source: list[str] | None,
98+
) -> TelemetryEvent | None:
99+
"""Build and validate a TelemetryEvent. Returns None if validation fails."""
100+
url: str | None = source[0] if isinstance(source, list) and source else None
101+
102+
json_schema: str | None = None
103+
if isinstance(schema, dict):
104+
try:
105+
json_schema = json.dumps(schema)
106+
except Exception:
107+
json_schema = None
108+
elif schema is not None:
109+
json_schema = str(schema)
110+
111+
llm_response: str | None = None
112+
if isinstance(response, dict):
113+
try:
114+
llm_response = json.dumps(response)
115+
except Exception:
116+
llm_response = None
117+
elif response is not None:
118+
llm_response = str(response)
119+
120+
try:
121+
return TelemetryEvent(
122+
user_prompt=prompt,
123+
json_schema=json_schema,
124+
website_content=content,
125+
llm_response=llm_response,
126+
llm_model=llm_model or "unknown",
127+
url=url,
128+
)
129+
except Exception:
130+
return None
131+
132+
133+
def _send_telemetry(event: TelemetryEvent):
134+
"""Send telemetry event to the tracing endpoint."""
100135
headers = {
101136
"Content-Type": "application/json",
102-
"User-Agent": f"scrapegraphai/{STR_VERSION}",
137+
"sgai-oss-version": VERSION,
103138
}
104139
try:
105-
data = json.dumps(event_json).encode()
106-
req = request.Request(PROXY_URL, data=data, headers=headers)
140+
data = json.dumps(event.model_dump()).encode()
141+
except Exception as e:
142+
logger.debug(f"Failed to serialize telemetry event: {e}")
143+
return
107144

145+
try:
146+
req = request.Request(TRACK_URL, data=data, headers=headers)
108147
with request.urlopen(req, timeout=TIMEOUT) as f:
109-
response_body = f.read()
110-
if f.code != 200:
111-
raise RuntimeError(response_body)
148+
f.read()
149+
if f.code == 201:
150+
logger.debug("Telemetry data sent successfully")
151+
else:
152+
logger.debug(f"Telemetry endpoint returned unexpected status: {f.code}")
112153
except Exception as e:
113-
logger.debug(f"Failed to send telemetry data to proxy: {e}")
114-
else:
115-
logger.debug(f"Telemetry payload forwarded to proxy: {data}")
154+
logger.debug(f"Failed to send telemetry data: {e}")
116155

117156

118-
def send_event_json(event_json: dict):
119-
if not g_telemetry_enabled:
120-
raise RuntimeError("Telemetry tracking is disabled!")
157+
def _send_telemetry_threaded(event: TelemetryEvent):
158+
"""Send telemetry in a background daemon thread."""
121159
try:
122-
th = threading.Thread(target=_send_event_json, args=(event_json,))
160+
th = threading.Thread(target=_send_telemetry, args=(event,))
161+
th.daemon = True
123162
th.start()
124-
except Exception as e:
125-
logger.debug(f"Telemetry dispatch thread failed: {e}")
163+
except RuntimeError as e:
164+
logger.debug(f"Failed to send telemetry data in a thread: {e}")
126165

127166

128167
def log_event(event: str, properties: Dict[str, any]):
129-
if is_telemetry_enabled():
130-
payload = {
131-
"event": event,
132-
"distinct_id": g_anonymous_id,
133-
"properties": {**BASE_PROPERTIES, **properties},
134-
}
135-
send_event_json(payload)
168+
pass
136169

137170

138171
def log_graph_execution(
@@ -150,23 +183,25 @@ def log_graph_execution(
150183
exception: str = None,
151184
total_tokens: int = None,
152185
):
153-
props = {
154-
"graph_name": graph_name,
155-
"source": source,
156-
"prompt": prompt,
157-
"schema": schema,
158-
"llm_model": llm_model,
159-
"embedder_model": embedder_model,
160-
"source_type": source_type,
161-
"content": content,
162-
"response": response,
163-
"execution_time": execution_time,
164-
"error_node": error_node,
165-
"exception": exception,
166-
"total_tokens": total_tokens,
167-
"type": "community-library",
168-
}
169-
log_event("graph_execution", props)
186+
if not is_telemetry_enabled():
187+
return
188+
189+
if error_node is not None:
190+
return
191+
192+
event = _build_valid_telemetry_event(
193+
prompt=prompt,
194+
schema=schema,
195+
content=content,
196+
response=response,
197+
llm_model=llm_model,
198+
source=source,
199+
)
200+
if event is None:
201+
logger.debug("Telemetry skipped: event validation failed")
202+
return
203+
204+
_send_telemetry_threaded(event)
170205

171206

172207
def capture_function_usage(call_fn: Callable) -> Callable:

0 commit comments

Comments
 (0)