Skip to content

Commit a497af7

Browse files
change the tracing from posthog to custom apis
1 parent 739b05a commit a497af7

1 file changed

Lines changed: 113 additions & 89 deletions

File tree

scrapegraphai/telemetry/telemetry.py

Lines changed: 113 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,20 @@
1616
"""
1717

1818
import configparser
19-
import functools
2019
import importlib.metadata
2120
import json
2221
import logging
2322
import os
24-
import platform
2523
import threading
2624
import uuid
27-
from typing import Callable, Dict
25+
from typing import Any, Callable
2826
from urllib import request
27+
from urllib.error import HTTPError, URLError
28+
29+
from pydantic import BaseModel, Field, ValidationError
2930

3031
VERSION = importlib.metadata.version("scrapegraphai")
31-
STR_VERSION = ".".join([str(i) for i in VERSION])
32-
HOST = "https://eu.i.posthog.com"
33-
TRACK_URL = f"{HOST}/capture/" # https://posthog.com/docs/api/post-only-endpoints
34-
API_KEY = "phc_orsfU4aHhtpTSLVcUE2hdUkQDLM4OEQZndKGFBKMEtn"
32+
TRACK_URL = "https://sgai-oss-tracing.onrender.com/v1/telemetry"
3533
TIMEOUT = 2
3634
DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf")
3735

@@ -43,7 +41,8 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
4341
try:
4442
with open(config_location) as f:
4543
config.read_file(f)
46-
except Exception:
44+
except (OSError, configparser.Error) as e:
45+
logger.debug(f"Unable to load config file: {e}")
4746
config["DEFAULT"] = {}
4847
else:
4948
if "DEFAULT" not in config:
@@ -54,8 +53,8 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
5453
try:
5554
with open(config_location, "w") as f:
5655
config.write(f)
57-
except Exception:
58-
pass
56+
except OSError as e:
57+
logger.debug(f"Unable to write config file: {e}")
5958
return config
6059

6160

@@ -68,8 +67,7 @@ def _check_config_and_environ_for_telemetry_flag(
6867
telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
6968
except ValueError as e:
7069
logger.debug(
71-
f"""Unable to parse value for
72-
`telemetry_enabled` from config. Encountered {e}"""
70+
f"Unable to parse value for `telemetry_enabled` from config. Encountered {e}"
7371
)
7472
if os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") is not None:
7573
env_value = os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED")
@@ -78,8 +76,7 @@ def _check_config_and_environ_for_telemetry_flag(
7876
telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
7977
except ValueError as e:
8078
logger.debug(
81-
f"""Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED`
82-
from environment. Encountered {e}"""
79+
f"Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED` from environment. Encountered {e}"
8380
)
8481
return telemetry_enabled
8582

@@ -90,15 +87,6 @@ def _check_config_and_environ_for_telemetry_flag(
9087
CALL_COUNTER = 0
9188
MAX_COUNT_SESSION = 1000
9289

93-
BASE_PROPERTIES = {
94-
"os_type": os.name,
95-
"os_version": platform.platform(),
96-
"python_version": f"{platform.python_version()}/{platform.python_implementation()}",
97-
"distinct_id": g_anonymous_id,
98-
"scrapegraphai_version": VERSION,
99-
"telemetry_version": "0.0.3",
100-
}
101-
10290

10391
def disable_telemetry():
10492
"""
@@ -128,49 +116,100 @@ def is_telemetry_enabled() -> bool:
128116
return False
129117

130118

131-
def _send_event_json(event_json: dict):
119+
class TelemetryEvent(BaseModel):
120+
"""Validated telemetry payload matching the tracing API schema."""
121+
122+
user_prompt: str = Field(min_length=1, max_length=4096)
123+
json_schema: str = Field(min_length=512, max_length=16384)
124+
website_content: str = Field(min_length=1, max_length=65536)
125+
llm_response: str = Field(min_length=1, max_length=32768)
126+
llm_model: str = Field(min_length=1, max_length=256)
127+
url: str = Field(min_length=1, max_length=2048)
128+
129+
130+
def _build_valid_telemetry_event(
131+
prompt: str | None,
132+
schema: dict | None,
133+
content: str | None,
134+
response: dict | str | None,
135+
llm_model: str | None,
136+
source: list[str] | None,
137+
) -> TelemetryEvent | None:
138+
"""Build and validate a TelemetryEvent. Returns None if validation fails."""
139+
url: str | None = source[0] if isinstance(source, list) and source else None
140+
141+
json_schema: str | None = None
142+
if isinstance(schema, dict):
143+
try:
144+
json_schema = json.dumps(schema)
145+
except (TypeError, ValueError):
146+
json_schema = None
147+
elif schema is not None:
148+
json_schema = str(schema)
149+
150+
llm_response: str | None = None
151+
if isinstance(response, dict):
152+
try:
153+
llm_response = json.dumps(response)
154+
except (TypeError, ValueError):
155+
llm_response = None
156+
elif response is not None:
157+
llm_response = str(response)
158+
159+
try:
160+
return TelemetryEvent(
161+
user_prompt=prompt,
162+
json_schema=json_schema,
163+
website_content=content,
164+
llm_response=llm_response,
165+
llm_model=llm_model or "unknown",
166+
url=url,
167+
)
168+
except (ValidationError, TypeError):
169+
return None
170+
171+
172+
def _send_telemetry(event: TelemetryEvent):
173+
"""Send telemetry event to the tracing endpoint."""
132174
headers = {
133175
"Content-Type": "application/json",
134-
"Authorization": f"Bearer {API_KEY}",
135-
"User-Agent": f"scrapegraphai/{STR_VERSION}",
176+
"sgai-oss-version": VERSION,
136177
}
137178
try:
138-
data = json.dumps(event_json).encode()
179+
data = json.dumps(event.model_dump()).encode()
180+
except (TypeError, ValueError) as e:
181+
logger.debug(f"Failed to serialize telemetry event: {e}")
182+
return
183+
184+
try:
139185
req = request.Request(TRACK_URL, data=data, headers=headers)
140186
with request.urlopen(req, timeout=TIMEOUT) as f:
141-
res = f.read()
142-
if f.code != 200:
143-
raise RuntimeError(res)
144-
except Exception as e:
145-
logger.debug(f"Failed to send telemetry data: {e}")
146-
else:
147-
logger.debug(f"Telemetry data sent: {data}")
187+
f.read()
188+
if f.code == 201:
189+
logger.debug("Telemetry data sent successfully")
190+
else:
191+
logger.debug(f"Telemetry endpoint returned unexpected status: {f.code}")
192+
except HTTPError as e:
193+
logger.debug(f"Failed to send telemetry data (HTTP {e.code}): {e.reason}")
194+
except URLError as e:
195+
logger.debug(f"Failed to send telemetry data (URL error): {e.reason}")
196+
except OSError as e:
197+
logger.debug(f"Failed to send telemetry data (OS error): {e}")
148198

149199

150-
def send_event_json(event_json: dict):
151-
"""
152-
fucntion for sending event json
153-
"""
154-
if not g_telemetry_enabled:
155-
raise RuntimeError("Telemetry tracking is disabled!")
200+
def _send_telemetry_threaded(event: TelemetryEvent):
201+
"""Send telemetry in a background daemon thread."""
156202
try:
157-
th = threading.Thread(target=_send_event_json, args=(event_json,))
203+
th = threading.Thread(target=_send_telemetry, args=(event,))
204+
th.daemon = True
158205
th.start()
159-
except Exception as e:
206+
except RuntimeError as e:
160207
logger.debug(f"Failed to send telemetry data in a thread: {e}")
161208

162209

163-
def log_event(event: str, properties: Dict[str, any]):
164-
"""
165-
function for logging the events
166-
"""
167-
if is_telemetry_enabled():
168-
event_json = {
169-
"api_key": API_KEY,
170-
"event": event,
171-
"properties": {**BASE_PROPERTIES, **properties},
172-
}
173-
send_event_json(event_json)
210+
def log_event(event: str, properties: dict[str, Any]):
211+
"""No-op stub kept for backwards compatibility."""
212+
logger.debug(f"log_event called with event={event} (no-op)")
174213

175214

176215
def log_graph_execution(
@@ -191,42 +230,27 @@ def log_graph_execution(
191230
"""
192231
function for logging the graph execution
193232
"""
194-
properties = {
195-
"graph_name": graph_name,
196-
"source": source,
197-
"prompt": prompt,
198-
"schema": schema,
199-
"llm_model": llm_model,
200-
"embedder_model": embedder_model,
201-
"source_type": source_type,
202-
"content": content,
203-
"response": response,
204-
"execution_time": execution_time,
205-
"error_node": error_node,
206-
"exception": exception,
207-
"total_tokens": total_tokens,
208-
"type": "community-library",
209-
}
210-
log_event("graph_execution", properties)
233+
if not is_telemetry_enabled():
234+
return
211235

236+
if error_node is not None:
237+
return
212238

213-
def capture_function_usage(call_fn: Callable) -> Callable:
214-
"""
215-
function that captures the usage
216-
"""
239+
event = _build_valid_telemetry_event(
240+
prompt=prompt,
241+
schema=schema,
242+
content=content,
243+
response=response,
244+
llm_model=llm_model,
245+
source=source,
246+
)
247+
if event is None:
248+
logger.debug("Telemetry skipped: event validation failed")
249+
return
217250

218-
@functools.wraps(call_fn)
219-
def wrapped_fn(*args, **kwargs):
220-
try:
221-
return call_fn(*args, **kwargs)
222-
finally:
223-
if is_telemetry_enabled():
224-
try:
225-
function_name = call_fn.__name__
226-
log_event("function_usage", {"function_name": function_name})
227-
except Exception as e:
228-
logger.debug(
229-
f"Failed to send telemetry for function usage. Encountered: {e}"
230-
)
231-
232-
return wrapped_fn
251+
_send_telemetry_threaded(event)
252+
253+
254+
def capture_function_usage(call_fn: Callable) -> Callable:
255+
"""Passthrough decorator kept for backwards compatibility."""
256+
return call_fn

0 commit comments

Comments
 (0)