1616"""
1717
1818import configparser
19- import functools
2019import importlib .metadata
2120import json
2221import logging
2322import os
24- import platform
2523import threading
2624import uuid
27- from typing import Callable , Dict
25+ from typing import Any , Callable
2826from urllib import request
27+ from urllib .error import HTTPError , URLError
28+
29+ from pydantic import BaseModel , Field , ValidationError
2930
3031VERSION = importlib .metadata .version ("scrapegraphai" )
31- STR_VERSION = "." .join ([str (i ) for i in VERSION ])
32- HOST = "https://eu.i.posthog.com"
33- TRACK_URL = f"{ HOST } /capture/" # https://posthog.com/docs/api/post-only-endpoints
34- API_KEY = "phc_orsfU4aHhtpTSLVcUE2hdUkQDLM4OEQZndKGFBKMEtn"
32+ TRACK_URL = "https://sgai-oss-tracing.onrender.com/v1/telemetry"
3533TIMEOUT = 2
3634DEFAULT_CONFIG_LOCATION = os .path .expanduser ("~/.scrapegraphai.conf" )
3735
@@ -43,7 +41,8 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
4341 try :
4442 with open (config_location ) as f :
4543 config .read_file (f )
46- except Exception :
44+ except (OSError , configparser .Error ) as e :
45+ logger .debug (f"Unable to load config file: { e } " )
4746 config ["DEFAULT" ] = {}
4847 else :
4948 if "DEFAULT" not in config :
@@ -54,8 +53,8 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
5453 try :
5554 with open (config_location , "w" ) as f :
5655 config .write (f )
57- except Exception :
58- pass
56+ except OSError as e :
57+ logger . debug ( f"Unable to write config file: { e } " )
5958 return config
6059
6160
@@ -68,8 +67,7 @@ def _check_config_and_environ_for_telemetry_flag(
6867 telemetry_enabled = config_obj .getboolean ("DEFAULT" , "telemetry_enabled" )
6968 except ValueError as e :
7069 logger .debug (
71- f"""Unable to parse value for
72- `telemetry_enabled` from config. Encountered { e } """
70+ f"Unable to parse value for `telemetry_enabled` from config. Encountered { e } "
7371 )
7472 if os .environ .get ("SCRAPEGRAPHAI_TELEMETRY_ENABLED" ) is not None :
7573 env_value = os .environ .get ("SCRAPEGRAPHAI_TELEMETRY_ENABLED" )
@@ -78,8 +76,7 @@ def _check_config_and_environ_for_telemetry_flag(
7876 telemetry_enabled = config_obj .getboolean ("DEFAULT" , "telemetry_enabled" )
7977 except ValueError as e :
8078 logger .debug (
81- f"""Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED`
82- from environment. Encountered { e } """
79+ f"Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED` from environment. Encountered { e } "
8380 )
8481 return telemetry_enabled
8582
@@ -90,15 +87,6 @@ def _check_config_and_environ_for_telemetry_flag(
9087CALL_COUNTER = 0
9188MAX_COUNT_SESSION = 1000
9289
93- BASE_PROPERTIES = {
94- "os_type" : os .name ,
95- "os_version" : platform .platform (),
96- "python_version" : f"{ platform .python_version ()} /{ platform .python_implementation ()} " ,
97- "distinct_id" : g_anonymous_id ,
98- "scrapegraphai_version" : VERSION ,
99- "telemetry_version" : "0.0.3" ,
100- }
101-
10290
10391def disable_telemetry ():
10492 """
@@ -128,49 +116,100 @@ def is_telemetry_enabled() -> bool:
128116 return False
129117
130118
131- def _send_event_json (event_json : dict ):
119+ class TelemetryEvent (BaseModel ):
120+ """Validated telemetry payload matching the tracing API schema."""
121+
122+ user_prompt : str = Field (min_length = 1 , max_length = 4096 )
123+ json_schema : str = Field (min_length = 512 , max_length = 16384 )
124+ website_content : str = Field (min_length = 1 , max_length = 65536 )
125+ llm_response : str = Field (min_length = 1 , max_length = 32768 )
126+ llm_model : str = Field (min_length = 1 , max_length = 256 )
127+ url : str = Field (min_length = 1 , max_length = 2048 )
128+
129+
130+ def _build_valid_telemetry_event (
131+ prompt : str | None ,
132+ schema : dict | None ,
133+ content : str | None ,
134+ response : dict | str | None ,
135+ llm_model : str | None ,
136+ source : list [str ] | None ,
137+ ) -> TelemetryEvent | None :
138+ """Build and validate a TelemetryEvent. Returns None if validation fails."""
139+ url : str | None = source [0 ] if isinstance (source , list ) and source else None
140+
141+ json_schema : str | None = None
142+ if isinstance (schema , dict ):
143+ try :
144+ json_schema = json .dumps (schema )
145+ except (TypeError , ValueError ):
146+ json_schema = None
147+ elif schema is not None :
148+ json_schema = str (schema )
149+
150+ llm_response : str | None = None
151+ if isinstance (response , dict ):
152+ try :
153+ llm_response = json .dumps (response )
154+ except (TypeError , ValueError ):
155+ llm_response = None
156+ elif response is not None :
157+ llm_response = str (response )
158+
159+ try :
160+ return TelemetryEvent (
161+ user_prompt = prompt ,
162+ json_schema = json_schema ,
163+ website_content = content ,
164+ llm_response = llm_response ,
165+ llm_model = llm_model or "unknown" ,
166+ url = url ,
167+ )
168+ except (ValidationError , TypeError ):
169+ return None
170+
171+
172+ def _send_telemetry (event : TelemetryEvent ):
173+ """Send telemetry event to the tracing endpoint."""
132174 headers = {
133175 "Content-Type" : "application/json" ,
134- "Authorization" : f"Bearer { API_KEY } " ,
135- "User-Agent" : f"scrapegraphai/{ STR_VERSION } " ,
176+ "sgai-oss-version" : VERSION ,
136177 }
137178 try :
138- data = json .dumps (event_json ).encode ()
179+ data = json .dumps (event .model_dump ()).encode ()
180+ except (TypeError , ValueError ) as e :
181+ logger .debug (f"Failed to serialize telemetry event: { e } " )
182+ return
183+
184+ try :
139185 req = request .Request (TRACK_URL , data = data , headers = headers )
140186 with request .urlopen (req , timeout = TIMEOUT ) as f :
141- res = f .read ()
142- if f .code != 200 :
143- raise RuntimeError (res )
144- except Exception as e :
145- logger .debug (f"Failed to send telemetry data: { e } " )
146- else :
147- logger .debug (f"Telemetry data sent: { data } " )
187+ f .read ()
188+ if f .code == 201 :
189+ logger .debug ("Telemetry data sent successfully" )
190+ else :
191+ logger .debug (f"Telemetry endpoint returned unexpected status: { f .code } " )
192+ except HTTPError as e :
193+ logger .debug (f"Failed to send telemetry data (HTTP { e .code } ): { e .reason } " )
194+ except URLError as e :
195+ logger .debug (f"Failed to send telemetry data (URL error): { e .reason } " )
196+ except OSError as e :
197+ logger .debug (f"Failed to send telemetry data (OS error): { e } " )
148198
149199
150- def send_event_json (event_json : dict ):
151- """
152- fucntion for sending event json
153- """
154- if not g_telemetry_enabled :
155- raise RuntimeError ("Telemetry tracking is disabled!" )
200+ def _send_telemetry_threaded (event : TelemetryEvent ):
201+ """Send telemetry in a background daemon thread."""
156202 try :
157- th = threading .Thread (target = _send_event_json , args = (event_json ,))
203+ th = threading .Thread (target = _send_telemetry , args = (event ,))
204+ th .daemon = True
158205 th .start ()
159- except Exception as e :
206+ except RuntimeError as e :
160207 logger .debug (f"Failed to send telemetry data in a thread: { e } " )
161208
162209
163- def log_event (event : str , properties : Dict [str , any ]):
164- """
165- function for logging the events
166- """
167- if is_telemetry_enabled ():
168- event_json = {
169- "api_key" : API_KEY ,
170- "event" : event ,
171- "properties" : {** BASE_PROPERTIES , ** properties },
172- }
173- send_event_json (event_json )
210+ def log_event (event : str , properties : dict [str , Any ]):
211+ """No-op stub kept for backwards compatibility."""
212+ logger .debug (f"log_event called with event={ event } (no-op)" )
174213
175214
176215def log_graph_execution (
@@ -191,42 +230,27 @@ def log_graph_execution(
191230 """
192231 function for logging the graph execution
193232 """
194- properties = {
195- "graph_name" : graph_name ,
196- "source" : source ,
197- "prompt" : prompt ,
198- "schema" : schema ,
199- "llm_model" : llm_model ,
200- "embedder_model" : embedder_model ,
201- "source_type" : source_type ,
202- "content" : content ,
203- "response" : response ,
204- "execution_time" : execution_time ,
205- "error_node" : error_node ,
206- "exception" : exception ,
207- "total_tokens" : total_tokens ,
208- "type" : "community-library" ,
209- }
210- log_event ("graph_execution" , properties )
233+ if not is_telemetry_enabled ():
234+ return
211235
236+ if error_node is not None :
237+ return
212238
213- def capture_function_usage (call_fn : Callable ) -> Callable :
214- """
215- function that captures the usage
216- """
239+ event = _build_valid_telemetry_event (
240+ prompt = prompt ,
241+ schema = schema ,
242+ content = content ,
243+ response = response ,
244+ llm_model = llm_model ,
245+ source = source ,
246+ )
247+ if event is None :
248+ logger .debug ("Telemetry skipped: event validation failed" )
249+ return
217250
218- @functools .wraps (call_fn )
219- def wrapped_fn (* args , ** kwargs ):
220- try :
221- return call_fn (* args , ** kwargs )
222- finally :
223- if is_telemetry_enabled ():
224- try :
225- function_name = call_fn .__name__
226- log_event ("function_usage" , {"function_name" : function_name })
227- except Exception as e :
228- logger .debug (
229- f"Failed to send telemetry for function usage. Encountered: { e } "
230- )
231-
232- return wrapped_fn
251+ _send_telemetry_threaded (event )
252+
253+
254+ def capture_function_usage (call_fn : Callable ) -> Callable :
255+ """Passthrough decorator kept for backwards compatibility."""
256+ return call_fn
0 commit comments