44import json
55import logging
66import os
7- import platform
87import threading
98import uuid
109from typing import Callable , Dict
1110from urllib import request
11+ from pydantic import BaseModel , Field
1212
13- # Load version
1413VERSION = importlib .metadata .version ("scrapegraphai" )
15- STR_VERSION = "." .join ([str (i ) for i in VERSION ])
16-
17- # 🚀 Your proxy service endpoint (instead of PostHog)
18- PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/"
19-
14+ TRACK_URL = "https://sgai-oss-tracing.onrender.com/v1/telemetry"
2015TIMEOUT = 2
2116DEFAULT_CONFIG_LOCATION = os .path .expanduser ("~/.scrapegraphai.conf" )
2217
2318logger = logging .getLogger (__name__ )
2419
25- # Everything below remains mostly same
2620def _load_config (config_location : str ) -> configparser .ConfigParser :
2721 config = configparser .ConfigParser ()
2822 try :
@@ -70,16 +64,6 @@ def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj
7064MAX_COUNT_SESSION = 1000
7165
7266
73- BASE_PROPERTIES = {
74- "os_type" : os .name ,
75- "os_version" : platform .platform (),
76- "python_version" : f"{ platform .python_version ()} /{ platform .python_implementation ()} " ,
77- "distinct_id" : g_anonymous_id ,
78- "scrapegraphai_version" : VERSION ,
79- "telemetry_version" : "0.0.4-proxy" ,
80- }
81-
82-
8367def disable_telemetry ():
8468 global g_telemetry_enabled
8569 g_telemetry_enabled = False
@@ -95,44 +79,93 @@ def is_telemetry_enabled() -> bool:
9579 return False
9680
9781
98- # ⭐ UPDATED FOR PROXY — send without API key
99- def _send_event_json (event_json : dict ):
82+ class TelemetryEvent (BaseModel ):
83+ user_prompt : str = Field (min_length = 1 , max_length = 4096 )
84+ json_schema : str = Field (min_length = 512 , max_length = 16384 )
85+ website_content : str = Field (min_length = 1 , max_length = 65536 )
86+ llm_response : str = Field (min_length = 1 , max_length = 32768 )
87+ llm_model : str = Field (min_length = 1 , max_length = 256 )
88+ url : str = Field (min_length = 1 , max_length = 2048 )
89+
90+
91+ def _build_valid_telemetry_event (
92+ prompt : str | None ,
93+ schema : dict | None ,
94+ content : str | None ,
95+ response : dict | str | None ,
96+ llm_model : str | None ,
97+ source : list [str ] | None ,
98+ ) -> TelemetryEvent | None :
99+ """Build and validate a TelemetryEvent. Returns None if validation fails."""
100+ url : str | None = source [0 ] if isinstance (source , list ) and source else None
101+
102+ json_schema : str | None = None
103+ if isinstance (schema , dict ):
104+ try :
105+ json_schema = json .dumps (schema )
106+ except Exception :
107+ json_schema = None
108+ elif schema is not None :
109+ json_schema = str (schema )
110+
111+ llm_response : str | None = None
112+ if isinstance (response , dict ):
113+ try :
114+ llm_response = json .dumps (response )
115+ except Exception :
116+ llm_response = None
117+ elif response is not None :
118+ llm_response = str (response )
119+
120+ try :
121+ return TelemetryEvent (
122+ user_prompt = prompt ,
123+ json_schema = json_schema ,
124+ website_content = content ,
125+ llm_response = llm_response ,
126+ llm_model = llm_model or "unknown" ,
127+ url = url ,
128+ )
129+ except Exception :
130+ return None
131+
132+
133+ def _send_telemetry (event : TelemetryEvent ):
134+ """Send telemetry event to the tracing endpoint."""
100135 headers = {
101136 "Content-Type" : "application/json" ,
102- "User-Agent " : f"scrapegraphai/ { STR_VERSION } " ,
137+ "sgai-oss-version " : VERSION ,
103138 }
104139 try :
105- data = json .dumps (event_json ).encode ()
106- req = request .Request (PROXY_URL , data = data , headers = headers )
140+ data = json .dumps (event .model_dump ()).encode ()
141+ except Exception as e :
142+ logger .debug (f"Failed to serialize telemetry event: { e } " )
143+ return
107144
145+ try :
146+ req = request .Request (TRACK_URL , data = data , headers = headers )
108147 with request .urlopen (req , timeout = TIMEOUT ) as f :
109- response_body = f .read ()
110- if f .code != 200 :
111- raise RuntimeError (response_body )
148+ f .read ()
149+ if f .code == 201 :
150+ logger .debug ("Telemetry data sent successfully" )
151+ else :
152+ logger .debug (f"Telemetry endpoint returned unexpected status: { f .code } " )
112153 except Exception as e :
113- logger .debug (f"Failed to send telemetry data to proxy: { e } " )
114- else :
115- logger .debug (f"Telemetry payload forwarded to proxy: { data } " )
154+ logger .debug (f"Failed to send telemetry data: { e } " )
116155
117156
118- def send_event_json (event_json : dict ):
119- if not g_telemetry_enabled :
120- raise RuntimeError ("Telemetry tracking is disabled!" )
157+ def _send_telemetry_threaded (event : TelemetryEvent ):
158+ """Send telemetry in a background daemon thread."""
121159 try :
122- th = threading .Thread (target = _send_event_json , args = (event_json ,))
160+ th = threading .Thread (target = _send_telemetry , args = (event ,))
161+ th .daemon = True
123162 th .start ()
124- except Exception as e :
125- logger .debug (f"Telemetry dispatch thread failed : { e } " )
163+ except RuntimeError as e :
164+ logger .debug (f"Failed to send telemetry data in a thread : { e } " )
126165
127166
128167def log_event (event : str , properties : Dict [str , any ]):
129- if is_telemetry_enabled ():
130- payload = {
131- "event" : event ,
132- "distinct_id" : g_anonymous_id ,
133- "properties" : {** BASE_PROPERTIES , ** properties },
134- }
135- send_event_json (payload )
168+ pass
136169
137170
138171def log_graph_execution (
@@ -150,23 +183,25 @@ def log_graph_execution(
150183 exception : str = None ,
151184 total_tokens : int = None ,
152185):
153- props = {
154- "graph_name" : graph_name ,
155- "source" : source ,
156- "prompt" : prompt ,
157- "schema" : schema ,
158- "llm_model" : llm_model ,
159- "embedder_model" : embedder_model ,
160- "source_type" : source_type ,
161- "content" : content ,
162- "response" : response ,
163- "execution_time" : execution_time ,
164- "error_node" : error_node ,
165- "exception" : exception ,
166- "total_tokens" : total_tokens ,
167- "type" : "community-library" ,
168- }
169- log_event ("graph_execution" , props )
186+ if not is_telemetry_enabled ():
187+ return
188+
189+ if error_node is not None :
190+ return
191+
192+ event = _build_valid_telemetry_event (
193+ prompt = prompt ,
194+ schema = schema ,
195+ content = content ,
196+ response = response ,
197+ llm_model = llm_model ,
198+ source = source ,
199+ )
200+ if event is None :
201+ logger .debug ("Telemetry skipped: event validation failed" )
202+ return
203+
204+ _send_telemetry_threaded (event )
170205
171206
172207def capture_function_usage (call_fn : Callable ) -> Callable :
0 commit comments