Skip to content

Commit b7c05c2

Browse files
fabclmntFabiana Clemente
andauthored
feat: add analytics (#335)
* feat: add telemetry features. * feat: add analytics to streamlit app * docs: Add analytics information to the documentation. * fix: fix issues related with imports * fix: add request * fix: remove print * fix: cleaning code --------- Co-authored-by: Fabiana Clemente <fabianaclemente@Fabianas-MacBook-Air.local>
1 parent 6466686 commit b7c05c2

7 files changed

Lines changed: 121 additions & 0 deletions

File tree

docs/support/analytics.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
2+
# Analytics & Telemetry
3+
4+
## Overview
5+
6+
`ydata-synthetic` is a powerful library designed to generate synthetic data.
7+
As part of our ongoing efforts to improve user experience and functionality, `ydata-synthetic`
8+
includes a telemetry feature. This feature collects anonymous usage data, helping us understand
9+
how the library is used and identify areas for improvement.
10+
11+
The primary goal of collecting telemetry data is to:
12+
13+
- Enhance the functionality and performance of the ydata-synthetic library
14+
- Prioritize new features based on user engagement
15+
- Identify common issues and bugs to improve overall user experience
16+
17+
### Data Collected
18+
The telemetry system collects non-personal, anonymous information such as:
19+
20+
- Python version
21+
- `ydata-synthetic` version
22+
- Frequency of use of `ydata-synthetic` features
23+
- Errors or exceptions thrown within the library
24+
25+
## Disabling usage analytics
26+
27+
We respect your choice to not participate in our telemetry collection.
28+
If you prefer to disable telemetry, you can do so by setting an environment
29+
variable on your system. Disabling telemetry will not affect the functionality
30+
of the ydata-profiling library, except for the ability to contribute to its usage analytics.
31+
32+
### Set an Environment Variable
33+
In your notebook or script make sure to set YDATA_SYNTHETIC_NO_ANALYTICS
34+
environment variable to `True`.
35+
36+
````python
37+
import os
38+
39+
os.environ['YDATA_SYNTHETIC_NO_ANALYTICS']='True'
40+
````
41+
42+
43+
44+

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ nav:
3434
- Help & Troubleshooting: 'support/help-troubleshooting.md'
3535
- Contribution Guidelines: 'support/contribute.md'
3636
- Contribution Guidelines: 'support/contribute.md'
37+
- Analytics: 'support/analytics.md'
3738
- Reference:
3839
- Changelog: 'reference/changelog.md'
3940
- API:

src/ydata_synthetic/streamlit_app/pages/1_Train_a_synthesizer.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
11
from typing import Union
22
import os
33
import json
4+
import logging
5+
46
import streamlit as st
57

68
from ydata.sdk.synthesizers import RegularSynthesizer
79
from ydata.sdk.common.client import get_client
810

11+
from ydata_synthetic.utils.logger import SynthesizersLogger
912
from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
1013
from ydata_synthetic.synthesizers.regular.model import Model
1114

1215
from ydata_synthetic.streamlit_app.pages.functions.load_data import upload_file
1316
from ydata_synthetic.streamlit_app.pages.functions.train import DataType, __CONDITIONAL_MODELS
1417
from ydata_synthetic.streamlit_app.pages.functions.train import init_synth, advanced_setttings, training_parameters
1518

19+
logger = SynthesizersLogger(name='streamlitSynthesizer.logger')
20+
logger.setLevel(logging.INFO)
21+
1622
def get_available_models(type: Union[str, DataType]):
1723

1824
dtype = DataType(type)
@@ -114,6 +120,8 @@ def run():
114120

115121
st.subheader("3. Train your synthesizer")
116122
if st.button('Click here to start the training process', disabled=not valid_token):
123+
124+
logger.info_def_report(model='fabric')
117125
model = RegularSynthesizer()
118126
with st.spinner("Please wait while your synthesizer trains..."):
119127
dtypes = {}

src/ydata_synthetic/synthesizers/regular/model.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Main synthesizer class
33
"""
44
from enum import Enum, unique
5+
import logging
56

67
from joblib import load
78

@@ -17,6 +18,10 @@
1718
from ydata_synthetic.synthesizers.regular.ctgan.model import CTGAN
1819
from ydata_synthetic.synthesizers.regular.gmm.model import GMM
1920

21+
from ydata_synthetic.utils.logger import SynthesizersLogger
22+
23+
logger = SynthesizersLogger(name='regularsynthesizer.logger')
24+
logger.setLevel(logging.INFO)
2025

2126
@unique
2227
class Model(Enum):
@@ -54,6 +59,8 @@ def __new__(cls, modelname: str, model_parameters =None, **kwargs):
5459
model=Model(modelname).function(**kwargs)
5560
else:
5661
model=Model(modelname).function(model_parameters, **kwargs)
62+
63+
logger.info_def_report(model=modelname)
5764
return model
5865

5966
@staticmethod

src/ydata_synthetic/synthesizers/timeseries/model.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,18 @@
33
"""
44
from enum import Enum, unique
55
import os
6+
import logging
67
from joblib import load
78

89
from tensorflow import config as tfconfig
910

1011
from ydata_synthetic.synthesizers.timeseries.timegan.model import TimeGAN
1112
from ydata_synthetic.synthesizers.timeseries.doppelganger.model import DoppelGANger
1213

14+
from ydata_synthetic.utils.logger import SynthesizersLogger
15+
16+
logger = SynthesizersLogger(name='timseriesSynthesizer.logger')
17+
logger.setLevel(logging.INFO)
1318

1419
@unique
1520
class Model(Enum):
@@ -28,6 +33,7 @@ def function(self):
2833
class TimeSeriesSynthesizer():
2934
"Abstraction class "
3035
def __new__(cls, modelname: str, model_parameters=None, **kwargs):
36+
logger.info_def_report(model=modelname)
3137
return Model(modelname).function(model_parameters, **kwargs)
3238

3339
@staticmethod
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""
2+
ydata-synthetic logger
3+
"""
4+
from __future__ import absolute_import, division, print_function
5+
6+
import logging
7+
8+
from ydata_synthetic.utils.utils import analytics_features
9+
10+
class SynthesizersLogger(logging.Logger):
11+
def __init__(self, name, level=logging.INFO):
12+
super().__init__(name, level)
13+
14+
def info(
15+
self,
16+
msg: object,
17+
) -> None:
18+
super().info(f'[SYNTHESIZER] - {msg}.')
19+
20+
def info_def_report(self, model: str):
21+
analytics_features(model=model)
22+
23+
super().info(f'[SYNTHESIZER] Creating a synthetic data generator with the following model - {model}.')

src/ydata_synthetic/utils/utils.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""
2+
Utility functions that are common to ydata-synthetic project
3+
"""
4+
import os
5+
import subprocess
6+
import platform
7+
import requests
8+
9+
from ydata_synthetic.version import __version__
10+
def analytics_features(model: str):
11+
endpoint= "https://packages.ydata.ai/ydata-synthetic?"
12+
13+
if bool(os.getenv("YDATA_SYNTHETIC_NO_ANALYTICS"))!= True:
14+
package_version = __version__
15+
try:
16+
subprocess.check_output("nvidia-smi")
17+
gpu_present = True
18+
except Exception:
19+
gpu_present = False
20+
21+
python_version = ".".join(platform.python_version().split(".")[:2])
22+
23+
try:
24+
request_message = f"{endpoint}version={package_version}" \
25+
f"&python_version={python_version}" \
26+
f"&model={model}" \
27+
f"&os={platform.system()}" \
28+
f"&gpu={str(gpu_present)}"
29+
30+
requests.get(request_message)
31+
except Exception:
32+
pass

0 commit comments

Comments
 (0)