Skip to content

Commit 8454559

Browse files
author
Francisco Santos
committed
pr review
1 parent 34e0d85 commit 8454559

7 files changed

Lines changed: 93 additions & 92 deletions

File tree

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,4 +373,6 @@ DerivedData/
373373

374374
# User created
375375
VERSION
376-
version.py
376+
version.py
377+
local_test_*.py
378+
local_test_*.ipynb
Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,46 @@
11
# Inverts all preprocessing pipelines provided in the preprocessing examples
22
from typing import Union
33

4-
import pandas as pd
4+
from pandas import DataFrame, concat
55

66
from sklearn.pipeline import Pipeline
77
from sklearn.compose import ColumnTransformer
8-
from sklearn.preprocessing import PowerTransformer, OneHotEncoder, StandardScaler
8+
from sklearn.preprocessing import PowerTransformer, OneHotEncoder, StandardScaler, MinMaxScaler
99

1010

11-
def inverse_transform(data: pd.DataFrame, processor: Union[Pipeline, ColumnTransformer, PowerTransformer, OneHotEncoder, StandardScaler]) -> pd.DataFrame:
11+
def inverse_transform(data: DataFrame, processor: Union[Pipeline, ColumnTransformer, PowerTransformer,
12+
OneHotEncoder, StandardScaler, MinMaxScaler]) -> DataFrame:
1213
"""Inverts data transformations taking place in a standard sklearn processor.
1314
Supported processes are sklearn pipelines, column transformers or base estimators like standard scalers.
1415
1516
Args:
16-
data (pd.DataFrame): The data object that needs inversion of preprocessing
17+
data (DataFrame): The data object that needs inversion of preprocessing
1718
processor (Union[Pipeline, ColumnTransformer, BaseEstimator]): The processor applied on the original data
1819
1920
Returns:
20-
inv_data (pd.DataFrame): The data object after inverting preprocessing"""
21+
inv_data (DataFrame): The data object after inverting preprocessing"""
2122
inv_data = data.copy()
22-
if isinstance(processor, (PowerTransformer, OneHotEncoder, StandardScaler, Pipeline)):
23-
inv_data = pd.DataFrame(processor.inverse_transform(data), columns=processor.feature_names_in_)
23+
if isinstance(processor, (PowerTransformer, OneHotEncoder, StandardScaler, MinMaxScaler, Pipeline)):
24+
inv_data = DataFrame(processor.inverse_transform(data), columns=processor.feature_names_in_ if hasattr(processor, "feature_names_in") else None)
2425
elif isinstance(processor, ColumnTransformer):
2526
output_indices = processor.output_indices_
26-
assert isinstance(data, pd.DataFrame), "The data to be inverted from a ColumnTransformer has to be a Pandas DataFrame."
27+
assert isinstance(data, DataFrame), "The data to be inverted from a ColumnTransformer has to be a Pandas DataFrame."
2728
for t_name, t, t_cols in processor.transformers_[::-1]:
2829
slice_ = output_indices[t_name]
2930
t_indices = list(range(slice_.start, slice_.stop, 1 if slice_.step is None else slice_.step))
3031
if t == 'drop':
3132
continue
3233
elif t == 'passthrough':
33-
inv_cols = pd.DataFrame(data.iloc[:,t_indices].values, columns = t_cols, index = data.index)
34+
inv_cols = DataFrame(data.iloc[:,t_indices].values, columns = t_cols, index = data.index)
3435
inv_col_names = inv_cols.columns
3536
else:
36-
inv_cols = pd.DataFrame(t.inverse_transform(data.iloc[:,t_indices].values), columns = t_cols, index = data.index)
37+
inv_cols = DataFrame(t.inverse_transform(data.iloc[:,t_indices].values), columns = t_cols, index = data.index)
3738
inv_col_names = inv_cols.columns
3839
if set(inv_col_names).issubset(set(inv_data.columns)):
3940
inv_data[inv_col_names] = inv_cols[inv_col_names]
4041
else:
41-
inv_data = pd.concat([inv_data, inv_cols], axis=1)
42+
inv_data = concat([inv_data, inv_cols], axis=1)
4243
else:
4344
print('The provided data processor is not supported and cannot be inverted with this method.')
4445
return None
45-
return inv_data[processor.feature_names_in_]
46+
return inv_data[processor.feature_names_in_] if hasattr(processor, "feature_names_in") else inv_data
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from typing import Union, List
2+
3+
from ydata_synthetic.postprocessing.regular import inverse_preprocesser
4+
5+
from sklearn.pipeline import Pipeline
6+
from sklearn.compose import ColumnTransformer
7+
from sklearn.preprocessing import PowerTransformer, OneHotEncoder, StandardScaler, MinMaxScaler
8+
9+
from pandas import DataFrame
10+
11+
def inverse_transform(data: List, processor: Union[Pipeline, ColumnTransformer, PowerTransformer, OneHotEncoder,
12+
StandardScaler, MinMaxScaler]):
13+
if isinstance(data, list):
14+
data = DataFrame(data)
15+
return inverse_preprocesser.inverse_transform(data, processor).tolist()
16+
else:
17+
return inverse_preprocesser.inverse_transform(data, processor)
Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from ydata_synthetic.preprocessing.timeseries.stock import transformations as processed_stock
2-
from ydata_synthetic.preprocessing.timeseries.stock_univariate import transformations as processed_stock_univariate
32

43
__all__ = [
54
"processed_stock",
6-
"processed_stock_univariate"
75
]

src/ydata_synthetic/preprocessing/timeseries/stock.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,25 @@
22
Get the stock data from Yahoo finance data
33
Data from the period 01 January 2017 - 24 January 2021
44
"""
5+
from typing import Union, List
6+
57
import pandas as pd
68

79
from ydata_synthetic.preprocessing.timeseries.utils import real_data_loading
810

9-
def transformations(path, seq_len: int):
10-
stock_df = pd.read_csv(path)
11+
def transformations(path, seq_len: int, cols: Union[str, List] = None):
12+
"""Apply min max scaling and roll windows of a temporal dataset.
13+
14+
Args:
15+
path(str): path to a csv temporal dataframe
16+
seq_len(int): length of the rolled sequences
17+
cols (Union[str, List]): Column or list of columns to be used"""
18+
if isinstance(cols, str):
19+
cols = [cols]
20+
if isinstance(cols, list):
21+
stock_df = pd.read_csv(path)[cols]
22+
else:
23+
stock_df = pd.read_csv(path)
1124
try:
1225
stock_df = stock_df.set_index('Date').sort_index()
1326
except:

src/ydata_synthetic/preprocessing/timeseries/stock_univariate.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

src/ydata_synthetic/synthesizers/timeseries/tscwgan/model.py

Lines changed: 45 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class TSCWGAN(BaseModel):
2525
def __init__(self, model_parameters, gradient_penalty_weight=10):
2626
"""Create a base TSCWGAN."""
2727
self.gradient_penalty_weight = gradient_penalty_weight
28+
self.cond_dim = model_parameters.condition
2829
super().__init__(model_parameters)
2930

3031
def define_gan(self):
@@ -170,91 +171,78 @@ def __init__(self, batch_size):
170171
self.batch_size = batch_size
171172

172173
def build_model(self, input_shape, dim, data_dim):
173-
# Define blocks
174-
input_to_latent = Sequential(layers=[
174+
# Define input - Expected input shape is (batch_size, seq_len, noise_dim). noise_dim = Z + cond
175+
noise_input = Input(shape = input_shape, batch_size = self.batch_size)
176+
177+
# Compose model
178+
proc_input = Sequential(layers=[
175179
Conv1D(filters=dim, kernel_size=1, input_shape = input_shape),
176180
LeakyReLU(),
177181
Conv1D(dim, kernel_size=5, dilation_rate=2, padding="same"),
178182
LeakyReLU()
179-
], name='input_to_latent')
183+
], name='input_to_latent')(noise_input)
184+
180185
block_cnn = Sequential(layers=[
181186
Conv1D(filters=dim, kernel_size=3, dilation_rate=2, padding="same"),
182187
LeakyReLU()
183188
], name='block_cnn')
184-
block_shift = Sequential(layers=[
189+
for i in range(3):
190+
if i == 0:
191+
cnn_block_i = proc_input
192+
cnn_block_o = block_cnn(proc_input)
193+
else:
194+
cnn_block_o = block_cnn(cnn_block_i)
195+
cnn_block_i = Add()([cnn_block_i, cnn_block_o])
196+
197+
shift = Sequential(layers=[
185198
Conv1D(filters=10, kernel_size=3, dilation_rate=2, padding="same"),
186199
LeakyReLU(),
187200
Flatten(),
188201
Dense(dim*2),
189202
LeakyReLU()
190-
], name='block_shift')
203+
], name='block_shift')(cnn_block_i)
204+
191205
block = Sequential(layers=[
192206
Dense(dim*2),
193207
LeakyReLU()
194208
], name='block')
195-
latent_to_output = Sequential([
196-
Dense(data_dim)
197-
], name='latent_to_ouput')
209+
for i in range(3):
210+
if i == 0:
211+
block_i = shift
212+
block_o = block(shift)
213+
else:
214+
block_o = block(block_i)
215+
block_i = Add()([block_i, block_o])
198216

199-
# Define input - Expected input shape is (batch_size, seq_len, noise_dim). noise_dim = Z + cond
200-
noise_input = Input(shape = input_shape, batch_size = self.batch_size)
201-
202-
# Compose model
203-
x = input_to_latent(noise_input)
204-
x_block = block_cnn(x)
205-
x = Add()([x_block, x])
206-
x_block = block_cnn(x)
207-
x = Add()([x_block, x])
208-
x_block = block_cnn(x)
209-
x = Add()([x_block, x])
210-
x = block_shift(x)
211-
x_block = block(x)
212-
x = Add()([x_block, x])
213-
x_block = block(x)
214-
x = Add()([x_block, x])
215-
x_block = block(x)
216-
x = Add()([x_block, x])
217-
x = latent_to_output(x)
218-
# Output - Expected shape is (batch_size, seq_len, data_dim). data_dim does not include conditions
219-
return Model(inputs=noise_input, outputs=x, name='SkipConnectionGenerator')
217+
output = Dense(data_dim, name='latent_to_ouput')(block_i)
218+
return Model(inputs = noise_input, outputs = output, name='SkipConnectionGenerator')
220219

221220
class Critic(Model):
222221
"""Conditional Wasserstein Critic with skip connections."""
223222
def __init__(self, batch_size):
224223
self.batch_size = batch_size
225224

226225
def build_model(self, input_shape, dim):
227-
# Define blocks
228-
ts_to_latent = Sequential(layers=[
226+
# Define input - Expected input shape is X + condition
227+
record_input = Input(shape = input_shape, batch_size = self.batch_size)
228+
229+
# Compose model
230+
proc_record = Sequential(layers=[
229231
Dense(dim*2,),
230232
LeakyReLU()
231-
], name='ts_to_latent')
233+
], name='ts_to_latent')(record_input)
234+
232235
block = Sequential(layers=[
233236
Dense(dim*2),
234237
LeakyReLU()
235238
], name='block')
236-
latent_to_score = Sequential(layers=[
237-
Dense(1)
238-
], name='latent_to_score')
239-
240-
# Define input - Expected input shape is X + condition
241-
record_input = Input(shape = input_shape, batch_size = self.batch_size)
242-
243-
# Compose model
244-
x = ts_to_latent(record_input)
245-
x_block = block(x)
246-
x = Add()([x_block, x])
247-
x_block = block(x)
248-
x = Add()([x_block, x])
249-
x_block = block(x)
250-
x = Add()([x_block, x])
251-
x_block = block(x)
252-
x = Add()([x_block, x])
253-
x_block = block(x)
254-
x = Add()([x_block, x])
255-
x_block = block(x)
256-
x = Add()([x_block, x])
257-
x_block = block(x)
258-
x = Add()([x_block, x])
259-
x = latent_to_score(x)
260-
return Model(inputs=record_input, outputs=x, name='SkipConnectionCritic')
239+
for i in range(7):
240+
if i == 0:
241+
block_i = proc_record
242+
block_o = block(proc_record)
243+
else:
244+
block_o = block(block_i)
245+
block_i = Add()([block_i, block_o])
246+
247+
output = Dense(1, name = 'latent_to_score')(block_i)
248+
return Model(inputs=record_input, outputs=output, name='SkipConnectionCritic')

0 commit comments

Comments
 (0)