From ca53aaa1f7ce283242f017bdd0a1fc8ec5c51895 Mon Sep 17 00:00:00 2001
From: Francisco Santos <francisco.santos@ydata.ai>
Date: Mon, 8 Nov 2021 16:50:54 +0000
Subject: [PATCH 1/3] PATEGAN base implementation

Remove duplicate test files after renaming

Use BaseModel variables
---
 requirements.txt                              |   1 +
 .../synthesizers/regular/__init__.py          |   2 +
 .../synthesizers/regular/pategan/__init__.py  |   0
 .../synthesizers/regular/pategan/model.py     | 256 ++++++++++++++++++
 4 files changed, 259 insertions(+)
 create mode 100644 src/ydata_synthetic/synthesizers/regular/pategan/__init__.py
 create mode 100644 src/ydata_synthetic/synthesizers/regular/pategan/model.py

diff --git a/requirements.txt b/requirements.txt
index 3f806133..b140d5ea 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ pmlb==1.0.*
 tqdm<5.0
 typeguard==2.13.*
 pytest==6.2.*
+tensorflow_probability==0.12.*
diff --git a/src/ydata_synthetic/synthesizers/regular/__init__.py b/src/ydata_synthetic/synthesizers/regular/__init__.py
index ee1497bd..7acda5d9 100644
--- a/src/ydata_synthetic/synthesizers/regular/__init__.py
+++ b/src/ydata_synthetic/synthesizers/regular/__init__.py
@@ -5,6 +5,7 @@
 from ydata_synthetic.synthesizers.regular.dragan.model import DRAGAN
 from ydata_synthetic.synthesizers.regular.cramergan.model import CRAMERGAN
 from ydata_synthetic.synthesizers.regular.cwgangp.model import CWGANGP
+from ydata_synthetic.synthesizers.regular.pategan.model import PATEGAN
 
 __all__ = [
     "VanilllaGAN",
@@ -14,4 +15,5 @@
     "DRAGAN",
     "CRAMERGAN",
     "CWGANGP"
+    "PATEGAN"
 ]
diff --git a/src/ydata_synthetic/synthesizers/regular/pategan/__init__.py b/src/ydata_synthetic/synthesizers/regular/pategan/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/ydata_synthetic/synthesizers/regular/pategan/model.py b/src/ydata_synthetic/synthesizers/regular/pategan/model.py
new file mode 100644
index 00000000..28d9ca25
--- /dev/null
+++ b/src/ydata_synthetic/synthesizers/regular/pategan/model.py
@@ -0,0 +1,256 @@
+"PATEGAN implementation supporting Differential Privacy budget specification."
+# pylint: disable = W0622, E0401
+from math import log
+from typing import List, NamedTuple, Optional
+
+import tqdm
+from tensorflow import (GradientTape, clip_by_value, concat, constant,
+                        expand_dims, ones_like, tensor_scatter_nd_update,
+                        transpose, zeros, zeros_like)
+from tensorflow.data import Dataset
+from tensorflow.dtypes import cast, float64, int64
+from tensorflow.keras import Model
+from tensorflow.keras.layers import Dense, Input, ReLU
+from tensorflow.keras.losses import BinaryCrossentropy
+from tensorflow.keras.optimizers import Adam
+from tensorflow.math import abs, exp, pow, reduce_sum, square
+from tensorflow.random import uniform
+from tensorflow_probability import distributions
+
+from ydata_synthetic.synthesizers import TrainParameters
+from ydata_synthetic.synthesizers.gan import BaseModel
+from ydata_synthetic.utils.gumbel_softmax import ActivationInterface
+
+
+# pylint: disable=R0902
+class PATEGAN(BaseModel):
+    "A basic PATEGAN synthesizer implementation with configurable differential privacy budget."
+
+    __MODEL__='PATEGAN'
+
+    def __init__(self, model_parameters, n_teachers: int, target_delta: float, target_epsilon: float):
+        super().__init__(model_parameters)
+        self.n_teachers = n_teachers
+        self.target_epsilon = target_epsilon
+        self.target_delta = target_delta
+
+    # pylint: disable=W0201
+    def define_gan(self, processor_info: Optional[NamedTuple] = None):
+        def discriminator():
+            return Discriminator(self.batch_size).build_model((self.data_dim,), self.layers_dim)
+
+        self.generator = Generator(self.batch_size). \
+            build_model(input_shape=(self.noise_dim,), dim=self.layers_dim, data_dim=self.data_dim,
+                        processor_info=processor_info)
+        self.s_discriminator = discriminator()
+        self.t_discriminators = [discriminator() for i in range(self.n_teachers)]
+
+        generator_optimizer = Adam(learning_rate=self.g_lr)
+        discriminator_optimizer = Adam(learning_rate=self.d_lr)
+
+        loss_fn = BinaryCrossentropy(from_logits=True)
+        self.generator.compile(loss=loss_fn, optimizer=generator_optimizer)
+        self.s_discriminator.compile(loss=loss_fn, optimizer=discriminator_optimizer)
+        for teacher in self.t_discriminators:
+            teacher.compile(loss=loss_fn, optimizer=discriminator_optimizer)
+
+    # pylint: disable = C0103
+    @staticmethod
+    def _moments_acc(n_teachers, votes, lap_scale, l_list):
+        q = (2 + lap_scale * abs(2 * votes - n_teachers))/(4 * exp(lap_scale * abs(2 * votes - n_teachers)))
+
+        update = []
+        for l in l_list:
+            clip = 2 * square(lap_scale) * l * (l + 1)
+            t = (1 - q) * pow((1 - q) / (1 - exp(2 * lap_scale) * q), l) + q * exp(2 * lap_scale * l)
+            update.append(reduce_sum(clip_by_value(t, clip_value_min=-clip, clip_value_max=clip)))
+        return cast(update, dtype=float64)
+
+    def get_data_loader(self, data) -> List[Dataset]:
+        "Obtain a List of TF Datasets corresponding to partitions for each teacher in n_teachers."
+        loader = []
+        SHUFFLE_BUFFER_SIZE = 100
+
+        for teacher_id in range(self.n_teachers):
+            start_id = int(teacher_id * len(data) / self.n_teachers)
+            end_id = int((teacher_id + 1) * len(data) / self.n_teachers if \
+                teacher_id != (self.n_teachers - 1) else len(data))
+            loader.append(Dataset.from_tensor_slices(data[start_id:end_id:])\
+                .batch(self.batch_size).shuffle(SHUFFLE_BUFFER_SIZE))
+        return loader
+
+    # pylint:disable=R0913
+    def train(self, data, class_ratios, train_arguments: TrainParameters, num_cols: List[str], cat_cols: List[str]):
+        """
+        Args:
+            data: A pandas DataFrame or a Numpy array with the data to be synthesized
+            class_ratios:
+            train_arguments: GAN training arguments.
+            num_cols: List of columns of the data object to be handled as numerical
+            cat_cols: List of columns of the data object to be handled as categorical
+        """
+        super().train(data, num_cols, cat_cols)
+
+        data = self.processor.transform(data)
+        self.data_dim = data.shape[1]
+        self.define_gan(self.processor.col_transform_info)
+
+        self.class_ratios = class_ratios
+
+        alpha = cast([0.0 for _ in range(train_arguments.num_moments)], float64)
+        l_list = 1 + cast(range(train_arguments.num_moments), float64)
+
+        # print("initial alpha", l_list.shape)
+
+        cross_entropy = BinaryCrossentropy(from_logits=True)
+
+        generator_optimizer = Adam(learning_rate=train_arguments.lr)
+        disc_opt_stu = Adam(learning_rate=train_arguments.lr)
+        disc_opt_t = [Adam(learning_rate=train_arguments.lr) for i in range(self.n_teachers)]
+
+        train_loader = self.get_data_loader(data, self.batch_size)
+
+        steps = 0
+        epsilon = 0
+
+        category_samples = distributions.Categorical(probs=self.class_ratios, dtype=float64)
+
+        while epsilon < self.target_epsilon:
+            # train the teacher descriminator
+            for t_2 in range(train_arguments.num_teacher_iters):
+                for i in range(self.n_teachers):
+                    inputs, categories = None, None
+                    for b, data_ in enumerate(train_loader[i]):
+                        inputs, categories = data_, b  # categories = 0, data_ holds the first batch, why do we do this?
+                        #categories will give zero value in each loop as the loop break after running the first time
+                        #inputs will have only the first batch of data
+                        break
+
+                    with GradientTape() as disc_tape:
+                        # train with real
+                        dis_data = concat([inputs, zeros((self.batch_size, 1), dtype=float64)], 1)  # Why do we append a column of zeros instead of categories?
+                        # print("1st batch data", dis_data.shape)
+                        real_output = self.t_discriminators[i](dis_data, training=True)
+                        # print(real_output.shape, tf.ones.shape)
+
+                        # train with fake
+                        z = uniform([self.batch_size, self.noise_dim], dtype=float64)
+                        # print("uniformly distributed noise", z.shape)
+
+                        sample = expand_dims(category_samples.sample(self.batch_size), axis=1)
+                        # print("category", sample.shape)
+
+                        fake = self.generator(concat([z, sample], 1))
+                        # print('fake', fake.shape)
+
+                        fake_output = self.t_discriminators[i](concat([fake, sample], 1), training=True)
+                        # print('fake_output_dis', fake_output.shape)
+
+                        # print("watch", disc_tape.watch(self.teacher_disc[i].trainable_variables)
+                        real_loss_disc = cross_entropy(ones_like(real_output), real_output)
+                        fake_loss_disc = cross_entropy(zeros_like(fake_output), fake_output)
+
+                        disc_loss = real_loss_disc + fake_loss_disc
+                        # print(disc_loss, real_loss_disc, fake_loss_disc)
+
+                        disc_grad = disc_tape.gradient(disc_loss, self.t_discriminators[i].trainable_variables)
+                        # print(gradients_of_discriminator)
+
+                        disc_opt_t[i].apply_gradients(zip(disc_grad, self.t_discriminators[i].trainable_variables))
+
+            # train the student discriminator
+            for t_3 in range(train_arguments.num_student_iters):
+                z = uniform([self.batch_size, self.noise_dim], dtype=float64)
+
+                sample = expand_dims(category_samples.sample(self.batch_size), axis=1)
+                # print("category_stu", sample.shape)
+
+                with GradientTape() as stu_tape:
+                    fake = self.generator(concat([z, sample], 1))
+                    # print('fake_stu', fake.shape)
+
+                    predictions, clean_votes = self._pate_voting(
+                        concat([fake, sample], 1), self.t_discriminators, train_arguments.lap_scale)
+                    # print("noisy_labels", predictions.shape, "clean_votes", clean_votes.shape)
+                    outputs = self.s_discriminator(concat([fake, sample], 1))
+
+                    # update the moments
+                    alpha = alpha + self._moments_acc(self.n_teachers, clean_votes, train_arguments.lap_scale, l_list)
+                    # print("final_alpha", alpha)
+
+                    stu_loss = cross_entropy(predictions, outputs)
+                    gradients_of_stu = stu_tape.gradient(stu_loss, self.s_discriminator.trainable_variables)
+                    # print(gradients_of_stu)
+
+                    disc_opt_stu.apply_gradients(zip(gradients_of_stu, self.s_discriminator.trainable_variables))
+
+            # train the generator
+            z = uniform([self.batch_size, self.noise_dim], dtype=float64)
+
+            sample_g = expand_dims(category_samples.sample(self.batch_size), axis=1)
+
+            with GradientTape() as gen_tape:
+                fake = self.generator(concat([z, sample_g], 1))
+                output = self.s_discriminator(concat([fake, sample_g], 1))
+
+                loss_gen = cross_entropy(ones_like(output), output)
+                gradients_of_generator = gen_tape.gradient(loss_gen, self.generator.trainable_variables)
+                generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
+
+            # Calculate the current privacy cost
+            epsilon = min((alpha - log(train_arguments.delta)) / l_list)
+            if steps % train_arguments.sample_interval == 0:
+                print("Step : ", steps, "Loss SD : ", stu_loss, "Loss G : ", loss_gen, "Epsilon : ", epsilon)
+
+            steps += 1
+        # self.generator.summary()
+
+    def _pate_voting(self, data, netTD, lap_scale):
+        # TODO: Validate the logic against original article
+        ## Faz os votos dos teachers (1/0) netTD para cada record em data e guarda em results
+        results = zeros([len(netTD), self.batch_size], dtype=int64)
+        # print(results)
+        for i in range(len(netTD)):
+            output = netTD[i](data, training=True)
+            pred = transpose(cast((output > 0.5), int64))
+            # print(pred)
+            results = tensor_scatter_nd_update(results, constant([[i]]), pred)
+            # print(results)
+
+        #guarda o somatorio das probabilidades atribuidas por cada disc a cada record (valores entre 0 e len(netTD))
+        clean_votes = expand_dims(cast(reduce_sum(results, 0), dtype=float64), 1)
+        # print("clean_votes",clean_votes)
+        noise_sample = distributions.Laplace(loc=0, scale=1/lap_scale).sample(clean_votes.shape)
+        # print("noise_sample", noise_sample)
+        noisy_results = clean_votes + cast(noise_sample, float64)
+        noisy_labels = cast((noisy_results > len(netTD)/2), float64)
+
+        return noisy_labels, clean_votes
+
+
+class Discriminator(Model):
+    def __init__(self, batch_size):
+        self.batch_size = batch_size
+
+    def build_model(self, input_shape, dim):
+        input = Input(shape=input_shape, batch_size=self.batch_size)
+        x = Dense(dim * 4)(input)
+        x = ReLU()(x)
+        x = Dense(dim * 2)(x)
+        x = Dense(1)(x)
+        return Model(inputs=input, outputs=x)
+
+
+class Generator(Model):
+    def __init__(self, batch_size):
+        self.batch_size = batch_size
+
+    def build_model(self, input_shape, dim, data_dim, processor_info: Optional[NamedTuple] = None):
+        input = Input(shape=input_shape, batch_size = self.batch_size)
+        x = Dense(dim)(input)
+        x = ReLU()(x)
+        x = Dense(dim * 2)(x)
+        x = Dense(data_dim)(x)
+        if processor_info:
+            x = ActivationInterface(processor_info, 'ActivationInterface')(x)
+        return Model(inputs=input, outputs=x)

From 48f21e9476540b55571410bfc34745b87163d49b Mon Sep 17 00:00:00 2001
From: Francisco Santos <francisco.santos@ydata.ai>
Date: Thu, 21 Apr 2022 17:34:44 +0100
Subject: [PATCH 2/3] feat: Integrate PATEGAN

---
 examples/regular/pategan_example.py           |  52 ++++++
 .../preprocessing/regular/processor.py        |   1 +
 .../synthesizers/regular/pategan/model.py     | 153 ++++++------------
 3 files changed, 104 insertions(+), 102 deletions(-)
 create mode 100644 examples/regular/pategan_example.py

diff --git a/examples/regular/pategan_example.py b/examples/regular/pategan_example.py
new file mode 100644
index 00000000..9fbf885a
--- /dev/null
+++ b/examples/regular/pategan_example.py
@@ -0,0 +1,52 @@
+from pmlb import fetch_data
+
+from ydata_synthetic.synthesizers.regular import PATEGAN
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+
+model = PATEGAN
+
+#Load data and define the data processor parameters
+data = fetch_data('adult')
+num_cols = ['age', 'fnlwgt', 'capital-gain', 'capital-loss', 'hours-per-week']
+cat_cols = ['workclass','education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
+            'native-country', 'target']
+
+print(data.head())
+
+#Defining the training parameters
+
+noise_dim = 128
+dim = 128
+batch_size = 50
+
+log_step = 100
+epochs = 300+1
+learning_rate = [5e-4, 3e-3]
+beta_1 = 0.5
+beta_2 = 0.9
+models_dir = './cache'
+
+gan_args = ModelParameters(batch_size=batch_size,
+                           lr=learning_rate,
+                           betas=(beta_1, beta_2),
+                           noise_dim=noise_dim,
+                           layers_dim=dim)
+
+#  PATEGAN specific arguments
+n_moments = 100
+n_teacher_iters = 5
+n_student_iters = 5
+n_teachers = min(int(len(data)/1e3), 100)
+##  Privacy/utility tradeoff specification
+target_delta = 1e-3
+target_epsilon = 1e-1
+lap_scale = 1e-4
+
+synthesizer = model(gan_args, n_teachers, target_delta, target_epsilon)
+synthesizer.train(data, num_cols, cat_cols,
+                  n_teacher_iters, n_student_iters, n_moments, lap_scale)
+
+synthesizer.save('pate_test.pkl')
+
+synthesizer = model.load('pate_test.pkl')
+synth_data = synthesizer.sample(1000)
diff --git a/src/ydata_synthetic/preprocessing/regular/processor.py b/src/ydata_synthetic/preprocessing/regular/processor.py
index cdbabb97..7154ab2a 100644
--- a/src/ydata_synthetic/preprocessing/regular/processor.py
+++ b/src/ydata_synthetic/preprocessing/regular/processor.py
@@ -22,6 +22,7 @@ class RegularModels(Enum):
     WGAN = 'WGAN'
     WGAN_GP = 'WGAN_GP'
     CWGAN_GP = 'CWGAN_GP'
+    PATEGAN = 'PATEGAN'
 
 
 @typechecked
diff --git a/src/ydata_synthetic/synthesizers/regular/pategan/model.py b/src/ydata_synthetic/synthesizers/regular/pategan/model.py
index 28d9ca25..00116954 100644
--- a/src/ydata_synthetic/synthesizers/regular/pategan/model.py
+++ b/src/ydata_synthetic/synthesizers/regular/pategan/model.py
@@ -3,9 +3,7 @@
 from math import log
 from typing import List, NamedTuple, Optional
 
-import tqdm
-from tensorflow import (GradientTape, clip_by_value, concat, constant,
-                        expand_dims, ones_like, tensor_scatter_nd_update,
+from tensorflow import (GradientTape, clip_by_value, constant, expand_dims, ones_like, tensor_scatter_nd_update,
                         transpose, zeros, zeros_like)
 from tensorflow.data import Dataset
 from tensorflow.dtypes import cast, float64, int64
@@ -19,7 +17,7 @@
 
 from ydata_synthetic.synthesizers import TrainParameters
 from ydata_synthetic.synthesizers.gan import BaseModel
-from ydata_synthetic.utils.gumbel_softmax import ActivationInterface
+from ydata_synthetic.utils.gumbel_softmax import GumbelSoftmaxActivation
 
 
 # pylint: disable=R0902
@@ -28,20 +26,20 @@ class PATEGAN(BaseModel):
 
     __MODEL__='PATEGAN'
 
-    def __init__(self, model_parameters, n_teachers: int, target_delta: float, target_epsilon: float):
+    def __init__(self, model_parameters, n_teachers: int, target_delta: float = 1e-5, target_epsilon: float = 5e-2):
         super().__init__(model_parameters)
         self.n_teachers = n_teachers
         self.target_epsilon = target_epsilon
         self.target_delta = target_delta
 
     # pylint: disable=W0201
-    def define_gan(self, processor_info: Optional[NamedTuple] = None):
+    def define_gan(self, activation_info: Optional[NamedTuple] = None):
         def discriminator():
             return Discriminator(self.batch_size).build_model((self.data_dim,), self.layers_dim)
 
         self.generator = Generator(self.batch_size). \
             build_model(input_shape=(self.noise_dim,), dim=self.layers_dim, data_dim=self.data_dim,
-                        processor_info=processor_info)
+                        activation_info=activation_info)
         self.s_discriminator = discriminator()
         self.t_discriminators = [discriminator() for i in range(self.n_teachers)]
 
@@ -76,18 +74,21 @@ def get_data_loader(self, data) -> List[Dataset]:
             end_id = int((teacher_id + 1) * len(data) / self.n_teachers if \
                 teacher_id != (self.n_teachers - 1) else len(data))
             loader.append(Dataset.from_tensor_slices(data[start_id:end_id:])\
-                .batch(self.batch_size).shuffle(SHUFFLE_BUFFER_SIZE))
+                .batch(self.batch_size).shuffle(SHUFFLE_BUFFER_SIZE).repeat().as_numpy_iterator())
         return loader
 
     # pylint:disable=R0913
-    def train(self, data, class_ratios, train_arguments: TrainParameters, num_cols: List[str], cat_cols: List[str]):
+    def train(self, data, num_cols: List[str], cat_cols: List[str], n_teacher_iters: int = 5, n_student_iters: int = 5,
+              n_moments: int = 100, lap_scale: float = 1e-4):
         """
         Args:
             data: A pandas DataFrame or a Numpy array with the data to be synthesized
-            class_ratios:
-            train_arguments: GAN training arguments.
             num_cols: List of columns of the data object to be handled as numerical
             cat_cols: List of columns of the data object to be handled as categorical
+            n_teacher_iters: Number of train steps of each teacher discriminator per global step
+            n_student_iters: Number of train steps of the student discriminator per global step
+            n_moments: Number of moments accounted in the privacy budget computations
+            lap_scale: Inverse laplace noise scale multiplier
         """
         super().train(data, num_cols, cat_cols)
 
@@ -95,133 +96,81 @@ def train(self, data, class_ratios, train_arguments: TrainParameters, num_cols:
         self.data_dim = data.shape[1]
         self.define_gan(self.processor.col_transform_info)
 
-        self.class_ratios = class_ratios
+        alpha = cast([0.0 for _ in range(n_moments)], float64)
+        l_list = 1 + cast(range(n_moments), float64)
+        lap_scale = cast(lap_scale, float64)
 
-        alpha = cast([0.0 for _ in range(train_arguments.num_moments)], float64)
-        l_list = 1 + cast(range(train_arguments.num_moments), float64)
-
-        # print("initial alpha", l_list.shape)
-
-        cross_entropy = BinaryCrossentropy(from_logits=True)
-
-        generator_optimizer = Adam(learning_rate=train_arguments.lr)
-        disc_opt_stu = Adam(learning_rate=train_arguments.lr)
-        disc_opt_t = [Adam(learning_rate=train_arguments.lr) for i in range(self.n_teachers)]
-
-        train_loader = self.get_data_loader(data, self.batch_size)
+        train_loaders = self.get_data_loader(data)
 
         steps = 0
         epsilon = 0
 
-        category_samples = distributions.Categorical(probs=self.class_ratios, dtype=float64)
-
         while epsilon < self.target_epsilon:
             # train the teacher descriminator
-            for t_2 in range(train_arguments.num_teacher_iters):
-                for i in range(self.n_teachers):
-                    inputs, categories = None, None
-                    for b, data_ in enumerate(train_loader[i]):
-                        inputs, categories = data_, b  # categories = 0, data_ holds the first batch, why do we do this?
-                        #categories will give zero value in each loop as the loop break after running the first time
-                        #inputs will have only the first batch of data
-                        break
+            for t_2 in range(n_teacher_iters):
+                for train_loader, t_discriminator in zip(train_loaders, self.t_discriminators):
+                    z = uniform([self.batch_size, self.noise_dim], dtype=float64)
 
                     with GradientTape() as disc_tape:
-                        # train with real
-                        dis_data = concat([inputs, zeros((self.batch_size, 1), dtype=float64)], 1)  # Why do we append a column of zeros instead of categories?
-                        # print("1st batch data", dis_data.shape)
-                        real_output = self.t_discriminators[i](dis_data, training=True)
-                        # print(real_output.shape, tf.ones.shape)
-
-                        # train with fake
-                        z = uniform([self.batch_size, self.noise_dim], dtype=float64)
-                        # print("uniformly distributed noise", z.shape)
-
-                        sample = expand_dims(category_samples.sample(self.batch_size), axis=1)
-                        # print("category", sample.shape)
-
-                        fake = self.generator(concat([z, sample], 1))
-                        # print('fake', fake.shape)
-
-                        fake_output = self.t_discriminators[i](concat([fake, sample], 1), training=True)
-                        # print('fake_output_dis', fake_output.shape)
+                        # loss on real data
+                        real_batch=train_loader.next()
+                        real_output = t_discriminator(real_batch, training=True)
+                        real_loss_disc = t_discriminator.loss(ones_like(real_output), real_output)
 
-                        # print("watch", disc_tape.watch(self.teacher_disc[i].trainable_variables)
-                        real_loss_disc = cross_entropy(ones_like(real_output), real_output)
-                        fake_loss_disc = cross_entropy(zeros_like(fake_output), fake_output)
+                        # loss on fake data
+                        fake = self.generator(z)
+                        fake_output = t_discriminator(fake, training=True)
+                        fake_loss_disc = t_discriminator.loss(zeros_like(fake_output), fake_output)
 
+                        # compute and apply gradients
                         disc_loss = real_loss_disc + fake_loss_disc
-                        # print(disc_loss, real_loss_disc, fake_loss_disc)
-
-                        disc_grad = disc_tape.gradient(disc_loss, self.t_discriminators[i].trainable_variables)
-                        # print(gradients_of_discriminator)
-
-                        disc_opt_t[i].apply_gradients(zip(disc_grad, self.t_discriminators[i].trainable_variables))
+                        disc_grad = disc_tape.gradient(disc_loss, t_discriminator.trainable_variables)
+                        t_discriminator.optimizer.apply_gradients(zip(disc_grad, t_discriminator.trainable_variables))
 
             # train the student discriminator
-            for t_3 in range(train_arguments.num_student_iters):
+            for t_3 in range(n_student_iters):
                 z = uniform([self.batch_size, self.noise_dim], dtype=float64)
 
-                sample = expand_dims(category_samples.sample(self.batch_size), axis=1)
-                # print("category_stu", sample.shape)
-
                 with GradientTape() as stu_tape:
-                    fake = self.generator(concat([z, sample], 1))
-                    # print('fake_stu', fake.shape)
+                    # student discriminator loss
+                    fake = self.generator(z)
+                    predictions, clean_votes = self._pate_voting(fake, self.t_discriminators, lap_scale)
+                    outputs = self.s_discriminator(fake)
+                    stu_loss = self.s_discriminator.loss(predictions, outputs)
 
-                    predictions, clean_votes = self._pate_voting(
-                        concat([fake, sample], 1), self.t_discriminators, train_arguments.lap_scale)
-                    # print("noisy_labels", predictions.shape, "clean_votes", clean_votes.shape)
-                    outputs = self.s_discriminator(concat([fake, sample], 1))
-
-                    # update the moments
-                    alpha = alpha + self._moments_acc(self.n_teachers, clean_votes, train_arguments.lap_scale, l_list)
-                    # print("final_alpha", alpha)
-
-                    stu_loss = cross_entropy(predictions, outputs)
+                    # compute and apply gradients
                     gradients_of_stu = stu_tape.gradient(stu_loss, self.s_discriminator.trainable_variables)
-                    # print(gradients_of_stu)
+                    self.s_discriminator.optimizer.apply_gradients(zip(gradients_of_stu, self.s_discriminator.trainable_variables))
 
-                    disc_opt_stu.apply_gradients(zip(gradients_of_stu, self.s_discriminator.trainable_variables))
+                    # update the moments
+                    alpha = alpha + self._moments_acc(self.n_teachers, clean_votes, lap_scale, l_list)
 
             # train the generator
             z = uniform([self.batch_size, self.noise_dim], dtype=float64)
-
-            sample_g = expand_dims(category_samples.sample(self.batch_size), axis=1)
-
             with GradientTape() as gen_tape:
-                fake = self.generator(concat([z, sample_g], 1))
-                output = self.s_discriminator(concat([fake, sample_g], 1))
+                fake = self.generator(z)
+                output = self.s_discriminator(fake)
+                loss_gen = self.generator.loss(ones_like(output), output)
 
-                loss_gen = cross_entropy(ones_like(output), output)
+                # compute and apply gradients
                 gradients_of_generator = gen_tape.gradient(loss_gen, self.generator.trainable_variables)
-                generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
+                self.generator.optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
 
             # Calculate the current privacy cost
-            epsilon = min((alpha - log(train_arguments.delta)) / l_list)
-            if steps % train_arguments.sample_interval == 0:
-                print("Step : ", steps, "Loss SD : ", stu_loss, "Loss G : ", loss_gen, "Epsilon : ", epsilon)
+            epsilon = min((alpha - log(self.target_delta)) / l_list).numpy()
+            print(f"Step : {steps} Loss SD : {stu_loss:.2e} Loss G : {loss_gen:.2e} Epsilon : {epsilon:.2e}")
 
             steps += 1
-        # self.generator.summary()
 
     def _pate_voting(self, data, netTD, lap_scale):
-        # TODO: Validate the logic against original article
-        ## Faz os votos dos teachers (1/0) netTD para cada record em data e guarda em results
         results = zeros([len(netTD), self.batch_size], dtype=int64)
-        # print(results)
-        for i in range(len(netTD)):
+        for i in enumerate(netTD):
             output = netTD[i](data, training=True)
             pred = transpose(cast((output > 0.5), int64))
-            # print(pred)
             results = tensor_scatter_nd_update(results, constant([[i]]), pred)
-            # print(results)
 
-        #guarda o somatorio das probabilidades atribuidas por cada disc a cada record (valores entre 0 e len(netTD))
         clean_votes = expand_dims(cast(reduce_sum(results, 0), dtype=float64), 1)
-        # print("clean_votes",clean_votes)
         noise_sample = distributions.Laplace(loc=0, scale=1/lap_scale).sample(clean_votes.shape)
-        # print("noise_sample", noise_sample)
         noisy_results = clean_votes + cast(noise_sample, float64)
         noisy_labels = cast((noisy_results > len(netTD)/2), float64)
 
@@ -245,12 +194,12 @@ class Generator(Model):
     def __init__(self, batch_size):
         self.batch_size = batch_size
 
-    def build_model(self, input_shape, dim, data_dim, processor_info: Optional[NamedTuple] = None):
+    def build_model(self, input_shape, dim, data_dim, activation_info: Optional[NamedTuple] = None, tau: Optional[float] = None):
         input = Input(shape=input_shape, batch_size = self.batch_size)
         x = Dense(dim)(input)
         x = ReLU()(x)
         x = Dense(dim * 2)(x)
         x = Dense(data_dim)(x)
-        if processor_info:
-            x = ActivationInterface(processor_info, 'ActivationInterface')(x)
+        if activation_info:
+            x = GumbelSoftmaxActivation(activation_info, tau=tau)(x)
         return Model(inputs=input, outputs=x)

From da061dfbd86932453caaad822f31379eb725e99f Mon Sep 17 00:00:00 2001
From: Francisco Santos <francisco.santos@ydata.ai>
Date: Mon, 16 May 2022 20:14:49 +0100
Subject: [PATCH 3/3] update and add comparison example

---
 examples/regular/pategan_comparison.ipynb     | 438 ++++++++++++++++++
 examples/regular/pategan_example.py           |   4 +-
 .../synthesizers/regular/__init__.py          |   2 +-
 .../synthesizers/regular/pategan/model.py     |  11 +-
 4 files changed, 446 insertions(+), 9 deletions(-)
 create mode 100644 examples/regular/pategan_comparison.ipynb

diff --git a/examples/regular/pategan_comparison.ipynb b/examples/regular/pategan_comparison.ipynb
new file mode 100644
index 00000000..80527ac6
--- /dev/null
+++ b/examples/regular/pategan_comparison.ipynb
@@ -0,0 +1,438 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PATEGAN example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we will be comparing the YData-Synthetic PATE-GAN implementation to the one from the original authors in the [mlforhealthlabpub](https://github.com/vanderschaarlab/mlforhealthlabpub/tree/main/alg/pategan) package. Since this package has a lot of dependencies and uses TensorFlow 1, we recommend that you create a new environment and follow their setup instructions available [here](https://github.com/vanderschaarlab/mlforhealthlabpub/blob/main/doc/install.md).\n",
+    "\n",
+    "## Introduction\n",
+    "To run this comparison we have executed `mlforhealthlabpub`'s implementation via the main script, together with their fake dataset script used for random dataset generation. With this utility script we have produced a train dataset used to train both synthesizers. Both synthesizers are defined according to the same set of parameters. After producing two versions of synthetic datasets we will use [Pandas Profiling](https://github.com/ydataai/pandas-profiling) to compare the outputs regarding fidelity.\n",
+    "\n",
+    "### Import the required packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-05-16 18:19:02.584078: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import pandas_profiling as pp\n",
+    "\n",
+    "from ydata_synthetic.synthesizers.regular import PATEGAN"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load the train dataset and the synthetic dataset from the original dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dir = '../../data/'\n",
+    "train_data = pd.read_csv(dir+'train_dataset.csv', index_col=0)\n",
+    "orig_synth = pd.read_csv(dir+'orig_synth.csv', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train the YData-Synthetic synthesizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-05-16 18:19:03.613787: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set\n",
+      "2022-05-16 18:19:03.614447: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1\n",
+      "2022-05-16 18:19:03.643719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: \n",
+      "pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2070 Super computeCapability: 7.5\n",
+      "coreClock: 1.38GHz coreCount: 40 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 417.29GiB/s\n",
+      "2022-05-16 18:19:03.643755: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2022-05-16 18:19:03.646305: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11\n",
+      "2022-05-16 18:19:03.646372: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2022-05-16 18:19:03.647028: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10\n",
+      "2022-05-16 18:19:03.647195: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10\n",
+      "2022-05-16 18:19:03.648253: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10\n",
+      "2022-05-16 18:19:03.648816: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11\n",
+      "2022-05-16 18:19:03.648911: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8\n",
+      "2022-05-16 18:19:03.649488: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:AutoGraph could not transform <bound method GumbelSoftmaxActivation.call of <ydata_synthetic.utils.gumbel_softmax.GumbelSoftmaxActivation object at 0x7f833c8a4f10>> and will run it as-is.\n",
+      "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n",
+      "Cause: module 'gast' has no attribute 'Index'\n",
+      "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n",
+      "WARNING: AutoGraph could not transform <bound method GumbelSoftmaxActivation.call of <ydata_synthetic.utils.gumbel_softmax.GumbelSoftmaxActivation object at 0x7f833c8a4f10>> and will run it as-is.\n",
+      "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n",
+      "Cause: module 'gast' has no attribute 'Index'\n",
+      "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-05-16 18:19:03.666574: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2022-05-16 18:19:03.667386: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set\n",
+      "2022-05-16 18:19:03.667959: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: \n",
+      "pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2070 Super computeCapability: 7.5\n",
+      "coreClock: 1.38GHz coreCount: 40 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 417.29GiB/s\n",
+      "2022-05-16 18:19:03.668011: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2022-05-16 18:19:03.668032: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11\n",
+      "2022-05-16 18:19:03.668045: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2022-05-16 18:19:03.668059: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10\n",
+      "2022-05-16 18:19:03.668072: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10\n",
+      "2022-05-16 18:19:03.668085: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10\n",
+      "2022-05-16 18:19:03.668098: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11\n",
+      "2022-05-16 18:19:03.668111: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8\n",
+      "2022-05-16 18:19:03.668726: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0\n",
+      "2022-05-16 18:19:03.668769: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2022-05-16 18:19:04.046932: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2022-05-16 18:19:04.046951: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267]      0 \n",
+      "2022-05-16 18:19:04.046955: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0:   N \n",
+      "2022-05-16 18:19:04.047834: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1406] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6713 MB memory) -> physical GPU (device: 0, name: NVIDIA GeForce RTX 2070 Super, pci bus id: 0000:01:00.0, compute capability: 7.5)\n",
+      "2022-05-16 18:19:04.312378: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11\n",
+      "2022-05-16 18:19:04.741514: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step : 0 Loss SD : 6.87e-01 Loss G : 7.65e-01 Epsilon : 8.44e-01\n",
+      "Step : 1 Loss SD : 7.01e-01 Loss G : 7.10e-01 Epsilon : 1.11e+00\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Synthetic data generation: 100%|██████████| 157/157 [00:00<00:00, 734.24it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from ydata_synthetic.synthesizers import ModelParameters\n",
+    "\n",
+    "num_cols = train_data.columns.to_list()\n",
+    "cat_cols = []\n",
+    "\n",
+    "#Defining the training parameters\n",
+    "noise_dim = 128\n",
+    "dim = 4*len(train_data.columns)\n",
+    "batch_size = 64\n",
+    "\n",
+    "log_step = 100\n",
+    "learning_rate = [5e-4, 3e-3]\n",
+    "beta_1 = 0.5\n",
+    "beta_2 = 0.9\n",
+    "models_dir = './cache'\n",
+    "\n",
+    "gan_args = ModelParameters(batch_size=batch_size,\n",
+    "                           lr=learning_rate,\n",
+    "                           betas=(beta_1, beta_2),\n",
+    "                           noise_dim=noise_dim,\n",
+    "                           layers_dim=dim)\n",
+    "\n",
+    "#  PATEGAN specific arguments\n",
+    "n_moments = 20\n",
+    "n_teacher_iters = 1\n",
+    "n_student_iters = 1\n",
+    "n_teachers = 10\n",
+    "##  Privacy/utility tradeoff specification\n",
+    "target_delta = 1e-5\n",
+    "target_epsilon = 1\n",
+    "lap_scale = 1e-2\n",
+    "\n",
+    "model = PATEGAN\n",
+    "\n",
+    "synthesizer = model(gan_args, n_teachers, target_delta, target_epsilon)\n",
+    "synthesizer.train(train_data, num_cols, cat_cols,\n",
+    "                  n_teacher_iters, n_student_iters, n_moments, lap_scale)\n",
+    "\n",
+    "synthesizer.save('pate_test.pkl')\n",
+    "\n",
+    "synthesizer = model.load('pate_test.pkl')\n",
+    "ydata_synth = synthesizer.sample(train_data.shape[0])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Profiling the synthetic samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_prof = pp.ProfileReport(train_data, title='Train data')\n",
+    "\n",
+    "orig_prof = pp.ProfileReport(orig_synth, title='Original PATEGAN implementation synthetic samples')\n",
+    "ydata_prof = pp.ProfileReport(ydata_synth, title='YData-Synthetic PATEGAN implementation synthetic samples')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "74199f13341e4415aefcbdfa82a9df6a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "da2181efde2b45e2942b6198107347bd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b37a5aba8838465aac62e29a075faf23",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "11c01850b0df468fa21455f656d7c796",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "train_prof.to_widgets()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6acd310d566c4c7cb420efd2589d1cff",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "13e90272053647a5848eea9fe28590d7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8b84001e7e064e41900397335d4153f9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dde0c80d413742ccbabe0e3f966f5d96",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "orig_prof.to_widgets()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "87593f32b5fb4007afad6feb44a5a07d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "91ad423953314a8fa55f51ac8255e618",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ec5a36b5e8d54a5e9f4e13267f354a3c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f09a254e89c749d993ad2e2e5ae91df1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ydata_prof.to_widgets()"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "5850aadae2c57651aba1ab0309328004d1fbca860a1d805d2b4acd5ba24aabf1"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.7.13 ('ydatasynth_dev')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.13"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/regular/pategan_example.py b/examples/regular/pategan_example.py
index 9fbf885a..9b3a6724 100644
--- a/examples/regular/pategan_example.py
+++ b/examples/regular/pategan_example.py
@@ -1,7 +1,7 @@
 from pmlb import fetch_data
 
 from ydata_synthetic.synthesizers.regular import PATEGAN
-from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+from ydata_synthetic.synthesizers import ModelParameters
 
 model = PATEGAN
 
@@ -49,4 +49,4 @@
 synthesizer.save('pate_test.pkl')
 
 synthesizer = model.load('pate_test.pkl')
-synth_data = synthesizer.sample(1000)
+synth_data = synthesizer.sample(10000)
diff --git a/src/ydata_synthetic/synthesizers/regular/__init__.py b/src/ydata_synthetic/synthesizers/regular/__init__.py
index 7acda5d9..2fbb3a88 100644
--- a/src/ydata_synthetic/synthesizers/regular/__init__.py
+++ b/src/ydata_synthetic/synthesizers/regular/__init__.py
@@ -14,6 +14,6 @@
     "WGAN_GP",
     "DRAGAN",
     "CRAMERGAN",
-    "CWGANGP"
+    "CWGANGP",
     "PATEGAN"
 ]
diff --git a/src/ydata_synthetic/synthesizers/regular/pategan/model.py b/src/ydata_synthetic/synthesizers/regular/pategan/model.py
index 00116954..3d4d20f1 100644
--- a/src/ydata_synthetic/synthesizers/regular/pategan/model.py
+++ b/src/ydata_synthetic/synthesizers/regular/pategan/model.py
@@ -8,14 +8,13 @@
 from tensorflow.data import Dataset
 from tensorflow.dtypes import cast, float64, int64
 from tensorflow.keras import Model
-from tensorflow.keras.layers import Dense, Input, ReLU
+from tensorflow.keras.layers import Dense, Input, Activation
 from tensorflow.keras.losses import BinaryCrossentropy
 from tensorflow.keras.optimizers import Adam
 from tensorflow.math import abs, exp, pow, reduce_sum, square
 from tensorflow.random import uniform
 from tensorflow_probability import distributions
 
-from ydata_synthetic.synthesizers import TrainParameters
 from ydata_synthetic.synthesizers.gan import BaseModel
 from ydata_synthetic.utils.gumbel_softmax import GumbelSoftmaxActivation
 
@@ -164,7 +163,7 @@ def train(self, data, num_cols: List[str], cat_cols: List[str], n_teacher_iters:
 
     def _pate_voting(self, data, netTD, lap_scale):
         results = zeros([len(netTD), self.batch_size], dtype=int64)
-        for i in enumerate(netTD):
+        for i in range(len(netTD)):
             output = netTD[i](data, training=True)
             pred = transpose(cast((output > 0.5), int64))
             results = tensor_scatter_nd_update(results, constant([[i]]), pred)
@@ -184,8 +183,7 @@ def __init__(self, batch_size):
     def build_model(self, input_shape, dim):
         input = Input(shape=input_shape, batch_size=self.batch_size)
         x = Dense(dim * 4)(input)
-        x = ReLU()(x)
-        x = Dense(dim * 2)(x)
+        x = Activation('relu')(x)
         x = Dense(1)(x)
         return Model(inputs=input, outputs=x)
 
@@ -197,8 +195,9 @@ def __init__(self, batch_size):
     def build_model(self, input_shape, dim, data_dim, activation_info: Optional[NamedTuple] = None, tau: Optional[float] = None):
         input = Input(shape=input_shape, batch_size = self.batch_size)
         x = Dense(dim)(input)
-        x = ReLU()(x)
+        x = Activation('tanh')(x)
         x = Dense(dim * 2)(x)
+        x = Activation('tanh')(x)
         x = Dense(data_dim)(x)
         if activation_info:
             x = GumbelSoftmaxActivation(activation_info, tau=tau)(x)