From 2bd4db57ae4afb54706bc91f66c9fd077a1e9de2 Mon Sep 17 00:00:00 2001 From: Dawith Date: Sun, 1 Mar 2026 23:19:05 -0500 Subject: [PATCH 01/18] Latent space representation with semantic and variational embedding spaces defined --- model/latent.py | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 model/latent.py diff --git a/model/latent.py b/model/latent.py new file mode 100644 index 0000000..e948f27 --- /dev/null +++ b/model/latent.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +from keras import Input, Model +from keras.layers import Dense + +class Semantic(Model): + def __init__(self, mlp_dim, semantic_dims): + self.name = "Semantic Embedding Block" + self.latent = self._build(mlp_dim, semantic_dims) + + def _build(mlp_dim, semantic_dims): + """ + Embedding space for semantically meaningful variables. Everything is + laterally spaced out. + + Args: + dims (list): List of dimensions for each variable. The final + dimension is reserved for regression variables. + + Returns: + Model: Keras Model object that contains just a single layer deep + model, with a structured latent space that maps to + semantically meaningful variables. + """ + inputs = Input(shape=(mpl_dim,)) + targets = [Input(shape=(dim,)) for dim in semantic_dims] + + # One-hot encoding spaces + # Sample type, growth, treatment, dose value, dose unit + one_hots = [Dense(dim, activation="softmax")(inputs) + for dim in semantic_dims[:-1]] + + # Regression spaces + # Min. frequency, Max. frequency, baseline time, treatment time, + # loop dt + reg = Dense(dim[-1], activation=None)(inputs) + + # Compute categorical_crossentropy error against targets for + # categorical variables + errors = [ + keras.losses.categorical_crossentropy(target, pred) + for target, pred in zip(targets[:-1], one_hots) + ] + + # Compute MAE + errors.append(keras.losses.mean_absolute_error(targets[-1], reg)) + + # Combine everything into single dense layer + concat = keras.layers.concatenate(one_hots + [reg]) + output = Dense(mlp_dim, activation="relu")(concat) + + return Model(inputs, [output, errors]) + + def call(self, inputs): + return self.latent(inputs) + + def summary(self): + return self.latent.summary() + +class Variational(Model): + def __init__(self, name): + self.name = name + + def _build(dim): + inputs = Input(shape=(dim,)) + x = Dense(dim, activation="relu")(inputs) + z1 = Dense(dim, activation="relu")(x) + z2 = Dense(dim, activation="relu")(x) + z = Sampling()([z_mean, z_log_var, z]) + + return Model(inputs, [z_mean, z_log_var, z]) + + def call(self, inputs): + return self.latent(inputs) + + def summary(self): + return self.latent.summary() + +class Sampling(layers.Layer): + def call(self, inputs): + z_mean, z_log_var = inputs + + # Reparameterization trick + eps = tf.random.normal(shape=tf.shape(z_mean)) + z = z_mean + tf.exp(0.5 * z_log_var) * eps + + # KL divergence term + kl = -0.5 * tf.reduce_sum( + 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), + axis=1 + ) + self.add_loss(tf.reduce_mean(kl)) + + return z + +# EOF From d19465b4f0702f9046db6f980dda8605556bee22 Mon Sep 17 00:00:00 2001 From: Dawith Date: Sun, 1 Mar 2026 23:19:50 -0500 Subject: [PATCH 02/18] Encoder separated out from the model.py for clarity --- model/encoder.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 model/encoder.py diff --git a/model/encoder.py b/model/encoder.py new file mode 100644 index 0000000..9775e55 --- /dev/null +++ b/model/encoder.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +""" +Model Encoder block +""" + +# Third party module imports +from keras import Input, Model +from keras.layers import BatchNormalization, Dense, Dropout, GlobalAveragePooling1D + +# Local module imports +from model.transformer import TimeseriesTransformerBuilder as TSTFBuilder +from model.latent import Semantic, Variational + +class Encoder(Model): + """ + Encoder block that inherits keras Model class. + + Args: + input_shape (tuple): Shape of the input tensor. + head_size (int): Number of features in each attention head. + num_heads (int) Number of attention heads. + ff_dim (int): Number of neurons in the feedforward neural network. + num_Transformer_blocks (int): Number of Transformer blocks. + mlp_units (List(int)): Number of neurons in each layer of the MLP. + n_classes (int): Number of output classes. + dropout (float): Dropout rate. + mlp_dropout (float): Dropout rate in the MLP. + + Attributes: + timeseriestransformer: Keras model, the TimeSeriesTransformer model. + """ + + def __init__(self, input_shape, head_size, num_heads, ff_dim, + num_Transformer_blocks, mlp_units, + dropout=0, mlp_dropout=0): + self.tstfbuilder = TSTFBuilder() + + super(Encoder, self).__init__() + self.encoder = self._modelstack( + input_shape, + head_size, + num_heads, + ff_dim, + num_Transformer_blocks, + mlp_units, + dropout, + mlp_dropout + ) + + def _modelstack(self, input_shape, head_size, num_heads, ff_dim, + num_Transformer_blocks, mlp_units, n_classes, + dropout, mlp_dropout): + """ + Creates a Timeseries Transformer model. This consists of a series of + Transformer blocks followed by an MLP. + + Args: + input_shape: tuple, shape of the input tensor. + head_size: int, the number of features in each attention head. + num_heads: int, the number of attention heads. + ff_dim: int, the number of neurons in the feedforward neural + network. + num_Transformer_blocks: int, the number of Transformer blocks. + mlp_units: list of ints, the number of neurons in each layer of + the MLP. + n_classes: list of ints, the number of output classes. + dropout: float, dropout rate. + mlp_dropout: float, dropout rate in the MLP. + + Returns: + A Keras model. + """ + + inputs = Input(shape=input_shape) + x = BatchNormalization()(inputs) + + # Transformer blocks + for _ in range(num_Transformer_blocks): + x = self.tstfbuilder.build_transformerblock( + x, + head_size, + num_heads, + ff_dim, + dropout + ) + + # Pooling and simple DNN block + x = GlobalAveragePooling1D(data_format="channels_first")(x) + for dim in mlp_units: + x = Dense(dim, activation="relu")(x) + x = Dropout(mlp_dropout)(x) + + # Two separate latent spaces supported + y = Dense(n_classes[0], activation="relu")(x) + z = Dense(n_classes[1], activation="relu")(x) + + return Model(inputs, [y, z]) + + def call(self, inputs): + """ + Calls the TimeSeriesTransformer model on a batch of inputs. + + Args: + inputs: Tensor, batch of input data. + + Returns: + Tensor, resulting output of the TimeSeriesTransformer model. + """ + return self.encoder(inputs) + + def summary(self): + """ + Prints a summary of the TimeSeriesTransformer model. + + Args: + None. + + Returns: + None. + """ + self.encoder.summary() + +# EOF From c5aa13bcaf3b14be196103bd268330d08128bf0f Mon Sep 17 00:00:00 2001 From: Dawith Date: Sun, 1 Mar 2026 23:20:22 -0500 Subject: [PATCH 03/18] Decoder separated out from model.py for clarity, work in progress to fix compatibility with new embedding structure --- model/decoder.py | 101 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 model/decoder.py diff --git a/model/decoder.py b/model/decoder.py new file mode 100644 index 0000000..65594c6 --- /dev/null +++ b/model/decoder.py @@ -0,0 +1,101 @@ +# -*- encoding: utf-9 -*- + +class DecoderModel(Model): + + def __init__(self, input_shape, head_size, num_heads, ff_dim, + num_Transformer_blocks, mlp_units, n_classes, + dropout=0, mlp_dropout=0): + """ + Initializes the TimeSeriesTransformer class. This class is a + wrapper around a Keras model that consists of a series of + Transformer blocks followed by an MLP. + + Args: + input_shape: tuple, shape of the input tensor. + head_size: int, the number of features in each attention head. + num_heads: int, the number of attention heads. + ff_dim: int, the number of neurons in the feedforward neural + network. + num_Transformer_blocks: int, the number of Transformer blocks. + mlp_units: list of ints, the number of neurons in each layer of + the MLP. + n_classes: int, the number of output classes. + dropout: float, dropout rate. + mlp_dropout: float, dropout rate in the MLP. + + Attributes: + timeseriestransformer: Keras model, the TimeSeriesTransformer + model. + """ + self.tstfbuilder = TSTFBuilder() + + super(DecoderModel, self).__init__() + self.decoder = self._modelstack( + input_shape, head_size, num_heads, ff_dim, + num_Transformer_blocks, mlp_units, n_classes, + dropout, mlp_dropout) + + def _modelstack(self, input_shape, head_size, num_heads, ff_dim, + num_Transformer_blocks, mlp_units, + dropout, mlp_dropout): + """ + Creates a Timeseries Transformer model. This consists of a series of + Transformer blocks followed by an MLP. + + Args: + input_shape: tuple, shape of the input tensor. + head_size: int, the number of features in each attention head. + num_heads: int, the number of attention heads. + ff_dim: int, the number of neurons in the feedforward neural + network. + num_Transformer_blocks: int, the number of Transformer blocks. + mlp_units: list of ints, the number of neurons in each layer of + the MLP. + dropout: float, dropout rate. + mlp_dropout: float, dropout rate in the MLP. + + Returns: + A Keras model. + """ + + inputs = Input((mlp_units[-1],)) + full_dimension = input_shape[0] * input_shape[1] + x = Dense(full_dimension, activation="relu")(inputs) + x = Reshape((input_shape[0], input_shape[1]))(x) + + for _ in range(num_Transformer_blocks): + x = self.tstfbuilder.build_transformerblock( + x, + head_size, + num_heads, + ff_dim, + dropout + ) + + # final layer with corrected shape + x = Conv1D(filters=input_shape[1], + kernel_size=1, + padding="valid", + activation=linear)(x) + + return Model(inputs, x) + + def call(self, inputs): + """ + Calls the TimeSeriesTransformer model on a batch of inputs. + + Args: + inputs (Tensor): batch of input data. + + Returns: + (Tensor) Decoded reconstruction of the spectral data. + """ + return self.decoder(inputs) + + def summary(self): + """ + Prints the Model summary. + """ + self.decoder.summary() + +# EOF From 7f9add4ddcbc18cce6971301576ebbe1ca806d8c Mon Sep 17 00:00:00 2001 From: Dawith Date: Sun, 1 Mar 2026 23:20:53 -0500 Subject: [PATCH 04/18] Second input bundle for training the latent space added --- model/trainingwheel.py | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 model/trainingwheel.py diff --git a/model/trainingwheel.py b/model/trainingwheel.py new file mode 100644 index 0000000..2a612db --- /dev/null +++ b/model/trainingwheel.py @@ -0,0 +1,44 @@ +# -*- encoding: utf-8 -*- + +# Third party module imports +from keras import Input, Model + +class TrainingWheel(Model): + def __init__(self, sem_input_shapes): + self.inputs = self._semantic_input(sem_input_shapes) + + def _semantic_input(self, sem_input_shapes): + """ + Create a keras Model block that takes in the semantic inputs and just + forwards the input. The whole point of it is so that at autoencoder + build, the semantic input can be easily inserted for training and + removed for inference. + + Args: + sem_input_shapes (List[int]): List of integers representing the + shapes of the semantic inputs. + """ + inputs = [Input(shape=(input_shape,)) + for input_shape in sem_input_shapes] + + return Model(inputs, inputs) + + def call(self, inputs): + """ + Calls the model on a batch of inputs. + + Args: + inputs (Tensor): Batch of input data. + + Returns: + (Tensor) Same input data passed through the model. + """ + return self.inputs(inputs) + + def summary(self): + """ + Prints Model summary. + """ + self.inputs.summary() + +# EOF From 54c6b74f46f782dc88bb7ff56f411282202a0aa1 Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:47:08 -0500 Subject: [PATCH 05/18] Decoder made compatible with new three-part autoencoder w/ regularization --- model/decoder.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/model/decoder.py b/model/decoder.py index 65594c6..506a12b 100644 --- a/model/decoder.py +++ b/model/decoder.py @@ -1,9 +1,17 @@ -# -*- encoding: utf-9 -*- +# -*- encoding: utf-8 -*- -class DecoderModel(Model): +# 3rd party module imports +from keras import Model +from keras.layers import Input, Dense, Reshape, Conv1D + +# Local module imports +from model.activation import sublinear, linear +from model.transformer import TimeseriesTransformerBuilder as TSTFBuilder + +class Decoder(Model): def __init__(self, input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, + num_Transformer_blocks, mlp_units, dropout=0, mlp_dropout=0): """ Initializes the TimeSeriesTransformer class. This class is a @@ -29,10 +37,10 @@ def __init__(self, input_shape, head_size, num_heads, ff_dim, """ self.tstfbuilder = TSTFBuilder() - super(DecoderModel, self).__init__() + super(Decoder, self).__init__() self.decoder = self._modelstack( input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, + num_Transformer_blocks, mlp_units, dropout, mlp_dropout) def _modelstack(self, input_shape, head_size, num_heads, ff_dim, @@ -58,12 +66,12 @@ def _modelstack(self, input_shape, head_size, num_heads, ff_dim, A Keras model. """ - inputs = Input((mlp_units[-1],)) + inputs = Input(shape=(mlp_units[-1],), name="decoder_input") full_dimension = input_shape[0] * input_shape[1] - x = Dense(full_dimension, activation="relu")(inputs) - x = Reshape((input_shape[0], input_shape[1]))(x) + x = Dense(full_dimension, activation="relu", name="dec_dense1")(inputs) + x = Reshape((input_shape[0], input_shape[1]), name="dec_reshape")(x) - for _ in range(num_Transformer_blocks): + for i in range(num_Transformer_blocks): x = self.tstfbuilder.build_transformerblock( x, head_size, @@ -76,9 +84,10 @@ def _modelstack(self, input_shape, head_size, num_heads, ff_dim, x = Conv1D(filters=input_shape[1], kernel_size=1, padding="valid", - activation=linear)(x) + activation=linear, + name="dec_conv1d")(x) - return Model(inputs, x) + return Model(inputs, x, name="decoder") def call(self, inputs): """ From ed99b8b5259834292f7e31c7324f36058a7a60d7 Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:47:42 -0500 Subject: [PATCH 06/18] Interface with embedding fixed for compatibility --- model/encoder.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/model/encoder.py b/model/encoder.py index 9775e55..d803f1d 100644 --- a/model/encoder.py +++ b/model/encoder.py @@ -9,7 +9,6 @@ # Local module imports from model.transformer import TimeseriesTransformerBuilder as TSTFBuilder -from model.latent import Semantic, Variational class Encoder(Model): """ @@ -48,8 +47,7 @@ def __init__(self, input_shape, head_size, num_heads, ff_dim, ) def _modelstack(self, input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, - dropout, mlp_dropout): + num_Transformer_blocks, mlp_units, dropout, mlp_dropout): """ Creates a Timeseries Transformer model. This consists of a series of Transformer blocks followed by an MLP. @@ -91,10 +89,8 @@ def _modelstack(self, input_shape, head_size, num_heads, ff_dim, x = Dropout(mlp_dropout)(x) # Two separate latent spaces supported - y = Dense(n_classes[0], activation="relu")(x) - z = Dense(n_classes[1], activation="relu")(x) - return Model(inputs, [y, z]) + return Model(inputs, x) def call(self, inputs): """ From 58351f8ca7e93ec5dba373294ccbb5e9cd35a5c0 Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:48:06 -0500 Subject: [PATCH 07/18] Latent block with two parallel sub-components built --- model/latent.py | 99 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 28 deletions(-) diff --git a/model/latent.py b/model/latent.py index e948f27..a5d0721 100644 --- a/model/latent.py +++ b/model/latent.py @@ -1,14 +1,47 @@ # -*- coding: utf-8 -*- +# 3rd party module imports +import keras from keras import Input, Model -from keras.layers import Dense +from keras.layers import Dense, Layer +import keras.ops as ops + +# Local module imports +from model.losses import categorical_crossentropy, mean_absolute_error + +class Latent(Model): + def __init__(self, mlp_dim, semantic_dims, var_dim): + super(Latent, self).__init__() + self.name = "Semantic Embedding Block" + self.semantic = Semantic(mlp_dim, semantic_dims) + self.variational = Variational(mlp_dim, var_dim) + self.latent = self._build(mlp_dim, semantic_dims, var_dim) + + def _build(self, mlp_dim, semantic_dims, var_dim): + """ + """ + + inputs = Input(shape=(mlp_dim,)) + sem_inputs = [Input(shape=(dim,)) for dim in semantic_dims] + s, s_err = self.semantic([inputs] + sem_inputs) + v, kl_loss = self.variational(inputs) + x = Dense(mlp_dim, activation="relu")(ops.concatenate([s, v], axis=-1)) + + return Model([inputs, *sem_inputs], [x, s_err, kl_loss], name="latent") + + def call(self, inputs): + return self.latent(inputs) + + def summary(self): + return self.latent.summary() class Semantic(Model): def __init__(self, mlp_dim, semantic_dims): self.name = "Semantic Embedding Block" + super(Semantic, self).__init__() self.latent = self._build(mlp_dim, semantic_dims) - def _build(mlp_dim, semantic_dims): + def _build(self, mlp_dim, semantic_dims): """ Embedding space for semantically meaningful variables. Everything is laterally spaced out. @@ -22,7 +55,12 @@ def _build(mlp_dim, semantic_dims): model, with a structured latent space that maps to semantically meaningful variables. """ - inputs = Input(shape=(mpl_dim,)) + + # Compute inverse log of dimensions to get weights + class_counts = ops.array(semantic_dims[:-1], dtype="float32") + weights = 1/ops.log(class_counts) + + inputs = Input(shape=(mlp_dim,)) targets = [Input(shape=(dim,)) for dim in semantic_dims] # One-hot encoding spaces @@ -33,23 +71,24 @@ def _build(mlp_dim, semantic_dims): # Regression spaces # Min. frequency, Max. frequency, baseline time, treatment time, # loop dt - reg = Dense(dim[-1], activation=None)(inputs) + reg = Dense(semantic_dims[-1], activation=None)(inputs) # Compute categorical_crossentropy error against targets for # categorical variables errors = [ - keras.losses.categorical_crossentropy(target, pred) - for target, pred in zip(targets[:-1], one_hots) + categorical_crossentropy(target, pred) * weight + for target, pred, weight in zip(targets[:-1], one_hots, weights) ] - # Compute MAE - errors.append(keras.losses.mean_absolute_error(targets[-1], reg)) + # Compute MAE, with normalization by approximate range of values (~4) + errors.append(mean_absolute_error(targets[-1], reg) / 4.) + error = ops.sum(ops.stack(errors)) # Combine everything into single dense layer - concat = keras.layers.concatenate(one_hots + [reg]) + concat = keras.layers.concatenate(one_hots + [reg], axis=-1) output = Dense(mlp_dim, activation="relu")(concat) - return Model(inputs, [output, errors]) + return Model([inputs, *targets], [output, error], name="semantic") def call(self, inputs): return self.latent(inputs) @@ -58,17 +97,23 @@ def summary(self): return self.latent.summary() class Variational(Model): - def __init__(self, name): - self.name = name + def __init__(self, dim, var_dim): + self.name = "Variational Embedding Block" + super(Variational, self).__init__() + self.latent = self._build(dim, var_dim) - def _build(dim): + def _build(self, dim, var_dim): inputs = Input(shape=(dim,)) - x = Dense(dim, activation="relu")(inputs) - z1 = Dense(dim, activation="relu")(x) - z2 = Dense(dim, activation="relu")(x) - z = Sampling()([z_mean, z_log_var, z]) + x = Dense(var_dim, activation="relu")(inputs) + z_mean = Dense(var_dim, activation=None)(x) + z_log_var = Dense(var_dim, activation=None)(x) + z = Sampling()([z_mean, z_log_var]) + kl_loss = -0.5 * ops.sum( + 1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var), + axis=1 + ) - return Model(inputs, [z_mean, z_log_var, z]) + return Model(inputs, [z, kl_loss], name="variational") def call(self, inputs): return self.latent(inputs) @@ -76,21 +121,19 @@ def call(self, inputs): def summary(self): return self.latent.summary() -class Sampling(layers.Layer): +class Sampling(Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.seed_gen = keras.random.SeedGenerator(1337) + def call(self, inputs): z_mean, z_log_var = inputs # Reparameterization trick - eps = tf.random.normal(shape=tf.shape(z_mean)) - z = z_mean + tf.exp(0.5 * z_log_var) * eps + eps = keras.random.normal(shape=ops.shape(z_mean), seed=self.seed_gen) + z = z_mean + ops.exp(0.5 * z_log_var) * eps - # KL divergence term - kl = -0.5 * tf.reduce_sum( - 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), - axis=1 - ) - self.add_loss(tf.reduce_mean(kl)) + return z - return z # EOF From 352fcd316030ff9c94dc403cbedabc92f789e44a Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:48:27 -0500 Subject: [PATCH 08/18] Major revision to CompoundModel class to use the new three-part structure --- model/model.py | 265 ++++++++++++++++--------------------------------- 1 file changed, 87 insertions(+), 178 deletions(-) diff --git a/model/model.py b/model/model.py index 540479c..1d455e5 100644 --- a/model/model.py +++ b/model/model.py @@ -6,19 +6,25 @@ """ +# Built-in module imports +from typing import Optional + +# 3rd party module imports from keras import Input, Model -from keras.layers import BatchNormalization, Conv1D, Dense, Dropout, Reshape, \ - GlobalAveragePooling1D, LayerNormalization, Masking, Conv2D, \ - MultiHeadAttention, concatenate +from keras.layers import Dense, Dropout, Reshape +import keras.ops as ops -from model.activation import sublinear, linear +# Local module imports from model.transformer import TimeseriesTransformerBuilder as TSTFBuilder +from model.encoder import Encoder +from model.decoder import Decoder +from model.latent import Latent class CompoundModel(Model): def __init__(self, input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, - dropout=0, mlp_dropout=0): + num_transformer_blocks, mlp_units, semantic_dims, + var_dims, dropout=0, mlp_dropout=0): """ Initializes the TimeSeriesTransformer class. This class is a wrapper around a Keras model that consists of a series of @@ -30,79 +36,68 @@ def __init__(self, input_shape, head_size, num_heads, ff_dim, num_heads: int, the number of attention heads. ff_dim: int, the number of neurons in the feedforward neural network. - num_Transformer_blocks: int, the number of Transformer blocks. + num_transformer_blocks: int, the number of Transformer blocks. mlp_units: list of ints, the number of neurons in each layer of the MLP. - n_classes: int, the number of output classes. dropout: float, dropout rate. mlp_dropout: float, dropout rate in the MLP. Attributes: - timeseriestransformer: Keras model, the TimeSeriesTransformer + autoencoder: Keras model, the TimeSeriesTransformer model. """ - self.tstfbuilder = TSTFBuilder() - super(CompoundModel, self).__init__() - self.timeseriestransformer = self._modelstack( - input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, - dropout, mlp_dropout) - - def _modelstack(self, input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, - dropout, mlp_dropout): - """ - Creates a Timeseries Transformer model. This consists of a series of - Transformer blocks followed by an MLP. - - Args: - input_shape: tuple, shape of the input tensor. - head_size: int, the number of features in each attention head. - num_heads: int, the number of attention heads. - ff_dim: int, the number of neurons in the feedforward neural - network. - num_Transformer_blocks: int, the number of Transformer blocks. - mlp_units: list of ints, the number of neurons in each layer of - the MLP. - n_classes: list of ints, the number of output classes. - dropout: float, dropout rate. - mlp_dropout: float, dropout rate in the MLP. - - Returns: - A Keras model. - """ - - inputs = Input(shape=input_shape) - #x = inputs - #inputs = Masking(mask_value=pad_value)(inputs) - x = BatchNormalization()(inputs) - - # Transformer blocks - for _ in range(num_Transformer_blocks): - x = self.tstfbuilder.build_transformerblock( - x, + self.encoder = Encoder( + input_shape, + head_size, + num_heads, + ff_dim, + num_transformer_blocks, + mlp_units, + dropout, + mlp_dropout + ) + + self.latent = Latent( + mlp_units[-1], + semantic_dims, + var_dims + ) + + self.decoder = Decoder( + input_shape, head_size, num_heads, ff_dim, - dropout - ) + num_transformer_blocks, + mlp_units, + dropout, + mlp_dropout + ) + self.autoencoder = self._modelstack(input_shape, semantic_dims) + - # Pooling and simple DNN block - x = GlobalAveragePooling1D(data_format="channels_first")(x) - for dim in mlp_units: - x = Dense(dim, activation="relu")(x) - x = Dropout(mlp_dropout)(x) + def _modelstack(self, input_shape, semantic_dims): - # Two separate latent spaces supported - #y = Dense(n_classes[0], activation="softmax")(x) - #z = Dense(n_classes[1], activation="softmax")(x) - y = Dense(n_classes[0], activation="relu")(x) - z = Dense(n_classes[1], activation="relu")(x) + # Define overall model inputs + encoder_input = Input(shape=input_shape, name="Encoder_input") + semantic_inputs = [Input(shape=(semantic_dim,), name=f"Semantic_input_{i}") + for i, semantic_dim in enumerate(semantic_dims)] + inputs = [encoder_input] + semantic_inputs - return Model(inputs, [y, z]) + # Encoder-Latent space-Decoder stack + encoding = self.encoder(encoder_input) + embedding, sem_loss, kl_loss = self.latent([encoding] + semantic_inputs) + decoding = self.decoder(embedding) - def call(self, inputs): + # Combine Losses + recon_loss = ops.mean(ops.abs(decoding - encoder_input)) + model = Model(inputs, [decoding, sem_loss, kl_loss, recon_loss], + name="Autoencoder") + + return model + + def call(self, inputs, training: Optional[bool] = None): """ Calls the TimeSeriesTransformer model on a batch of inputs. @@ -112,9 +107,19 @@ def call(self, inputs): Returns: Tensor, resulting output of the TimeSeriesTransformer model. """ - return self.timeseriestransformer(inputs) + outputs, sem_loss, kl_loss, recon_loss = self.autoencoder( + inputs, + training=training + ) + + recon_weight = ops.cast(1./4., dtype=recon_loss.dtype) + self.add_loss(sem_loss) + self.add_loss(kl_loss) + self.add_loss(recon_loss * recon_weight) - def summary(self): + return outputs + + def summary(self, *args, **kwargs): """ Prints a summary of the TimeSeriesTransformer model. @@ -124,123 +129,27 @@ def summary(self): Returns: None. """ - self.timeseriestransformer.summary() + self.autoencoder.summary(*args, **kwargs) -class DecoderModel(Model): - - def __init__(self, input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, - dropout=0, mlp_dropout=0): - """ - Initializes the TimeSeriesTransformer class. This class is a - wrapper around a Keras model that consists of a series of - Transformer blocks followed by an MLP. + def kl(self): + beta = ( + self.beta_anneal(epoch, total_epochs) + * self.beta_cyclical(epoch, cycle) + ) + capacity = self.kl_capacity(epoch, max_cap, total_epochs) + weight = 1 + kl_modified = beta * ops.abs(kl_loss - capacity) * weight - Args: - input_shape: tuple, shape of the input tensor. - head_size: int, the number of features in each attention head. - num_heads: int, the number of attention heads. - ff_dim: int, the number of neurons in the feedforward neural - network. - num_Transformer_blocks: int, the number of Transformer blocks. - mlp_units: list of ints, the number of neurons in each layer of - the MLP. - n_classes: int, the number of output classes. - dropout: float, dropout rate. - mlp_dropout: float, dropout rate in the MLP. - Attributes: - timeseriestransformer: Keras model, the TimeSeriesTransformer - model. - """ - self.tstfbuilder = TSTFBuilder() + def beta_anneal(self, epoch, total_epochs): + return ops.minimum(1.0, epoch / total_epochs) - super(DecoderModel, self).__init__() - self.timeseriestransdecoder = self._modelstack( - input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, - dropout, mlp_dropout) + def beta_cyclical(self, epoch, cycle): + cycle_pos = (step % cycle) / cycle + return ops.minimum(1.0, 2 * cycle_pos) - def _modelstack(self, input_shape, head_size, num_heads, ff_dim, - num_Transformer_blocks, mlp_units, n_classes, - dropout, mlp_dropout): - """ - Creates a Timeseries Transformer model. This consists of a series of - Transformer blocks followed by an MLP. - - Args: - input_shape: tuple, shape of the input tensor. - head_size: int, the number of features in each attention head. - num_heads: int, the number of attention heads. - ff_dim: int, the number of neurons in the feedforward neural - network. - num_Transformer_blocks: int, the number of Transformer blocks. - mlp_units: list of ints, the number of neurons in each layer of - the MLP. - n_classes: list of ints, the number of output classes. - dropout: float, dropout rate. - mlp_dropout: float, dropout rate in the MLP. - - Returns: - A Keras model. - """ - - shape0 = n_classes[0] - shape1 = n_classes[1] - x0 = Input(shape=(shape0,)) - x1 = Input(shape=(shape1,)) - inputs = [x0, x1] - x0 = Dense(n_classes[0], activation="relu")(x0) - x1 = Dense(n_classes[1], activation="relu")(x1) - x = concatenate([x0, x1], axis=-1) - full_dimension = input_shape[0] * input_shape[1] - x = Dense(full_dimension, activation="relu")(x) - x = Reshape((input_shape[0], input_shape[1]))(x) - - """ - for dim in mlp_units: - x = Dense(dim, activation="relu")(x) - x = Dropout(mlp_dropout)(x) - """ - for _ in range(num_Transformer_blocks): - x = self.tstfbuilder.build_transformerblock( - x, - head_size, - num_heads, - ff_dim, - dropout - ) - - # final layer with corrected shape - x = Conv1D(filters=input_shape[1], - kernel_size=1, - padding="valid", - activation=linear)(x) - - return Model(inputs, x) - - def call(self, inputs): - """ - Calls the TimeSeriesTransformer model on a batch of inputs. - - Args: - inputs: Tensor, batch of input data. - - Returns: - Tensor, resulting output of the TimeSeriesTransformer model. - """ - return self.timeseriestransdecoder(inputs) - - def summary(self): - """ - Prints a summary of the TimeSeriesTransformer model. - - Args: - None. - - Returns: - None. - """ - self.timeseriestransdecoder.summary() + def kl_capacity(self, epoch, max_cap, total_epochs): + cap = max_cap * epoch / total_epochs + return ops.minimum(max_cap, cap) # EOF From b04a7c3779669582292be281cdb13b6a4ca112ab Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:48:50 -0500 Subject: [PATCH 09/18] Revised training module to properly invoke the target model --- train/autoencoder_train.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/train/autoencoder_train.py b/train/autoencoder_train.py index e9e7402..d58440c 100644 --- a/train/autoencoder_train.py +++ b/train/autoencoder_train.py @@ -17,8 +17,6 @@ from model.metrics import MutualInformation, mutual_information from visualize.visualize import confusion_matrix from visualize.plot import roc_plot -from train.encoder_train import build_encoder -from train.decoder_train import build_decoder def autoencoder_workflow(params, shape, n_classes, train_set, validation_set, test_set, @@ -47,18 +45,26 @@ def autoencoder_workflow(params, shape, n_classes, save_autoencoder(params, model, path) -def build_autoencoder(params, shape, n_classes): - autoencoder_params = params["autoencoder_params"] - #mi = MutualInformation() +def build_autoencoder(params, shape, semantic_dims): + params = params["autoencoder_params"] mse = MeanSquaredError() - encoder_model = build_encoder(params, shape, n_classes) - decoder_model = build_decoder(params, shape, n_classes) - model = keras.Sequential([encoder_model, decoder_model]) + model = CompoundModel( + shape, + params["head_size"], + params["num_heads"], + params["ff_dim"], + params["num_transformer_blocks"], + params["mlp_units"], + semantic_dims, + params["var_dims"], + dropout=params["dropout"], + mlp_dropout=params["mlp_dropout"] + ) + model.build(shape) model.compile( optimizer=keras.optimizers.Adam(learning_rate=4e-4), - loss=autoencoder_params["loss"], - metrics=[mse]#, mutual_information] + metrics=params["metrics"] ) return model @@ -80,8 +86,10 @@ def train_autoencoder(params, model, train_set, validation_set, path): start = time.time() model.fit( - x=train_set, y=train_set, - validation_data=(validation_set, validation_set), + x=[train_set[0], train_set[1][0], train_set[1][1]], y=train_set[0], + validation_data=( + [validation_set[0], validation_set[1][0], validation_set[1][1]], + validation_set[0]), batch_size=params["batch_size"], epochs=params["epochs"], verbose=log_level, @@ -95,7 +103,7 @@ def test_autoencoder(model: Model, test: List, metrics: dict): """ """ - test_eval = model.evaluate(test, test) + test_eval = model.evaluate(test[0], test[0]) if len(metrics.keys()) == 1: metrics[metrics.keys()[0]] = test_eval else: From 4a224ba9fb6cb5a1089bff94b8e10755555ef821 Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:49:15 -0500 Subject: [PATCH 10/18] Minor fixes but these will be deprecated --- train/decoder_train.py | 9 ++++----- train/encoder_train.py | 7 ++++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/train/decoder_train.py b/train/decoder_train.py index 2e4c6ff..d10386c 100644 --- a/train/decoder_train.py +++ b/train/decoder_train.py @@ -7,7 +7,7 @@ import typing from typing import List -from model.model import DecoderModel +from model.model import Decoder from visualize.plot import spectra_plot def decoder_workflow(params, train_set, validation_set, test_set, @@ -26,26 +26,25 @@ def decoder_workflow(params, train_set, validation_set, test_set, spectra_plot(test_predict[0], name=f"{target}-{treatment}-predict") spectra_plot(test_set[0][0], name=f"{target}-{treatment}-true") -def build_decoder(params, input_shape, n_classes): +def build_decoder(params, input_shape, semantic_dims): """ """ params = params["decoder_params"] - decoder = DecoderModel( + decoder = Decoder( input_shape, params["head_size"], params["num_heads"], params["ff_dim"], params["num_transformer_blocks"], params["mlp_units"], - n_classes, params["dropout"], params["mlp_dropout"] ) decoder.compile( optimizer=keras.optimizers.Adam(learning_rate=4e-4), - loss=params["loss"], + #loss=params["loss"], metrics=params["metrics"] ) diff --git a/train/encoder_train.py b/train/encoder_train.py index ceab077..2013e8d 100644 --- a/train/encoder_train.py +++ b/train/encoder_train.py @@ -34,7 +34,7 @@ def encoder_workflow(params, shape, n_classes, save_encoder(model, path) -def build_encoder(params, input_shape, n_classes): +def build_encoder(params, input_shape, semantic_dims): log_level = params["log_level"] params = params["encoder_params"] model = CompoundModel( @@ -44,14 +44,15 @@ def build_encoder(params, input_shape, n_classes): params["ff_dim"], params["num_transformer_blocks"], params["mlp_units"], - n_classes, + semantic_dims, + params["var_dims"], dropout=params["dropout"], mlp_dropout=params["mlp_dropout"] ) model.compile( optimizer=keras.optimizers.Adam(learning_rate=4e-4), - loss=params["loss"], + #loss=params["loss"], metrics=params["metrics"] ) if log_level == 1: From d24d7a8eb5bb700a43c493a307fec57fa0016331 Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:49:38 -0500 Subject: [PATCH 11/18] Inputs for autoencoder training corrected --- train_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train_model.py b/train_model.py index d7b79e2..eae5575 100644 --- a/train_model.py +++ b/train_model.py @@ -120,9 +120,9 @@ def main(): params, shape, n_classes, - train_set[0], - validation_set[0], - test_set[0], + train_set, + validation_set, + test_set, categories, keys, path From 3fddf624c79ccf20e2a72f4617bd5a398e86e24c Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:49:49 -0500 Subject: [PATCH 12/18] name param added --- model/transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/transformer.py b/model/transformer.py index 5a03188..e3de135 100644 --- a/model/transformer.py +++ b/model/transformer.py @@ -38,7 +38,7 @@ def __init__(self): """ def build_transformerblock(self, inputs, head_size, num_heads, - ff_dim, dropout): + ff_dim, dropout, name=None): """ Constructs the transformer block. A transformer block consists of the following steps: From ce47e3f7f375d34072b1ac8b9f2f1c66edb340fa Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:50:29 -0500 Subject: [PATCH 13/18] Custom loss functions using keras.ops only --- model/losses.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 model/losses.py diff --git a/model/losses.py b/model/losses.py new file mode 100644 index 0000000..d08a5f1 --- /dev/null +++ b/model/losses.py @@ -0,0 +1,10 @@ +# -*- encoding: utf-8 -*- + +import keras.ops as ops + +def categorical_crossentropy(y_true, y_pred): + y_pred = ops.clip(y_pred, 1e-7, 1.0) + return -ops.sum(y_true * ops.log(y_pred), axis=-1) + +def mean_absolute_error(y_true, y_pred): + return ops.mean(ops.abs(y_pred - y_true), axis=-1) From b21d2b32282d3804ca5e59c4b34ff61eb64918a6 Mon Sep 17 00:00:00 2001 From: Dawith Date: Mon, 2 Mar 2026 13:50:52 -0500 Subject: [PATCH 14/18] Metrics --- model/metrics.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 model/metrics.py diff --git a/model/metrics.py b/model/metrics.py new file mode 100644 index 0000000..584cdc1 --- /dev/null +++ b/model/metrics.py @@ -0,0 +1,97 @@ +import keras +import keras.ops as ops + +def mutual_information(y_true, y_pred): + return jax_mi(y_true, y_pred) + +def jax_mi(y_true, y_pred, num_bins=20, eps=1e-8): + y_true = keras.ops.reshape(y_true, (-1,)) + y_pred = keras.ops.reshape(y_pred, (-1,)) + + y_min, y_max = ops.min(y_true), ops.max(y_true) + p_min, p_max = ops.min(y_pred), ops.max(y_pred) + + y_edges = ops.linspace(y_min, y_max, num_bins + 1) + p_edges = ops.linspace(p_min, p_max, num_bins + 1) + + def digitize(values, edges): + cmp = values[:, None] >= edges[None, :] + return ops.sum(cmp, axis=1) - 1 + + y_bin = ops.clip(digitize(y_true, y_edges), 0, num_bins - 1) + p_bin = ops.clip(digitize(y_pred, p_edges), 0, num_bins - 1) + + # Compute joint histogram WITHOUT one-hot + joint = ops.zeros((num_bins, num_bins)) + joint = joint.at[y_bin, p_bin].add(1.0) + + joint = joint / (ops.sum(joint) + eps) + + py = ops.sum(joint, axis=1, keepdims=True) + pp = ops.sum(joint, axis=0, keepdims=True) + + ratio = (joint + eps) / (py * pp + eps) + mi = ops.sum(joint * ops.log(ratio)) + + return mi + + +def flatten(x): + return keras.ops.reshape(x,(-1,)) + +def mutual_info(y_true, y_pred, num_bins=20): + eps = 1E-8 + + y_true = flatten(y_true) + y_pred = flatten(y_pred) + + y_min, y_max = (ops.min(y_true), ops.max(y_true)) + p_min, p_max = (ops.min(y_pred), ops.max(y_pred)) + + y_edges = ops.linspace(y_min, y_max, num_bins + 1) + p_edges = ops.linspace(p_min, p_max, num_bins + 1) + + def digitize(values, edges): + cmp = values[:, None] >= edges[None, :] + return ops.sum(cmp, axis=1) - 1 + + y_bin = ops.clip(digitize(y_true, y_edges), 0, num_bins - 1) + p_bin = ops.clip(digitize(y_pred, p_edges), 0, num_bins - 1) + + # Compute joint histogram + # Convert bin indices to one-hot + y_oh = ops.one_hot(y_bin, num_bins) + p_oh = ops.one_hot(p_bin, num_bins) + + joint = ops.sum(y_oh[:, :, None] * p_oh[:, None, :], axis=0) + joint = joint / ops.sum(joint) + + # Marginals + py = ops.sum(joint, axis=1) + pp = ops.sum(joint, axis=0) + + # Compute MI + py = ops.reshape(py, (-1, 1)) + pp = ops.reshape(pp, (1, -1)) + + denom = py * pp + ratio = (joint + eps) / (denom + eps) + mi = ops.sum(joint * ops.log(ratio)) + + return mi + +class MutualInformation(keras.metrics.Metric): + def __init__(self, num_bins=20, name="mutual_information", **kwargs): + super().__init__(name=name, **kwargs) + self.num_bins = num_bins + self.mi = self.add_weight(shape=(), initializer="zeros") + + def update_state(self, y_true, y_pred, sample_weight=None): + value = mutual_info(y_true, y_pred, self.num_bins) + if sample_weight is not None: + value = value * sample_weight + self.mi.assign(value) + + def result(self): + return self.mi + From 62535c9f95dbfe222eab7d923987df1063674e88 Mon Sep 17 00:00:00 2001 From: Dawith Date: Wed, 4 Mar 2026 15:06:03 -0500 Subject: [PATCH 15/18] Loss computation set up --- model/model.py | 68 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/model/model.py b/model/model.py index 1d455e5..d1d7e42 100644 --- a/model/model.py +++ b/model/model.py @@ -47,6 +47,11 @@ def __init__(self, input_shape, head_size, num_heads, ff_dim, model. """ super(CompoundModel, self).__init__() + self.cycle_len = 25 + self.total_epoch = None + self.max_cap = 5.0 + self.beta = 0.01 + self.encoder = Encoder( input_shape, head_size, @@ -111,12 +116,11 @@ def call(self, inputs, training: Optional[bool] = None): inputs, training=training ) - - recon_weight = ops.cast(1./4., dtype=recon_loss.dtype) + + self.recon_weight = ops.cast(1./4., dtype=recon_loss.dtype) self.add_loss(sem_loss) - self.add_loss(kl_loss) - self.add_loss(recon_loss * recon_weight) - + self.add_loss(self.beta*kl_loss) + self.add_loss(recon_loss * self.recon_weight) return outputs def summary(self, *args, **kwargs): @@ -131,16 +135,6 @@ def summary(self, *args, **kwargs): """ self.autoencoder.summary(*args, **kwargs) - def kl(self): - beta = ( - self.beta_anneal(epoch, total_epochs) - * self.beta_cyclical(epoch, cycle) - ) - capacity = self.kl_capacity(epoch, max_cap, total_epochs) - weight = 1 - kl_modified = beta * ops.abs(kl_loss - capacity) * weight - - def beta_anneal(self, epoch, total_epochs): return ops.minimum(1.0, epoch / total_epochs) @@ -152,4 +146,48 @@ def kl_capacity(self, epoch, max_cap, total_epochs): cap = max_cap * epoch / total_epochs return ops.minimum(max_cap, cap) + def kl_weight(self, epoch, step, total_epochs, cycle, max_cap): + beta = ( + self.beta_anneal(epoch, total_epochs) * + self.beta_cyclical(epoch, cycle) + ) + capacity = self.kl_capacity(epoch, max_cap, total_epochs) + return beta, capacity + + """ + def train_step(self, data, sample_weight=None): + + with ops.GradientTape() as tape: + outputs, sem_loss, kl_loss, recon_loss = self( + data, training=True + ) + self.recon_weight = ops.cast(1./4., dtype=ops.float32) + epoch = ops.cast( + self.optimizer.iterations // self.steps_per_epoch, + kl_loss.dtype + ) + step = ops.cast(self.optimizer.iterations, kl_loss.dtype) + beta, capacity = self.kl_weight( + epoch=epoch, + step=step, + total_epochs=self.total_epoch, + cycle=self.cycle_len, + max_cap=self.max_cap + ) + kl_term = beta * ops.abs(kl_loss - capacity) + total_loss = sem_loss + kl_term + recon_loss * self.recon_weight + + trainable_vars = self.trainable_variables + grads = tape.gradient(total_loss, trainable_vars) + self.optimizer.apply_gradients(zip(grads, trainable_vars)) + + return { + "loss": total_loss, + "sem_loss": sem_loss, + "kl_loss": kl_loss, + "recon_loss": recon_loss, + "beta": beta, + "capacity": capacity + }""" + # EOF From ca40fae6244940d154bacbc7c8db6339bdd18f85 Mon Sep 17 00:00:00 2001 From: Dawith Date: Wed, 4 Mar 2026 15:06:30 -0500 Subject: [PATCH 16/18] ETL now has option to specify dataset for training --- pipe/etl.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pipe/etl.py b/pipe/etl.py index a6499be..35085a0 100644 --- a/pipe/etl.py +++ b/pipe/etl.py @@ -38,7 +38,7 @@ def etl(spark: SparkSession, split: list=None) -> DataFrame: data = split_sets(data, split=split) return data -def read(spark: SparkSession, split=None) -> DataFrame: +def read(spark: SparkSession, split=None, parquet=None) -> DataFrame: """ Reads the processed data from a Parquet file and splits it into training, validation, and test sets. @@ -50,7 +50,10 @@ def read(spark: SparkSession, split=None) -> DataFrame: DataFrame: The split datasets and category dictionary. """ - data = spark.read.parquet("/app/workdir/parquet/data.parquet") + if parquet is None: + data = spark.read.parquet("/app/workdir/parquet/data.parquet") + else: + data = spark.read.parquet(f"/app/workdir/parquet/{parquet}") data = split_sets(data, split=split) return data From 3b13606dad2a6796661f3bf58eb9d44149c76c7f Mon Sep 17 00:00:00 2001 From: Dawith Date: Wed, 4 Mar 2026 15:07:00 -0500 Subject: [PATCH 17/18] Training parameter set revised --- train/autoencoder_train.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/train/autoencoder_train.py b/train/autoencoder_train.py index d58440c..e00a6a8 100644 --- a/train/autoencoder_train.py +++ b/train/autoencoder_train.py @@ -61,6 +61,7 @@ def build_autoencoder(params, shape, semantic_dims): dropout=params["dropout"], mlp_dropout=params["mlp_dropout"] ) + model.total_epoch = params["epochs"] model.build(shape) model.compile( optimizer=keras.optimizers.Adam(learning_rate=4e-4), @@ -103,7 +104,8 @@ def test_autoencoder(model: Model, test: List, metrics: dict): """ """ - test_eval = model.evaluate(test[0], test[0]) + test = [test[0], test[1][0], test[1][1]] + test_eval = model.evaluate(test, test) if len(metrics.keys()) == 1: metrics[metrics.keys()[0]] = test_eval else: @@ -115,7 +117,7 @@ def test_autoencoder(model: Model, test: List, metrics: dict): return metrics, test_predict def evaluate_autoencoder(params, test_predict, test_set, categories, keys, path): - plt.pcolor(test_set) + plt.pcolor(test_set[0]) plt.savefig(path / params["timestamp"] / "original.png") plt.close() plt.pcolor(test_predict) From 9b3009190087b661189310eda81104861cd67af6 Mon Sep 17 00:00:00 2001 From: Dawith Date: Wed, 4 Mar 2026 15:07:12 -0500 Subject: [PATCH 18/18] Training parameters revised --- train_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/train_model.py b/train_model.py index eae5575..549f492 100644 --- a/train_model.py +++ b/train_model.py @@ -74,9 +74,10 @@ def main(): #keras.distribution.set_distribution(parallel) if params["load_from_scratch"]: - data = etl(spark, split=params["encoder_params"]["split"]) + data = etl(spark, split=params["autoencoder_params"]["split"]) else: - data = read(spark, split=params["encoder_params"]["split"]) + data = read(spark, split=params["autoencoder_params"]["split"], + parquet=params["dataset"]) (train_set, validation_set, test_set, categories) = data