From 2bd4db57ae4afb54706bc91f66c9fd077a1e9de2 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Sun, 1 Mar 2026 23:19:05 -0500
Subject: [PATCH 01/18] Latent space representation with semantic and
 variational embedding spaces defined

---
 model/latent.py | 96 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 model/latent.py

diff --git a/model/latent.py b/model/latent.py
new file mode 100644
index 0000000..e948f27
--- /dev/null
+++ b/model/latent.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+
+from keras import Input, Model
+from keras.layers import Dense
+
+class Semantic(Model):
+    def __init__(self, mlp_dim, semantic_dims):
+        self.name = "Semantic Embedding Block"
+        self.latent = self._build(mlp_dim, semantic_dims)
+
+    def _build(mlp_dim, semantic_dims):
+        """
+        Embedding space for semantically meaningful variables. Everything is
+        laterally spaced out.
+        
+        Args:
+            dims (list): List of dimensions for each variable. The final
+                    dimension is reserved for regression variables.
+
+        Returns:
+            Model: Keras Model object that contains just a single layer deep
+                    model, with a structured latent space that maps to
+                    semantically meaningful variables.
+        """
+        inputs = Input(shape=(mpl_dim,))
+        targets = [Input(shape=(dim,)) for dim in semantic_dims]
+
+        # One-hot encoding spaces
+        # Sample type, growth, treatment, dose value, dose unit
+        one_hots = [Dense(dim, activation="softmax")(inputs)
+                    for dim in semantic_dims[:-1]]
+
+        # Regression spaces
+        # Min. frequency, Max. frequency, baseline time, treatment time,
+        # loop dt
+        reg = Dense(dim[-1], activation=None)(inputs)
+
+        # Compute categorical_crossentropy error against targets for
+        # categorical variables
+        errors = [
+            keras.losses.categorical_crossentropy(target, pred)
+            for target, pred in zip(targets[:-1], one_hots)
+        ]
+        
+        # Compute MAE
+        errors.append(keras.losses.mean_absolute_error(targets[-1], reg))
+
+        # Combine everything into single dense layer
+        concat = keras.layers.concatenate(one_hots + [reg])
+        output = Dense(mlp_dim, activation="relu")(concat)
+
+        return Model(inputs, [output, errors])
+
+    def call(self, inputs):
+        return self.latent(inputs)
+
+    def summary(self):
+        return self.latent.summary()
+
+class Variational(Model):
+    def __init__(self, name):
+        self.name = name
+
+    def _build(dim):
+        inputs = Input(shape=(dim,))
+        x = Dense(dim, activation="relu")(inputs)
+        z1 = Dense(dim, activation="relu")(x)
+        z2 = Dense(dim, activation="relu")(x)
+        z = Sampling()([z_mean, z_log_var, z])
+
+        return Model(inputs, [z_mean, z_log_var, z])
+
+    def call(self, inputs):
+        return self.latent(inputs)
+
+    def summary(self):
+        return self.latent.summary()
+
+class Sampling(layers.Layer):
+    def call(self, inputs):
+        z_mean, z_log_var = inputs
+
+        # Reparameterization trick
+        eps = tf.random.normal(shape=tf.shape(z_mean))
+        z = z_mean + tf.exp(0.5 * z_log_var) * eps
+
+        # KL divergence term
+        kl = -0.5 * tf.reduce_sum(
+        1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var),
+            axis=1
+                    )
+        self.add_loss(tf.reduce_mean(kl))
+
+    return z
+
+# EOF

From d19465b4f0702f9046db6f980dda8605556bee22 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Sun, 1 Mar 2026 23:19:50 -0500
Subject: [PATCH 02/18] Encoder separated out from the model.py for clarity

---
 model/encoder.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 model/encoder.py

diff --git a/model/encoder.py b/model/encoder.py
new file mode 100644
index 0000000..9775e55
--- /dev/null
+++ b/model/encoder.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""
+Model Encoder block
+"""
+
+# Third party module imports
+from keras import Input, Model
+from keras.layers import BatchNormalization, Dense, Dropout, GlobalAveragePooling1D
+
+# Local module imports
+from model.transformer import TimeseriesTransformerBuilder as TSTFBuilder
+from model.latent import Semantic, Variational
+
+class Encoder(Model):
+    """
+    Encoder block that inherits keras Model class. 
+
+    Args:
+        input_shape (tuple): Shape of the input tensor.
+        head_size (int): Number of features in each attention head.
+        num_heads (int) Number of attention heads.
+        ff_dim (int): Number of neurons in the feedforward neural network.
+        num_Transformer_blocks (int): Number of Transformer blocks.
+        mlp_units (List(int)): Number of neurons in each layer of the MLP.
+        n_classes (int): Number of output classes.
+        dropout (float): Dropout rate.
+        mlp_dropout (float): Dropout rate in the MLP.
+
+    Attributes:
+        timeseriestransformer: Keras model, the TimeSeriesTransformer model.
+    """
+    
+    def __init__(self, input_shape, head_size, num_heads, ff_dim,
+                 num_Transformer_blocks, mlp_units,
+                 dropout=0, mlp_dropout=0):
+        self.tstfbuilder = TSTFBuilder()
+
+        super(Encoder, self).__init__()
+        self.encoder = self._modelstack(
+                input_shape,
+                head_size,
+                num_heads,
+                ff_dim,
+                num_Transformer_blocks,
+                mlp_units,
+                dropout,
+                mlp_dropout
+        )
+
+    def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
+                   num_Transformer_blocks, mlp_units, n_classes,
+                    dropout, mlp_dropout):
+        """
+        Creates a Timeseries Transformer model. This consists of a series of
+        Transformer blocks followed by an MLP.
+
+        Args:
+            input_shape: tuple, shape of the input tensor.
+            head_size: int, the number of features in each attention head.
+            num_heads: int, the number of attention heads.
+            ff_dim: int, the number of neurons in the feedforward neural
+                network.
+            num_Transformer_blocks: int, the number of Transformer blocks.
+            mlp_units: list of ints, the number of neurons in each layer of
+                the MLP.
+            n_classes: list of ints, the number of output classes.
+            dropout: float, dropout rate.
+            mlp_dropout: float, dropout rate in the MLP.
+
+        Returns:
+            A Keras model.
+        """
+
+        inputs = Input(shape=input_shape)
+        x = BatchNormalization()(inputs)
+
+        # Transformer blocks
+        for _ in range(num_Transformer_blocks):
+            x = self.tstfbuilder.build_transformerblock(
+                x,
+                head_size,
+                num_heads,
+                ff_dim,
+                dropout
+            )
+
+        # Pooling and simple DNN block
+        x = GlobalAveragePooling1D(data_format="channels_first")(x)
+        for dim in mlp_units:
+            x = Dense(dim, activation="relu")(x)
+            x = Dropout(mlp_dropout)(x)
+
+        # Two separate latent spaces supported
+        y = Dense(n_classes[0], activation="relu")(x)
+        z = Dense(n_classes[1], activation="relu")(x)
+
+        return Model(inputs, [y, z])
+
+    def call(self, inputs):
+        """
+        Calls the TimeSeriesTransformer model on a batch of inputs.
+
+        Args:
+            inputs: Tensor, batch of input data.
+
+        Returns:
+            Tensor, resulting output of the TimeSeriesTransformer model.
+        """
+        return self.encoder(inputs)
+
+    def summary(self):
+        """
+        Prints a summary of the TimeSeriesTransformer model.
+
+        Args:
+            None.
+
+        Returns:
+            None.
+        """
+        self.encoder.summary()
+
+# EOF

From c5aa13bcaf3b14be196103bd268330d08128bf0f Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Sun, 1 Mar 2026 23:20:22 -0500
Subject: [PATCH 03/18] Decoder separated out from model.py for clarity, work
 in progress to fix compatibility with new embedding structure

---
 model/decoder.py | 101 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 model/decoder.py

diff --git a/model/decoder.py b/model/decoder.py
new file mode 100644
index 0000000..65594c6
--- /dev/null
+++ b/model/decoder.py
@@ -0,0 +1,101 @@
+# -*- encoding: utf-9 -*-
+
+class DecoderModel(Model):
+    
+    def __init__(self, input_shape, head_size, num_heads, ff_dim,
+                 num_Transformer_blocks, mlp_units, n_classes,
+                 dropout=0, mlp_dropout=0):
+        """
+        Initializes the TimeSeriesTransformer class. This class is a
+        wrapper around a Keras model that consists of a series of
+        Transformer blocks followed by an MLP.
+
+        Args:
+            input_shape: tuple, shape of the input tensor.
+            head_size: int, the number of features in each attention head.
+            num_heads: int, the number of attention heads.
+            ff_dim: int, the number of neurons in the feedforward neural
+                network.
+            num_Transformer_blocks: int, the number of Transformer blocks.
+            mlp_units: list of ints, the number of neurons in each layer of
+                the MLP.
+            n_classes: int, the number of output classes.
+            dropout: float, dropout rate.
+            mlp_dropout: float, dropout rate in the MLP.
+
+        Attributes:
+            timeseriestransformer: Keras model, the TimeSeriesTransformer
+                model.
+        """
+        self.tstfbuilder = TSTFBuilder()
+
+        super(DecoderModel, self).__init__()
+        self.decoder = self._modelstack(
+                input_shape, head_size, num_heads, ff_dim,
+                num_Transformer_blocks, mlp_units, n_classes,
+                dropout, mlp_dropout)
+
+    def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
+                   num_Transformer_blocks, mlp_units,
+                    dropout, mlp_dropout):
+        """
+        Creates a Timeseries Transformer model. This consists of a series of
+        Transformer blocks followed by an MLP.
+
+        Args:
+            input_shape: tuple, shape of the input tensor.
+            head_size: int, the number of features in each attention head.
+            num_heads: int, the number of attention heads.
+            ff_dim: int, the number of neurons in the feedforward neural
+                network.
+            num_Transformer_blocks: int, the number of Transformer blocks.
+            mlp_units: list of ints, the number of neurons in each layer of
+                the MLP.
+            dropout: float, dropout rate.
+            mlp_dropout: float, dropout rate in the MLP.
+
+        Returns:
+            A Keras model.
+        """
+
+        inputs = Input((mlp_units[-1],))
+        full_dimension = input_shape[0] * input_shape[1]
+        x = Dense(full_dimension, activation="relu")(inputs)
+        x = Reshape((input_shape[0], input_shape[1]))(x)
+
+        for _ in range(num_Transformer_blocks):
+            x = self.tstfbuilder.build_transformerblock(
+                x,
+                head_size,
+                num_heads,
+                ff_dim,
+                dropout
+            )
+
+        # final layer with corrected shape
+        x = Conv1D(filters=input_shape[1],
+                   kernel_size=1,
+                   padding="valid",
+                   activation=linear)(x)
+
+        return Model(inputs, x)
+
+    def call(self, inputs):
+        """
+        Calls the TimeSeriesTransformer model on a batch of inputs.
+
+        Args:
+            inputs (Tensor): batch of input data.
+
+        Returns:
+            (Tensor) Decoded reconstruction of the spectral data.
+        """
+        return self.decoder(inputs)
+
+    def summary(self):
+        """
+        Prints the Model summary.
+        """
+        self.decoder.summary()
+
+# EOF

From 7f9add4ddcbc18cce6971301576ebbe1ca806d8c Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Sun, 1 Mar 2026 23:20:53 -0500
Subject: [PATCH 04/18] Second input bundle for training the latent space added

---
 model/trainingwheel.py | 44 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 model/trainingwheel.py

diff --git a/model/trainingwheel.py b/model/trainingwheel.py
new file mode 100644
index 0000000..2a612db
--- /dev/null
+++ b/model/trainingwheel.py
@@ -0,0 +1,44 @@
+# -*- encoding: utf-8 -*-
+
+# Third party module imports
+from keras import Input, Model
+
+class TrainingWheel(Model):
+    def __init__(self, sem_input_shapes):
+        self.inputs = self._semantic_input(sem_input_shapes)
+
+    def _semantic_input(self, sem_input_shapes):
+        """
+        Create a keras Model block that takes in the semantic inputs and just
+        forwards the input. The whole point of it is so that at autoencoder
+        build, the semantic input can be easily inserted for training and
+        removed for inference.
+
+        Args:
+            sem_input_shapes (List[int]): List of integers representing the
+                    shapes of the semantic inputs.
+        """
+        inputs = [Input(shape=(input_shape,))
+                  for input_shape in sem_input_shapes]
+
+        return Model(inputs, inputs)
+
+    def call(self, inputs):
+        """
+        Calls the model on a batch of inputs.
+
+        Args:
+            inputs (Tensor): Batch of input data.
+
+        Returns:
+            (Tensor) Same input data passed through the model.
+        """
+        return self.inputs(inputs)
+
+    def summary(self):
+        """
+        Prints Model summary.
+        """
+        self.inputs.summary()
+
+# EOF

From 54c6b74f46f782dc88bb7ff56f411282202a0aa1 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:47:08 -0500
Subject: [PATCH 05/18] Decoder made compatible with new three-part autoencoder
 w/ regularization

---
 model/decoder.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/model/decoder.py b/model/decoder.py
index 65594c6..506a12b 100644
--- a/model/decoder.py
+++ b/model/decoder.py
@@ -1,9 +1,17 @@
-# -*- encoding: utf-9 -*-
+# -*- encoding: utf-8 -*-
 
-class DecoderModel(Model):
+# 3rd party module imports
+from keras import Model
+from keras.layers import Input, Dense, Reshape, Conv1D
+
+# Local module imports
+from model.activation import sublinear, linear
+from model.transformer import TimeseriesTransformerBuilder as TSTFBuilder
+
+class Decoder(Model):
     
     def __init__(self, input_shape, head_size, num_heads, ff_dim,
-                 num_Transformer_blocks, mlp_units, n_classes,
+                 num_Transformer_blocks, mlp_units,
                  dropout=0, mlp_dropout=0):
         """
         Initializes the TimeSeriesTransformer class. This class is a
@@ -29,10 +37,10 @@ def __init__(self, input_shape, head_size, num_heads, ff_dim,
         """
         self.tstfbuilder = TSTFBuilder()
 
-        super(DecoderModel, self).__init__()
+        super(Decoder, self).__init__()
         self.decoder = self._modelstack(
                 input_shape, head_size, num_heads, ff_dim,
-                num_Transformer_blocks, mlp_units, n_classes,
+                num_Transformer_blocks, mlp_units,
                 dropout, mlp_dropout)
 
     def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
@@ -58,12 +66,12 @@ def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
             A Keras model.
         """
 
-        inputs = Input((mlp_units[-1],))
+        inputs = Input(shape=(mlp_units[-1],), name="decoder_input")
         full_dimension = input_shape[0] * input_shape[1]
-        x = Dense(full_dimension, activation="relu")(inputs)
-        x = Reshape((input_shape[0], input_shape[1]))(x)
+        x = Dense(full_dimension, activation="relu", name="dec_dense1")(inputs)
+        x = Reshape((input_shape[0], input_shape[1]), name="dec_reshape")(x)
 
-        for _ in range(num_Transformer_blocks):
+        for i in range(num_Transformer_blocks):
             x = self.tstfbuilder.build_transformerblock(
                 x,
                 head_size,
@@ -76,9 +84,10 @@ def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
         x = Conv1D(filters=input_shape[1],
                    kernel_size=1,
                    padding="valid",
-                   activation=linear)(x)
+                   activation=linear,
+                   name="dec_conv1d")(x)
 
-        return Model(inputs, x)
+        return Model(inputs, x, name="decoder")
 
     def call(self, inputs):
         """

From ed99b8b5259834292f7e31c7324f36058a7a60d7 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:47:42 -0500
Subject: [PATCH 06/18] Interface with embedding fixed for compatibility

---
 model/encoder.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/model/encoder.py b/model/encoder.py
index 9775e55..d803f1d 100644
--- a/model/encoder.py
+++ b/model/encoder.py
@@ -9,7 +9,6 @@
 
 # Local module imports
 from model.transformer import TimeseriesTransformerBuilder as TSTFBuilder
-from model.latent import Semantic, Variational
 
 class Encoder(Model):
     """
@@ -48,8 +47,7 @@ def __init__(self, input_shape, head_size, num_heads, ff_dim,
         )
 
     def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
-                   num_Transformer_blocks, mlp_units, n_classes,
-                    dropout, mlp_dropout):
+                    num_Transformer_blocks, mlp_units, dropout, mlp_dropout):
         """
         Creates a Timeseries Transformer model. This consists of a series of
         Transformer blocks followed by an MLP.
@@ -91,10 +89,8 @@ def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
             x = Dropout(mlp_dropout)(x)
 
         # Two separate latent spaces supported
-        y = Dense(n_classes[0], activation="relu")(x)
-        z = Dense(n_classes[1], activation="relu")(x)
 
-        return Model(inputs, [y, z])
+        return Model(inputs, x)
 
     def call(self, inputs):
         """

From 58351f8ca7e93ec5dba373294ccbb5e9cd35a5c0 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:48:06 -0500
Subject: [PATCH 07/18] Latent block with two parallel sub-components built

---
 model/latent.py | 99 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 71 insertions(+), 28 deletions(-)

diff --git a/model/latent.py b/model/latent.py
index e948f27..a5d0721 100644
--- a/model/latent.py
+++ b/model/latent.py
@@ -1,14 +1,47 @@
 # -*- coding: utf-8 -*-
 
+# 3rd party module imports
+import keras
 from keras import Input, Model
-from keras.layers import Dense
+from keras.layers import Dense, Layer
+import keras.ops as ops
+
+# Local module imports
+from model.losses import categorical_crossentropy, mean_absolute_error
+
+class Latent(Model):
+    def __init__(self, mlp_dim, semantic_dims, var_dim):
+        super(Latent, self).__init__()
+        self.name = "Semantic Embedding Block"
+        self.semantic = Semantic(mlp_dim, semantic_dims)
+        self.variational = Variational(mlp_dim, var_dim)
+        self.latent = self._build(mlp_dim, semantic_dims, var_dim)
+
+    def _build(self, mlp_dim, semantic_dims, var_dim):
+        """
+        """
+
+        inputs = Input(shape=(mlp_dim,))
+        sem_inputs = [Input(shape=(dim,)) for dim in semantic_dims]
+        s, s_err = self.semantic([inputs] + sem_inputs)
+        v, kl_loss = self.variational(inputs)
+        x = Dense(mlp_dim, activation="relu")(ops.concatenate([s, v], axis=-1))
+
+        return Model([inputs, *sem_inputs], [x, s_err, kl_loss], name="latent")
+
+    def call(self, inputs):
+        return self.latent(inputs)
+
+    def summary(self):
+        return self.latent.summary()
 
 class Semantic(Model):
     def __init__(self, mlp_dim, semantic_dims):
         self.name = "Semantic Embedding Block"
+        super(Semantic, self).__init__()
         self.latent = self._build(mlp_dim, semantic_dims)
 
-    def _build(mlp_dim, semantic_dims):
+    def _build(self, mlp_dim, semantic_dims):
         """
         Embedding space for semantically meaningful variables. Everything is
         laterally spaced out.
@@ -22,7 +55,12 @@ def _build(mlp_dim, semantic_dims):
                     model, with a structured latent space that maps to
                     semantically meaningful variables.
         """
-        inputs = Input(shape=(mpl_dim,))
+
+        # Compute inverse log of dimensions to get weights
+        class_counts = ops.array(semantic_dims[:-1], dtype="float32")
+        weights = 1/ops.log(class_counts)
+
+        inputs = Input(shape=(mlp_dim,))
         targets = [Input(shape=(dim,)) for dim in semantic_dims]
 
         # One-hot encoding spaces
@@ -33,23 +71,24 @@ def _build(mlp_dim, semantic_dims):
         # Regression spaces
         # Min. frequency, Max. frequency, baseline time, treatment time,
         # loop dt
-        reg = Dense(dim[-1], activation=None)(inputs)
+        reg = Dense(semantic_dims[-1], activation=None)(inputs)
 
         # Compute categorical_crossentropy error against targets for
         # categorical variables
         errors = [
-            keras.losses.categorical_crossentropy(target, pred)
-            for target, pred in zip(targets[:-1], one_hots)
+            categorical_crossentropy(target, pred) * weight
+            for target, pred, weight in zip(targets[:-1], one_hots, weights)
         ]
         
-        # Compute MAE
-        errors.append(keras.losses.mean_absolute_error(targets[-1], reg))
+        # Compute MAE, with normalization by approximate range of values (~4)
+        errors.append(mean_absolute_error(targets[-1], reg) / 4.)
+        error = ops.sum(ops.stack(errors))
 
         # Combine everything into single dense layer
-        concat = keras.layers.concatenate(one_hots + [reg])
+        concat = keras.layers.concatenate(one_hots + [reg], axis=-1)
         output = Dense(mlp_dim, activation="relu")(concat)
 
-        return Model(inputs, [output, errors])
+        return Model([inputs, *targets], [output, error], name="semantic")
 
     def call(self, inputs):
         return self.latent(inputs)
@@ -58,17 +97,23 @@ def summary(self):
         return self.latent.summary()
 
 class Variational(Model):
-    def __init__(self, name):
-        self.name = name
+    def __init__(self, dim, var_dim):
+        self.name = "Variational Embedding Block"
+        super(Variational, self).__init__()
+        self.latent = self._build(dim, var_dim)
 
-    def _build(dim):
+    def _build(self, dim, var_dim):
         inputs = Input(shape=(dim,))
-        x = Dense(dim, activation="relu")(inputs)
-        z1 = Dense(dim, activation="relu")(x)
-        z2 = Dense(dim, activation="relu")(x)
-        z = Sampling()([z_mean, z_log_var, z])
+        x = Dense(var_dim, activation="relu")(inputs)
+        z_mean = Dense(var_dim, activation=None)(x)
+        z_log_var = Dense(var_dim, activation=None)(x)
+        z = Sampling()([z_mean, z_log_var])
+        kl_loss = -0.5 * ops.sum(
+            1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var),
+            axis=1
+        )
 
-        return Model(inputs, [z_mean, z_log_var, z])
+        return Model(inputs, [z, kl_loss], name="variational")
 
     def call(self, inputs):
         return self.latent(inputs)
@@ -76,21 +121,19 @@ def call(self, inputs):
     def summary(self):
         return self.latent.summary()
 
-class Sampling(layers.Layer):
+class Sampling(Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.seed_gen = keras.random.SeedGenerator(1337)
+
     def call(self, inputs):
         z_mean, z_log_var = inputs
 
         # Reparameterization trick
-        eps = tf.random.normal(shape=tf.shape(z_mean))
-        z = z_mean + tf.exp(0.5 * z_log_var) * eps
+        eps = keras.random.normal(shape=ops.shape(z_mean), seed=self.seed_gen)
+        z = z_mean + ops.exp(0.5 * z_log_var) * eps
 
-        # KL divergence term
-        kl = -0.5 * tf.reduce_sum(
-        1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var),
-            axis=1
-                    )
-        self.add_loss(tf.reduce_mean(kl))
+        return z
 
-    return z
 
 # EOF

From 352fcd316030ff9c94dc403cbedabc92f789e44a Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:48:27 -0500
Subject: [PATCH 08/18] Major revision to CompoundModel class to use the new
 three-part structure

---
 model/model.py | 265 ++++++++++++++++---------------------------------
 1 file changed, 87 insertions(+), 178 deletions(-)

diff --git a/model/model.py b/model/model.py
index 540479c..1d455e5 100644
--- a/model/model.py
+++ b/model/model.py
@@ -6,19 +6,25 @@
 
 """
 
+# Built-in module imports
+from typing import Optional
+
+# 3rd party module imports
 from keras import Input, Model
-from keras.layers import BatchNormalization, Conv1D, Dense, Dropout, Reshape, \
-        GlobalAveragePooling1D, LayerNormalization, Masking, Conv2D, \
-        MultiHeadAttention, concatenate
+from keras.layers import Dense, Dropout, Reshape
+import keras.ops as ops
 
-from model.activation import sublinear, linear
+# Local module imports
 from model.transformer import TimeseriesTransformerBuilder as TSTFBuilder
+from model.encoder import Encoder
+from model.decoder import Decoder
+from model.latent import Latent
 
 class CompoundModel(Model):
     
     def __init__(self, input_shape, head_size, num_heads, ff_dim,
-                 num_Transformer_blocks, mlp_units, n_classes,
-                 dropout=0, mlp_dropout=0):
+                 num_transformer_blocks, mlp_units, semantic_dims,
+                 var_dims, dropout=0, mlp_dropout=0):
         """
         Initializes the TimeSeriesTransformer class. This class is a
         wrapper around a Keras model that consists of a series of
@@ -30,79 +36,68 @@ def __init__(self, input_shape, head_size, num_heads, ff_dim,
             num_heads: int, the number of attention heads.
             ff_dim: int, the number of neurons in the feedforward neural
                 network.
-            num_Transformer_blocks: int, the number of Transformer blocks.
+            num_transformer_blocks: int, the number of Transformer blocks.
             mlp_units: list of ints, the number of neurons in each layer of
                 the MLP.
-            n_classes: int, the number of output classes.
             dropout: float, dropout rate.
             mlp_dropout: float, dropout rate in the MLP.
 
         Attributes:
-            timeseriestransformer: Keras model, the TimeSeriesTransformer
+            autoencoder: Keras model, the TimeSeriesTransformer
                 model.
         """
-        self.tstfbuilder = TSTFBuilder()
-
         super(CompoundModel, self).__init__()
-        self.timeseriestransformer = self._modelstack(
-                input_shape, head_size, num_heads, ff_dim,
-                num_Transformer_blocks, mlp_units, n_classes,
-                dropout, mlp_dropout)
-
-    def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
-                   num_Transformer_blocks, mlp_units, n_classes,
-                    dropout, mlp_dropout):
-        """
-        Creates a Timeseries Transformer model. This consists of a series of
-        Transformer blocks followed by an MLP.
-
-        Args:
-            input_shape: tuple, shape of the input tensor.
-            head_size: int, the number of features in each attention head.
-            num_heads: int, the number of attention heads.
-            ff_dim: int, the number of neurons in the feedforward neural
-                network.
-            num_Transformer_blocks: int, the number of Transformer blocks.
-            mlp_units: list of ints, the number of neurons in each layer of
-                the MLP.
-            n_classes: list of ints, the number of output classes.
-            dropout: float, dropout rate.
-            mlp_dropout: float, dropout rate in the MLP.
-
-        Returns:
-            A Keras model.
-        """
-
-        inputs = Input(shape=input_shape)
-        #x = inputs
-        #inputs = Masking(mask_value=pad_value)(inputs)
-        x = BatchNormalization()(inputs)
-
-        # Transformer blocks
-        for _ in range(num_Transformer_blocks):
-            x = self.tstfbuilder.build_transformerblock(
-                x,
+        self.encoder = Encoder(
+                input_shape,
+                head_size,
+                num_heads,
+                ff_dim,
+                num_transformer_blocks,
+                mlp_units,
+                dropout,
+                mlp_dropout
+        )
+
+        self.latent = Latent(
+                mlp_units[-1],
+                semantic_dims,
+                var_dims
+        )
+
+        self.decoder = Decoder(
+                input_shape,
                 head_size,
                 num_heads,
                 ff_dim,
-                dropout
-            )
+                num_transformer_blocks,
+                mlp_units,
+                dropout,
+                mlp_dropout
+        )
+        self.autoencoder = self._modelstack(input_shape, semantic_dims)
+        
 
-        # Pooling and simple DNN block
-        x = GlobalAveragePooling1D(data_format="channels_first")(x)
-        for dim in mlp_units:
-            x = Dense(dim, activation="relu")(x)
-            x = Dropout(mlp_dropout)(x)
+    def _modelstack(self, input_shape, semantic_dims):
 
-        # Two separate latent spaces supported
-        #y = Dense(n_classes[0], activation="softmax")(x)
-        #z = Dense(n_classes[1], activation="softmax")(x)
-        y = Dense(n_classes[0], activation="relu")(x)
-        z = Dense(n_classes[1], activation="relu")(x)
+        # Define overall model inputs
+        encoder_input = Input(shape=input_shape, name="Encoder_input")
+        semantic_inputs = [Input(shape=(semantic_dim,), name=f"Semantic_input_{i}")
+                           for i, semantic_dim in enumerate(semantic_dims)]
+        inputs = [encoder_input] + semantic_inputs
 
-        return Model(inputs, [y, z])
+        # Encoder-Latent space-Decoder stack
+        encoding = self.encoder(encoder_input)
+        embedding, sem_loss, kl_loss = self.latent([encoding] + semantic_inputs)
+        decoding = self.decoder(embedding)
 
-    def call(self, inputs):
+        # Combine Losses
+        recon_loss = ops.mean(ops.abs(decoding - encoder_input))
+        model = Model(inputs, [decoding, sem_loss, kl_loss, recon_loss],
+                      name="Autoencoder")
+
+        return model
+
+    def call(self, inputs, training: Optional[bool] = None):
         """
         Calls the TimeSeriesTransformer model on a batch of inputs.
 
@@ -112,9 +107,19 @@ def call(self, inputs):
         Returns:
             Tensor, resulting output of the TimeSeriesTransformer model.
         """
-        return self.timeseriestransformer(inputs)
+        outputs, sem_loss, kl_loss, recon_loss = self.autoencoder(
+                inputs,
+                training=training
+        )
+
+        recon_weight = ops.cast(1./4., dtype=recon_loss.dtype)
+        self.add_loss(sem_loss)
+        self.add_loss(kl_loss)
+        self.add_loss(recon_loss * recon_weight)
 
-    def summary(self):
+        return outputs
+
+    def summary(self, *args, **kwargs):
         """
         Prints a summary of the TimeSeriesTransformer model.
 
@@ -124,123 +129,27 @@ def summary(self):
         Returns:
             None.
         """
-        self.timeseriestransformer.summary()
+        self.autoencoder.summary(*args, **kwargs)
 
-class DecoderModel(Model):
-    
-    def __init__(self, input_shape, head_size, num_heads, ff_dim,
-                 num_Transformer_blocks, mlp_units, n_classes,
-                 dropout=0, mlp_dropout=0):
-        """
-        Initializes the TimeSeriesTransformer class. This class is a
-        wrapper around a Keras model that consists of a series of
-        Transformer blocks followed by an MLP.
+    def kl(self):
+        beta = (
+            self.beta_anneal(epoch, total_epochs)
+            * self.beta_cyclical(epoch, cycle)
+        )
+        capacity = self.kl_capacity(epoch, max_cap, total_epochs)
+        weight = 1
+        kl_modified = beta * ops.abs(kl_loss - capacity) * weight
 
-        Args:
-            input_shape: tuple, shape of the input tensor.
-            head_size: int, the number of features in each attention head.
-            num_heads: int, the number of attention heads.
-            ff_dim: int, the number of neurons in the feedforward neural
-                network.
-            num_Transformer_blocks: int, the number of Transformer blocks.
-            mlp_units: list of ints, the number of neurons in each layer of
-                the MLP.
-            n_classes: int, the number of output classes.
-            dropout: float, dropout rate.
-            mlp_dropout: float, dropout rate in the MLP.
 
-        Attributes:
-            timeseriestransformer: Keras model, the TimeSeriesTransformer
-                model.
-        """
-        self.tstfbuilder = TSTFBuilder()
+    def beta_anneal(self, epoch, total_epochs):
+        return ops.minimum(1.0, epoch / total_epochs)
 
-        super(DecoderModel, self).__init__()
-        self.timeseriestransdecoder = self._modelstack(
-                input_shape, head_size, num_heads, ff_dim,
-                num_Transformer_blocks, mlp_units, n_classes,
-                dropout, mlp_dropout)
+    def beta_cyclical(self, epoch, cycle):
+        cycle_pos = (step % cycle) / cycle
+        return ops.minimum(1.0, 2 * cycle_pos)
 
-    def _modelstack(self, input_shape, head_size, num_heads, ff_dim,
-                   num_Transformer_blocks, mlp_units, n_classes,
-                    dropout, mlp_dropout):
-        """
-        Creates a Timeseries Transformer model. This consists of a series of
-        Transformer blocks followed by an MLP.
-
-        Args:
-            input_shape: tuple, shape of the input tensor.
-            head_size: int, the number of features in each attention head.
-            num_heads: int, the number of attention heads.
-            ff_dim: int, the number of neurons in the feedforward neural
-                network.
-            num_Transformer_blocks: int, the number of Transformer blocks.
-            mlp_units: list of ints, the number of neurons in each layer of
-                the MLP.
-            n_classes: list of ints, the number of output classes.
-            dropout: float, dropout rate.
-            mlp_dropout: float, dropout rate in the MLP.
-
-        Returns:
-            A Keras model.
-        """
-
-        shape0 = n_classes[0]
-        shape1 = n_classes[1]
-        x0 = Input(shape=(shape0,))
-        x1 = Input(shape=(shape1,))
-        inputs = [x0, x1]
-        x0 = Dense(n_classes[0], activation="relu")(x0)
-        x1 = Dense(n_classes[1], activation="relu")(x1)
-        x = concatenate([x0, x1], axis=-1)
-        full_dimension = input_shape[0] * input_shape[1]
-        x = Dense(full_dimension, activation="relu")(x)
-        x = Reshape((input_shape[0], input_shape[1]))(x)
-
-        """
-        for dim in mlp_units:
-            x = Dense(dim, activation="relu")(x)
-            x = Dropout(mlp_dropout)(x)
-        """
-        for _ in range(num_Transformer_blocks):
-            x = self.tstfbuilder.build_transformerblock(
-                x,
-                head_size,
-                num_heads,
-                ff_dim,
-                dropout
-            )
-
-        # final layer with corrected shape
-        x = Conv1D(filters=input_shape[1],
-                   kernel_size=1,
-                   padding="valid",
-                   activation=linear)(x)
-
-        return Model(inputs, x)
-
-    def call(self, inputs):
-        """
-        Calls the TimeSeriesTransformer model on a batch of inputs.
-
-        Args:
-            inputs: Tensor, batch of input data.
-
-        Returns:
-            Tensor, resulting output of the TimeSeriesTransformer model.
-        """
-        return self.timeseriestransdecoder(inputs)
-
-    def summary(self):
-        """
-        Prints a summary of the TimeSeriesTransformer model.
-
-        Args:
-            None.
-
-        Returns:
-            None.
-        """
-        self.timeseriestransdecoder.summary()
+    def kl_capacity(self, epoch, max_cap, total_epochs):
+        cap = max_cap * epoch / total_epochs
+        return ops.minimum(max_cap, cap)
 
 # EOF

From b04a7c3779669582292be281cdb13b6a4ca112ab Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:48:50 -0500
Subject: [PATCH 09/18] Revised training module to properly invoke the target
 model

---
 train/autoencoder_train.py | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/train/autoencoder_train.py b/train/autoencoder_train.py
index e9e7402..d58440c 100644
--- a/train/autoencoder_train.py
+++ b/train/autoencoder_train.py
@@ -17,8 +17,6 @@
 from model.metrics import MutualInformation, mutual_information
 from visualize.visualize import confusion_matrix
 from visualize.plot import roc_plot
-from train.encoder_train import build_encoder
-from train.decoder_train import build_decoder
 
 def autoencoder_workflow(params, shape, n_classes,
                          train_set, validation_set, test_set,
@@ -47,18 +45,26 @@ def autoencoder_workflow(params, shape, n_classes,
 
     save_autoencoder(params, model, path)
 
-def build_autoencoder(params, shape, n_classes):
-    autoencoder_params = params["autoencoder_params"]
-    #mi = MutualInformation()
+def build_autoencoder(params, shape, semantic_dims):
+    params = params["autoencoder_params"]
     mse = MeanSquaredError()
 
-    encoder_model = build_encoder(params, shape, n_classes)
-    decoder_model = build_decoder(params, shape, n_classes)
-    model = keras.Sequential([encoder_model, decoder_model])
+    model = CompoundModel(
+        shape,
+        params["head_size"],
+        params["num_heads"],
+        params["ff_dim"],
+        params["num_transformer_blocks"],
+        params["mlp_units"],
+        semantic_dims,
+        params["var_dims"],
+        dropout=params["dropout"],
+        mlp_dropout=params["mlp_dropout"]
+    )
+    model.build(shape)
     model.compile(
         optimizer=keras.optimizers.Adam(learning_rate=4e-4),
-        loss=autoencoder_params["loss"],
-        metrics=[mse]#, mutual_information]
+        metrics=params["metrics"]
     )
 
     return model
@@ -80,8 +86,10 @@ def train_autoencoder(params, model, train_set, validation_set, path):
     
     start = time.time()
     model.fit(
-        x=train_set, y=train_set,
-        validation_data=(validation_set, validation_set),
+        x=[train_set[0], train_set[1][0], train_set[1][1]], y=train_set[0],
+        validation_data=(
+            [validation_set[0], validation_set[1][0], validation_set[1][1]],
+            validation_set[0]),
         batch_size=params["batch_size"],
         epochs=params["epochs"],
         verbose=log_level,
@@ -95,7 +103,7 @@ def test_autoencoder(model: Model, test: List, metrics: dict):
     """
     """
 
-    test_eval = model.evaluate(test, test)
+    test_eval = model.evaluate(test[0], test[0])
     if len(metrics.keys()) == 1:
         metrics[metrics.keys()[0]] = test_eval
     else:

From 4a224ba9fb6cb5a1089bff94b8e10755555ef821 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:49:15 -0500
Subject: [PATCH 10/18] Minor fixes but these will be deprecated

---
 train/decoder_train.py | 9 ++++-----
 train/encoder_train.py | 7 ++++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/train/decoder_train.py b/train/decoder_train.py
index 2e4c6ff..d10386c 100644
--- a/train/decoder_train.py
+++ b/train/decoder_train.py
@@ -7,7 +7,7 @@
 import typing
 from typing import List
 
-from model.model import DecoderModel
+from model.model import Decoder
 from visualize.plot import spectra_plot
 
 def decoder_workflow(params, train_set, validation_set, test_set,
@@ -26,26 +26,25 @@ def decoder_workflow(params, train_set, validation_set, test_set,
     spectra_plot(test_predict[0], name=f"{target}-{treatment}-predict")
     spectra_plot(test_set[0][0], name=f"{target}-{treatment}-true")
 
-def build_decoder(params, input_shape, n_classes):
+def build_decoder(params, input_shape, semantic_dims):
     """
     """
 
     params = params["decoder_params"]
-    decoder = DecoderModel(
+    decoder = Decoder(
             input_shape,
             params["head_size"],
             params["num_heads"],
             params["ff_dim"],
             params["num_transformer_blocks"],
             params["mlp_units"],
-            n_classes,
             params["dropout"],
             params["mlp_dropout"]
     )
 
     decoder.compile(
         optimizer=keras.optimizers.Adam(learning_rate=4e-4),
-        loss=params["loss"],
+        #loss=params["loss"],
         metrics=params["metrics"]
     )
 
diff --git a/train/encoder_train.py b/train/encoder_train.py
index ceab077..2013e8d 100644
--- a/train/encoder_train.py
+++ b/train/encoder_train.py
@@ -34,7 +34,7 @@ def encoder_workflow(params, shape, n_classes,
 
     save_encoder(model, path)
 
-def build_encoder(params, input_shape, n_classes):
+def build_encoder(params, input_shape, semantic_dims):
     log_level = params["log_level"]
     params = params["encoder_params"]
     model = CompoundModel(
@@ -44,14 +44,15 @@ def build_encoder(params, input_shape, n_classes):
         params["ff_dim"],
         params["num_transformer_blocks"],
         params["mlp_units"],
-        n_classes,
+        semantic_dims,
+        params["var_dims"],
         dropout=params["dropout"],
         mlp_dropout=params["mlp_dropout"]
     )
 
     model.compile(
         optimizer=keras.optimizers.Adam(learning_rate=4e-4),
-        loss=params["loss"],
+        #loss=params["loss"],
         metrics=params["metrics"]
     )
     if log_level == 1:

From d24d7a8eb5bb700a43c493a307fec57fa0016331 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:49:38 -0500
Subject: [PATCH 11/18] Inputs for autoencoder training corrected

---
 train_model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/train_model.py b/train_model.py
index d7b79e2..eae5575 100644
--- a/train_model.py
+++ b/train_model.py
@@ -120,9 +120,9 @@ def main():
             params,
             shape,
             n_classes,
-            train_set[0],
-            validation_set[0],
-            test_set[0],
+            train_set,
+            validation_set,
+            test_set,
             categories,
             keys,
             path

From 3fddf624c79ccf20e2a72f4617bd5a398e86e24c Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:49:49 -0500
Subject: [PATCH 12/18] name param added

---
 model/transformer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/model/transformer.py b/model/transformer.py
index 5a03188..e3de135 100644
--- a/model/transformer.py
+++ b/model/transformer.py
@@ -38,7 +38,7 @@ def __init__(self):
         """
 
     def build_transformerblock(self, inputs, head_size, num_heads,
-                           ff_dim, dropout):
+                           ff_dim, dropout, name=None):
         """
         Constructs the transformer block. A transformer block consists of the
         following steps:

From ce47e3f7f375d34072b1ac8b9f2f1c66edb340fa Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:50:29 -0500
Subject: [PATCH 13/18] Custom loss functions using keras.ops only

---
 model/losses.py | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 model/losses.py

diff --git a/model/losses.py b/model/losses.py
new file mode 100644
index 0000000..d08a5f1
--- /dev/null
+++ b/model/losses.py
@@ -0,0 +1,10 @@
+# -*- encoding: utf-8 -*-
+
+import keras.ops as ops
+
+def categorical_crossentropy(y_true, y_pred):
+    y_pred = ops.clip(y_pred, 1e-7, 1.0)
+    return -ops.sum(y_true * ops.log(y_pred), axis=-1)
+
+def mean_absolute_error(y_true, y_pred):
+    return ops.mean(ops.abs(y_pred - y_true), axis=-1)

From b21d2b32282d3804ca5e59c4b34ff61eb64918a6 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Mon, 2 Mar 2026 13:50:52 -0500
Subject: [PATCH 14/18] Metrics

---
 model/metrics.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 model/metrics.py

diff --git a/model/metrics.py b/model/metrics.py
new file mode 100644
index 0000000..584cdc1
--- /dev/null
+++ b/model/metrics.py
@@ -0,0 +1,97 @@
+import keras
+import keras.ops as ops
+
+def mutual_information(y_true, y_pred):
+    return jax_mi(y_true, y_pred)
+
+def jax_mi(y_true, y_pred, num_bins=20, eps=1e-8):
+    y_true = keras.ops.reshape(y_true, (-1,))
+    y_pred = keras.ops.reshape(y_pred, (-1,))
+
+    y_min, y_max = ops.min(y_true), ops.max(y_true)
+    p_min, p_max = ops.min(y_pred), ops.max(y_pred)
+
+    y_edges = ops.linspace(y_min, y_max, num_bins + 1)
+    p_edges = ops.linspace(p_min, p_max, num_bins + 1)
+
+    def digitize(values, edges):
+        cmp = values[:, None] >= edges[None, :]
+        return ops.sum(cmp, axis=1) - 1
+
+    y_bin = ops.clip(digitize(y_true, y_edges), 0, num_bins - 1)
+    p_bin = ops.clip(digitize(y_pred, p_edges), 0, num_bins - 1)
+
+    # Compute joint histogram WITHOUT one-hot
+    joint = ops.zeros((num_bins, num_bins))
+    joint = joint.at[y_bin, p_bin].add(1.0)
+
+    joint = joint / (ops.sum(joint) + eps)
+
+    py = ops.sum(joint, axis=1, keepdims=True)
+    pp = ops.sum(joint, axis=0, keepdims=True)
+
+    ratio = (joint + eps) / (py * pp + eps)
+    mi = ops.sum(joint * ops.log(ratio))
+
+    return mi
+
+
+def flatten(x):
+    return keras.ops.reshape(x,(-1,))
+
+def mutual_info(y_true, y_pred, num_bins=20):
+    eps = 1E-8
+
+    y_true = flatten(y_true)
+    y_pred = flatten(y_pred)
+
+    y_min, y_max = (ops.min(y_true), ops.max(y_true))
+    p_min, p_max = (ops.min(y_pred), ops.max(y_pred))
+
+    y_edges = ops.linspace(y_min, y_max, num_bins + 1)
+    p_edges = ops.linspace(p_min, p_max, num_bins + 1)
+    
+    def digitize(values, edges):
+        cmp = values[:, None] >= edges[None, :]
+        return ops.sum(cmp, axis=1) - 1
+
+    y_bin = ops.clip(digitize(y_true, y_edges), 0, num_bins - 1)
+    p_bin = ops.clip(digitize(y_pred, p_edges), 0, num_bins - 1)
+
+    # Compute joint histogram
+    # Convert bin indices to one-hot
+    y_oh = ops.one_hot(y_bin, num_bins)
+    p_oh = ops.one_hot(p_bin, num_bins)
+
+    joint = ops.sum(y_oh[:, :, None] * p_oh[:, None, :], axis=0)
+    joint = joint / ops.sum(joint)
+
+    # Marginals
+    py = ops.sum(joint, axis=1)
+    pp = ops.sum(joint, axis=0)
+
+    # Compute MI
+    py = ops.reshape(py, (-1, 1))
+    pp = ops.reshape(pp, (1, -1))
+
+    denom = py * pp
+    ratio = (joint + eps) / (denom + eps)
+    mi = ops.sum(joint * ops.log(ratio))
+
+    return mi
+
+class MutualInformation(keras.metrics.Metric):
+    def __init__(self, num_bins=20, name="mutual_information", **kwargs):
+        super().__init__(name=name, **kwargs)
+        self.num_bins = num_bins
+        self.mi = self.add_weight(shape=(), initializer="zeros")
+
+    def update_state(self, y_true, y_pred, sample_weight=None):
+        value = mutual_info(y_true, y_pred, self.num_bins)
+        if sample_weight is not None:
+            value = value * sample_weight
+        self.mi.assign(value)
+
+    def result(self):
+        return self.mi
+

From 62535c9f95dbfe222eab7d923987df1063674e88 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Wed, 4 Mar 2026 15:06:03 -0500
Subject: [PATCH 15/18] Loss computation set up

---
 model/model.py | 68 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 15 deletions(-)

diff --git a/model/model.py b/model/model.py
index 1d455e5..d1d7e42 100644
--- a/model/model.py
+++ b/model/model.py
@@ -47,6 +47,11 @@ def __init__(self, input_shape, head_size, num_heads, ff_dim,
                 model.
         """
         super(CompoundModel, self).__init__()
+        self.cycle_len = 25
+        self.total_epoch = None
+        self.max_cap = 5.0
+        self.beta = 0.01
+
         self.encoder = Encoder(
                 input_shape,
                 head_size,
@@ -111,12 +116,11 @@ def call(self, inputs, training: Optional[bool] = None):
                 inputs,
                 training=training
         )
-
-        recon_weight = ops.cast(1./4., dtype=recon_loss.dtype)
+        
+        self.recon_weight = ops.cast(1./4., dtype=recon_loss.dtype)
         self.add_loss(sem_loss)
-        self.add_loss(kl_loss)
-        self.add_loss(recon_loss * recon_weight)
-
+        self.add_loss(self.beta*kl_loss)
+        self.add_loss(recon_loss * self.recon_weight)
         return outputs
 
     def summary(self, *args, **kwargs):
@@ -131,16 +135,6 @@ def summary(self, *args, **kwargs):
         """
         self.autoencoder.summary(*args, **kwargs)
 
-    def kl(self):
-        beta = (
-            self.beta_anneal(epoch, total_epochs)
-            * self.beta_cyclical(epoch, cycle)
-        )
-        capacity = self.kl_capacity(epoch, max_cap, total_epochs)
-        weight = 1
-        kl_modified = beta * ops.abs(kl_loss - capacity) * weight
-
-
     def beta_anneal(self, epoch, total_epochs):
         return ops.minimum(1.0, epoch / total_epochs)
 
@@ -152,4 +146,48 @@ def kl_capacity(self, epoch, max_cap, total_epochs):
         cap = max_cap * epoch / total_epochs
         return ops.minimum(max_cap, cap)
 
+    def kl_weight(self, epoch, step, total_epochs, cycle, max_cap):
+        beta = (
+            self.beta_anneal(epoch, total_epochs) *
+            self.beta_cyclical(epoch, cycle)
+        )
+        capacity = self.kl_capacity(epoch, max_cap, total_epochs)
+        return beta, capacity
+
+    """
+    def train_step(self, data, sample_weight=None):
+
+        with ops.GradientTape() as tape:
+            outputs, sem_loss, kl_loss, recon_loss = self(
+                    data, training=True
+            )
+            self.recon_weight = ops.cast(1./4., dtype=ops.float32)
+            epoch = ops.cast(
+                    self.optimizer.iterations // self.steps_per_epoch,
+                    kl_loss.dtype
+            )
+            step = ops.cast(self.optimizer.iterations, kl_loss.dtype)
+            beta, capacity = self.kl_weight(
+                    epoch=epoch,
+                    step=step,
+                    total_epochs=self.total_epoch,
+                    cycle=self.cycle_len,
+                    max_cap=self.max_cap
+            )
+            kl_term = beta * ops.abs(kl_loss - capacity)
+            total_loss = sem_loss + kl_term + recon_loss * self.recon_weight
+
+        trainable_vars = self.trainable_variables
+        grads = tape.gradient(total_loss, trainable_vars)
+        self.optimizer.apply_gradients(zip(grads, trainable_vars))
+
+        return {
+            "loss": total_loss,
+            "sem_loss": sem_loss,
+            "kl_loss": kl_loss,
+            "recon_loss": recon_loss,
+            "beta": beta,
+            "capacity": capacity
+        }"""
+
 # EOF

From ca40fae6244940d154bacbc7c8db6339bdd18f85 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Wed, 4 Mar 2026 15:06:30 -0500
Subject: [PATCH 16/18] ETL now has option to specify dataset for training

---
 pipe/etl.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pipe/etl.py b/pipe/etl.py
index a6499be..35085a0 100644
--- a/pipe/etl.py
+++ b/pipe/etl.py
@@ -38,7 +38,7 @@ def etl(spark: SparkSession, split: list=None) -> DataFrame:
             data = split_sets(data, split=split)
     return data
 
-def read(spark: SparkSession, split=None) -> DataFrame:
+def read(spark: SparkSession, split=None, parquet=None) -> DataFrame:
     """
     Reads the processed data from a Parquet file and splits it into training,
     validation, and test sets.
@@ -50,7 +50,10 @@ def read(spark: SparkSession, split=None) -> DataFrame:
         DataFrame: The split datasets and category dictionary.
     """
 
-    data = spark.read.parquet("/app/workdir/parquet/data.parquet")
+    if parquet is None:
+        data = spark.read.parquet("/app/workdir/parquet/data.parquet")
+    else:
+        data = spark.read.parquet(f"/app/workdir/parquet/{parquet}")
     data = split_sets(data, split=split)
     return data
 

From 3b13606dad2a6796661f3bf58eb9d44149c76c7f Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Wed, 4 Mar 2026 15:07:00 -0500
Subject: [PATCH 17/18] Training parameter set revised

---
 train/autoencoder_train.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/train/autoencoder_train.py b/train/autoencoder_train.py
index d58440c..e00a6a8 100644
--- a/train/autoencoder_train.py
+++ b/train/autoencoder_train.py
@@ -61,6 +61,7 @@ def build_autoencoder(params, shape, semantic_dims):
         dropout=params["dropout"],
         mlp_dropout=params["mlp_dropout"]
     )
+    model.total_epoch = params["epochs"]
     model.build(shape)
     model.compile(
         optimizer=keras.optimizers.Adam(learning_rate=4e-4),
@@ -103,7 +104,8 @@ def test_autoencoder(model: Model, test: List, metrics: dict):
     """
     """
 
-    test_eval = model.evaluate(test[0], test[0])
+    test = [test[0], test[1][0], test[1][1]]
+    test_eval = model.evaluate(test, test)
     if len(metrics.keys()) == 1:
         metrics[metrics.keys()[0]] = test_eval
     else:
@@ -115,7 +117,7 @@ def test_autoencoder(model: Model, test: List, metrics: dict):
     return metrics, test_predict
 
 def evaluate_autoencoder(params, test_predict, test_set, categories, keys, path):
-    plt.pcolor(test_set)
+    plt.pcolor(test_set[0])
     plt.savefig(path / params["timestamp"] / "original.png")
     plt.close()
     plt.pcolor(test_predict)

From 9b3009190087b661189310eda81104861cd67af6 Mon Sep 17 00:00:00 2001
From: Dawith <cephalopodmaster@protonmail.com>
Date: Wed, 4 Mar 2026 15:07:12 -0500
Subject: [PATCH 18/18] Training parameters revised

---
 train_model.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/train_model.py b/train_model.py
index eae5575..549f492 100644
--- a/train_model.py
+++ b/train_model.py
@@ -74,9 +74,10 @@ def main():
     #keras.distribution.set_distribution(parallel)
     
     if params["load_from_scratch"]:
-        data = etl(spark, split=params["encoder_params"]["split"])
+        data = etl(spark, split=params["autoencoder_params"]["split"])
     else:
-        data = read(spark, split=params["encoder_params"]["split"])
+        data = read(spark, split=params["autoencoder_params"]["split"],
+                    parquet=params["dataset"])
         
     (train_set, validation_set, test_set, categories) = data