From 465dd25d896e09b716d8c833e132e579fae83e07 Mon Sep 17 00:00:00 2001 From: Dawith Date: Tue, 21 Oct 2025 12:42:34 -0400 Subject: [PATCH] working on decoder --- model/model.py | 137 +++++++++++++++++++++++++++++-------------- model/transformer.py | 32 +++++++++- 2 files changed, 121 insertions(+), 48 deletions(-) diff --git a/model/model.py b/model/model.py index 2ee911c..ceea214 100644 --- a/model/model.py +++ b/model/model.py @@ -93,37 +93,109 @@ def _modelstack(self, input_shape, head_size, num_heads, ff_dim, return Model(inputs, [y, z]) - def _transformerblocks(self, inputs, head_size, num_heads, - ff_dim, dropout): + def call(self, inputs): """ - Constructs the transformer block. This consists of multi-head - attention, dropout, layer normalization, a residual connection, - a feedforward neural network, and another residual connection. + Calls the TimeSeriesTransformer model on a batch of inputs. Args: inputs: Tensor, batch of input data. + + Returns: + Tensor, resulting output of the TimeSeriesTransformer model. + """ + return self.timeseriestransformer(inputs) + + def summary(self): + """ + Prints a summary of the TimeSeriesTransformer model. + + Args: + None. + + Returns: + None. + """ + self.timeseriestransformer.summary() + +class DecoderModel(Model): + + def __init__(self, input_shape, head_size, num_heads, ff_dim, + num_Transformer_blocks, mlp_units, n_classes, + dropout=0, mlp_dropout=0): + """ + Initializes the TimeSeriesTransformer class. This class is a + wrapper around a Keras model that consists of a series of + Transformer blocks followed by an MLP. + + Args: + input_shape: tuple, shape of the input tensor. + head_size: int, the number of features in each attention head. + num_heads: int, the number of attention heads. + ff_dim: int, the number of neurons in the feedforward neural + network. + num_Transformer_blocks: int, the number of Transformer blocks. + mlp_units: list of ints, the number of neurons in each layer of + the MLP. + n_classes: int, the number of output classes. + dropout: float, dropout rate. + mlp_dropout: float, dropout rate in the MLP. + + Attributes: + timeseriestransformer: Keras model, the TimeSeriesTransformer + model. + """ + self.tstfbuilder = TSTFBuilder() + + super(CompoundModel, self).__init__() + self.timeseriestransdecoder = self._modelstack( + input_shape, head_size, num_heads, ff_dim, + num_Transformer_blocks, mlp_units, n_classes, + dropout, mlp_dropout) + + def _modelstack(self, input_shape, head_size, num_heads, ff_dim, + num_Transformer_blocks, mlp_units, n_classes, + dropout, mlp_dropout): + """ + Creates a Timeseries Transformer model. This consists of a series of + Transformer blocks followed by an MLP. + + Args: + input_shape: tuple, shape of the input tensor. head_size: int, the number of features in each attention head. num_heads: int, the number of attention heads. ff_dim: int, the number of neurons in the feedforward neural network. + num_Transformer_blocks: int, the number of Transformer blocks. + mlp_units: list of ints, the number of neurons in each layer of + the MLP. + n_classes: list of ints, the number of output classes. dropout: float, dropout rate. + mlp_dropout: float, dropout rate in the MLP. Returns: - A model layer. - """ - x = MultiHeadAttention( - key_dim=head_size, num_heads=num_heads, - dropout=dropout)(inputs, inputs) - x = Dropout(dropout)(x) - x = LayerNormalization(epsilon=1e-6)(x) - res = x + inputs - - x = Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res) - x = Dropout(dropout)(x) - x = Conv1D(filters=inputs.shape[-1], kernel_size=1)(x) - outputs = Dropout(dropout)(x) + res - - return outputs + A Keras model. + """ + + x1, x2 = Input(shape=input_shape) + x1 = Dense(n_classes[0], activation="relu")(x1) + x2 = Dense(n_classes[1], activation="relu")(x2) + x = (x1 + x2) + x = GlobalAveragePooling1D(data_format="channels_first")(x) + + for dim in mlp_units: + x = Dense(dim, activation="relu")(x) + x = Dropout(mlp_dropout)(x) + + for _ in range(num_Transformer_blocks): + x = self.tstfbuilder.build_decoderblock( + x, + head_size, + num_heads, + ff_dim, + dropout + ) + + return Model(inputs, z) def call(self, inputs): """ @@ -149,29 +221,4 @@ def summary(self): """ self.timeseriestransformer.summary() - ''' - def compile(self, loss="sparse_categorical_crossentropy", - optimizer="adam", - metrics=["sparse_categorical_accuracy"]): - """ - Compiles the TimeSeriesTransformer model. - - Args: - loss: str, loss function. - optimizer: str, optimizer. - metrics: list of str, evaluation metrics. - - Returns: - None. - """ - - super() - self.timeseriestransformer.compile( - loss="sparse_categorical_crossentropy", - optimizer="adam", - metrics=["sparse_categorical_accuracy"]) - - return - ''' - # EOF diff --git a/model/transformer.py b/model/transformer.py index 44e0a64..917f859 100644 --- a/model/transformer.py +++ b/model/transformer.py @@ -40,9 +40,14 @@ def __init__(self): def build_transformerblock(self, inputs, head_size, num_heads, ff_dim, dropout): """ - Constructs the transformer block. This consists of multi-head - attention, dropout, layer normalization, a residual connection, - a feedforward neural network, and another residual connection. + Constructs the transformer block. A transformer block consists of the + following steps: + 1. multi-head attention + 2. dropout + 3. layer normalization + 4. residual connection + 5. feedforward neural network + 6. residual connection Args: inputs: Tensor, batch of input data. @@ -69,6 +74,27 @@ def build_transformerblock(self, inputs, head_size, num_heads, return outputs + def build_decoderblock(self, inputs, head_size, num_heads, ff_dim, + dropout): + """ + Constructs the decoder block. This consists of masked multi-head + attention, dropout, layer normalization, a residual connection, + a feedforward neural network, and another residual connection, but in + the reverse order as the encoder block. + """ + + x = LayerNormalization(epsilon=1e-6)(inputs) + x = Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(inputs) + x = Dropout(dropout)(x) + x = Conv1D(filters=inputs.shape[-1], kernel_size=1)(x) + x = Dropout(dropout)(x) + res = x + inputs + outputs = MultiHeadAttention( + key_dim=head_size, num_heads=num_heads, + dropout=dropout)(res, res, use_causal_mask=True) + + return outputs + def call(self, inputs): """ Calls the TimeSeriesTransformer model on a batch of inputs.