From 0ea663b7812eca2fe6e85aa0c1879b5b75a759c8 Mon Sep 17 00:00:00 2001 From: maelstrom Date: Sat, 7 Dec 2024 21:59:02 -0500 Subject: [PATCH] Data loading portion of train.py --- train.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 train.py diff --git a/train.py b/train.py new file mode 100644 index 0000000..c82e0ef --- /dev/null +++ b/train.py @@ -0,0 +1,30 @@ +""" +train.py + +Launches the training process for the model. +""" + +from pathlib import Path + +from pipe.pipe import SpectrogramPipe +from pyspark.sql import SparkSession + +def main(): + path = Path("/app/datadump/train") + + labels = [] + with open(path / "train.csv", "r") as file: + for line in file: + labels.append(line.strip().split(",")) + + spark = SparkSession.builder.appName("train").getOrCreate() + pipe = SpectrogramPipe(spark) + data = pipe.spectrogram_pipe(path, labels) + print(data.head()) + + return + +if __name__ == "__main__": + main() + +# EOF