diff --git a/train.py b/train.py new file mode 100644 index 0000000..c82e0ef --- /dev/null +++ b/train.py @@ -0,0 +1,30 @@ +""" +train.py + +Launches the training process for the model. +""" + +from pathlib import Path + +from pipe.pipe import SpectrogramPipe +from pyspark.sql import SparkSession + +def main(): + path = Path("/app/datadump/train") + + labels = [] + with open(path / "train.csv", "r") as file: + for line in file: + labels.append(line.strip().split(",")) + + spark = SparkSession.builder.appName("train").getOrCreate() + pipe = SpectrogramPipe(spark) + data = pipe.spectrogram_pipe(path, labels) + print(data.head()) + + return + +if __name__ == "__main__": + main() + +# EOF