From 0ea663b7812eca2fe6e85aa0c1879b5b75a759c8 Mon Sep 17 00:00:00 2001
From: maelstrom <lim185@purdue.edu>
Date: Sat, 7 Dec 2024 21:59:02 -0500
Subject: [PATCH] Data loading portion of train.py

---
 train.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 train.py

diff --git a/train.py b/train.py
new file mode 100644
index 0000000..c82e0ef
--- /dev/null
+++ b/train.py
@@ -0,0 +1,30 @@
+"""
+train.py
+
+Launches the training process for the model.
+"""
+
+from pathlib import Path
+
+from pipe.pipe import SpectrogramPipe
+from pyspark.sql import SparkSession
+
+def main():
+    path = Path("/app/datadump/train")
+
+    labels = []
+    with open(path / "train.csv", "r") as file:
+        for line in file:
+            labels.append(line.strip().split(","))
+
+    spark = SparkSession.builder.appName("train").getOrCreate()
+    pipe = SpectrogramPipe(spark)
+    data = pipe.spectrogram_pipe(path, labels)
+    print(data.head())
+    
+    return
+
+if __name__ == "__main__":
+    main()
+
+# EOF