Skip to content

Commit

Permalink
Testing with parquet file for training completed
Browse files Browse the repository at this point in the history
  • Loading branch information
lim185 committed Sep 30, 2025
1 parent 9ca02e2 commit 3e44f19
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pipe/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def read(spark: SparkSession) -> DataFrame:
"""

data = spark.read.parquet("/app/workdir/parquet/data.parquet")
data = split(data)
data = split_sets(data)
return data

def split_sets(data: DataFrame, split=[0.99, 0.005, 0.005]) -> tuple:
Expand Down
2 changes: 1 addition & 1 deletion train.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def main():

keys = ["treatment", "target"]

load_from_scratch = True
load_from_scratch = False
if load_from_scratch:
data = etl(spark, split=SPLIT)
else:
Expand Down

0 comments on commit 3e44f19

Please sign in to comment.