Skip to content

Commit

Permalink
Deleted unused old code
Browse files Browse the repository at this point in the history
  • Loading branch information
lim185 committed Oct 14, 2025
1 parent 6601688 commit e769f39
Showing 1 changed file with 0 additions and 26 deletions.
26 changes: 0 additions & 26 deletions pipe/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,22 +80,6 @@ def onehot(dataframe: DataFrame, keys: list) -> DataFrame:
result = result.withColumnRenamed(column_name, f"{column_name}_str")
result = result.withColumnRenamed(f"{column_name}_encoded", column_name)

"""
bundle = {key: [
arr.tolist()
for arr in OneHotEncoder(sparse_output=False) \
.fit_transform(dataframe.select(key).collect())
] for key in keys
}
bundle = [dict(zip(bundle.keys(), values))
for values in zip(*bundle.values())]
schema = types.StructType([
types.StructField(key, types.ArrayType(types.FloatType()), True)
for key in keys
])
return bundle, schema"""
return result

def transform(spark: SparkSession, dataframe: DataFrame, keys: list) \
Expand All @@ -107,16 +91,6 @@ def transform(spark: SparkSession, dataframe: DataFrame, keys: list) \
"index", functions.monotonically_increasing_id()
)

"""
bundle, schema = onehot(dataframe, keys)
newframe = spark.createDataFrame(bundle, schema=schema).withColumn(
"index", functions.monotonically_increasing_id()
)
for key in keys:
dataframe = dataframe.withColumnRenamed(key, f"{key}_str")
dataframe = dataframe.join(newframe, on="index", how="inner")
"""
dataframe = onehot(dataframe, keys)

return dataframe

0 comments on commit e769f39

Please sign in to comment.