Skip to content

Commit

Permalink
Function descriptions added
Browse files Browse the repository at this point in the history
  • Loading branch information
lim185 committed Sep 30, 2025
1 parent 940d227 commit 825ff18
Showing 1 changed file with 31 additions and 3 deletions.
34 changes: 31 additions & 3 deletions pipe/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
from pyspark.sql import SparkSession, Row, DataFrame
import scipy as sp

def extract(spark):
def extract(spark: SparkSession) -> DataFrame:
"""
First step of the ETL pipeline. It reads the list of .mat files from
a CSV list, opens and pulls the spectrogram from each respective file.
Args:
spark (SparkSession): Spark session object.
"""

path = Path("/app/workdir")
Expand All @@ -33,13 +35,34 @@ def extract(spark):

def image_pipe(spark: SparkSession, imagepath: Path, namepattern: str,
stacksize: int) -> np.ndarray:
images = np.zeros((stacksize, 800,800))
"""
Loads a stack of images from a path based on the given name pattern.
Args:
imagepath (Path): Path to the image files.
namepattern (str): Name pattern for the image files.
stacksize (int): Number of images in the stack.
Returns:
images: 3D numpy array of stacked images.
"""

images = np.zeros((stacksize, 800, 800))
for i in range(stacksize):
images[i,:,:] = cv.imread(imagepath/namepattern.format(i), -1)

return images

class SpectrogramReader:
"""
Class to read spectrograms and metadata from different file formats based
on user specified filetype.
Args:
spark (SparkSession): Spark session object.
filetype (str): File format type. Supported types are 'hdf5',
'shards', and 'matfiles'.
"""

def __init__(self, spark: SparkSession, filetype: str = "hdf5"):
self.spark = spark
Expand Down Expand Up @@ -86,7 +109,8 @@ def spectrogram_read_matfiles(self, specpath: Path, labels:list,
labels (list): List of target labels.
Returns:
DataFrame: Spark DataFrame containing the spectrograms and
associated metadata.
"""
spectrograms = []
row = {}
Expand Down Expand Up @@ -127,6 +151,8 @@ def spectrogram_read_hdf5(self, specpath: Path, labels: list,
specpath (Path): Path to the spectrogram files.
Returns:
DataFrame: Spark DataFrame containing the spectrograms and
associated metadata.
"""

metadata = self.metadata_pipe(specpath, labels)
Expand Down Expand Up @@ -156,6 +182,8 @@ def spectrogram_read_shards(self, specpath: Path, namepattern: str,
spectrogram.
Returns:
DataFrame: Spark DataFrame containing the spectrograms and
associated metadata.
"""

return
Expand Down

0 comments on commit 825ff18

Please sign in to comment.