Skip to content

Commit

Permalink
Pipe rewritten to identify files by tag
Browse files Browse the repository at this point in the history
  • Loading branch information
Dawith committed Dec 8, 2024
1 parent b300665 commit a1836f2
Showing 1 changed file with 12 additions and 15 deletions.
27 changes: 12 additions & 15 deletions pipe/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
pipe.py
"""

import os
from pathlib import Path
import typing

import cv2 as cv
import h5py
import numpy as np
from pyspark.sql import SparkSession, Row

Expand All @@ -26,35 +28,30 @@ def __init__(self, spark: SparkSession, filetype: str = "hdf5"):
elif filetype == "shards":
self.spectrogram_pipe = self.spectrogram_pipe_shards
else:
raise ValueError(s"Invalid filetype {filetype}.")
raise ValueError

def spectrogram_pipe_hdf5(self, specpath: Path, freq_samples: int)
-> np.ndarray:
def spectrogram_pipe_hdf5(self, specpath: Path, labels: list,
namepattern:str="averaged_spectrogram{}.hdf5"
) -> np.ndarray:
"""
Loads spectrograms for each stack iteration from an hdf5 data file,
and turns it into a spark-friendly format.
Args:
specpath (Path): Path to the spectrogram files.
namepattern (str): Name pattern for the spectrogram files.
stacksize (int): Number of spectrograms in the stack.
freq_samples (int): Number of frequency samples in each
spectrogram.
Returns:
"""

spectrograms = []
for filename in os.listdir(specpath):
if not filename.endswith(".hdf5"):
continue
for label in labels:
filename = namepattern.format(label)
with h5py.File(specpath/filename, 'r') as f:
spectrograms.append(Row(label=filename,
spectrogram=f['spectrogram'][:]))

# Turn spectrogram into a spark dataframe.
spectrograms.append(
Row(label=label,
spectrogram=f['spectrogram'][:].tolist()))

return spectrograms
return self.spark.createDataFrame(spectrograms)

def spectrogram_pipe_shards(self, specpath: Path, namepattern: str,
stacksize: int, freq_samples: int) -> np.ndarray:
Expand Down

0 comments on commit a1836f2

Please sign in to comment.