Skip to content

Commit

Permalink
Metadata now part of spectrogram data
Browse files Browse the repository at this point in the history
  • Loading branch information
lim185 committed Dec 8, 2024
1 parent a1836f2 commit cbacea4
Showing 1 changed file with 26 additions and 3 deletions.
29 changes: 26 additions & 3 deletions pipe/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,28 @@ def __init__(self, spark: SparkSession, filetype: str = "hdf5"):
else:
raise ValueError

def metadata_pipe(self, metapath: Path, labels:list,
namepattern: str="metadata{}.json") -> dict:
"""
Loads metadata for each target label from a set of json files and
return them as a hierarchical dictionary.
Args:
metapath (Path): Path to the metadata files.
labels (list): List of target labels.
namepattern (str): Name pattern for the metadata files.
Returns:
metadata: Hierarchical dictionary of metadata.
"""

metadata = {}
for label in labels:
with open(metapath/namepattern.format(label), 'r') as f:
metadata[label] = json.load(f)

return metadata

def spectrogram_pipe_hdf5(self, specpath: Path, labels: list,
namepattern:str="averaged_spectrogram{}.hdf5"
) -> np.ndarray:
Expand All @@ -43,13 +65,14 @@ def spectrogram_pipe_hdf5(self, specpath: Path, labels: list,
Returns:
"""

metadata = self.metadata_pipe(specpath, labels)
spectrograms = []
for label in labels:
filename = namepattern.format(label)
meta = metadata[label]
with h5py.File(specpath/filename, 'r') as f:
spectrograms.append(
Row(label=label,
spectrogram=f['spectrogram'][:].tolist()))
meta["spectrogram"] = f['spectrogram'][:].tolist()
spectrograms.append(Row(**meta))

return self.spark.createDataFrame(spectrograms)

Expand Down

0 comments on commit cbacea4

Please sign in to comment.