diff --git a/src/samplers.py b/src/samplers.py index 699bbf1..7723ae1 100644 --- a/src/samplers.py +++ b/src/samplers.py @@ -1,11 +1,13 @@ """Samplers for training, validation, and testing.""" -import numpy as np -import pandas as pd import abc +import numpy as np + + class Sampler(abc.ABC): """Abstract base class for data samplers.""" + def __init__(self): """Blank constructor.""" pass @@ -13,7 +15,7 @@ def __init__(self): @abc.abstractmethod def create_samplers(self): """Create training, test, and validation samplers. - + This should return a dictionary with "train", "val", "test" as keys and indices of datapoints as values. """ @@ -24,8 +26,10 @@ def name(): """Name of the sampling method.""" pass + class RandomSampler(Sampler): """Perform uniform random sampling on datapoints.""" + def __init__(self, seed, dataset_size): """Initialize sampler. @@ -66,21 +70,21 @@ def create_samplers(self, sample_config): train_size = int(np.ceil(sample_config["train"] * self.dataset_size)) train_idx = idx_array[:train_size] val_size = int(np.ceil(sample_config["val"] * self.dataset_size)) - val_idx = idx_array[train_size: train_size + val_size] - test_idx = idx_array[train_size + val_size:] + val_idx = idx_array[train_size : train_size + val_size] + test_idx = idx_array[train_size + val_size :] # Create samples samples = {"train": train_idx, "val": val_idx, "test": test_idx} - + return samples - + @staticmethod def name(): + """Name of the sampling method.""" return "random" + if __name__ == "__main__": rs = RandomSampler(0, 100) samples = rs.create_samplers({"train": 0.6, "val": 0.2, "test": 0.2}) print(samples) - -