From b8c319b7d957d1764175b79a62cb79bed7654914 Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Mon, 25 Sep 2023 16:55:15 -0400 Subject: [PATCH] Fixed codestyle --- src/train.py | 27 +++++++----- src/utils.py | 19 +++++---- workflows/basic_train_val_test.py | 68 +++++++++++++++---------------- 3 files changed, 63 insertions(+), 51 deletions(-) diff --git a/src/train.py b/src/train.py index b676420..a105c74 100644 --- a/src/train.py +++ b/src/train.py @@ -12,6 +12,7 @@ class Standardizer: """Class to standardize targets.""" + def __init__(self): """ Class to standardize outputs. @@ -20,7 +21,7 @@ def __init__(self): """ self.mean = 0 self.std = 0.1 - + def initialize(self, X): """Initialize mean and std based on the given tensor. @@ -67,7 +68,7 @@ def restore(self, Z): """ X = self.mean + Z * self.std return X - + def restore_cont(self, Z): """ Restore a standardized contribution to the non-standardized contribution. @@ -335,7 +336,7 @@ def predict(self, dataset, indices, return_targets=False): Returns ------- prediction_dict: dict - Dictionary containing "targets", "predictions", "contributions" and + Dictionary containing "targets", "predictions", "contributions" and "indices" (copy of predict_idx). """ # Create arrays @@ -364,10 +365,14 @@ def predict(self, dataset, indices, return_targets=False): # Compute prediction pred_dict = self.model(nn_input) - predictions[i] = self.standardizer.restore(pred_dict["output"].cpu().detach()) + predictions[i] = self.standardizer.restore( + pred_dict["output"].cpu().detach() + ) conts_std = pred_dict["contributions"].cpu().detach() - contributions[i, :] = self.standardizer.restore_cont(conts_std).numpy().flatten() - #contributions[i, :] = pred_dict["contributions"].cpu().detach().numpy().flatten() + contributions[i, :] = ( + self.standardizer.restore_cont(conts_std).numpy().flatten() + ) + # contributions[i, :] = pred_dict["contributions"].cpu().detach().numpy().flatten() predictions_dict = { "targets": targets, @@ -487,10 +492,12 @@ def train(self, epochs, dataloader_dict, verbose=False): # Print, if verbose if verbose: - print(f"Epoch: [{i}] Training loss: [{train_loss:.3f}] " + \ - f"Training metric: [{train_metric:.3f}] " +\ - f"Validation loss: [{val_loss:.3f}] " +\ - f"Validation metric: [{val_metric:.3f}]") + print( + f"Epoch: [{i}] Training loss: [{train_loss:.3f}] " + + f"Training metric: [{train_metric:.3f}] " + + f"Validation loss: [{val_loss:.3f}] " + + f"Validation metric: [{val_metric:.3f}]" + ) # Load the best model self.load(best_status=True) diff --git a/src/utils.py b/src/utils.py index a834949..de98478 100644 --- a/src/utils.py +++ b/src/utils.py @@ -50,6 +50,7 @@ def partition_structure(atoms, z_cutoffs): return part_atoms + def partition_structure_by_layers(atoms, layer_cutoffs): """Partition atomic structue into bulk, surface, and/or adsorbates by layers. @@ -176,6 +177,7 @@ def featurize_atoms( "edge_indices": edge_indices, } + def create_dataloaders(proc_data, sample_idx, batch_size, num_proc=0): """Create training, validation, and/or test dataloaders. @@ -190,7 +192,7 @@ def create_dataloaders(proc_data, sample_idx, batch_size, num_proc=0): Batch size num_proc: int (default = 0) Number of cores to be used for parallelization. Defaults to serial. - + Returns ------- dataloader_dict: dict @@ -200,15 +202,18 @@ def create_dataloaders(proc_data, sample_idx, batch_size, num_proc=0): dataloader_dict = {"train": [], "val": [], "test": []} for key in dataloader_dict.keys(): - if sample_idx[key].shape[0] > 0.: + if sample_idx[key].shape[0] > 0.0: sampler = SubsetRandomSampler(sample_idx[key]) - dataloader_dict[key] = DataLoader(dataset=proc_data, - batch_size=batch_size, - sampler=sampler, - num_workers=num_proc) - + dataloader_dict[key] = DataLoader( + dataset=proc_data, + batch_size=batch_size, + sampler=sampler, + num_workers=num_proc, + ) + return dataloader_dict + if __name__ == "__main__": from ase.io import read diff --git a/workflows/basic_train_val_test.py b/workflows/basic_train_val_test.py index 0aba6c5..50b2a65 100644 --- a/workflows/basic_train_val_test.py +++ b/workflows/basic_train_val_test.py @@ -5,8 +5,8 @@ from ..src.constants import REPO_PATH from ..src.data import AtomsDataset from ..src.samplers import RandomSampler -from ..src.utils import create_dataloaders from ..src.train import Model +from ..src.utils import create_dataloaders # Set seeds seed = 0 @@ -45,42 +45,42 @@ "metric_function": "mae", "learning_rate": 0.001, "optimizer": "adam", - "lr_milestones": [75] + "lr_milestones": [75], } partition_configs = [ - { - "n_conv": 3, - "n_hidden": 1, - "hidden_size": 20, - "conv_size": 20, - "dropout": 0.1, - "num_node_features": dataset[0][0].num_node_features, - "num_edge_features": dataset[0][0].num_edge_features, - "conv_type": "CGConv", - }, - { - "n_conv": 5, - "n_hidden": 2, - "hidden_size": 50, - "conv_size": 50, - "dropout": 0.1, - "num_node_features": dataset[0][1].num_node_features, - "num_edge_features": dataset[0][1].num_edge_features, - "conv_type": "CGConv", - }, - { - "n_conv": 5, - "n_hidden": 2, - "hidden_size": 50, - "conv_size": 50, - "dropout": 0.1, - "num_node_features": dataset[0][2].num_node_features, - "num_edge_features": dataset[0][2].num_edge_features, - "conv_type": "CGConv", - }, + { + "n_conv": 3, + "n_hidden": 1, + "hidden_size": 20, + "conv_size": 20, + "dropout": 0.1, + "num_node_features": dataset[0][0].num_node_features, + "num_edge_features": dataset[0][0].num_edge_features, + "conv_type": "CGConv", + }, + { + "n_conv": 5, + "n_hidden": 2, + "hidden_size": 50, + "conv_size": 50, + "dropout": 0.1, + "num_node_features": dataset[0][1].num_node_features, + "num_edge_features": dataset[0][1].num_edge_features, + "conv_type": "CGConv", + }, + { + "n_conv": 5, + "n_hidden": 2, + "hidden_size": 50, + "conv_size": 50, + "dropout": 0.1, + "num_node_features": dataset[0][2].num_node_features, + "num_edge_features": dataset[0][2].num_edge_features, + "conv_type": "CGConv", + }, ] -model_path = REPO_PATH / "trained_models" / "S_binary_calcs" +model_path = REPO_PATH / "trained_models" / "S_binary_calcs" model = Model(global_config, partition_configs, model_path) model.init_standardizer([dataset[i][0].y for i in sample_idx["train"]]) results_dict = model.train(100, dataloader_dict, verbose=True) @@ -91,4 +91,4 @@ # Make predictions on a structure pred_dict = model.predict(dataset, [0, 100, 200, 500], return_targets=True) -print(pred_dict) \ No newline at end of file +print(pred_dict)