From 4768241dba1bae5d693fc68e9d4047635c100ff8 Mon Sep 17 00:00:00 2001
From: Gaurav S Deshmukh <deshmukg@gilbreth-fe03.rcac.purdue.edu>
Date: Sun, 24 Sep 2023 00:32:53 -0400
Subject: [PATCH 1/3] Added Model class

---
 src/data.py     |  1 +
 src/models.py   |  2 +-
 src/samplers.py |  6 ++++--
 src/utils.py    | 35 +++++++++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/src/data.py b/src/data.py
index 01e92a1..aa1a23d 100644
--- a/src/data.py
+++ b/src/data.py
@@ -352,6 +352,7 @@ def load_datapoints(atoms, process_dict):
     #                      ])
     print(dataset[0][-1].x)
     print(dataset.df_name_idx.head())
+    print(dataset[0][-1].name)
 
     # Create datapoint
     atoms = read(data_root_path / "Pt_3_Rh_9_-7-7-S.cif")
diff --git a/src/models.py b/src/models.py
index 8ff3fd6..6f2d727 100644
--- a/src/models.py
+++ b/src/models.py
@@ -110,7 +110,7 @@ def init_conv_layers(self):
                     gnn.CGConv(
                         channels=self.conv_size[i],
                         dim=self.num_edge_features[i],
-                        batch_norm=True,
+                        batch_norm=False,
                     ),
                     nn.LeakyReLU(inplace=True),
                 ]
diff --git a/src/samplers.py b/src/samplers.py
index d6d79ef..ffa364b 100644
--- a/src/samplers.py
+++ b/src/samplers.py
@@ -67,9 +67,11 @@ def create_samplers(self, sample_config):
         randomizer.shuffle(idx_array)
 
         # Get indices
-        train_size = int(np.ceil(sample_config["train"] * self.dataset_size))
+        if sample_config["train"] < 1.:
+            train_size = int(np.ceil(sample_config["train"] * self.dataset_size))
         train_idx = idx_array[:train_size]
-        val_size = int(np.ceil(sample_config["val"] * self.dataset_size))
+        if sample_config["val"] < 1.:
+            val_size = int(np.floor(sample_config["val"] * self.dataset_size))
         val_idx = idx_array[train_size : train_size + val_size]
         test_idx = idx_array[train_size + val_size :]
 
diff --git a/src/utils.py b/src/utils.py
index 2e73f2f..5b2c6b2 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -5,6 +5,9 @@
 import numpy as np
 import torch
 
+from torch.utils.data import SubsetRandomSampler
+from torch_geometric.loader import DataLoader
+
 from featurizers import (
     OneHotEncoder,
     list_of_edge_featurizers,
@@ -133,6 +136,38 @@ def featurize_atoms(
         "edge_indices": edge_indices,
     }
 
+def create_dataloaders(proc_data, sample_idx, batch_size, num_proc=0):
+    """Create training, validation, and/or test dataloaders.
+
+    Parameters
+    ----------
+    proc_data: AtomsDataset or AtomsDatapoints
+        Processed dataset object
+    sampler: dict
+        A dictionary with "train", "val", and "test" indices returned by a Sampler
+        object.
+    batch_size: int
+        Batch size
+    num_proc: int (default = 0)
+        Number of cores to be used for parallelization. Defaults to serial.
+    
+    Returns
+    -------
+    dataloader_dict: dict
+        Dictionary of "train", "val", and "test" dataloaders
+    """
+    # Create dataloader dict
+    dataloader_dict = {"train": [], "val": [], "test": []}
+
+    for key in dataloader_dict.keys():
+        if sample_idx[key].shape[0] > 0.:
+            sampler = SubsetRandomSampler(sample_idx[key])
+            dataloader_dict[key] = DataLoader(dataset=proc_data,
+                                                batch_size=batch_size,
+                                                sampler=sampler,
+                                                num_workers=num_proc)
+            
+    return dataloader_dict
 
 if __name__ == "__main__":
     from ase.io import read

From 8875133a7b6d26deacd0e0aab05c3e3be59e6cb2 Mon Sep 17 00:00:00 2001
From: Gaurav S Deshmukh <deshmukg@gilbreth-fe03.rcac.purdue.edu>
Date: Sun, 24 Sep 2023 00:34:43 -0400
Subject: [PATCH 2/3] Added train.py

---
 src/train.py | 463 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 463 insertions(+)
 create mode 100644 src/train.py

diff --git a/src/train.py b/src/train.py
new file mode 100644
index 0000000..32539f9
--- /dev/null
+++ b/src/train.py
@@ -0,0 +1,463 @@
+"""Train and test the model."""
+
+from copy import deepcopy
+from pathlib import Path
+
+import numpy as np
+import torch
+
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+
+from models import MultiGCN
+
+class Standardizer:
+    def __init__(self, X):
+        """
+        Class to standardize outputs.
+        
+        Parameters
+        ----------
+        X: torch.Tensor 
+            Tensor of outputs
+        """
+        self.mean = torch.mean(X)
+        self.std = torch.std(X)
+    
+    def standardize(self, X):
+        """
+        Convert a non-standardized output to a standardized output.
+
+        Parameters
+        ----------
+        X: torch.Tensor 
+            Tensor of non-standardized outputs
+
+        Returns
+        -------
+        Z: torch.Tensor
+            Tensor of standardized outputs
+
+        """
+        Z = (X - self.mean) / (self.std)
+        return Z
+    
+    def restore(self, Z):
+        """
+        Restore a standardized output to the non-standardized output.
+
+        Parameters
+        ----------
+        Z: torch.Tensor
+            Tensor of standardized outputs
+
+        Returns
+        -------
+        X: torch.Tensor 
+            Tensor of non-standardized outputs
+
+        """
+        X = self.mean + Z * self.std
+        return X
+    
+    def get_state(self):
+        """
+        Return dictionary of the state of the Standardizer.
+
+        Returns
+        -------
+        dict
+            Dictionary with the mean and std of the outputs
+
+        """
+        return {"mean" : self.mean, "std" : self.std}
+    
+    def set_state(self, state):
+        """
+        Load a dictionary containing the state of the Standardizer.
+
+        Parameters
+        ----------
+        state : dict
+            Dictionary containing mean and std 
+        """
+        self.mean = state["mean"]
+        self.std = state["std"]
+
+class Model:
+    """Wrapper class for a MultiGCN model that allows training and prediction."""
+    def __init__(self, global_config, partition_configs, model_path):
+        """Initialize a MultiGCN model.
+
+        Parameters
+        ----------
+        global_config: dict
+            Global configuration dictionary. Should contain the following keys:
+            gpu (whether to use GPU, bool), optimizer (name of optimizer, str; can
+            either be "adam" or "sgd"), learning_rate (model learning rate, float),
+            lr_milestones (milestones when learning rate is to be decresed, list;
+            optional)
+        partition_configs: List[Dict]
+            List of dictionaries containing parameters for the GNN for each
+            partition. The number of different GNNs are judged based on the
+            size of the list. Each partition config should contain the following
+            keys: n_conv (number of convolutional layers, int), n_hidden (number
+            of hidden layers, int), conv_size (feature size before convolution, int)
+            hidden_size (nodes per hidden layer node, int), dropout (dropout
+            probability for hidden layers, float), conv_type (type of convolution
+            layer, str; currently only "CGConv" is supported), pool_type
+            (type of pooling layer, str; currently "add" and "mean" are supported),
+            num_node_features (number of node features, int), num_edge_features
+            (number of edge features, int).
+        model_path: str
+            Path where the model is to be saved
+        """
+        # Create model
+        self.model = MultiGCN(partition_configs)
+
+        # Create model path
+        self.make_directory_structure(model_path)
+        
+        # Set GPU status
+        self.use_gpu = global_config["gpu"]
+
+        # Set loss function
+        if global_config["loss_function"] == "mse":
+            self.loss_fn = torch.nn.MSELoss()
+        else:
+            raise ValueError(
+                "Incorrect loss function. Currently only 'mse' is supported"
+            )
+    
+        # Set metric function
+        if global_config["metric_function"] == "mae":
+            self.metric_fn = mean_absolute_error
+        elif global_config["metric_function"] == "mse":
+            self.metric_fn = mean_squared_error
+
+        # Set optimizer
+        if global_config["optimizer"].lower().strip() == "adam":
+            self.optimizer = torch.optim.Adam(
+                self.model.parameters(),
+                lr=global_config["learning_rate"],
+            )
+        elif global_config["optimizer"].lower().strip() == "sgd":
+            self.optimizer = torch.optim.SGD(
+                self.model_parameters(),
+                lr=global_config["learning_rate"],
+            )
+
+        # Set scheduler
+        if "lr_milestones" in global_config.keys():
+            self.scheduler = torch.optim.MultiStepLR(
+                optimizer=self.optimizer,
+                milestones=global_config["lr_milestones"]
+            )
+        else:
+            self.scheduler = None
+
+    def make_directory_structure(self, model_path):
+        """Make directory structure to store models and results."""
+        self.model_path = Path(model_path)
+        self.model_save_path = self.model_path / "models"
+        self.model_results_path = self.model_path / "results"
+        self.model_save_path.mkdir(parents=True, exist_ok=True)
+        self.model_results_path.mkdir(parents=True, exist_ok=True)
+
+    def init_standardizer(self, targets):
+        """Initialize the Standardizer using training targets (typically).
+
+        Parameters
+        ----------
+        targets: np.ndarray or torch.Tensor
+            Array of training outputs
+        """
+        self.standardizer = Standardizer(targets)
+
+    def train_epoch(self, dataloader):
+        """Train the model for a single epoch.
+
+        Parameters
+        ----------
+        dataloader: torch_geometric.loader.DataLoader
+            Training dataloader
+        """
+        # Variables to store average stats
+        avg_loss = 0
+        avg_metric = 0
+        count = 0
+
+        # Enable train mode of model
+        self.model.train()
+
+        # Go over each batch in the dataloader
+        for data_objects in dataloader:
+            # Standardize output
+            y = data_objects[0].y
+            y_std = self.standardizer.standardize(y)
+
+            # Transfer to GPU (if True)
+            if self.use_gpu:
+                nn_output = y_std.cuda()
+                nn_input = [d.cuda() for d in data_objects]
+            else:
+                nn_output = y_std
+                nn_input = data_objects
+
+            # Compute prediction
+            pred_dict = self.model(nn_input)
+
+            # Calculate loss
+            loss = self.loss_fn(nn_output, pred_dict["output"])
+            avg_loss += loss
+
+            # Calculate metric
+            y_pred = self.standardizer.restore(pred_dict["output"].cpu())
+            metric = self.metric_fn(y, y_pred)
+            avg_metric += metric
+
+            # Set zero gradient for all the tensors
+            self.optimizer.zero_grad()
+
+            # Perform backward propagation
+            loss.backward()
+
+            # Update weights and biases
+            self.optimizer.step()
+
+            # Update scheduler if not None
+            if self.scheduler is not None:
+                self.scheduler.step()
+
+            # Increase count
+            count += 1
+
+        # Calculate average loss and metric
+        avg_loss = avg_loss / count
+        avg_metric = avg_metric / count
+
+        return avg_loss, avg_metric
+    
+    def validate(self, dataloader):
+        """Validate/test the model.
+
+        Parameters
+        ----------
+        dataloader: torch_geometric.loader.DataLoader
+            Validation/test dataloader
+        """
+        # Variables to store average stats
+        avg_loss = 0
+        avg_metric = 0
+        count = 0
+
+        # Enable train mode of model
+        self.model.eval()
+
+        # Go over each batch in the dataloader
+        for data_objects in dataloader:
+            # Standardize output
+            y = data_objects[0].y
+            y_std = self.standardizer.standardize(y)
+
+            # Transfer to GPU (if True)
+            if self.use_gpu:
+                nn_output = y_std.cuda()
+                nn_input = [d.cuda() for d in data_objects]
+            else:
+                nn_output = y_std
+                nn_input = data_objects
+
+            # Compute prediction
+            pred_dict = self.model(nn_input)
+
+            # Calculate loss
+            loss = self.loss_fn(nn_output, pred_dict["output"])
+            avg_loss += loss
+
+            # Calculate metric
+            y_pred = self.standardizer.restore(pred_dict["output"].cpu())
+            metric = self.metric_fn(y, y_pred)
+            avg_metric += metric
+
+            # Increase count
+            count += 1
+
+        # Calculate average loss and metric
+        avg_loss = avg_loss / count
+        avg_metric = avg_metric / count
+
+        return avg_loss, avg_metric
+
+    def predict(self, dataset, indices, return_targets=False):
+        """Predict outputs from the model.
+
+        Parameters
+        ----------
+        dataset: AtomsDataset or AtomsDatapoints
+            Validation dataloader
+        indices: list or np.ndarray
+            List of indices for datapoints for which predictions are to be made
+        return_targets: bool (default = False)
+            If True, outputs are returned. If False, all targets will be 0.
+
+        Returns
+        -------
+        prediction_dict: dict
+            Dictionary containing "targets", "predictions", and "indices" (copy of
+            predict_idx).
+        """
+        # Create arrays
+        targets = np.zeros(len(indices))
+        predictions = np.zeros(len(indices))
+
+        # Enable eval mode of model
+        self.model.eval()
+
+        # Go over each batch in the dataloader
+        for i, idx in enumerate(indices):
+            # Get data objects
+            data_objects = dataset.get(i)
+
+            # Standardize output
+            if return_targets:
+                targets[i] = data_objects[0].y.cpu()
+
+            # Transfer to GPU (if True)
+            if self.use_gpu:
+                nn_input = [d.cuda() for d in data_objects]
+            else:
+                nn_input = data_objects
+
+            # Compute prediction
+            pred_dict = self.model(nn_input)
+            predictions[i] = self.standardizer.restore(pred_dict["output"].cpu())
+
+        predictions_dict = {"targets": targets, "predictions": predictions,
+                            "indices": indices}
+        
+        return predictions_dict
+    
+    def save(self, epoch, best_status=None):
+        """Save the current state of the model as a dictionary.
+
+        The dictionary contains the epoch, model state dict, optimizer state dict,
+        and standardizer state dict.
+
+        Parameters
+        ----------
+        epoch: int
+            Current epoch
+        best_status: bool
+            If True, this is also saved as "best.pt".
+        """
+        save_dict = {
+            "epoch": epoch,
+            "model_state_dict": self.model.state_dict(),
+            "optimizer_state_dict": self.optimizer.state_dict(),
+            "standardizer_state_dict": self.standardizer.get_state(),
+        }
+        save_path = self.model_save_path / f"model_{epoch}.pt"
+        torch.save(save_dict, save_path)
+        if best_status:
+            save_path = self.model_save_path / "best.pt"
+            torch.save(save_dict, save_path)
+
+    def load(self, epoch=None, best_status=None):
+        """Load a model saved at a particular epoch or the best model.
+        
+        If best_status is True, epoch is ignored and the best model is loaded.
+
+        Parameters
+        ----------
+        epoch: int
+            Model at this epoch is loaded
+        best_status: bool
+            If this is True, the best model is loaded
+        """
+        # Load path
+        if best_status:
+            load_path = self.model_save_path / "best.pt"
+        else:
+            load_path = self.model_save_path / f"model_{epoch}.pt"
+        
+        # Load the dictionary
+        load_dict = torch.load(load_path)
+
+        # Set state dicts
+        self.model.load_state_dict(load_dict["model_state_dict"])
+        self.standardizer.set_state(load_dict["standardizer_state_dict"])
+
+    def train(self, epochs, dataloader_dict, verbose=False):
+        """Train a model for the given number of epochs.
+
+        The training is performed with early stopping, i.e., the metric function
+        is evaluated at every epoch and the model with the best value for this
+        metric is loaded after training for testing. 
+
+        Parameters
+        ----------
+        epochs: int
+            Total number of epochs
+        dataloader_dict: dict
+            Dictionary of train, val, and test dataloaders
+        verbose: bool
+            If True, progress is printed for every epoch.
+
+        Returns
+        -------
+        results_dict: Dict[Dict]
+            Dictionary of dictionaries. The outer dictionary contains the keys
+            "loss" and "metric" and the inner dictionaries contain the keys
+            "train", "val", and "test".
+        """
+        # Create empty lists
+        train_losses = []
+        train_metrics = []
+        val_losses = []
+        val_metrics = []
+
+        # Initialize validation loss
+        prev_val_metric = 1e9
+        best_status = False
+
+        # Train and validate model
+        for i in range(epochs):
+            # Train 
+            train_loss, train_metric = self.train_epoch(dataloader_dict["train"])
+            
+            # Validate
+            val_loss, val_metric = self.validate(dataloader_dict["val"])
+            
+            # Check if model is best
+            if val_metric < prev_val_metric:
+                best_status = True
+                prev_val_metric = deepcopy(val_metric)
+            else:
+                best_status = False
+            
+            # Save model
+            self.save(i, best_status)
+            
+            # Save losses and metrics
+            train_losses.append(train_loss)
+            val_losses.append(val_loss)
+            train_metrics.append(train_metric)
+            val_metrics.append(val_metric)
+
+        # Load the best model
+        self.load(best_status=True)
+
+        # Test the model
+        test_loss, test_metric = self.validate(dataloader_dict["test"])
+
+        loss_dict = {
+            "train": train_losses, "val": val_losses, "test": test_loss
+        }
+        metric_dict = {
+            "train": train_metrics, "val": val_metrics, "test": test_metric
+        }
+
+        results_dict = {"loss": loss_dict, "metric": metric_dict}
+
+        return  results_dict
+    
\ No newline at end of file

From b212fae58383d79dcbd6ce5838cbc6f7a44a85cd Mon Sep 17 00:00:00 2001
From: Gaurav S Deshmukh <deshmukg@gilbreth-fe03.rcac.purdue.edu>
Date: Sun, 24 Sep 2023 00:37:03 -0400
Subject: [PATCH 3/3] Fix codestyle

---
 src/train.py | 72 ++++++++++++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/train.py b/src/train.py
index 32539f9..2e9ab02 100644
--- a/src/train.py
+++ b/src/train.py
@@ -5,31 +5,32 @@
 
 import numpy as np
 import torch
-
 from sklearn.metrics import mean_absolute_error, mean_squared_error
 
 from models import MultiGCN
 
+
 class Standardizer:
+    """Class to standardize targets."""
     def __init__(self, X):
         """
         Class to standardize outputs.
-        
+
         Parameters
         ----------
-        X: torch.Tensor 
+        X: torch.Tensor
             Tensor of outputs
         """
         self.mean = torch.mean(X)
         self.std = torch.std(X)
-    
+
     def standardize(self, X):
         """
         Convert a non-standardized output to a standardized output.
 
         Parameters
         ----------
-        X: torch.Tensor 
+        X: torch.Tensor
             Tensor of non-standardized outputs
 
         Returns
@@ -40,7 +41,7 @@ def standardize(self, X):
         """
         Z = (X - self.mean) / (self.std)
         return Z
-    
+
     def restore(self, Z):
         """
         Restore a standardized output to the non-standardized output.
@@ -52,13 +53,13 @@ def restore(self, Z):
 
         Returns
         -------
-        X: torch.Tensor 
+        X: torch.Tensor
             Tensor of non-standardized outputs
 
         """
         X = self.mean + Z * self.std
         return X
-    
+
     def get_state(self):
         """
         Return dictionary of the state of the Standardizer.
@@ -69,8 +70,8 @@ def get_state(self):
             Dictionary with the mean and std of the outputs
 
         """
-        return {"mean" : self.mean, "std" : self.std}
-    
+        return {"mean": self.mean, "std": self.std}
+
     def set_state(self, state):
         """
         Load a dictionary containing the state of the Standardizer.
@@ -78,13 +79,15 @@ def set_state(self, state):
         Parameters
         ----------
         state : dict
-            Dictionary containing mean and std 
+            Dictionary containing mean and std
         """
         self.mean = state["mean"]
         self.std = state["std"]
 
+
 class Model:
     """Wrapper class for a MultiGCN model that allows training and prediction."""
+
     def __init__(self, global_config, partition_configs, model_path):
         """Initialize a MultiGCN model.
 
@@ -116,7 +119,7 @@ def __init__(self, global_config, partition_configs, model_path):
 
         # Create model path
         self.make_directory_structure(model_path)
-        
+
         # Set GPU status
         self.use_gpu = global_config["gpu"]
 
@@ -127,7 +130,7 @@ def __init__(self, global_config, partition_configs, model_path):
             raise ValueError(
                 "Incorrect loss function. Currently only 'mse' is supported"
             )
-    
+
         # Set metric function
         if global_config["metric_function"] == "mae":
             self.metric_fn = mean_absolute_error
@@ -149,8 +152,7 @@ def __init__(self, global_config, partition_configs, model_path):
         # Set scheduler
         if "lr_milestones" in global_config.keys():
             self.scheduler = torch.optim.MultiStepLR(
-                optimizer=self.optimizer,
-                milestones=global_config["lr_milestones"]
+                optimizer=self.optimizer, milestones=global_config["lr_milestones"]
             )
         else:
             self.scheduler = None
@@ -236,7 +238,7 @@ def train_epoch(self, dataloader):
         avg_metric = avg_metric / count
 
         return avg_loss, avg_metric
-    
+
     def validate(self, dataloader):
         """Validate/test the model.
 
@@ -332,11 +334,14 @@ def predict(self, dataset, indices, return_targets=False):
             pred_dict = self.model(nn_input)
             predictions[i] = self.standardizer.restore(pred_dict["output"].cpu())
 
-        predictions_dict = {"targets": targets, "predictions": predictions,
-                            "indices": indices}
-        
+        predictions_dict = {
+            "targets": targets,
+            "predictions": predictions,
+            "indices": indices,
+        }
+
         return predictions_dict
-    
+
     def save(self, epoch, best_status=None):
         """Save the current state of the model as a dictionary.
 
@@ -364,7 +369,7 @@ def save(self, epoch, best_status=None):
 
     def load(self, epoch=None, best_status=None):
         """Load a model saved at a particular epoch or the best model.
-        
+
         If best_status is True, epoch is ignored and the best model is loaded.
 
         Parameters
@@ -379,7 +384,7 @@ def load(self, epoch=None, best_status=None):
             load_path = self.model_save_path / "best.pt"
         else:
             load_path = self.model_save_path / f"model_{epoch}.pt"
-        
+
         # Load the dictionary
         load_dict = torch.load(load_path)
 
@@ -392,7 +397,7 @@ def train(self, epochs, dataloader_dict, verbose=False):
 
         The training is performed with early stopping, i.e., the metric function
         is evaluated at every epoch and the model with the best value for this
-        metric is loaded after training for testing. 
+        metric is loaded after training for testing.
 
         Parameters
         ----------
@@ -422,22 +427,22 @@ def train(self, epochs, dataloader_dict, verbose=False):
 
         # Train and validate model
         for i in range(epochs):
-            # Train 
+            # Train
             train_loss, train_metric = self.train_epoch(dataloader_dict["train"])
-            
+
             # Validate
             val_loss, val_metric = self.validate(dataloader_dict["val"])
-            
+
             # Check if model is best
             if val_metric < prev_val_metric:
                 best_status = True
                 prev_val_metric = deepcopy(val_metric)
             else:
                 best_status = False
-            
+
             # Save model
             self.save(i, best_status)
-            
+
             # Save losses and metrics
             train_losses.append(train_loss)
             val_losses.append(val_loss)
@@ -450,14 +455,9 @@ def train(self, epochs, dataloader_dict, verbose=False):
         # Test the model
         test_loss, test_metric = self.validate(dataloader_dict["test"])
 
-        loss_dict = {
-            "train": train_losses, "val": val_losses, "test": test_loss
-        }
-        metric_dict = {
-            "train": train_metrics, "val": val_metrics, "test": test_metric
-        }
+        loss_dict = {"train": train_losses, "val": val_losses, "test": test_loss}
+        metric_dict = {"train": train_metrics, "val": val_metrics, "test": test_metric}
 
         results_dict = {"loss": loss_dict, "metric": metric_dict}
 
-        return  results_dict
-    
\ No newline at end of file
+        return results_dict