diff --git a/.gitignore b/.gitignore
index 513abe4..1acfb60 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,6 @@ data/*
 !data/dband_centers.csv
 __pycache__
 *.cif
+trained_models
+*.pt
+*__init__.py
\ No newline at end of file
diff --git a/src/data.py b/src/data.py
index aa1a23d..093b964 100644
--- a/src/data.py
+++ b/src/data.py
@@ -10,9 +10,9 @@
 from ase.io import read
 from torch_geometric.data import Data, Dataset
 
-from constants import REPO_PATH
-from featurizers import OneHotEncoder
-from utils import featurize_atoms, partition_structure
+from .constants import REPO_PATH
+from .featurizers import OneHotEncoder
+from .utils import featurize_atoms, partition_structure_by_layers
 
 
 class AtomsDataset(Dataset):
@@ -60,7 +60,7 @@ def __init__(self, root, prop_csv):
 
     def process_data(
         self,
-        z_cutoffs,
+        layer_cutoffs,
         node_features,
         edge_features,
         max_atoms=None,
@@ -75,8 +75,8 @@ def process_data(
 
         Parameters
         ----------
-        z_cutoffs: list or np.ndarray
-            List of z-coordinates based on which atomic structures are
+        layer_cutoffs: list or np.ndarray
+            List of layer cutoffs based on which atomic structures are
             partitioned. The number of partitions is equal to one more than the
             length of z_cutoffs.
         node_features: list[list]
@@ -115,7 +115,7 @@ def process_data(
             atoms = read(str(file_path))
 
             # Partition structure
-            part_atoms = partition_structure(atoms, z_cutoffs)
+            part_atoms = partition_structure_by_layers(atoms, layer_cutoffs)
 
             # Featurize partitions
             data_objects = []
@@ -191,7 +191,7 @@ def __init__(self, atoms):
 
     def process_data(
         self,
-        z_cutoffs,
+        layer_cutoffs,
         node_features,
         edge_features,
         max_atoms=None,
@@ -206,8 +206,8 @@ def process_data(
 
         Parameters
         ----------
-        z_cutoffs: list or np.ndarray
-            List of z-coordinates based on which atomic structures are
+        layer_cutoffs: list or np.ndarray
+            List of layer cutoffs based on which atomic structures are
             partitioned. The number of partitions is equal to one more than the
             length of z_cutoffs.
         node_features: list[list]
@@ -229,7 +229,7 @@ def process_data(
         # Iterate over files and process them
         for atoms_obj in self.atoms:
             # Partition structure
-            part_atoms = partition_structure(atoms_obj, z_cutoffs)
+            part_atoms = partition_structure_by_layers(atoms_obj, layer_cutoffs)
 
             # Featurize partitions
             data_objects = []
@@ -337,28 +337,11 @@ def load_datapoints(atoms, process_dict):
     data_root_path = Path(REPO_PATH) / "data" / "S_calcs"
     prop_csv_path = data_root_path / "name_prop.csv"
 
-    # Create dataset
-    dataset = AtomsDataset(data_root_path, prop_csv_path)
-    # dataset.process_data(z_cutoffs=[13., 20.],
-    #                      node_features=[
-    #                          ["atomic_number", "dband_center"],
-    #                          ["atomic_number", "reactivity"],
-    #                          ["atomic_number", "reactivity"],
-    #                      ],
-    #                      edge_features=[
-    #                          ["bulk_bond_distance"],
-    #                          ["surface_bond_distance"],
-    #                          ["adsorbate_bond_distance"],
-    #                      ])
-    print(dataset[0][-1].x)
-    print(dataset.df_name_idx.head())
-    print(dataset[0][-1].name)
-
     # Create datapoint
     atoms = read(data_root_path / "Pt_3_Rh_9_-7-7-S.cif")
     datapoint = AtomsDatapoints(atoms)
     datapoint.process_data(
-        z_cutoffs=[13.0, 20.0],
+        layer_cutoffs=[3, 6],
         node_features=[
             ["atomic_number", "dband_center"],
             ["atomic_number", "reactivity"],
diff --git a/src/featurizers.py b/src/featurizers.py
index 4ca0109..bd749f5 100644
--- a/src/featurizers.py
+++ b/src/featurizers.py
@@ -9,8 +9,8 @@
 from mendeleev import element
 from torch.nn.functional import one_hot
 
-from constants import DBAND_FILE_PATH
-from graphs import AtomsGraph
+from .constants import DBAND_FILE_PATH
+from .graphs import AtomsGraph
 
 
 class OneHotEncoder:
diff --git a/src/models.py b/src/models.py
index 6f2d727..97f6cbc 100644
--- a/src/models.py
+++ b/src/models.py
@@ -45,7 +45,7 @@ def __init__(self, partition_configs):
 
         # Initialize layers
         # Initial transform
-        self.init_transform = []
+        self.init_transform = nn.ModuleList()
         for i in range(self.n_partitions):
             self.init_transform.append(
                 nn.Sequential(
@@ -58,12 +58,12 @@ def __init__(self, partition_configs):
         self.init_conv_layers()
 
         # Pooling layers
-        self.pool_layers = []
+        self.pool_layers = nn.ModuleList()
         for i in range(self.n_partitions):
-            self.pool_layers.append(gnn.pool.global_add_pool)
+            self.pool_layers.append(gnn.aggr.SumAggregation())
 
         # Pool transform
-        self.pool_transform = []
+        self.pool_transform = nn.ModuleList()
         for i in range(self.n_partitions):
             self.pool_transform.append(
                 nn.Sequential(
@@ -73,7 +73,7 @@ def __init__(self, partition_configs):
             )
 
         # Hidden layers
-        self.hidden_layers = []
+        self.hidden_layers = nn.ModuleList()
         for i in range(self.n_partitions):
             self.hidden_layers.append(
                 nn.Sequential(
@@ -98,10 +98,12 @@ def __init__(self, partition_configs):
         self.final_lin_transform = nn.Linear(self.n_partitions, 1, bias=False)
         with torch.no_grad():
             self.final_lin_transform.weight.copy_(torch.ones(self.n_partitions))
+        for p in self.final_lin_transform.parameters():
+            p.requires_grad = False
 
     def init_conv_layers(self):
         """Initialize convolutional layers."""
-        self.conv_layers = []
+        self.conv_layers = nn.ModuleList()
         for i in range(self.n_partitions):
             part_conv_layers = []
             for j in range(self.n_conv[i]):
@@ -110,7 +112,7 @@ def init_conv_layers(self):
                     gnn.CGConv(
                         channels=self.conv_size[i],
                         dim=self.num_edge_features[i],
-                        batch_norm=False,
+                        batch_norm=True,
                     ),
                     nn.LeakyReLU(inplace=True),
                 ]
@@ -151,7 +153,7 @@ def forward(self, data_objects):
                     conv_data = layer(conv_data)
 
             # Apply pooling layer
-            pooled_data = self.pool_layers[i](x=conv_data, batch=None)
+            pooled_data = self.pool_layers[i](x=conv_data, index=data.batch)
 
             # Apply pool-to-hidden transform
             hidden_data = self.pool_transform[i](pooled_data)
@@ -163,8 +165,8 @@ def forward(self, data_objects):
             contributions.append(hidden_data)
 
         # Apply final transformation
-        contributions = torch.cat(contributions, dim=-1)
-        output = self.final_lin_transform(contributions)
+        contributions = torch.cat(contributions)
+        output = self.final_lin_transform(contributions.view(-1, 3))
 
         return {"output": output, "contributions": contributions}
 
diff --git a/src/train.py b/src/train.py
index 2e9ab02..150ce5c 100644
--- a/src/train.py
+++ b/src/train.py
@@ -7,15 +7,24 @@
 import torch
 from sklearn.metrics import mean_absolute_error, mean_squared_error
 
-from models import MultiGCN
+from .models import MultiGCN
 
 
 class Standardizer:
     """Class to standardize targets."""
-    def __init__(self, X):
+
+    def __init__(self):
         """
         Class to standardize outputs.
 
+        Initialize with dummy values.
+        """
+        self.mean = 0
+        self.std = 0.1
+
+    def initialize(self, X):
+        """Initialize mean and std based on the given tensor.
+
         Parameters
         ----------
         X: torch.Tensor
@@ -60,6 +69,24 @@ def restore(self, Z):
         X = self.mean + Z * self.std
         return X
 
+    def restore_cont(self, Z):
+        """
+        Restore a standardized contribution to the non-standardized contribution.
+
+        Parameters
+        ----------
+        Z: torch.Tensor
+            Tensor of standardized contributions
+
+        Returns
+        -------
+        X: torch.Tensor
+            Tensor of non-standardized contributions
+
+        """
+        X = (self.mean / Z.shape[0]) + Z * self.std
+        return X
+
     def get_state(self):
         """
         Return dictionary of the state of the Standardizer.
@@ -122,7 +149,8 @@ def __init__(self, global_config, partition_configs, model_path):
 
         # Set GPU status
         self.use_gpu = global_config["gpu"]
-
+        if self.use_gpu:
+            self.model.cuda()
         # Set loss function
         if global_config["loss_function"] == "mse":
             self.loss_fn = torch.nn.MSELoss()
@@ -145,18 +173,21 @@ def __init__(self, global_config, partition_configs, model_path):
             )
         elif global_config["optimizer"].lower().strip() == "sgd":
             self.optimizer = torch.optim.SGD(
-                self.model_parameters(),
+                self.model.parameters(),
                 lr=global_config["learning_rate"],
             )
 
         # Set scheduler
         if "lr_milestones" in global_config.keys():
-            self.scheduler = torch.optim.MultiStepLR(
+            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
                 optimizer=self.optimizer, milestones=global_config["lr_milestones"]
             )
         else:
             self.scheduler = None
 
+        # Set standardizer
+        self.standardizer = Standardizer()
+
     def make_directory_structure(self, model_path):
         """Make directory structure to store models and results."""
         self.model_path = Path(model_path)
@@ -173,7 +204,7 @@ def init_standardizer(self, targets):
         targets: np.ndarray or torch.Tensor
             Array of training outputs
         """
-        self.standardizer = Standardizer(targets)
+        self.standardizer.initialize(torch.Tensor(targets))
 
     def train_epoch(self, dataloader):
         """Train the model for a single epoch.
@@ -209,11 +240,11 @@ def train_epoch(self, dataloader):
             pred_dict = self.model(nn_input)
 
             # Calculate loss
-            loss = self.loss_fn(nn_output, pred_dict["output"])
+            loss = self.loss_fn(pred_dict["output"], nn_output.unsqueeze(1))
             avg_loss += loss
 
             # Calculate metric
-            y_pred = self.standardizer.restore(pred_dict["output"].cpu())
+            y_pred = self.standardizer.restore(pred_dict["output"].cpu().detach())
             metric = self.metric_fn(y, y_pred)
             avg_metric += metric
 
@@ -273,11 +304,11 @@ def validate(self, dataloader):
             pred_dict = self.model(nn_input)
 
             # Calculate loss
-            loss = self.loss_fn(nn_output, pred_dict["output"])
+            loss = self.loss_fn(pred_dict["output"], nn_output.unsqueeze(1))
             avg_loss += loss
 
             # Calculate metric
-            y_pred = self.standardizer.restore(pred_dict["output"].cpu())
+            y_pred = self.standardizer.restore(pred_dict["output"].cpu().detach())
             metric = self.metric_fn(y, y_pred)
             avg_metric += metric
 
@@ -305,12 +336,14 @@ def predict(self, dataset, indices, return_targets=False):
         Returns
         -------
         prediction_dict: dict
-            Dictionary containing "targets", "predictions", and "indices" (copy of
-            predict_idx).
+            Dictionary containing "targets", "predictions", "contributions" and
+            "indices" (copy of predict_idx).
         """
         # Create arrays
+        n_partitions = len(dataset.get(indices[0]))
         targets = np.zeros(len(indices))
         predictions = np.zeros(len(indices))
+        contributions = np.zeros((len(indices), n_partitions))
 
         # Enable eval mode of model
         self.model.eval()
@@ -318,7 +351,7 @@ def predict(self, dataset, indices, return_targets=False):
         # Go over each batch in the dataloader
         for i, idx in enumerate(indices):
             # Get data objects
-            data_objects = dataset.get(i)
+            data_objects = dataset.get(idx)
 
             # Standardize output
             if return_targets:
@@ -332,12 +365,19 @@ def predict(self, dataset, indices, return_targets=False):
 
             # Compute prediction
             pred_dict = self.model(nn_input)
-            predictions[i] = self.standardizer.restore(pred_dict["output"].cpu())
+            predictions[i] = self.standardizer.restore(
+                pred_dict["output"].cpu().detach()
+            )
+            conts_std = pred_dict["contributions"].cpu().detach()
+            contributions[i, :] = (
+                self.standardizer.restore_cont(conts_std).numpy().flatten()
+            )
 
         predictions_dict = {
             "targets": targets,
             "predictions": predictions,
             "indices": indices,
+            "contributions": contributions,
         }
 
         return predictions_dict
@@ -444,11 +484,20 @@ def train(self, epochs, dataloader_dict, verbose=False):
             self.save(i, best_status)
 
             # Save losses and metrics
-            train_losses.append(train_loss)
-            val_losses.append(val_loss)
+            train_losses.append(train_loss.cpu().detach().numpy())
+            val_losses.append(val_loss.cpu().detach().numpy())
             train_metrics.append(train_metric)
             val_metrics.append(val_metric)
 
+            # Print, if verbose
+            if verbose:
+                print(
+                    f"Epoch: [{i}]  Training loss: [{train_loss:.3f}]  "
+                    + f"Training metric: [{train_metric:.3f}]  "
+                    + f"Validation loss: [{val_loss:.3f}]  "
+                    + f"Validation metric: [{val_metric:.3f}]"
+                )
+
         # Load the best model
         self.load(best_status=True)
 
diff --git a/src/utils.py b/src/utils.py
index 5b2c6b2..de98478 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -8,12 +8,12 @@
 from torch.utils.data import SubsetRandomSampler
 from torch_geometric.loader import DataLoader
 
-from featurizers import (
+from .featurizers import (
     OneHotEncoder,
     list_of_edge_featurizers,
     list_of_node_featurizers,
 )
-from graphs import AtomsGraph
+from .graphs import AtomsGraph
 
 
 def partition_structure(atoms, z_cutoffs):
@@ -51,6 +51,47 @@ def partition_structure(atoms, z_cutoffs):
     return part_atoms
 
 
+def partition_structure_by_layers(atoms, layer_cutoffs):
+    """Partition atomic structue into bulk, surface, and/or adsorbates by layers.
+
+    Parameters
+    ----------
+    atoms: ase.Atoms object
+        The structure to be partitioned
+    layer_cutoffs: list or np.ndarray
+        List of layer cutoffs. xy planes are placed above the specified layer
+        cutoffs to partition atoms above and below them. The length of layer
+        should be equal to one less than the number of partitions.
+    """
+    # Set number of partitions equal to 1 more than the length of z_cutoffs
+    n_partitions = int(len(layer_cutoffs) + 1)
+
+    # Calculate interlayer distance
+    z_array = np.unique(np.sort(atoms.get_positions()[:, -1]))
+    z_min = z_array.min()
+    d_interlayer = abs(z_array[1] - z_array[0])
+
+    # Add 0 and infinity to cutoffs
+    z_cutoffs = z_min + (np.array(layer_cutoffs) - 1) * d_interlayer + 0.1
+    z_cutoffs = np.insert(z_cutoffs, 0, 0)
+    z_cutoffs = np.insert(z_cutoffs, len(z_cutoffs), np.inf)
+
+    # Get positions
+    pos = atoms.get_positions()
+
+    # Iterate over number of partitions
+    part_atoms = []
+    for i in range(n_partitions):
+        part_idx = (
+            np.argwhere((pos[:, -1] >= z_cutoffs[i]) & (pos[:, -1] < z_cutoffs[i + 1]))
+            .flatten()
+            .tolist()
+        )
+        part_atoms.append(part_idx)
+
+    return part_atoms
+
+
 def featurize_atoms(
     atoms,
     select_idx,
@@ -136,6 +177,7 @@ def featurize_atoms(
         "edge_indices": edge_indices,
     }
 
+
 def create_dataloaders(proc_data, sample_idx, batch_size, num_proc=0):
     """Create training, validation, and/or test dataloaders.
 
@@ -150,7 +192,7 @@ def create_dataloaders(proc_data, sample_idx, batch_size, num_proc=0):
         Batch size
     num_proc: int (default = 0)
         Number of cores to be used for parallelization. Defaults to serial.
-    
+
     Returns
     -------
     dataloader_dict: dict
@@ -160,15 +202,18 @@ def create_dataloaders(proc_data, sample_idx, batch_size, num_proc=0):
     dataloader_dict = {"train": [], "val": [], "test": []}
 
     for key in dataloader_dict.keys():
-        if sample_idx[key].shape[0] > 0.:
+        if sample_idx[key].shape[0] > 0.0:
             sampler = SubsetRandomSampler(sample_idx[key])
-            dataloader_dict[key] = DataLoader(dataset=proc_data,
-                                                batch_size=batch_size,
-                                                sampler=sampler,
-                                                num_workers=num_proc)
-            
+            dataloader_dict[key] = DataLoader(
+                dataset=proc_data,
+                batch_size=batch_size,
+                sampler=sampler,
+                num_workers=num_proc,
+            )
+
     return dataloader_dict
 
+
 if __name__ == "__main__":
     from ase.io import read
 
diff --git a/workflows/basic_train_val_test.py b/workflows/basic_train_val_test.py
new file mode 100644
index 0000000..50b2a65
--- /dev/null
+++ b/workflows/basic_train_val_test.py
@@ -0,0 +1,94 @@
+"""Basic workflow to train, validate, and test a model."""
+
+import torch
+
+from ..src.constants import REPO_PATH
+from ..src.data import AtomsDataset
+from ..src.samplers import RandomSampler
+from ..src.train import Model
+from ..src.utils import create_dataloaders
+
+# Set seeds
+seed = 0
+torch.manual_seed(seed)
+
+# Create dataset
+dataset_path = REPO_PATH / "data" / "S_calcs"
+prop_csv_path = dataset_path / "name_prop.csv"
+dataset = AtomsDataset(root=dataset_path, prop_csv=prop_csv_path)
+
+# Process dataset
+# dataset.process_data(layer_cutoffs=[3, 6],
+#                          node_features=[
+#                              ["atomic_number", "dband_center", "coordination"],
+#                              ["atomic_number", "reactivity", "coordination"],
+#                              ["atomic_number", "reactivity", "coordination"],
+#                          ],
+#                          edge_features=[
+#                              ["bulk_bond_distance"],
+#                              ["surface_bond_distance"],
+#                              ["adsorbate_bond_distance"],
+#                          ])
+
+# Create sampler
+sample_config = {"train": 0.8, "val": 0.1, "test": 0.1}
+sampler = RandomSampler(seed, dataset.len())
+sample_idx = sampler.create_samplers(sample_config)
+
+# Create dataloaders
+dataloader_dict = create_dataloaders(dataset, sample_idx, batch_size=32)
+
+# Create model
+global_config = {
+    "gpu": True,
+    "loss_function": "mse",
+    "metric_function": "mae",
+    "learning_rate": 0.001,
+    "optimizer": "adam",
+    "lr_milestones": [75],
+}
+partition_configs = [
+    {
+        "n_conv": 3,
+        "n_hidden": 1,
+        "hidden_size": 20,
+        "conv_size": 20,
+        "dropout": 0.1,
+        "num_node_features": dataset[0][0].num_node_features,
+        "num_edge_features": dataset[0][0].num_edge_features,
+        "conv_type": "CGConv",
+    },
+    {
+        "n_conv": 5,
+        "n_hidden": 2,
+        "hidden_size": 50,
+        "conv_size": 50,
+        "dropout": 0.1,
+        "num_node_features": dataset[0][1].num_node_features,
+        "num_edge_features": dataset[0][1].num_edge_features,
+        "conv_type": "CGConv",
+    },
+    {
+        "n_conv": 5,
+        "n_hidden": 2,
+        "hidden_size": 50,
+        "conv_size": 50,
+        "dropout": 0.1,
+        "num_node_features": dataset[0][2].num_node_features,
+        "num_edge_features": dataset[0][2].num_edge_features,
+        "conv_type": "CGConv",
+    },
+]
+
+model_path = REPO_PATH / "trained_models" / "S_binary_calcs"
+model = Model(global_config, partition_configs, model_path)
+model.init_standardizer([dataset[i][0].y for i in sample_idx["train"]])
+results_dict = model.train(100, dataloader_dict, verbose=True)
+print(f"Test metric: {results_dict['metric']['test']}")
+
+# Load model
+model.load(best_status=True)
+
+# Make predictions on a structure
+pred_dict = model.predict(dataset, [0, 100, 200, 500], return_targets=True)
+print(pred_dict)