Skip to content

Commit

Permalink
Added SlabGCN
Browse files Browse the repository at this point in the history
  • Loading branch information
Gaurav S Deshmukh committed Sep 26, 2023
1 parent b86dcea commit a5314f2
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 45 deletions.
167 changes: 159 additions & 8 deletions src/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,25 +81,26 @@ def __init__(self, partition_configs):
[
nn.Linear(self.hidden_size[i], self.hidden_size[i]),
nn.LeakyReLU(inplace=True),
nn.Dropout(p=self.dropout[i]),
]
* (self.n_hidden[i] - 1)
+ [
nn.Linear(self.hidden_size[i], 1),
nn.LeakyReLU(inplace=True),
nn.Dropout(p=self.dropout[i]),
]
)
)
)

# Final linear layer
# TODO: replace 1 with multiple outputs
self.final_lin_transform = nn.Linear(self.n_partitions, 1, bias=False)
with torch.no_grad():
self.final_lin_transform.weight.copy_(torch.ones(self.n_partitions))
for p in self.final_lin_transform.parameters():
p.requires_grad = False
self.fin1_transform = nn.Linear(self.n_partitions, 50)
self.fin1_act = nn.LeakyReLU()
self.final_lin_transform = nn.Linear(50, 1)
self.final_lin_act = nn.LeakyReLU()
#with torch.no_grad():
# self.final_lin_transform.weight.copy_(torch.ones(self.n_partitions))
#for p in self.final_lin_transform.parameters():
# p.requires_grad = False

def init_conv_layers(self):
"""Initialize convolutional layers."""
Expand Down Expand Up @@ -166,9 +167,159 @@ def forward(self, data_objects):

# Apply final transformation
contributions = torch.cat(contributions)
output = self.final_lin_transform(contributions.view(-1, 3))
output = self.fin1_transform(contributions.view(-1, 3))
output = self.fin1_act(output)
output = self.final_lin_transform(output)
output = self.final_lin_act(output)

return {"output": output, "contributions": contributions}

class SlabGCN(nn.Module):
"""Class to customize the graph neural network."""

def __init__(self, partition_configs, global_config):
"""Initialize the graph neural network.
Parameters
----------
partition_configs: List[Dict]
List of dictionaries containing parameters for the GNN for each
partition. The number of different GNNs are judged based on the
size of the list. Each partition config should contain the following
keys: n_conv (number of convolutional layers, int), n_hidden (number
of hidden layers, int), conv_size (feature size before convolution, int)
hidden_size (nodes per hidden layer node, int), dropout (dropout
probability for hidden layers, float), conv_type (type of convolution
layer, str; currently only "CGConv" is supported), pool_type
(type of pooling layer, str; currently "add" and "mean" are supported),
num_node_features (number of node features, int), num_edge_features
(number of edge features, int).
global_config: Dict
This should contain the following keys: n_hidden (Number of hidden
layers for the shared FFN, int), hidden_size (Number of nodes per
hidden layer, int), dropout (Probability of dropping a node, float).
"""
super().__init__()

# Store hyperparameters
self.n_conv = [config["n_conv"] for config in partition_configs]
self.n_hidden = global_config["n_hidden"]
self.hidden_size = global_config["hidden_size"]
self.conv_size = [config["conv_size"] for config in partition_configs]
self.conv_type = [config["conv_type"] for config in partition_configs]
self.dropout = global_config.get("dropout", 0)
self.num_node_features = [
config["num_node_features"] for config in partition_configs
]
self.num_edge_features = [
config["num_edge_features"] for config in partition_configs
]
self.n_partitions = len(partition_configs)

# Initialize layers
# Initial transform
self.init_transform = nn.ModuleList()
for i in range(self.n_partitions):
self.init_transform.append(
nn.Sequential(
nn.Linear(self.num_node_features[i], self.conv_size[i]),
nn.LeakyReLU(inplace=True),
)
)

# Convolutional layers
self.init_conv_layers()

# Pooling layers
self.pool_layers = nn.ModuleList()
for i in range(self.n_partitions):
self.pool_layers.append(gnn.aggr.SumAggregation())

# Pool transform
self.pool_transform = nn.Sequential(
nn.Linear(sum(self.conv_size), self.hidden_size),
nn.LeakyReLU(inplace=True),
)

# Hidden layers
self.hidden_layers = nn.Sequential(
*(
[
nn.Linear(self.hidden_size, self.hidden_size),
nn.LeakyReLU(inplace=True),
nn.Dropout(p=self.dropout),
]
* (self.n_hidden - 1)
+ [
nn.Linear(self.hidden_size, 1),
nn.LeakyReLU(inplace=True),
nn.Dropout(p=self.dropout),
]
)
)

def init_conv_layers(self):
"""Initialize convolutional layers."""
self.conv_layers = nn.ModuleList()
for i in range(self.n_partitions):
part_conv_layers = []
for j in range(self.n_conv[i]):
# TODO Add possibility of changing convolutional layers
conv_layer = [
gnn.CGConv(
channels=self.conv_size[i],
dim=self.num_edge_features[i],
batch_norm=True,
),
nn.LeakyReLU(inplace=True),
]
part_conv_layers.extend(conv_layer)

self.conv_layers.append(nn.ModuleList(part_conv_layers))

def forward(self, data_objects):
"""Foward pass of the network(s).
Parameters
----------
data_objects: list
List of data objects, each corresponding to a graph of a partition
of an atomic structure.
Returns
------
dict
Dictionary containing "output" and "contributions".
"""
# For each data object
pools = []
for i, data in enumerate(data_objects):
# Apply initial transform
conv_data = self.init_transform[i](data.x)

# Apply convolutional layers
for layer in self.conv_layers[i]:
if isinstance(layer, gnn.MessagePassing):
conv_data = layer(
x=conv_data,
edge_index=data.edge_index,
edge_attr=data.edge_attr,
)
else:
conv_data = layer(conv_data)

# Apply pooling layer
pooled_data = self.pool_layers[i](x=conv_data, index=data.batch)
pools.append(pooled_data)

# Apply pool-to-hidden transform
pools = torch.cat(pools, dim=1)
hidden_data = self.pool_transform(pools)

# Apply hidden layers
output = self.hidden_layers(hidden_data)

return {"output": output}


if __name__ == "__main__":
Expand Down
12 changes: 3 additions & 9 deletions src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error

from .models import MultiGCN
from .models import MultiGCN, SlabGCN


class Standardizer:
Expand Down Expand Up @@ -142,7 +142,7 @@ def __init__(self, global_config, partition_configs, model_path):
Path where the model is to be saved
"""
# Create model
self.model = MultiGCN(partition_configs)
self.model = SlabGCN(partition_configs, global_config)

# Create model path
self.make_directory_structure(model_path)
Expand Down Expand Up @@ -336,14 +336,13 @@ def predict(self, dataset, indices, return_targets=False):
Returns
-------
prediction_dict: dict
Dictionary containing "targets", "predictions", "contributions" and
Dictionary containing "targets", "predictions", and
"indices" (copy of predict_idx).
"""
# Create arrays
n_partitions = len(dataset.get(indices[0]))
targets = np.zeros(len(indices))
predictions = np.zeros(len(indices))
contributions = np.zeros((len(indices), n_partitions))

# Enable eval mode of model
self.model.eval()
Expand All @@ -368,16 +367,11 @@ def predict(self, dataset, indices, return_targets=False):
predictions[i] = self.standardizer.restore(
pred_dict["output"].cpu().detach()
)
conts_std = pred_dict["contributions"].cpu().detach()
contributions[i, :] = (
self.standardizer.restore_cont(conts_std).numpy().flatten()
)

predictions_dict = {
"targets": targets,
"predictions": predictions,
"indices": indices,
"contributions": contributions,
}

return predictions_dict
Expand Down
50 changes: 22 additions & 28 deletions workflows/basic_train_val_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@
dataset = AtomsDataset(root=dataset_path, prop_csv=prop_csv_path)

# Process dataset
# dataset.process_data(layer_cutoffs=[3, 6],
# node_features=[
# ["atomic_number", "dband_center", "coordination"],
# ["atomic_number", "reactivity", "coordination"],
# ["atomic_number", "reactivity", "coordination"],
# ],
# edge_features=[
# ["bulk_bond_distance"],
# ["surface_bond_distance"],
# ["adsorbate_bond_distance"],
# ])
dataset.process_data(layer_cutoffs=[3, 6],
node_features=[
["atomic_number", "dband_center", "coordination"],
["atomic_number", "reactivity", "coordination"],
["atomic_number", "reactivity", "coordination"],
],
edge_features=[
["bulk_bond_distance"],
["surface_bond_distance"],
["adsorbate_bond_distance"],
])

# Create sampler
sample_config = {"train": 0.8, "val": 0.1, "test": 0.1}
Expand All @@ -43,37 +43,31 @@
"gpu": True,
"loss_function": "mse",
"metric_function": "mae",
"learning_rate": 0.001,
"learning_rate": 0.01,
"optimizer": "adam",
"lr_milestones": [75],
"lr_milestones": [50],
"n_hidden": 2,
"hidden_size": 30,
"dropout": 0.1,
}
partition_configs = [
{
"n_conv": 3,
"n_hidden": 1,
"hidden_size": 20,
"conv_size": 20,
"dropout": 0.1,
"n_conv": 2,
"conv_size": 30,
"num_node_features": dataset[0][0].num_node_features,
"num_edge_features": dataset[0][0].num_edge_features,
"conv_type": "CGConv",
},
{
"n_conv": 5,
"n_hidden": 2,
"hidden_size": 50,
"conv_size": 50,
"dropout": 0.1,
"n_conv": 2,
"conv_size": 30,
"num_node_features": dataset[0][1].num_node_features,
"num_edge_features": dataset[0][1].num_edge_features,
"conv_type": "CGConv",
},
{
"n_conv": 5,
"n_hidden": 2,
"hidden_size": 50,
"conv_size": 50,
"dropout": 0.1,
"n_conv": 2,
"conv_size": 30,
"num_node_features": dataset[0][2].num_node_features,
"num_edge_features": dataset[0][2].num_edge_features,
"conv_type": "CGConv",
Expand Down

0 comments on commit a5314f2

Please sign in to comment.