From 39ab2ca686c9d33d9c2c0c130e16c223feffdb0d Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 22 Sep 2023 18:32:54 -0400 Subject: [PATCH 1/6] Added model --- src/models.py | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 src/models.py diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..a7f2e33 --- /dev/null +++ b/src/models.py @@ -0,0 +1,153 @@ +"""Graph neural network models.""" + +import torch +import torch.nn as nn +import torch_geometric.nn as gnn + +class MultiGCN(gnn.MessagePassing): + """Class to customize the graph neural network.""" + def __init__(self, partition_configs): + """Initialize the graph neural network. + + Parameters + ---------- + partition_configs: List[Dict] + List of dictionaries containing parameters for the GNN for each + partition. The number of different GNNs are judged based on the + size of the list. Each partition config should contain the following + keys: n_conv (number of convolutional layers, int), n_hidden (number + of hidden layers, int), conv_size (feature size before convolution, int) + hidden_size (nodes per hidden layer node, int), dropout (dropout + probability for hidden layers, float), conv_type (type of convolution + layer, str; currently only "CGConv" is supported), pool_type + (type of pooling layer, str; currently "add" and "mean" are supported), + num_node_features (number of node features, int), num_edge_features + (number of edge features, int). + """ + # Store hyperparameters + self.n_conv = [config["n_conv"] for config in partition_configs] + self.n_hidden = [config["n_hidden"] for config in partition_configs] + self.hidden_size = [config["hidden_size"] for config in partition_configs] + self.conv_size = [config["conv_size"] for config in partition_configs] + self.conv_type = [config["conv_type"] for config in partition_configs] + self.dropout = [config["dropout"] for config in partition_configs] + self.num_node_features = [ + config["num_node_features"] for config in partition_configs + ] + self.num_edge_features = [ + config["num_node_features"] for config in partition_configs + ] + self.n_partitions = len(partition_configs) + + # Initialize layers + # Initial transform + self.init_transform = [] + for i in range(self.n_partitions): + self.init_transform.append( + nn.ModuleList( + nn.Linear(self.num_node_features[i], self.conv_size[i]), + nn.LeakyReLU(inplace=True), + ) + ) + + # Convolutional layers + self.init_conv_layers() + + # Pooling layers + self.pool_layers = [] + for i in range(self.n_partitions): + self.pool_layers.append(gnn.pool.global_addpool()) + + # Pool transform + self.pool_transform = [] + for i in range(self.n_partitions): + self.pool_transform.append( + nn.ModuleList( + nn.Linear(self.conv_size[i], self.hidden_size[i]), + nn.LeakyReLU(inplace=True), + ) + ) + + # Hidden layers + self.hidden_layers = [] + for i in range(self.n_partitions): + self.hidden_layers.append( + nn.ModuleList([ + nn.Linear(self.hidden_size[i], self.hidden_size[i]), + nn.LeakyReLU(inplace=True), + nn.Dropout(p=self.dropout), + ] * (self.hidden_layers - 1) + + [ + nn.Linear(self.hidden_size[i], 1), + nn.LeakyReLU(inplace=True), + nn.Dropout(p=self.dropout), + ] + ) + ) + + # Final linear layer + # TODO: replace 1 with multiple outputs + self.final_lin_transform = nn.Linear(self.n_partitions, 1) + + + def init_conv_layers(self): + """Initialize convolutional layers.""" + self.conv_layers = [] + for i in range(self.n_partitions): + part_conv_layers = [] + for j in range(self.n_conv): + conv_layer = [ + gnn.CGConv( + channels=self.num_node_features[i], + dim=self.num_edge_features[i], + batch_norm=True + ), + nn.LeakyReLU(inplace=True) + ] + part_conv_layers.append(conv_layer) + + self.conv_layers.append(nn.ModuleList(part_conv_layers)) + + def forward(self, data_objects): + """Foward pass of the network(s). + + Parameters + ---------- + data_objects: list + List of data objects, each corresponding to a graph of a partition + of an atomic structure. + + Returns + ------ + dict + Dictionary containing "output" and "contributions". + """ + # Initialize empty list for contributions + contributions = [] + # For each data object + for i, data in enumerate(data_objects): + # Apply initial transform + conv_data = self.init_transform[i](data) + + # Apply convolutional layers + for layer in self.conv_layers[i]: + conv_data = layer(conv_data) + + # Apply pooling layer + pooled_data = self.pool_layers[i](conv_data) + + # Apply pool-to-hidden transform + hidden_data = self.pool_transform[i](pooled_data) + + # Apply hidden layers + for layer in self.hidden_layers[i]: + hidden_data = layer(hidden_data) + + # Save contribution + contributions.append(hidden_data) + + # Apply final transformation + output = self.final_lin_transform(*contributions) + + return {"output": output, "contributions": contributions} + \ No newline at end of file From 57196b1da0148936c58a0ca1213710b56b31fe4a Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 22 Sep 2023 21:46:42 -0400 Subject: [PATCH 2/6] It works --- src/models.py | 108 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 21 deletions(-) diff --git a/src/models.py b/src/models.py index a7f2e33..56321d2 100644 --- a/src/models.py +++ b/src/models.py @@ -4,7 +4,7 @@ import torch.nn as nn import torch_geometric.nn as gnn -class MultiGCN(gnn.MessagePassing): +class MultiGCN(nn.Module): """Class to customize the graph neural network.""" def __init__(self, partition_configs): """Initialize the graph neural network. @@ -24,6 +24,8 @@ def __init__(self, partition_configs): num_node_features (number of node features, int), num_edge_features (number of edge features, int). """ + super().__init__() + # Store hyperparameters self.n_conv = [config["n_conv"] for config in partition_configs] self.n_hidden = [config["n_hidden"] for config in partition_configs] @@ -35,7 +37,7 @@ def __init__(self, partition_configs): config["num_node_features"] for config in partition_configs ] self.num_edge_features = [ - config["num_node_features"] for config in partition_configs + config["num_edge_features"] for config in partition_configs ] self.n_partitions = len(partition_configs) @@ -44,7 +46,7 @@ def __init__(self, partition_configs): self.init_transform = [] for i in range(self.n_partitions): self.init_transform.append( - nn.ModuleList( + nn.Sequential( nn.Linear(self.num_node_features[i], self.conv_size[i]), nn.LeakyReLU(inplace=True), ) @@ -56,13 +58,13 @@ def __init__(self, partition_configs): # Pooling layers self.pool_layers = [] for i in range(self.n_partitions): - self.pool_layers.append(gnn.pool.global_addpool()) + self.pool_layers.append(gnn.pool.global_add_pool) # Pool transform self.pool_transform = [] for i in range(self.n_partitions): self.pool_transform.append( - nn.ModuleList( + nn.Sequential( nn.Linear(self.conv_size[i], self.hidden_size[i]), nn.LeakyReLU(inplace=True), ) @@ -72,16 +74,16 @@ def __init__(self, partition_configs): self.hidden_layers = [] for i in range(self.n_partitions): self.hidden_layers.append( - nn.ModuleList([ + nn.Sequential(*([ nn.Linear(self.hidden_size[i], self.hidden_size[i]), nn.LeakyReLU(inplace=True), - nn.Dropout(p=self.dropout), - ] * (self.hidden_layers - 1) + + nn.Dropout(p=self.dropout[i]), + ] * (self.n_hidden[i] - 1) + [ nn.Linear(self.hidden_size[i], 1), nn.LeakyReLU(inplace=True), - nn.Dropout(p=self.dropout), - ] + nn.Dropout(p=self.dropout[i]), + ]) ) ) @@ -95,16 +97,16 @@ def init_conv_layers(self): self.conv_layers = [] for i in range(self.n_partitions): part_conv_layers = [] - for j in range(self.n_conv): + for j in range(self.n_conv[i]): conv_layer = [ gnn.CGConv( - channels=self.num_node_features[i], + channels=self.conv_size[i], dim=self.num_edge_features[i], - batch_norm=True + batch_norm=True, ), nn.LeakyReLU(inplace=True) ] - part_conv_layers.append(conv_layer) + part_conv_layers.extend(conv_layer) self.conv_layers.append(nn.ModuleList(part_conv_layers)) @@ -127,27 +129,91 @@ def forward(self, data_objects): # For each data object for i, data in enumerate(data_objects): # Apply initial transform - conv_data = self.init_transform[i](data) + conv_data = self.init_transform[i](data.x.to(torch.float32)) # Apply convolutional layers for layer in self.conv_layers[i]: - conv_data = layer(conv_data) + if isinstance(layer, gnn.MessagePassing): + conv_data = layer(x=conv_data, edge_index=data.edge_index, + edge_attr=data.edge_attr) + else: + conv_data = layer(conv_data) # Apply pooling layer - pooled_data = self.pool_layers[i](conv_data) + pooled_data = self.pool_layers[i](x=conv_data, batch=None) # Apply pool-to-hidden transform hidden_data = self.pool_transform[i](pooled_data) # Apply hidden layers - for layer in self.hidden_layers[i]: - hidden_data = layer(hidden_data) + hidden_data = self.hidden_layers[i](hidden_data) # Save contribution contributions.append(hidden_data) # Apply final transformation - output = self.final_lin_transform(*contributions) + contributions = torch.cat(contributions, dim=-1) + output = self.final_lin_transform(contributions) return {"output": output, "contributions": contributions} - \ No newline at end of file + +if __name__ == "__main__": + from ase.io import read + from data import AtomsDatapoints + from constants import REPO_PATH + from pathlib import Path + # Test for one tensor + # Create datapoins + data_root_path = Path(REPO_PATH) / "data" / "S_calcs" + atoms = read(data_root_path / "Pt_3_Rh_9_-7-7-S.cif") + datapoint = AtomsDatapoints(atoms) + datapoint.process_data( + z_cutoffs=[13.0, 20.0], + node_features=[ + ["atomic_number", "dband_center"], + ["atomic_number", "reactivity"], + ["atomic_number", "reactivity"], + ], + edge_features=[ + ["bulk_bond_distance"], + ["surface_bond_distance"], + ["adsorbate_bond_distance"], + ], + ) + data_objects = datapoint.get(0) + + # Get result + partition_configs = [ + { + "n_conv": 3, + "n_hidden": 3, + "hidden_size": 30, + "conv_size": 40, + "dropout": 0.1, + "num_node_features": data_objects[0].num_node_features, + "num_edge_features": data_objects[0].num_edge_features, + "conv_type": "CGConv", + }, + { + "n_conv": 3, + "n_hidden": 3, + "hidden_size": 30, + "conv_size": 40, + "dropout": 0.1, + "num_node_features": data_objects[1].num_node_features, + "num_edge_features": data_objects[1].num_edge_features, + "conv_type": "CGConv", + }, + { + "n_conv": 3, + "n_hidden": 3, + "hidden_size": 30, + "conv_size": 40, + "dropout": 0.1, + "num_node_features": data_objects[2].num_node_features, + "num_edge_features": data_objects[2].num_edge_features, + "conv_type": "CGConv", + } + ] + net = MultiGCN(partition_configs) + result_dict = net(data_objects) \ No newline at end of file From a9d3d7afe769445bad84acdf42563f5c7774ab98 Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 22 Sep 2023 21:47:47 -0400 Subject: [PATCH 3/6] Removed float32 --- src/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models.py b/src/models.py index 56321d2..015d29e 100644 --- a/src/models.py +++ b/src/models.py @@ -129,7 +129,7 @@ def forward(self, data_objects): # For each data object for i, data in enumerate(data_objects): # Apply initial transform - conv_data = self.init_transform[i](data.x.to(torch.float32)) + conv_data = self.init_transform[i](data.x) # Apply convolutional layers for layer in self.conv_layers[i]: From 26718967fd1f84270aa7dfd2017097c10f2ebeb4 Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 22 Sep 2023 21:53:28 -0400 Subject: [PATCH 4/6] Added TODO --- src/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/models.py b/src/models.py index 015d29e..000d01b 100644 --- a/src/models.py +++ b/src/models.py @@ -98,6 +98,7 @@ def init_conv_layers(self): for i in range(self.n_partitions): part_conv_layers = [] for j in range(self.n_conv[i]): + # TODO Add possibility of changing convolutional layers conv_layer = [ gnn.CGConv( channels=self.conv_size[i], From 79a9704b53031e631af4740cb0dd2361793b5577 Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 22 Sep 2023 21:56:51 -0400 Subject: [PATCH 5/6] Fixed codestyle --- src/models.py | 120 +++++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 54 deletions(-) diff --git a/src/models.py b/src/models.py index 000d01b..615ddc8 100644 --- a/src/models.py +++ b/src/models.py @@ -4,8 +4,10 @@ import torch.nn as nn import torch_geometric.nn as gnn + class MultiGCN(nn.Module): """Class to customize the graph neural network.""" + def __init__(self, partition_configs): """Initialize the graph neural network. @@ -15,17 +17,17 @@ def __init__(self, partition_configs): List of dictionaries containing parameters for the GNN for each partition. The number of different GNNs are judged based on the size of the list. Each partition config should contain the following - keys: n_conv (number of convolutional layers, int), n_hidden (number + keys: n_conv (number of convolutional layers, int), n_hidden (number of hidden layers, int), conv_size (feature size before convolution, int) - hidden_size (nodes per hidden layer node, int), dropout (dropout - probability for hidden layers, float), conv_type (type of convolution + hidden_size (nodes per hidden layer node, int), dropout (dropout + probability for hidden layers, float), conv_type (type of convolution layer, str; currently only "CGConv" is supported), pool_type (type of pooling layer, str; currently "add" and "mean" are supported), num_node_features (number of node features, int), num_edge_features (number of edge features, int). """ super().__init__() - + # Store hyperparameters self.n_conv = [config["n_conv"] for config in partition_configs] self.n_hidden = [config["n_hidden"] for config in partition_configs] @@ -74,16 +76,20 @@ def __init__(self, partition_configs): self.hidden_layers = [] for i in range(self.n_partitions): self.hidden_layers.append( - nn.Sequential(*([ - nn.Linear(self.hidden_size[i], self.hidden_size[i]), - nn.LeakyReLU(inplace=True), - nn.Dropout(p=self.dropout[i]), - ] * (self.n_hidden[i] - 1) + - [ - nn.Linear(self.hidden_size[i], 1), - nn.LeakyReLU(inplace=True), - nn.Dropout(p=self.dropout[i]), - ]) + nn.Sequential( + *( + [ + nn.Linear(self.hidden_size[i], self.hidden_size[i]), + nn.LeakyReLU(inplace=True), + nn.Dropout(p=self.dropout[i]), + ] + * (self.n_hidden[i] - 1) + + [ + nn.Linear(self.hidden_size[i], 1), + nn.LeakyReLU(inplace=True), + nn.Dropout(p=self.dropout[i]), + ] + ) ) ) @@ -91,7 +97,6 @@ def __init__(self, partition_configs): # TODO: replace 1 with multiple outputs self.final_lin_transform = nn.Linear(self.n_partitions, 1) - def init_conv_layers(self): """Initialize convolutional layers.""" self.conv_layers = [] @@ -105,10 +110,10 @@ def init_conv_layers(self): dim=self.num_edge_features[i], batch_norm=True, ), - nn.LeakyReLU(inplace=True) + nn.LeakyReLU(inplace=True), ] part_conv_layers.extend(conv_layer) - + self.conv_layers.append(nn.ModuleList(part_conv_layers)) def forward(self, data_objects): @@ -130,13 +135,16 @@ def forward(self, data_objects): # For each data object for i, data in enumerate(data_objects): # Apply initial transform - conv_data = self.init_transform[i](data.x) + conv_data = self.init_transform[i](data.x) # Apply convolutional layers for layer in self.conv_layers[i]: if isinstance(layer, gnn.MessagePassing): - conv_data = layer(x=conv_data, edge_index=data.edge_index, - edge_attr=data.edge_attr) + conv_data = layer( + x=conv_data, + edge_index=data.edge_index, + edge_attr=data.edge_attr, + ) else: conv_data = layer(conv_data) @@ -157,12 +165,16 @@ def forward(self, data_objects): output = self.final_lin_transform(contributions) return {"output": output, "contributions": contributions} - + + if __name__ == "__main__": + from pathlib import Path + from ase.io import read - from data import AtomsDatapoints + from constants import REPO_PATH - from pathlib import Path + from data import AtomsDatapoints + # Test for one tensor # Create datapoins data_root_path = Path(REPO_PATH) / "data" / "S_calcs" @@ -185,36 +197,36 @@ def forward(self, data_objects): # Get result partition_configs = [ - { - "n_conv": 3, - "n_hidden": 3, - "hidden_size": 30, - "conv_size": 40, - "dropout": 0.1, - "num_node_features": data_objects[0].num_node_features, - "num_edge_features": data_objects[0].num_edge_features, - "conv_type": "CGConv", - }, - { - "n_conv": 3, - "n_hidden": 3, - "hidden_size": 30, - "conv_size": 40, - "dropout": 0.1, - "num_node_features": data_objects[1].num_node_features, - "num_edge_features": data_objects[1].num_edge_features, - "conv_type": "CGConv", - }, - { - "n_conv": 3, - "n_hidden": 3, - "hidden_size": 30, - "conv_size": 40, - "dropout": 0.1, - "num_node_features": data_objects[2].num_node_features, - "num_edge_features": data_objects[2].num_edge_features, - "conv_type": "CGConv", - } + { + "n_conv": 3, + "n_hidden": 3, + "hidden_size": 30, + "conv_size": 40, + "dropout": 0.1, + "num_node_features": data_objects[0].num_node_features, + "num_edge_features": data_objects[0].num_edge_features, + "conv_type": "CGConv", + }, + { + "n_conv": 3, + "n_hidden": 3, + "hidden_size": 30, + "conv_size": 40, + "dropout": 0.1, + "num_node_features": data_objects[1].num_node_features, + "num_edge_features": data_objects[1].num_edge_features, + "conv_type": "CGConv", + }, + { + "n_conv": 3, + "n_hidden": 3, + "hidden_size": 30, + "conv_size": 40, + "dropout": 0.1, + "num_node_features": data_objects[2].num_node_features, + "num_edge_features": data_objects[2].num_edge_features, + "conv_type": "CGConv", + }, ] net = MultiGCN(partition_configs) - result_dict = net(data_objects) \ No newline at end of file + result_dict = net(data_objects) From fd0ec97861bb721db34fde3eb3c11a2ac77d0853 Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 22 Sep 2023 22:30:02 -0400 Subject: [PATCH 6/6] Fixed final layer weights --- src/data.py | 4 ++-- src/models.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/data.py b/src/data.py index e9561af..01e92a1 100644 --- a/src/data.py +++ b/src/data.py @@ -131,7 +131,7 @@ def process_data( # Convert to Data object data_obj = Data( - x=feat_dict["node_tensor"], + x=feat_dict["node_tensor"].to(torch.float32), edge_index=feat_dict["edge_indices"], edge_attr=feat_dict["edge_tensor"], y=torch.Tensor([self.map_name_prop[name]]), @@ -245,7 +245,7 @@ def process_data( # Convert to Data object data_obj = Data( - x=feat_dict["node_tensor"], + x=feat_dict["node_tensor"].to(torch.float32), edge_index=feat_dict["edge_indices"], edge_attr=feat_dict["edge_tensor"], ) diff --git a/src/models.py b/src/models.py index 615ddc8..8ff3fd6 100644 --- a/src/models.py +++ b/src/models.py @@ -95,7 +95,9 @@ def __init__(self, partition_configs): # Final linear layer # TODO: replace 1 with multiple outputs - self.final_lin_transform = nn.Linear(self.n_partitions, 1) + self.final_lin_transform = nn.Linear(self.n_partitions, 1, bias=False) + with torch.no_grad(): + self.final_lin_transform.weight.copy_(torch.ones(self.n_partitions)) def init_conv_layers(self): """Initialize convolutional layers.""" @@ -230,3 +232,4 @@ def forward(self, data_objects): ] net = MultiGCN(partition_configs) result_dict = net(data_objects) + print(result_dict)