From 9bd23b1a64a9f0c1a4b71648ee9950b0a594caea Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 15 Sep 2023 00:04:53 -0400 Subject: [PATCH 1/4] Added util functions --- data/dband_centers.csv | 1 + src/featurizers.py | 132 +++++++++++++++++++++++++++++++++++-- src/utils.py | 145 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+), 4 deletions(-) create mode 100644 src/utils.py diff --git a/data/dband_centers.csv b/data/dband_centers.csv index dc45243..874e959 100644 --- a/data/dband_centers.csv +++ b/data/dband_centers.csv @@ -1,3 +1,4 @@ +0, 0 22, 1.50 23, 1.06 24, 0.16 diff --git a/src/featurizers.py b/src/featurizers.py index b55d7a1..9a37b73 100644 --- a/src/featurizers.py +++ b/src/featurizers.py @@ -151,9 +151,11 @@ def featurize_graph(self, atoms_graph): # Get atomic numbers atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) + zero_idx = np.argwhere(atom_num_arr == 0.) # Create node feature matrix self._feat_tensor = self.encoder.transform(atom_num_arr) + self._feat_tensor[zero_idx, :] = 0. @property def feat_tensor(self): @@ -217,12 +219,14 @@ def featurize_graph(self, atoms_graph): # Get atomic numbers atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) + zero_idx = np.argwhere(atom_num_arr == 0.) # Map from atomic number to d-band center dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr) # Create node feature matrix self._feat_tensor = self.encoder.transform(dband_arr) + self._feat_tensor[zero_idx, :] = 0. @property def feat_tensor(self): @@ -284,9 +288,14 @@ def featurize_graph(self, atoms_graph): # Get atomic numbers atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) + zero_idx = np.argwhere(atom_num_arr == 0.) + + # Get valence electrons for each atom + valence_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr) # Create node feature matrix - self._feat_tensor = self.encoder.transform(atom_num_arr) + self._feat_tensor = self.encoder.transform(valence_arr) + self._feat_tensor[zero_idx, :] = 0. @property def feat_tensor(self): @@ -305,6 +314,89 @@ def name(): """Return the name of the featurizer.""" return "valence" +class ReactivityFeaturizer(Featurizer): + """Featurize nodes based on close-packed d-band center and/or valence.""" + + def __init__(self, encoder, min=-5, max=3, n_intervals=10): + """Initialize featurizer with min = -5, max = 3, n_intervals = 10. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of d-band center + max: int + Maximum value of d-band center + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder_dband = encoder + self.encoder_dband.fit(self.min, self.max, self.n_intervals) + + # Fit valence encoder + self.encoder_val = encoder + self.encoder_val.fit(1, 12, self.n_intervals) + + # Get dband centers from csv + self.map_dict_dband = {} + with open(DBAND_FILE_PATH, "r") as f: + csv_reader = csv.reader(f) + for row in csv_reader: + self.map_dict_dband[int(row[0])] = float(row[1]) + + # Create a map between atomic number and number of valence electrons + self.map_dict_val = {0: 0, 1: 1, 2: 0} + for i in range(3, 21, 1): + self.map_dict_val[i] = min(element(i).ec.get_valence().ne(), 12) + + def featurize_graph(self, atoms_graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") + atom_num_arr = np.array(list(atom_num_dict.values())) + zero_idx = np.argwhere(atom_num_arr == 0.) + + # Map from atomic number to d-band center + react_arr = np.zeros_like(atom_num_arr) + for i, n in enumerate(atom_num_arr): + if n < 21: + react_arr[i] = self.map_dict_val[n] + else: + react_arr[i] = self.map_dict_dband[n] + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(react_arr) + self._feat_tensor[zero_idx, :] = 0. + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "reactivity" class CoordinationFeaturizer(Featurizer): """Featurize nodes based on coordination number.""" @@ -341,11 +433,17 @@ def featurize_graph(self, atoms_graph): A graph of a collection of bulk, surface, or adsorbate atoms. """ # Get atomic numbers + atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") + atom_num_arr = np.array(list(atom_num_dict.values())) + zero_idx = np.argwhere(atom_num_arr == 0.) + + # Get coordination numbers cn_dict = nx.get_node_attributes(atoms_graph.graph, "coordination") cn_arr = np.array(list(cn_dict.values())) # Create node feature matrix self._feat_tensor = self.encoder.transform(cn_arr) + self._feat_tensor[zero_idx, :] = 0. @property def feat_tensor(self): @@ -407,7 +505,7 @@ def featurize_graph(self, atoms_graph): self._feat_tensor = self.encoder.transform(bond_dist_arr) # Create list of edge indices - self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges())) + self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges())).view(2, -1) @property def feat_tensor(self): @@ -435,7 +533,7 @@ def edge_indices(self): @staticmethod def name(): """Return the name of the featurizer.""" - return "valence" + return "bond_distance" class BulkBondDistanceFeaturizer(BondDistanceFeaturizer): @@ -449,6 +547,11 @@ class BulkBondDistanceFeaturizer(BondDistanceFeaturizer): def __init__(self, encoder, min=0, max=8, n_intervals=8): super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "bulk_bond_distance" + class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer): """Featurize bulk bond distances. @@ -461,6 +564,10 @@ class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer): def __init__(self, encoder, min=0, max=5, n_intervals=10): super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + def name(): + """Return the name of the featurizer.""" + return "surface_bond_distance" + class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer): """Featurize bulk bond distances. @@ -473,6 +580,23 @@ class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer): def __init__(self, encoder, min=0, max=4, n_intervals=16): super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + def name(): + """Return the name of the featurizer.""" + return "adsorbate_distance" + + +list_of_node_featurizers = [ + AtomNumFeaturizer, + DBandFeaturizer, + ValenceFeaturizer, + CoordinationFeaturizer, +] + +list_of_edge_featurizers = [ + BulkBondDistanceFeaturizer, + SurfaceBondDistanceFeaturizer, + AdsorbateBondDistanceFeaturizer, +] if __name__ == "__main__": from ase.io import read @@ -486,5 +610,5 @@ def __init__(self, encoder, min=0, max=4, n_intervals=16): bdf = BulkBondDistanceFeaturizer(OneHotEncoder()) bdf.featurize_graph(g) - print(bdf.feat_tensor) + print(bdf.feat_tensor.shape) print(bdf.edge_indices) diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..1f2fdce --- /dev/null +++ b/src/utils.py @@ -0,0 +1,145 @@ +"""Utility functions.""" + +from copy import deepcopy + +import numpy as np +import torch + +from featurizers import ( + list_of_node_featurizers, + list_of_edge_featurizers, + OneHotEncoder +) +from graphs import AtomsGraph + +def partition_structure(atoms, n_partitions, z_cutoffs): + """Partition atomic structue into bulk, surface, and/or adsorbates. + + Parameters + ---------- + atoms: ase.Atoms object + The structure to be partitioned + n_partitions: int + Number of partitions + z_cutoffs: list or np.ndarray + List of z-coordinate cutoffs. xy planes are placed at the specified + cutoffs to partition atoms above and below them. The length of z-cutoffs + should be equal to one less than the number of partitions. + """ + # Check if length of z_cutoffs is equal to n_paritions + if len(z_cutoffs) != n_partitions - 1: + raise ValueError("The length of z_cutoffs must be equal to\ + one less than the number of partitions") + + # Add 0 and infinity to cutoffs + z_cutoffs = np.insert(z_cutoffs, 0, 0) + z_cutoffs = np.insert(z_cutoffs, len(z_cutoffs), np.inf) + + # Get positions + pos = atoms.get_positions() + + # Iterate over number of partitions + part_atoms = [] + for i in range(n_partitions): + part_idx = np.argwhere( + (pos[:, -1] >= z_cutoffs[i]) & (pos[:, -1] < z_cutoffs[i+1]) + ).flatten().tolist() + part_atoms.append(part_idx) + + return part_atoms + +def featurize_atoms( + atoms, + select_idx, + node_features, + edge_features, + max_atoms=50, + encoder=OneHotEncoder() + ): + """Featurize atoms and bonds with the chosen featurizers. + + Parameters + ---------- + atoms: ase.Atoms objet + Atoms object containing the whole structure + select_idx: list + List of indices for atoms to featurize. Typically, this will be provided + by the partition_structure function + node_features: list or np.ndarray + Names of node featurizers to use (current options: atomic_number, dband + center, valence electrons, coordination, reactivity). The "reactivity" + featurizer uses valence electrons for adsorbates (atomic number < 21) and + d-band center for larger transition metal atoms (atomic number >= 21). + edge_features: list or np.ndarray + Names of edge featurizers to use (current options: bulk_bond_distance, + surface_bond_distance, adsorbate_bond_distance). All of these encode + bond distance using a one-hot encoder, but the bounds for each vary. + max_atoms: int (default = 50) + Maximum number of allowed atoms. If the number of atoms in the graph are + fewer than this number, the graph is padded with empty nodes. This is + required to make the sizes of the node feature tensors consistent across + structures. + encoder: encoder object from featurizers.py + Currently only the OneHotEncoder is supported + + Returns + ------- + dict + Dictionary with keys "node_tensor", "edge_tensor", and "edge_indices" and + corresponding tensors as values. + """ + # Create graph + atoms_graph = AtomsGraph(atoms=atoms, select_idx=select_idx, max_atoms=max_atoms) + + # Collect node featurizers + node_feats = [] + node_intervals = [] + for node_featurizer in list_of_node_featurizers: + if node_featurizer.name() in node_features: + nf = node_featurizer(deepcopy(encoder)) + node_intervals.append(nf.n_intervals) + node_feats.append(nf) + + # Collect edge featurizers + edge_feats = [] + edge_intervals = [] + for edge_featurizer in list_of_edge_featurizers: + if edge_featurizer.name() in edge_features: + ef = edge_featurizer(deepcopy(encoder)) + edge_intervals.append(ef.n_intervals) + edge_feats.append(ef) + + # Store node matrices from each feaurizer + node_mats = [] + for nf in node_feats: + nf.featurize_graph(atoms_graph) + node_mats.append(nf.feat_tensor) + # Stack node mats to create node tensor + node_tensor = torch.hstack(node_mats) + + # Store edge matrices from each featurizer + edge_mats = [] + for ef in edge_feats: + ef.featurize_graph(atoms_graph) + edge_mats.append(ef.feat_tensor) + # Stack edge mats to create edge tensor + edge_tensor = torch.hstack(edge_mats) + + # Store edge indices + edge_indices = ef.edge_indices + + return {"node_tensor": node_tensor, "edge_tensor": edge_tensor, + "edge_indices": edge_indices} + + +if __name__ == "__main__": + from ase.io import read + atoms = read("CONTCAR") + + part_atoms = partition_structure(atoms, 3, z_cutoffs=[15, 23.5]) + print(part_atoms) + + feat_dict = featurize_atoms(atoms, part_atoms[0], ["atomic_number", "dband_center"], + ["bulk_bond_distance"], max_atoms=34) + print(feat_dict) + \ No newline at end of file From 32466b7de6f2b2fbf7e6ada5d5c7b821cdfae1a0 Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 15 Sep 2023 00:34:22 -0400 Subject: [PATCH 2/4] Changed edge indices to LongTensor --- src/featurizers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/featurizers.py b/src/featurizers.py index 9a37b73..9597295 100644 --- a/src/featurizers.py +++ b/src/featurizers.py @@ -505,7 +505,7 @@ def featurize_graph(self, atoms_graph): self._feat_tensor = self.encoder.transform(bond_dist_arr) # Create list of edge indices - self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges())).view(2, -1) + self._edge_indices = torch.LongTensor(list(atoms_graph.graph.edges())).view(2, -1) @property def feat_tensor(self): From 63b2ad4b18c3163aa7ce5063ec834a3ced406fea Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Fri, 15 Sep 2023 21:49:36 -0400 Subject: [PATCH 3/4] Fix codestyle --- src/featurizers.py | 26 ++++++++++++--------- src/graphs.py | 7 ++++-- src/utils.py | 58 ++++++++++++++++++++++++++++------------------ 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/src/featurizers.py b/src/featurizers.py index 9597295..145152f 100644 --- a/src/featurizers.py +++ b/src/featurizers.py @@ -151,11 +151,11 @@ def featurize_graph(self, atoms_graph): # Get atomic numbers atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) - zero_idx = np.argwhere(atom_num_arr == 0.) + zero_idx = np.argwhere(atom_num_arr == 0.0) # Create node feature matrix self._feat_tensor = self.encoder.transform(atom_num_arr) - self._feat_tensor[zero_idx, :] = 0. + self._feat_tensor[zero_idx, :] = 0.0 @property def feat_tensor(self): @@ -219,14 +219,14 @@ def featurize_graph(self, atoms_graph): # Get atomic numbers atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) - zero_idx = np.argwhere(atom_num_arr == 0.) + zero_idx = np.argwhere(atom_num_arr == 0.0) # Map from atomic number to d-band center dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr) # Create node feature matrix self._feat_tensor = self.encoder.transform(dband_arr) - self._feat_tensor[zero_idx, :] = 0. + self._feat_tensor[zero_idx, :] = 0.0 @property def feat_tensor(self): @@ -288,14 +288,14 @@ def featurize_graph(self, atoms_graph): # Get atomic numbers atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) - zero_idx = np.argwhere(atom_num_arr == 0.) + zero_idx = np.argwhere(atom_num_arr == 0.0) # Get valence electrons for each atom valence_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr) # Create node feature matrix self._feat_tensor = self.encoder.transform(valence_arr) - self._feat_tensor[zero_idx, :] = 0. + self._feat_tensor[zero_idx, :] = 0.0 @property def feat_tensor(self): @@ -314,6 +314,7 @@ def name(): """Return the name of the featurizer.""" return "valence" + class ReactivityFeaturizer(Featurizer): """Featurize nodes based on close-packed d-band center and/or valence.""" @@ -367,7 +368,7 @@ def featurize_graph(self, atoms_graph): # Get atomic numbers atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) - zero_idx = np.argwhere(atom_num_arr == 0.) + zero_idx = np.argwhere(atom_num_arr == 0.0) # Map from atomic number to d-band center react_arr = np.zeros_like(atom_num_arr) @@ -379,7 +380,7 @@ def featurize_graph(self, atoms_graph): # Create node feature matrix self._feat_tensor = self.encoder.transform(react_arr) - self._feat_tensor[zero_idx, :] = 0. + self._feat_tensor[zero_idx, :] = 0.0 @property def feat_tensor(self): @@ -398,6 +399,7 @@ def name(): """Return the name of the featurizer.""" return "reactivity" + class CoordinationFeaturizer(Featurizer): """Featurize nodes based on coordination number.""" @@ -435,7 +437,7 @@ def featurize_graph(self, atoms_graph): # Get atomic numbers atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) - zero_idx = np.argwhere(atom_num_arr == 0.) + zero_idx = np.argwhere(atom_num_arr == 0.0) # Get coordination numbers cn_dict = nx.get_node_attributes(atoms_graph.graph, "coordination") @@ -443,7 +445,7 @@ def featurize_graph(self, atoms_graph): # Create node feature matrix self._feat_tensor = self.encoder.transform(cn_arr) - self._feat_tensor[zero_idx, :] = 0. + self._feat_tensor[zero_idx, :] = 0.0 @property def feat_tensor(self): @@ -505,7 +507,9 @@ def featurize_graph(self, atoms_graph): self._feat_tensor = self.encoder.transform(bond_dist_arr) # Create list of edge indices - self._edge_indices = torch.LongTensor(list(atoms_graph.graph.edges())).view(2, -1) + self._edge_indices = torch.LongTensor(list(atoms_graph.graph.edges())).view( + 2, -1 + ) @property def feat_tensor(self): diff --git a/src/graphs.py b/src/graphs.py index b744e8f..40a09b3 100644 --- a/src/graphs.py +++ b/src/graphs.py @@ -4,8 +4,11 @@ import networkx as nx import numpy as np -from ase.neighborlist import (NewPrimitiveNeighborList, build_neighbor_list, - natural_cutoffs) +from ase.neighborlist import ( + NewPrimitiveNeighborList, + build_neighbor_list, + natural_cutoffs, +) class AtomsGraph: diff --git a/src/utils.py b/src/utils.py index 1f2fdce..3ecf32b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -6,15 +6,16 @@ import torch from featurizers import ( - list_of_node_featurizers, + OneHotEncoder, list_of_edge_featurizers, - OneHotEncoder + list_of_node_featurizers, ) from graphs import AtomsGraph + def partition_structure(atoms, n_partitions, z_cutoffs): """Partition atomic structue into bulk, surface, and/or adsorbates. - + Parameters ---------- atoms: ase.Atoms object @@ -24,13 +25,15 @@ def partition_structure(atoms, n_partitions, z_cutoffs): z_cutoffs: list or np.ndarray List of z-coordinate cutoffs. xy planes are placed at the specified cutoffs to partition atoms above and below them. The length of z-cutoffs - should be equal to one less than the number of partitions. + should be equal to one less than the number of partitions. """ # Check if length of z_cutoffs is equal to n_paritions if len(z_cutoffs) != n_partitions - 1: - raise ValueError("The length of z_cutoffs must be equal to\ - one less than the number of partitions") - + raise ValueError( + "The length of z_cutoffs must be equal to\ + one less than the number of partitions" + ) + # Add 0 and infinity to cutoffs z_cutoffs = np.insert(z_cutoffs, 0, 0) z_cutoffs = np.insert(z_cutoffs, len(z_cutoffs), np.inf) @@ -41,21 +44,24 @@ def partition_structure(atoms, n_partitions, z_cutoffs): # Iterate over number of partitions part_atoms = [] for i in range(n_partitions): - part_idx = np.argwhere( - (pos[:, -1] >= z_cutoffs[i]) & (pos[:, -1] < z_cutoffs[i+1]) - ).flatten().tolist() + part_idx = ( + np.argwhere((pos[:, -1] >= z_cutoffs[i]) & (pos[:, -1] < z_cutoffs[i + 1])) + .flatten() + .tolist() + ) part_atoms.append(part_idx) return part_atoms + def featurize_atoms( - atoms, - select_idx, - node_features, - edge_features, - max_atoms=50, - encoder=OneHotEncoder() - ): + atoms, + select_idx, + node_features, + edge_features, + max_atoms=50, + encoder=OneHotEncoder(), +): """Featurize atoms and bonds with the chosen featurizers. Parameters @@ -128,18 +134,26 @@ def featurize_atoms( # Store edge indices edge_indices = ef.edge_indices - return {"node_tensor": node_tensor, "edge_tensor": edge_tensor, - "edge_indices": edge_indices} + return { + "node_tensor": node_tensor, + "edge_tensor": edge_tensor, + "edge_indices": edge_indices, + } if __name__ == "__main__": from ase.io import read + atoms = read("CONTCAR") part_atoms = partition_structure(atoms, 3, z_cutoffs=[15, 23.5]) print(part_atoms) - feat_dict = featurize_atoms(atoms, part_atoms[0], ["atomic_number", "dband_center"], - ["bulk_bond_distance"], max_atoms=34) + feat_dict = featurize_atoms( + atoms, + part_atoms[0], + ["atomic_number", "dband_center"], + ["bulk_bond_distance"], + max_atoms=34, + ) print(feat_dict) - \ No newline at end of file From 16161e302e83f465f74ebb983582796d0c291bbc Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Tue, 19 Sep 2023 15:26:26 -0400 Subject: [PATCH 4/4] Added static method decorators --- src/featurizers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/featurizers.py b/src/featurizers.py index 145152f..b4c6d65 100644 --- a/src/featurizers.py +++ b/src/featurizers.py @@ -568,6 +568,7 @@ class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer): def __init__(self, encoder, min=0, max=5, n_intervals=10): super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + @staticmethod def name(): """Return the name of the featurizer.""" return "surface_bond_distance" @@ -584,6 +585,7 @@ class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer): def __init__(self, encoder, min=0, max=4, n_intervals=16): super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + @staticmethod def name(): """Return the name of the featurizer.""" return "adsorbate_distance"