From d80ef8b497eb42c5a60836c0ddfac8f101eb50dc Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Tue, 12 Sep 2023 16:27:31 -0400 Subject: [PATCH] Added bond featurizers --- .gitignore | 1 + src/featurizers.py | 208 ++++++++++++++++++++++++++++++++++++++++----- src/graphs.py | 28 ++++-- 3 files changed, 209 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index dba4b43..94d6dd9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *POSCAR* +*CONTCAR* *.csv !data/dband_centers.csv __pycache__ diff --git a/src/featurizers.py b/src/featurizers.py index 41cc262..436a8d6 100644 --- a/src/featurizers.py +++ b/src/featurizers.py @@ -11,6 +11,7 @@ from mendeleev import element from constants import DBAND_FILE_PATH +from graphs import AtomsGraph class OneHotEncoder: """Featurize a property using a one-hot encoding scheme.""" @@ -114,18 +115,24 @@ def name(self): class AtomNumFeaturizer(Featurizer): """Featurize nodes based on atomic number.""" - def __init__(self, encoder): + def __init__(self, encoder, min=1, max=80, n_intervals=10): """Initialize featurizer with min = 1, max = 80, n_intervals = 10. Parameters ---------- encoder: OneHotEncoder Initialized object of class OneHotEncoder. + min: int + Minimum value of atomic number + max: int + Maximum value of atomic number + n_intervals: int + Number of intervals """ # Initialize variables - self.min = 1 - self.max = 80 - self.n_intervals = 10 + self.min = min + self.max = max + self.n_intervals = n_intervals # Fit encoder self.encoder = encoder @@ -141,7 +148,7 @@ def featurize_graph(self, graph): """ # Get atomic numbers atom_num_dict = nx.get_node_attributes(graph, "atomic_number") - atom_num_arr = np.array(atom_num_dict.values()) + atom_num_arr = np.array(list(atom_num_dict.values())) # Create node feature matrix self._feat_tensor = self.encoder.transform(atom_num_arr) @@ -165,18 +172,24 @@ def name(): class DBandFeaturizer(Featurizer): """Featurize nodes based on close-packed d-band center.""" - def __init__(self, encoder): + def __init__(self, encoder, min=-5, max=3, n_intervals=10): """Initialize featurizer with min = -5, max = 3, n_intervals = 10. Parameters ---------- encoder: OneHotEncoder Initialized object of class OneHotEncoder. + min: int + Minimum value of d-band center + max: int + Maximum value of d-band center + n_intervals: int + Number of intervals """ # Initialize variables - self.min = -5 - self.max = 3 - self.n_intervals = 10 + self.min = min + self.max = max + self.n_intervals = n_intervals # Fit encoder self.encoder = encoder @@ -187,7 +200,7 @@ def __init__(self, encoder): with open(DBAND_FILE_PATH, "r") as f: csv_reader = csv.reader(f) for row in csv_reader: - self.map_dict[row[0]] = row[1] + self.map_dict[int(row[0])] = float(row[1]) def featurize_graph(self, graph): """Featurize an AtomsGraph. @@ -199,7 +212,7 @@ def featurize_graph(self, graph): """ # Get atomic numbers atom_num_dict = nx.get_node_attributes(graph, "atomic_number") - atom_num_arr = np.array(atom_num_dict.values()) + atom_num_arr = np.array(list(atom_num_dict.values())) # Map from atomic number to d-band center dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr) @@ -226,18 +239,24 @@ def name(): class ValenceFeaturizer(Featurizer): """Featurize nodes based on number of valence electrons.""" - def __init__(self, encoder): + def __init__(self, encoder, min=1, max=12, n_intervals=12): """Initialize featurizer with min = 1, max = 12, n_intervals = 12. Parameters ---------- encoder: OneHotEncoder Initialized object of class OneHotEncoder. + min: int + Minimum value of valence electrons + max: int + Maximum value of valence electrons + n_intervals: int + Number of intervals """ # Initialize variables - self.min = 1 - self.max = 12 - self.n_intervals = 12 + self.min = min + self.max = max + self.n_intervals = n_intervals # Fit encoder self.encoder = encoder @@ -246,7 +265,7 @@ def __init__(self, encoder): # Create a map between atomic number and number of valence electrons self.map_dict = {1: 1, 2:0} for i in range(3, 21, 1): - self.map_dict[i] = element(i).ec.get_valence().ne() + self.map_dict[i] = min(element(i).ec.get_valence().ne(), 12) def featurize_graph(self, graph): """Featurize an AtomsGraph. @@ -258,7 +277,7 @@ def featurize_graph(self, graph): """ # Get atomic numbers atom_num_dict = nx.get_node_attributes(graph, "atomic_number") - atom_num_arr = np.array(atom_num_dict.values()) + atom_num_arr = np.array(list(atom_num_dict.values())) # Create node feature matrix self._feat_tensor = self.encoder.transform(atom_num_arr) @@ -279,9 +298,156 @@ def feat_tensor(self): def name(): """Return the name of the featurizer.""" return "valence" + +class CoordinationFeaturizer(Featurizer): + """Featurize nodes based on coordination number.""" + def __init__(self, encoder, min=1, max=15, n_intervals=15): + """Initialize featurizer with min = 1, max = 15, n_intervals = 15. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of valence electrons + max: int + Maximum value of valence electrons + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + def featurize_graph(self, graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + cn_dict = nx.get_node_attributes(graph, "coordination") + cn_arr = np.array(list(cn_dict.values())) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(cn_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "coordination" + +class BondDistanceFeaturizer(Featurizer): + """Featurize edges based on bond distance.""" + def __init__(self, encoder, min, max, n_intervals): + """Initialize bond distance featurizer. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of atomic number + max: int + Maximum value of atomic number + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + def featurize_graph(self, graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + bond_dist_dict = nx.get_edge_attributes(graph, "bond_distance") + bond_dist_arr = np.array(list(bond_dist_dict.values())) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(bond_dist_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "valence" + +class BulkBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for bulk atoms. The values are: min = 0, max = 8, + n_intervals = 8. + """ + def __init__(self, encoder, min=0, max=8, n_intervals=8): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + +class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for surface atoms. The values are: min = 0, max = 5, + n_intervals = 10. + """ + def __init__(self, encoder, min=0, max=5, n_intervals=10): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + +class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for adsorbate atoms. The values are: min = 0, max = 4, + n_intervals = 16. + """ + def __init__(self, encoder, min=0, max=4, n_intervals=16): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) if __name__ == "__main__": - prop = np.array([1.5, 2.5, 3.5, 4.5, 5.5]) - ohf = OneHotEncoder() - ohf.fit(1, 6, 5) - print(ohf.transform(prop)) \ No newline at end of file + from ase.io import read + + atoms = read("CONTCAR") + g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12]) + dbf = DBandFeaturizer(OneHotEncoder()) + dbf.featurize_graph(g.graph) + print(dbf.feat_tensor) \ No newline at end of file diff --git a/src/graphs.py b/src/graphs.py index f0061c8..bcfc5c2 100644 --- a/src/graphs.py +++ b/src/graphs.py @@ -1,10 +1,8 @@ """Classes to create bulk, surface, and adsorbate graphs.""" -import abc - import networkx as nx import numpy as np -from ase.neighborlist import build_neighbor_list +from ase.neighborlist import build_neighbor_list, natural_cutoffs class AtomsGraph: @@ -37,7 +35,10 @@ def create_graph(self): """Create a graph from an atoms object and neighbor_list.""" # Create neighbor list of atoms self.neighbor_list = build_neighbor_list( - self.atoms, bothways=True, self_interaction=False + self.atoms, + natural_cutoffs(self.atoms), + bothways=True, + self_interaction=False ) # Create NetworkX Multigraph @@ -54,11 +55,21 @@ def create_graph(self): ) # Iterate over nodes, identify neighbors, and add edges between them - for n in graph.nodes(): + node_list = list(graph.nodes()) + bond_tuples = [] + for n in node_list: # Get neighbors from neighbor list neighbor_idx, _ = self.neighbor_list.get_neighbors(n) # Iterate over neighbors for nn in neighbor_idx: + # Save bond + bond = (n, nn) + rev_bond = tuple(reversed(bond)) + # Check if bond has already been added + if rev_bond in bond_tuples: + continue + else: + bond_tuples.append(bond) # If neighbor is not in graph, add it as a node if not graph.has_node(nn): graph.add_node( @@ -69,10 +80,14 @@ def create_graph(self): ) # Calculate bond distance bond_dist = np.linalg.norm( - graph.nodes[n].position - graph.nodes[nn].position + graph.nodes[n]["position"] - graph.nodes[nn]["position"] ) graph.add_edge(n, nn, bond_distance=bond_dist) + # Add coordination numbers + for n in graph.nodes(): + graph.nodes[n]["coordination"] = graph.degree[n] + # Assign graph object self.graph = graph @@ -85,4 +100,3 @@ def plot(self, filename=None): If provided, the plot is saved with the given filename. """ pass -