From 916580b1f0004d2404f7379b0c50558614d2f589 Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Mon, 11 Sep 2023 19:12:17 -0400 Subject: [PATCH 1/4] Added featurizers --- .gitignore | 3 + data/dband_centers.csv | 23 ++++ src/constants.py | 7 + src/featurizers.py | 287 +++++++++++++++++++++++++++++++++++++++++ src/graphs.py | 31 ----- 5 files changed, 320 insertions(+), 31 deletions(-) create mode 100644 data/dband_centers.csv create mode 100644 src/constants.py create mode 100644 src/featurizers.py diff --git a/.gitignore b/.gitignore index 092dc51..dba4b43 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ *POSCAR* +*.csv +!data/dband_centers.csv +__pycache__ diff --git a/data/dband_centers.csv b/data/dband_centers.csv new file mode 100644 index 0000000..dc45243 --- /dev/null +++ b/data/dband_centers.csv @@ -0,0 +1,23 @@ +22, 1.50 +23, 1.06 +24, 0.16 +25, 0.07 +26, -0.92 +27, -1.17 +28, -1.29 +29, -2.67 +40, 1.95 +41, 1.41 +42, 0.35 +43, -0.60 +44, -1.41 +45, -1.73 +46, -1.83 +47, -4.30 +72, 2.47 +73, 2.00 +74, 0.77 +75, -0.51 +77, -2.11 +78, -2.25 +79, -3.56 \ No newline at end of file diff --git a/src/constants.py b/src/constants.py new file mode 100644 index 0000000..f0f47a5 --- /dev/null +++ b/src/constants.py @@ -0,0 +1,7 @@ +"""Constants are defined here.""" + +import pathlib + +REPO_PATH = pathlib.Path(__file__).parents[1] + +DBAND_FILE_PATH = REPO_PATH / "data" / "dband_centers.csv" \ No newline at end of file diff --git a/src/featurizers.py b/src/featurizers.py new file mode 100644 index 0000000..41cc262 --- /dev/null +++ b/src/featurizers.py @@ -0,0 +1,287 @@ +"""Node and bond featurizers.""" + +import abc +import csv + +import numpy as np +import networkx as nx +import torch + +from torch.nn.functional import one_hot +from mendeleev import element + +from constants import DBAND_FILE_PATH + +class OneHotEncoder: + """Featurize a property using a one-hot encoding scheme.""" + def __init__(self): + """Blank constructor.""" + pass + + def fit(self, min, max, n_intervals): + """Fit encoder based on min, max, and number of intervals parameters. + + Parameters + ---------- + min: int + Minimum possible value of the property. + max: int + Maximum possible value of the property. + n_intervals: int + Number of elements in the one-hot encoded array. + """ + self.min = min + self.max = max + self.n_intervals = n_intervals + + def transform(self, property): + """Transform a given property vector/matrix/tensor. + + Parameters + ---------- + property: list or np.ndarray or torch.Tensor + Tensor containing value(s) of the property to be transformed. The + tensor must have a shape of N where N is the number of atoms. + """ + # Transform property to tensor + property = torch.Tensor(property) + + # Scale between 0 and num_intervals + scaled_prop = ((property - self.min) + / (self.max - self.min)) * self.n_intervals + + # Apply floor function + floor_prop = torch.floor(scaled_prop) + + # Create onehot encoding + onehot_prop = one_hot(floor_prop.to(torch.int64), + num_classes=self.n_intervals) + + return onehot_prop + +class Featurizer(abc.ABC): + """Meta class for defining featurizers.""" + @abc.abstractmethod + def __init__(self, encoder): + """Initialize class variables and fit encoder. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + """ + pass + + @abc.abstractmethod + def featurize_graph(self, graph): + """Featurize an AtomsGraph. + + This class should create a feature tensor from the given graph. This + feature tensor should have a shape of (N, M) where N = number of atoms + and M = n_intervals provided to the encoder. The feature tensor should + be saved as self._feat_tensor. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + pass + + @abc.abstractproperty + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + pass + + @abc.abstractstaticmethod + def name(self): + """Return the name of the featurizer. + + Returns + ------- + _name = str + Name of the featurizer. + """ + return "abstract_featurizer" + + +class AtomNumFeaturizer(Featurizer): + """Featurize nodes based on atomic number.""" + def __init__(self, encoder): + """Initialize featurizer with min = 1, max = 80, n_intervals = 10. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + """ + # Initialize variables + self.min = 1 + self.max = 80 + self.n_intervals = 10 + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + def featurize_graph(self, graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + atom_num_dict = nx.get_node_attributes(graph, "atomic_number") + atom_num_arr = np.array(atom_num_dict.values()) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(atom_num_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "atomic_number" + +class DBandFeaturizer(Featurizer): + """Featurize nodes based on close-packed d-band center.""" + def __init__(self, encoder): + """Initialize featurizer with min = -5, max = 3, n_intervals = 10. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + """ + # Initialize variables + self.min = -5 + self.max = 3 + self.n_intervals = 10 + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + # Get dband centers from csv + self.map_dict = {} + with open(DBAND_FILE_PATH, "r") as f: + csv_reader = csv.reader(f) + for row in csv_reader: + self.map_dict[row[0]] = row[1] + + def featurize_graph(self, graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + atom_num_dict = nx.get_node_attributes(graph, "atomic_number") + atom_num_arr = np.array(atom_num_dict.values()) + + # Map from atomic number to d-band center + dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(dband_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "dband_center" + +class ValenceFeaturizer(Featurizer): + """Featurize nodes based on number of valence electrons.""" + def __init__(self, encoder): + """Initialize featurizer with min = 1, max = 12, n_intervals = 12. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + """ + # Initialize variables + self.min = 1 + self.max = 12 + self.n_intervals = 12 + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + # Create a map between atomic number and number of valence electrons + self.map_dict = {1: 1, 2:0} + for i in range(3, 21, 1): + self.map_dict[i] = element(i).ec.get_valence().ne() + + def featurize_graph(self, graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + atom_num_dict = nx.get_node_attributes(graph, "atomic_number") + atom_num_arr = np.array(atom_num_dict.values()) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(atom_num_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "valence" + +if __name__ == "__main__": + prop = np.array([1.5, 2.5, 3.5, 4.5, 5.5]) + ohf = OneHotEncoder() + ohf.fit(1, 6, 5) + print(ohf.transform(prop)) \ No newline at end of file diff --git a/src/graphs.py b/src/graphs.py index 3ab8d5e..f0061c8 100644 --- a/src/graphs.py +++ b/src/graphs.py @@ -76,18 +76,6 @@ def create_graph(self): # Assign graph object self.graph = graph - def featurize(self, node_featurizer, bond_featurizer): - """Featurize nodes and edges of the graph. - - Parameters - ---------- - node_featurizer: TODO - Object that featurizes atoms - bond_featurizer: TODO - Object that featurizes bonds - """ - pass - def plot(self, filename=None): """Plot the graph using NetworkX. @@ -98,22 +86,3 @@ def plot(self, filename=None): """ pass - def get_node_tensor(self): - """Get the node matrix of the graph as a PyTorch Tensor. - - Returns - ------- - node_matrix: torch.Tensor - Node matrix - """ - pass - - def get_edge_tensor(self): - """Get the edge matrix of the graph as a PyTorch Tensor. - - Returns - ------- - edge_matrix: torch.Tensor - Edge tensor - """ - pass From d80ef8b497eb42c5a60836c0ddfac8f101eb50dc Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Tue, 12 Sep 2023 16:27:31 -0400 Subject: [PATCH 2/4] Added bond featurizers --- .gitignore | 1 + src/featurizers.py | 208 ++++++++++++++++++++++++++++++++++++++++----- src/graphs.py | 28 ++++-- 3 files changed, 209 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index dba4b43..94d6dd9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *POSCAR* +*CONTCAR* *.csv !data/dband_centers.csv __pycache__ diff --git a/src/featurizers.py b/src/featurizers.py index 41cc262..436a8d6 100644 --- a/src/featurizers.py +++ b/src/featurizers.py @@ -11,6 +11,7 @@ from mendeleev import element from constants import DBAND_FILE_PATH +from graphs import AtomsGraph class OneHotEncoder: """Featurize a property using a one-hot encoding scheme.""" @@ -114,18 +115,24 @@ def name(self): class AtomNumFeaturizer(Featurizer): """Featurize nodes based on atomic number.""" - def __init__(self, encoder): + def __init__(self, encoder, min=1, max=80, n_intervals=10): """Initialize featurizer with min = 1, max = 80, n_intervals = 10. Parameters ---------- encoder: OneHotEncoder Initialized object of class OneHotEncoder. + min: int + Minimum value of atomic number + max: int + Maximum value of atomic number + n_intervals: int + Number of intervals """ # Initialize variables - self.min = 1 - self.max = 80 - self.n_intervals = 10 + self.min = min + self.max = max + self.n_intervals = n_intervals # Fit encoder self.encoder = encoder @@ -141,7 +148,7 @@ def featurize_graph(self, graph): """ # Get atomic numbers atom_num_dict = nx.get_node_attributes(graph, "atomic_number") - atom_num_arr = np.array(atom_num_dict.values()) + atom_num_arr = np.array(list(atom_num_dict.values())) # Create node feature matrix self._feat_tensor = self.encoder.transform(atom_num_arr) @@ -165,18 +172,24 @@ def name(): class DBandFeaturizer(Featurizer): """Featurize nodes based on close-packed d-band center.""" - def __init__(self, encoder): + def __init__(self, encoder, min=-5, max=3, n_intervals=10): """Initialize featurizer with min = -5, max = 3, n_intervals = 10. Parameters ---------- encoder: OneHotEncoder Initialized object of class OneHotEncoder. + min: int + Minimum value of d-band center + max: int + Maximum value of d-band center + n_intervals: int + Number of intervals """ # Initialize variables - self.min = -5 - self.max = 3 - self.n_intervals = 10 + self.min = min + self.max = max + self.n_intervals = n_intervals # Fit encoder self.encoder = encoder @@ -187,7 +200,7 @@ def __init__(self, encoder): with open(DBAND_FILE_PATH, "r") as f: csv_reader = csv.reader(f) for row in csv_reader: - self.map_dict[row[0]] = row[1] + self.map_dict[int(row[0])] = float(row[1]) def featurize_graph(self, graph): """Featurize an AtomsGraph. @@ -199,7 +212,7 @@ def featurize_graph(self, graph): """ # Get atomic numbers atom_num_dict = nx.get_node_attributes(graph, "atomic_number") - atom_num_arr = np.array(atom_num_dict.values()) + atom_num_arr = np.array(list(atom_num_dict.values())) # Map from atomic number to d-band center dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr) @@ -226,18 +239,24 @@ def name(): class ValenceFeaturizer(Featurizer): """Featurize nodes based on number of valence electrons.""" - def __init__(self, encoder): + def __init__(self, encoder, min=1, max=12, n_intervals=12): """Initialize featurizer with min = 1, max = 12, n_intervals = 12. Parameters ---------- encoder: OneHotEncoder Initialized object of class OneHotEncoder. + min: int + Minimum value of valence electrons + max: int + Maximum value of valence electrons + n_intervals: int + Number of intervals """ # Initialize variables - self.min = 1 - self.max = 12 - self.n_intervals = 12 + self.min = min + self.max = max + self.n_intervals = n_intervals # Fit encoder self.encoder = encoder @@ -246,7 +265,7 @@ def __init__(self, encoder): # Create a map between atomic number and number of valence electrons self.map_dict = {1: 1, 2:0} for i in range(3, 21, 1): - self.map_dict[i] = element(i).ec.get_valence().ne() + self.map_dict[i] = min(element(i).ec.get_valence().ne(), 12) def featurize_graph(self, graph): """Featurize an AtomsGraph. @@ -258,7 +277,7 @@ def featurize_graph(self, graph): """ # Get atomic numbers atom_num_dict = nx.get_node_attributes(graph, "atomic_number") - atom_num_arr = np.array(atom_num_dict.values()) + atom_num_arr = np.array(list(atom_num_dict.values())) # Create node feature matrix self._feat_tensor = self.encoder.transform(atom_num_arr) @@ -279,9 +298,156 @@ def feat_tensor(self): def name(): """Return the name of the featurizer.""" return "valence" + +class CoordinationFeaturizer(Featurizer): + """Featurize nodes based on coordination number.""" + def __init__(self, encoder, min=1, max=15, n_intervals=15): + """Initialize featurizer with min = 1, max = 15, n_intervals = 15. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of valence electrons + max: int + Maximum value of valence electrons + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + def featurize_graph(self, graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + cn_dict = nx.get_node_attributes(graph, "coordination") + cn_arr = np.array(list(cn_dict.values())) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(cn_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "coordination" + +class BondDistanceFeaturizer(Featurizer): + """Featurize edges based on bond distance.""" + def __init__(self, encoder, min, max, n_intervals): + """Initialize bond distance featurizer. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of atomic number + max: int + Maximum value of atomic number + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + def featurize_graph(self, graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + bond_dist_dict = nx.get_edge_attributes(graph, "bond_distance") + bond_dist_arr = np.array(list(bond_dist_dict.values())) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(bond_dist_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "valence" + +class BulkBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for bulk atoms. The values are: min = 0, max = 8, + n_intervals = 8. + """ + def __init__(self, encoder, min=0, max=8, n_intervals=8): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + +class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for surface atoms. The values are: min = 0, max = 5, + n_intervals = 10. + """ + def __init__(self, encoder, min=0, max=5, n_intervals=10): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + +class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for adsorbate atoms. The values are: min = 0, max = 4, + n_intervals = 16. + """ + def __init__(self, encoder, min=0, max=4, n_intervals=16): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) if __name__ == "__main__": - prop = np.array([1.5, 2.5, 3.5, 4.5, 5.5]) - ohf = OneHotEncoder() - ohf.fit(1, 6, 5) - print(ohf.transform(prop)) \ No newline at end of file + from ase.io import read + + atoms = read("CONTCAR") + g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12]) + dbf = DBandFeaturizer(OneHotEncoder()) + dbf.featurize_graph(g.graph) + print(dbf.feat_tensor) \ No newline at end of file diff --git a/src/graphs.py b/src/graphs.py index f0061c8..bcfc5c2 100644 --- a/src/graphs.py +++ b/src/graphs.py @@ -1,10 +1,8 @@ """Classes to create bulk, surface, and adsorbate graphs.""" -import abc - import networkx as nx import numpy as np -from ase.neighborlist import build_neighbor_list +from ase.neighborlist import build_neighbor_list, natural_cutoffs class AtomsGraph: @@ -37,7 +35,10 @@ def create_graph(self): """Create a graph from an atoms object and neighbor_list.""" # Create neighbor list of atoms self.neighbor_list = build_neighbor_list( - self.atoms, bothways=True, self_interaction=False + self.atoms, + natural_cutoffs(self.atoms), + bothways=True, + self_interaction=False ) # Create NetworkX Multigraph @@ -54,11 +55,21 @@ def create_graph(self): ) # Iterate over nodes, identify neighbors, and add edges between them - for n in graph.nodes(): + node_list = list(graph.nodes()) + bond_tuples = [] + for n in node_list: # Get neighbors from neighbor list neighbor_idx, _ = self.neighbor_list.get_neighbors(n) # Iterate over neighbors for nn in neighbor_idx: + # Save bond + bond = (n, nn) + rev_bond = tuple(reversed(bond)) + # Check if bond has already been added + if rev_bond in bond_tuples: + continue + else: + bond_tuples.append(bond) # If neighbor is not in graph, add it as a node if not graph.has_node(nn): graph.add_node( @@ -69,10 +80,14 @@ def create_graph(self): ) # Calculate bond distance bond_dist = np.linalg.norm( - graph.nodes[n].position - graph.nodes[nn].position + graph.nodes[n]["position"] - graph.nodes[nn]["position"] ) graph.add_edge(n, nn, bond_distance=bond_dist) + # Add coordination numbers + for n in graph.nodes(): + graph.nodes[n]["coordination"] = graph.degree[n] + # Assign graph object self.graph = graph @@ -85,4 +100,3 @@ def plot(self, filename=None): If provided, the plot is saved with the given filename. """ pass - From 2347f0359f264f1cb3dd120055266bd65eabcf0c Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Tue, 12 Sep 2023 18:07:10 -0400 Subject: [PATCH 3/4] Added padding and fixed bond distances --- src/featurizers.py | 54 +++++++++++++------ src/graphs.py | 127 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 143 insertions(+), 38 deletions(-) diff --git a/src/featurizers.py b/src/featurizers.py index 436a8d6..04b9ac7 100644 --- a/src/featurizers.py +++ b/src/featurizers.py @@ -74,7 +74,7 @@ def __init__(self, encoder): pass @abc.abstractmethod - def featurize_graph(self, graph): + def featurize_graph(self, atoms_graph): """Featurize an AtomsGraph. This class should create a feature tensor from the given graph. This @@ -115,8 +115,8 @@ def name(self): class AtomNumFeaturizer(Featurizer): """Featurize nodes based on atomic number.""" - def __init__(self, encoder, min=1, max=80, n_intervals=10): - """Initialize featurizer with min = 1, max = 80, n_intervals = 10. + def __init__(self, encoder, min=0, max=80, n_intervals=10): + """Initialize featurizer with min = 0, max = 80, n_intervals = 10. Parameters ---------- @@ -138,7 +138,7 @@ def __init__(self, encoder, min=1, max=80, n_intervals=10): self.encoder = encoder self.encoder.fit(self.min, self.max, self.n_intervals) - def featurize_graph(self, graph): + def featurize_graph(self, atoms_graph): """Featurize an AtomsGraph. Parameters @@ -147,7 +147,7 @@ def featurize_graph(self, graph): A graph of a collection of bulk, surface, or adsorbate atoms. """ # Get atomic numbers - atom_num_dict = nx.get_node_attributes(graph, "atomic_number") + atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) # Create node feature matrix @@ -202,7 +202,7 @@ def __init__(self, encoder, min=-5, max=3, n_intervals=10): for row in csv_reader: self.map_dict[int(row[0])] = float(row[1]) - def featurize_graph(self, graph): + def featurize_graph(self, atoms_graph): """Featurize an AtomsGraph. Parameters @@ -211,7 +211,7 @@ def featurize_graph(self, graph): A graph of a collection of bulk, surface, or adsorbate atoms. """ # Get atomic numbers - atom_num_dict = nx.get_node_attributes(graph, "atomic_number") + atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) # Map from atomic number to d-band center @@ -263,11 +263,11 @@ def __init__(self, encoder, min=1, max=12, n_intervals=12): self.encoder.fit(self.min, self.max, self.n_intervals) # Create a map between atomic number and number of valence electrons - self.map_dict = {1: 1, 2:0} + self.map_dict = {0: 0, 1: 1, 2:0} for i in range(3, 21, 1): self.map_dict[i] = min(element(i).ec.get_valence().ne(), 12) - def featurize_graph(self, graph): + def featurize_graph(self, atoms_graph): """Featurize an AtomsGraph. Parameters @@ -276,7 +276,7 @@ def featurize_graph(self, graph): A graph of a collection of bulk, surface, or adsorbate atoms. """ # Get atomic numbers - atom_num_dict = nx.get_node_attributes(graph, "atomic_number") + atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") atom_num_arr = np.array(list(atom_num_dict.values())) # Create node feature matrix @@ -324,7 +324,7 @@ def __init__(self, encoder, min=1, max=15, n_intervals=15): self.encoder = encoder self.encoder.fit(self.min, self.max, self.n_intervals) - def featurize_graph(self, graph): + def featurize_graph(self, atoms_graph): """Featurize an AtomsGraph. Parameters @@ -333,7 +333,7 @@ def featurize_graph(self, graph): A graph of a collection of bulk, surface, or adsorbate atoms. """ # Get atomic numbers - cn_dict = nx.get_node_attributes(graph, "coordination") + cn_dict = nx.get_node_attributes(atoms_graph.graph, "coordination") cn_arr = np.array(list(cn_dict.values())) # Create node feature matrix @@ -381,7 +381,7 @@ def __init__(self, encoder, min, max, n_intervals): self.encoder = encoder self.encoder.fit(self.min, self.max, self.n_intervals) - def featurize_graph(self, graph): + def featurize_graph(self, atoms_graph): """Featurize an AtomsGraph. Parameters @@ -390,11 +390,14 @@ def featurize_graph(self, graph): A graph of a collection of bulk, surface, or adsorbate atoms. """ # Get atomic numbers - bond_dist_dict = nx.get_edge_attributes(graph, "bond_distance") + bond_dist_dict = nx.get_edge_attributes(atoms_graph.graph, "bond_distance") bond_dist_arr = np.array(list(bond_dist_dict.values())) # Create node feature matrix self._feat_tensor = self.encoder.transform(bond_dist_arr) + + # Create list of edge indices + self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges())) @property def feat_tensor(self): @@ -407,6 +410,17 @@ def feat_tensor(self): M = n_intervals provided to the encoder """ return self._feat_tensor + + @property + def edge_indices(self): + """Return list of edge indices. + + Returns + ------- + edge_indices: torch.Tensor + Tensor with edge indices + """ + return self._edge_indices @staticmethod def name(): @@ -448,6 +462,12 @@ def __init__(self, encoder, min=0, max=4, n_intervals=16): atoms = read("CONTCAR") g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12]) - dbf = DBandFeaturizer(OneHotEncoder()) - dbf.featurize_graph(g.graph) - print(dbf.feat_tensor) \ No newline at end of file + + anf = AtomNumFeaturizer(OneHotEncoder()) + anf.featurize_graph(g) + print(anf.feat_tensor.shape) + + bdf = BulkBondDistanceFeaturizer(OneHotEncoder()) + bdf.featurize_graph(g) + print(bdf.feat_tensor) + print(bdf.edge_indices) \ No newline at end of file diff --git a/src/graphs.py b/src/graphs.py index bcfc5c2..99e96bc 100644 --- a/src/graphs.py +++ b/src/graphs.py @@ -1,8 +1,11 @@ """Classes to create bulk, surface, and adsorbate graphs.""" +from copy import deepcopy + import networkx as nx import numpy as np -from ase.neighborlist import build_neighbor_list, natural_cutoffs +from ase.neighborlist import (build_neighbor_list, natural_cutoffs, + NewPrimitiveNeighborList) class AtomsGraph: @@ -33,37 +36,48 @@ def __init__(self, atoms, select_idx, max_atoms=50): def create_graph(self): """Create a graph from an atoms object and neighbor_list.""" - # Create neighbor list of atoms - self.neighbor_list = build_neighbor_list( - self.atoms, - natural_cutoffs(self.atoms), - bothways=True, - self_interaction=False - ) - # Create NetworkX Multigraph graph = nx.MultiGraph() # Iterate over selected atoms and add them as nodes + self.node_count = 0 + self.map_idx_node = {} for atom in self.atoms: - if atom.index in self.select_idx and atom.index not in list(graph.nodes()): + if (atom.index in self.select_idx + and atom.index not in list(graph.nodes(data="index"))): graph.add_node( - atom.index, + self.node_count, + index=atom.index, atomic_number=atom.number, symbol=atom.symbol, - position=atom.position, ) + self.map_idx_node[atom.index] = self.node_count + self.node_count += 1 + + # Create neighbor list of atoms + self.neighbor_list = build_neighbor_list( + self.atoms, + natural_cutoffs(self.atoms), + bothways=True, + self_interaction=False, + primitive=NewPrimitiveNeighborList + ) # Iterate over nodes, identify neighbors, and add edges between them node_list = list(graph.nodes()) bond_tuples = [] for n in node_list: # Get neighbors from neighbor list - neighbor_idx, _ = self.neighbor_list.get_neighbors(n) + neighbor_idx, neighbor_offsets = self.neighbor_list.get_neighbors( + graph.nodes[n]["index"] + ) # Iterate over neighbors - for nn in neighbor_idx: + for nn, offset in zip(neighbor_idx, neighbor_offsets): + # Skip if self atom + if nn == graph.nodes[n]["index"]: + continue # Save bond - bond = (n, nn) + bond = (graph.nodes[n]["index"], nn) rev_bond = tuple(reversed(bond)) # Check if bond has already been added if rev_bond in bond_tuples: @@ -71,18 +85,28 @@ def create_graph(self): else: bond_tuples.append(bond) # If neighbor is not in graph, add it as a node - if not graph.has_node(nn): + node_indices = nx.get_node_attributes(graph, "index") + if nn not in list(node_indices.values()): graph.add_node( - nn, + self.node_count, + index=nn, atomic_number=self.atoms[nn].number, symbol=self.atoms[nn].symbol, - position=self.atoms[nn].position, ) + self.map_idx_node[nn] = self.node_count + self.node_count += 1 # Calculate bond distance - bond_dist = np.linalg.norm( - graph.nodes[n]["position"] - graph.nodes[nn]["position"] + bond_dist = self.calc_minimum_distance( + self.atoms[graph.nodes[n]["index"]].position, + self.atoms[nn].position, + offset, + ) + graph.add_edge( + n, self.map_idx_node[nn], bond_distance=bond_dist ) - graph.add_edge(n, nn, bond_distance=bond_dist) + + # Pad graph + graph = self.pad_graph(graph) # Add coordination numbers for n in graph.nodes(): @@ -91,6 +115,59 @@ def create_graph(self): # Assign graph object self.graph = graph + def pad_graph(self, graph): + """Pad graph with empty nodes. + + This can be used to make sure that the number of nodes in each graph is + equal to max_atoms + + Parameters + ---------- + graph: Networkx.Graph + A Networkx graph + + Returns + ------- + padded_graph: Networkx.Graph + Padded graph + """ + padded_graph = deepcopy(graph) + + for i in range(self.node_count, self.max_atoms, 1): + padded_graph.add_node( + i, + index=-1, + atomic_number=0, + symbol="", + position=np.zeros(3) + ) + + return padded_graph + + def calc_minimum_distance(self, pos_1, pos_2, offset): + """ + Calculate minimum distance between two atoms. + + Parameters + ---------- + pos_1: np.ndarray + Position of first atom in x, y, z coordinates + pos_2: np.ndarray + Position of second atom in x, y, z coordinates + offset: np.ndarray + Offset returned by the neighbor list in ASE + """ + # First, calculate the distance without offset + dist_1 = np.linalg.norm(pos_1 - pos_2) + + # Next calculate the distance by applying offset to second position + dist_2 = np.linalg.norm(pos_1 - (pos_2 + offset @ self.atoms.get_cell())) + + # Get minimum distance + min_dist = min(dist_1, dist_2) + + return min_dist + def plot(self, filename=None): """Plot the graph using NetworkX. @@ -100,3 +177,11 @@ def plot(self, filename=None): If provided, the plot is saved with the given filename. """ pass + +if __name__ == "__main__": + from ase.io import read + + atoms = read("CONTCAR") + g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12]) + print(g.map_idx_node) + print(g.graph.edges(data="bond_distance")) \ No newline at end of file From 119e5c7d02154b2cc3480f59993569514c5fd9d9 Mon Sep 17 00:00:00 2001 From: Gaurav S Deshmukh Date: Wed, 13 Sep 2023 19:53:04 -0400 Subject: [PATCH 4/4] Fix codestyle --- src/constants.py | 2 +- src/featurizers.py | 85 +++++++++++++++++++++++++++------------------- src/graphs.py | 34 ++++++++----------- 3 files changed, 67 insertions(+), 54 deletions(-) diff --git a/src/constants.py b/src/constants.py index f0f47a5..1c0d2d2 100644 --- a/src/constants.py +++ b/src/constants.py @@ -4,4 +4,4 @@ REPO_PATH = pathlib.Path(__file__).parents[1] -DBAND_FILE_PATH = REPO_PATH / "data" / "dband_centers.csv" \ No newline at end of file +DBAND_FILE_PATH = REPO_PATH / "data" / "dband_centers.csv" diff --git a/src/featurizers.py b/src/featurizers.py index 04b9ac7..b55d7a1 100644 --- a/src/featurizers.py +++ b/src/featurizers.py @@ -3,18 +3,19 @@ import abc import csv -import numpy as np import networkx as nx +import numpy as np import torch - -from torch.nn.functional import one_hot from mendeleev import element +from torch.nn.functional import one_hot from constants import DBAND_FILE_PATH from graphs import AtomsGraph + class OneHotEncoder: """Featurize a property using a one-hot encoding scheme.""" + def __init__(self): """Blank constructor.""" pass @@ -37,7 +38,7 @@ def fit(self, min, max, n_intervals): def transform(self, property): """Transform a given property vector/matrix/tensor. - + Parameters ---------- property: list or np.ndarray or torch.Tensor @@ -48,24 +49,24 @@ def transform(self, property): property = torch.Tensor(property) # Scale between 0 and num_intervals - scaled_prop = ((property - self.min) - / (self.max - self.min)) * self.n_intervals - + scaled_prop = ((property - self.min) / (self.max - self.min)) * self.n_intervals + # Apply floor function floor_prop = torch.floor(scaled_prop) - + # Create onehot encoding - onehot_prop = one_hot(floor_prop.to(torch.int64), - num_classes=self.n_intervals) - + onehot_prop = one_hot(floor_prop.to(torch.int64), num_classes=self.n_intervals) + return onehot_prop + class Featurizer(abc.ABC): """Meta class for defining featurizers.""" + @abc.abstractmethod def __init__(self, encoder): """Initialize class variables and fit encoder. - + Parameters ---------- encoder: OneHotEncoder @@ -92,7 +93,7 @@ def featurize_graph(self, atoms_graph): @abc.abstractproperty def feat_tensor(self): """Return the featurized node tensor. - + Returns ------- feat_tensor: torch.Tensor @@ -100,11 +101,11 @@ def feat_tensor(self): M = n_intervals provided to the encoder """ pass - + @abc.abstractstaticmethod def name(self): """Return the name of the featurizer. - + Returns ------- _name = str @@ -115,6 +116,7 @@ def name(self): class AtomNumFeaturizer(Featurizer): """Featurize nodes based on atomic number.""" + def __init__(self, encoder, min=0, max=80, n_intervals=10): """Initialize featurizer with min = 0, max = 80, n_intervals = 10. @@ -156,7 +158,7 @@ def featurize_graph(self, atoms_graph): @property def feat_tensor(self): """Return the featurized node tensor. - + Returns ------- feat_tensor: torch.Tensor @@ -164,14 +166,16 @@ def feat_tensor(self): M = n_intervals provided to the encoder """ return self._feat_tensor - + @staticmethod def name(): """Return the name of the featurizer.""" return "atomic_number" + class DBandFeaturizer(Featurizer): """Featurize nodes based on close-packed d-band center.""" + def __init__(self, encoder, min=-5, max=3, n_intervals=10): """Initialize featurizer with min = -5, max = 3, n_intervals = 10. @@ -223,7 +227,7 @@ def featurize_graph(self, atoms_graph): @property def feat_tensor(self): """Return the featurized node tensor. - + Returns ------- feat_tensor: torch.Tensor @@ -236,9 +240,11 @@ def feat_tensor(self): def name(): """Return the name of the featurizer.""" return "dband_center" - + + class ValenceFeaturizer(Featurizer): """Featurize nodes based on number of valence electrons.""" + def __init__(self, encoder, min=1, max=12, n_intervals=12): """Initialize featurizer with min = 1, max = 12, n_intervals = 12. @@ -263,7 +269,7 @@ def __init__(self, encoder, min=1, max=12, n_intervals=12): self.encoder.fit(self.min, self.max, self.n_intervals) # Create a map between atomic number and number of valence electrons - self.map_dict = {0: 0, 1: 1, 2:0} + self.map_dict = {0: 0, 1: 1, 2: 0} for i in range(3, 21, 1): self.map_dict[i] = min(element(i).ec.get_valence().ne(), 12) @@ -285,7 +291,7 @@ def featurize_graph(self, atoms_graph): @property def feat_tensor(self): """Return the featurized node tensor. - + Returns ------- feat_tensor: torch.Tensor @@ -298,9 +304,11 @@ def feat_tensor(self): def name(): """Return the name of the featurizer.""" return "valence" - + + class CoordinationFeaturizer(Featurizer): """Featurize nodes based on coordination number.""" + def __init__(self, encoder, min=1, max=15, n_intervals=15): """Initialize featurizer with min = 1, max = 15, n_intervals = 15. @@ -342,7 +350,7 @@ def featurize_graph(self, atoms_graph): @property def feat_tensor(self): """Return the featurized node tensor. - + Returns ------- feat_tensor: torch.Tensor @@ -355,9 +363,11 @@ def feat_tensor(self): def name(): """Return the name of the featurizer.""" return "coordination" - + + class BondDistanceFeaturizer(Featurizer): """Featurize edges based on bond distance.""" + def __init__(self, encoder, min, max, n_intervals): """Initialize bond distance featurizer. @@ -395,14 +405,14 @@ def featurize_graph(self, atoms_graph): # Create node feature matrix self._feat_tensor = self.encoder.transform(bond_dist_arr) - + # Create list of edge indices self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges())) @property def feat_tensor(self): """Return the featurized node tensor. - + Returns ------- feat_tensor: torch.Tensor @@ -410,7 +420,7 @@ def feat_tensor(self): M = n_intervals provided to the encoder """ return self._feat_tensor - + @property def edge_indices(self): """Return list of edge indices. @@ -426,48 +436,55 @@ def edge_indices(self): def name(): """Return the name of the featurizer.""" return "valence" - + + class BulkBondDistanceFeaturizer(BondDistanceFeaturizer): """Featurize bulk bond distances. - + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval values initialized for bulk atoms. The values are: min = 0, max = 8, n_intervals = 8. """ + def __init__(self, encoder, min=0, max=8, n_intervals=8): super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer): """Featurize bulk bond distances. - + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval values initialized for surface atoms. The values are: min = 0, max = 5, n_intervals = 10. """ + def __init__(self, encoder, min=0, max=5, n_intervals=10): super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer): """Featurize bulk bond distances. - + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval values initialized for adsorbate atoms. The values are: min = 0, max = 4, n_intervals = 16. """ + def __init__(self, encoder, min=0, max=4, n_intervals=16): super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + if __name__ == "__main__": from ase.io import read atoms = read("CONTCAR") g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12]) - + anf = AtomNumFeaturizer(OneHotEncoder()) anf.featurize_graph(g) print(anf.feat_tensor.shape) - + bdf = BulkBondDistanceFeaturizer(OneHotEncoder()) bdf.featurize_graph(g) print(bdf.feat_tensor) - print(bdf.edge_indices) \ No newline at end of file + print(bdf.edge_indices) diff --git a/src/graphs.py b/src/graphs.py index 99e96bc..b744e8f 100644 --- a/src/graphs.py +++ b/src/graphs.py @@ -4,8 +4,8 @@ import networkx as nx import numpy as np -from ase.neighborlist import (build_neighbor_list, natural_cutoffs, - NewPrimitiveNeighborList) +from ase.neighborlist import (NewPrimitiveNeighborList, build_neighbor_list, + natural_cutoffs) class AtomsGraph: @@ -43,8 +43,9 @@ def create_graph(self): self.node_count = 0 self.map_idx_node = {} for atom in self.atoms: - if (atom.index in self.select_idx - and atom.index not in list(graph.nodes(data="index"))): + if atom.index in self.select_idx and atom.index not in list( + graph.nodes(data="index") + ): graph.add_node( self.node_count, index=atom.index, @@ -60,7 +61,7 @@ def create_graph(self): natural_cutoffs(self.atoms), bothways=True, self_interaction=False, - primitive=NewPrimitiveNeighborList + primitive=NewPrimitiveNeighborList, ) # Iterate over nodes, identify neighbors, and add edges between them @@ -101,9 +102,7 @@ def create_graph(self): self.atoms[nn].position, offset, ) - graph.add_edge( - n, self.map_idx_node[nn], bond_distance=bond_dist - ) + graph.add_edge(n, self.map_idx_node[nn], bond_distance=bond_dist) # Pad graph graph = self.pad_graph(graph) @@ -120,12 +119,12 @@ def pad_graph(self, graph): This can be used to make sure that the number of nodes in each graph is equal to max_atoms - + Parameters ---------- graph: Networkx.Graph A Networkx graph - + Returns ------- padded_graph: Networkx.Graph @@ -135,11 +134,7 @@ def pad_graph(self, graph): for i in range(self.node_count, self.max_atoms, 1): padded_graph.add_node( - i, - index=-1, - atomic_number=0, - symbol="", - position=np.zeros(3) + i, index=-1, atomic_number=0, symbol="", position=np.zeros(3) ) return padded_graph @@ -159,13 +154,13 @@ def calc_minimum_distance(self, pos_1, pos_2, offset): """ # First, calculate the distance without offset dist_1 = np.linalg.norm(pos_1 - pos_2) - + # Next calculate the distance by applying offset to second position dist_2 = np.linalg.norm(pos_1 - (pos_2 + offset @ self.atoms.get_cell())) # Get minimum distance min_dist = min(dist_1, dist_2) - + return min_dist def plot(self, filename=None): @@ -178,10 +173,11 @@ def plot(self, filename=None): """ pass + if __name__ == "__main__": from ase.io import read - + atoms = read("CONTCAR") g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12]) print(g.map_idx_node) - print(g.graph.edges(data="bond_distance")) \ No newline at end of file + print(g.graph.edges(data="bond_distance"))