diff --git a/.gitignore b/.gitignore index 092dc51..94d6dd9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ *POSCAR* +*CONTCAR* +*.csv +!data/dband_centers.csv +__pycache__ diff --git a/data/dband_centers.csv b/data/dband_centers.csv new file mode 100644 index 0000000..dc45243 --- /dev/null +++ b/data/dband_centers.csv @@ -0,0 +1,23 @@ +22, 1.50 +23, 1.06 +24, 0.16 +25, 0.07 +26, -0.92 +27, -1.17 +28, -1.29 +29, -2.67 +40, 1.95 +41, 1.41 +42, 0.35 +43, -0.60 +44, -1.41 +45, -1.73 +46, -1.83 +47, -4.30 +72, 2.47 +73, 2.00 +74, 0.77 +75, -0.51 +77, -2.11 +78, -2.25 +79, -3.56 \ No newline at end of file diff --git a/src/constants.py b/src/constants.py new file mode 100644 index 0000000..1c0d2d2 --- /dev/null +++ b/src/constants.py @@ -0,0 +1,7 @@ +"""Constants are defined here.""" + +import pathlib + +REPO_PATH = pathlib.Path(__file__).parents[1] + +DBAND_FILE_PATH = REPO_PATH / "data" / "dband_centers.csv" diff --git a/src/featurizers.py b/src/featurizers.py new file mode 100644 index 0000000..b55d7a1 --- /dev/null +++ b/src/featurizers.py @@ -0,0 +1,490 @@ +"""Node and bond featurizers.""" + +import abc +import csv + +import networkx as nx +import numpy as np +import torch +from mendeleev import element +from torch.nn.functional import one_hot + +from constants import DBAND_FILE_PATH +from graphs import AtomsGraph + + +class OneHotEncoder: + """Featurize a property using a one-hot encoding scheme.""" + + def __init__(self): + """Blank constructor.""" + pass + + def fit(self, min, max, n_intervals): + """Fit encoder based on min, max, and number of intervals parameters. + + Parameters + ---------- + min: int + Minimum possible value of the property. + max: int + Maximum possible value of the property. + n_intervals: int + Number of elements in the one-hot encoded array. + """ + self.min = min + self.max = max + self.n_intervals = n_intervals + + def transform(self, property): + """Transform a given property vector/matrix/tensor. + + Parameters + ---------- + property: list or np.ndarray or torch.Tensor + Tensor containing value(s) of the property to be transformed. The + tensor must have a shape of N where N is the number of atoms. + """ + # Transform property to tensor + property = torch.Tensor(property) + + # Scale between 0 and num_intervals + scaled_prop = ((property - self.min) / (self.max - self.min)) * self.n_intervals + + # Apply floor function + floor_prop = torch.floor(scaled_prop) + + # Create onehot encoding + onehot_prop = one_hot(floor_prop.to(torch.int64), num_classes=self.n_intervals) + + return onehot_prop + + +class Featurizer(abc.ABC): + """Meta class for defining featurizers.""" + + @abc.abstractmethod + def __init__(self, encoder): + """Initialize class variables and fit encoder. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + """ + pass + + @abc.abstractmethod + def featurize_graph(self, atoms_graph): + """Featurize an AtomsGraph. + + This class should create a feature tensor from the given graph. This + feature tensor should have a shape of (N, M) where N = number of atoms + and M = n_intervals provided to the encoder. The feature tensor should + be saved as self._feat_tensor. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + pass + + @abc.abstractproperty + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + pass + + @abc.abstractstaticmethod + def name(self): + """Return the name of the featurizer. + + Returns + ------- + _name = str + Name of the featurizer. + """ + return "abstract_featurizer" + + +class AtomNumFeaturizer(Featurizer): + """Featurize nodes based on atomic number.""" + + def __init__(self, encoder, min=0, max=80, n_intervals=10): + """Initialize featurizer with min = 0, max = 80, n_intervals = 10. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of atomic number + max: int + Maximum value of atomic number + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + def featurize_graph(self, atoms_graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") + atom_num_arr = np.array(list(atom_num_dict.values())) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(atom_num_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "atomic_number" + + +class DBandFeaturizer(Featurizer): + """Featurize nodes based on close-packed d-band center.""" + + def __init__(self, encoder, min=-5, max=3, n_intervals=10): + """Initialize featurizer with min = -5, max = 3, n_intervals = 10. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of d-band center + max: int + Maximum value of d-band center + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + # Get dband centers from csv + self.map_dict = {} + with open(DBAND_FILE_PATH, "r") as f: + csv_reader = csv.reader(f) + for row in csv_reader: + self.map_dict[int(row[0])] = float(row[1]) + + def featurize_graph(self, atoms_graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") + atom_num_arr = np.array(list(atom_num_dict.values())) + + # Map from atomic number to d-band center + dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(dband_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "dband_center" + + +class ValenceFeaturizer(Featurizer): + """Featurize nodes based on number of valence electrons.""" + + def __init__(self, encoder, min=1, max=12, n_intervals=12): + """Initialize featurizer with min = 1, max = 12, n_intervals = 12. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of valence electrons + max: int + Maximum value of valence electrons + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + # Create a map between atomic number and number of valence electrons + self.map_dict = {0: 0, 1: 1, 2: 0} + for i in range(3, 21, 1): + self.map_dict[i] = min(element(i).ec.get_valence().ne(), 12) + + def featurize_graph(self, atoms_graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number") + atom_num_arr = np.array(list(atom_num_dict.values())) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(atom_num_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "valence" + + +class CoordinationFeaturizer(Featurizer): + """Featurize nodes based on coordination number.""" + + def __init__(self, encoder, min=1, max=15, n_intervals=15): + """Initialize featurizer with min = 1, max = 15, n_intervals = 15. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of valence electrons + max: int + Maximum value of valence electrons + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + def featurize_graph(self, atoms_graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + cn_dict = nx.get_node_attributes(atoms_graph.graph, "coordination") + cn_arr = np.array(list(cn_dict.values())) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(cn_arr) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "coordination" + + +class BondDistanceFeaturizer(Featurizer): + """Featurize edges based on bond distance.""" + + def __init__(self, encoder, min, max, n_intervals): + """Initialize bond distance featurizer. + + Parameters + ---------- + encoder: OneHotEncoder + Initialized object of class OneHotEncoder. + min: int + Minimum value of atomic number + max: int + Maximum value of atomic number + n_intervals: int + Number of intervals + """ + # Initialize variables + self.min = min + self.max = max + self.n_intervals = n_intervals + + # Fit encoder + self.encoder = encoder + self.encoder.fit(self.min, self.max, self.n_intervals) + + def featurize_graph(self, atoms_graph): + """Featurize an AtomsGraph. + + Parameters + ---------- + graph: AtomsGraph + A graph of a collection of bulk, surface, or adsorbate atoms. + """ + # Get atomic numbers + bond_dist_dict = nx.get_edge_attributes(atoms_graph.graph, "bond_distance") + bond_dist_arr = np.array(list(bond_dist_dict.values())) + + # Create node feature matrix + self._feat_tensor = self.encoder.transform(bond_dist_arr) + + # Create list of edge indices + self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges())) + + @property + def feat_tensor(self): + """Return the featurized node tensor. + + Returns + ------- + feat_tensor: torch.Tensor + Featurized tensor having shape (N, M) where N = number of atoms and + M = n_intervals provided to the encoder + """ + return self._feat_tensor + + @property + def edge_indices(self): + """Return list of edge indices. + + Returns + ------- + edge_indices: torch.Tensor + Tensor with edge indices + """ + return self._edge_indices + + @staticmethod + def name(): + """Return the name of the featurizer.""" + return "valence" + + +class BulkBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for bulk atoms. The values are: min = 0, max = 8, + n_intervals = 8. + """ + + def __init__(self, encoder, min=0, max=8, n_intervals=8): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + + +class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for surface atoms. The values are: min = 0, max = 5, + n_intervals = 10. + """ + + def __init__(self, encoder, min=0, max=5, n_intervals=10): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + + +class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer): + """Featurize bulk bond distances. + + Child class of BondDistanceFeaturizer with suitable min, max, and n_interval + values initialized for adsorbate atoms. The values are: min = 0, max = 4, + n_intervals = 16. + """ + + def __init__(self, encoder, min=0, max=4, n_intervals=16): + super().__init__(encoder, min=min, max=max, n_intervals=n_intervals) + + +if __name__ == "__main__": + from ase.io import read + + atoms = read("CONTCAR") + g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12]) + + anf = AtomNumFeaturizer(OneHotEncoder()) + anf.featurize_graph(g) + print(anf.feat_tensor.shape) + + bdf = BulkBondDistanceFeaturizer(OneHotEncoder()) + bdf.featurize_graph(g) + print(bdf.feat_tensor) + print(bdf.edge_indices) diff --git a/src/graphs.py b/src/graphs.py index 3ab8d5e..b744e8f 100644 --- a/src/graphs.py +++ b/src/graphs.py @@ -1,10 +1,11 @@ """Classes to create bulk, surface, and adsorbate graphs.""" -import abc +from copy import deepcopy import networkx as nx import numpy as np -from ase.neighborlist import build_neighbor_list +from ase.neighborlist import (NewPrimitiveNeighborList, build_neighbor_list, + natural_cutoffs) class AtomsGraph: @@ -35,58 +36,132 @@ def __init__(self, atoms, select_idx, max_atoms=50): def create_graph(self): """Create a graph from an atoms object and neighbor_list.""" - # Create neighbor list of atoms - self.neighbor_list = build_neighbor_list( - self.atoms, bothways=True, self_interaction=False - ) - # Create NetworkX Multigraph graph = nx.MultiGraph() # Iterate over selected atoms and add them as nodes + self.node_count = 0 + self.map_idx_node = {} for atom in self.atoms: - if atom.index in self.select_idx and atom.index not in list(graph.nodes()): + if atom.index in self.select_idx and atom.index not in list( + graph.nodes(data="index") + ): graph.add_node( - atom.index, + self.node_count, + index=atom.index, atomic_number=atom.number, symbol=atom.symbol, - position=atom.position, ) + self.map_idx_node[atom.index] = self.node_count + self.node_count += 1 + + # Create neighbor list of atoms + self.neighbor_list = build_neighbor_list( + self.atoms, + natural_cutoffs(self.atoms), + bothways=True, + self_interaction=False, + primitive=NewPrimitiveNeighborList, + ) # Iterate over nodes, identify neighbors, and add edges between them - for n in graph.nodes(): + node_list = list(graph.nodes()) + bond_tuples = [] + for n in node_list: # Get neighbors from neighbor list - neighbor_idx, _ = self.neighbor_list.get_neighbors(n) + neighbor_idx, neighbor_offsets = self.neighbor_list.get_neighbors( + graph.nodes[n]["index"] + ) # Iterate over neighbors - for nn in neighbor_idx: + for nn, offset in zip(neighbor_idx, neighbor_offsets): + # Skip if self atom + if nn == graph.nodes[n]["index"]: + continue + # Save bond + bond = (graph.nodes[n]["index"], nn) + rev_bond = tuple(reversed(bond)) + # Check if bond has already been added + if rev_bond in bond_tuples: + continue + else: + bond_tuples.append(bond) # If neighbor is not in graph, add it as a node - if not graph.has_node(nn): + node_indices = nx.get_node_attributes(graph, "index") + if nn not in list(node_indices.values()): graph.add_node( - nn, + self.node_count, + index=nn, atomic_number=self.atoms[nn].number, symbol=self.atoms[nn].symbol, - position=self.atoms[nn].position, ) + self.map_idx_node[nn] = self.node_count + self.node_count += 1 # Calculate bond distance - bond_dist = np.linalg.norm( - graph.nodes[n].position - graph.nodes[nn].position + bond_dist = self.calc_minimum_distance( + self.atoms[graph.nodes[n]["index"]].position, + self.atoms[nn].position, + offset, ) - graph.add_edge(n, nn, bond_distance=bond_dist) + graph.add_edge(n, self.map_idx_node[nn], bond_distance=bond_dist) + + # Pad graph + graph = self.pad_graph(graph) + + # Add coordination numbers + for n in graph.nodes(): + graph.nodes[n]["coordination"] = graph.degree[n] # Assign graph object self.graph = graph - def featurize(self, node_featurizer, bond_featurizer): - """Featurize nodes and edges of the graph. + def pad_graph(self, graph): + """Pad graph with empty nodes. + + This can be used to make sure that the number of nodes in each graph is + equal to max_atoms Parameters ---------- - node_featurizer: TODO - Object that featurizes atoms - bond_featurizer: TODO - Object that featurizes bonds + graph: Networkx.Graph + A Networkx graph + + Returns + ------- + padded_graph: Networkx.Graph + Padded graph """ - pass + padded_graph = deepcopy(graph) + + for i in range(self.node_count, self.max_atoms, 1): + padded_graph.add_node( + i, index=-1, atomic_number=0, symbol="", position=np.zeros(3) + ) + + return padded_graph + + def calc_minimum_distance(self, pos_1, pos_2, offset): + """ + Calculate minimum distance between two atoms. + + Parameters + ---------- + pos_1: np.ndarray + Position of first atom in x, y, z coordinates + pos_2: np.ndarray + Position of second atom in x, y, z coordinates + offset: np.ndarray + Offset returned by the neighbor list in ASE + """ + # First, calculate the distance without offset + dist_1 = np.linalg.norm(pos_1 - pos_2) + + # Next calculate the distance by applying offset to second position + dist_2 = np.linalg.norm(pos_1 - (pos_2 + offset @ self.atoms.get_cell())) + + # Get minimum distance + min_dist = min(dist_1, dist_2) + + return min_dist def plot(self, filename=None): """Plot the graph using NetworkX. @@ -98,22 +173,11 @@ def plot(self, filename=None): """ pass - def get_node_tensor(self): - """Get the node matrix of the graph as a PyTorch Tensor. - Returns - ------- - node_matrix: torch.Tensor - Node matrix - """ - pass - - def get_edge_tensor(self): - """Get the edge matrix of the graph as a PyTorch Tensor. +if __name__ == "__main__": + from ase.io import read - Returns - ------- - edge_matrix: torch.Tensor - Edge tensor - """ - pass + atoms = read("CONTCAR") + g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12]) + print(g.map_idx_node) + print(g.graph.edges(data="bond_distance"))