Added util functions

GreeleyGroup · Sep 15, 2023 · 9bd23b1 · 9bd23b1
1 parent 97bc2a8
commit 9bd23b1
Show file tree

Hide file tree

Showing 3 changed files with 274 additions and 4 deletions.
diff --git a/data/dband_centers.csv b/data/dband_centers.csv
@@ -1,3 +1,4 @@
+0, 0
 22, 1.50
 23, 1.06
 24, 0.16

diff --git a/src/featurizers.py b/src/featurizers.py
@@ -151,9 +151,11 @@ def featurize_graph(self, atoms_graph):
         # Get atomic numbers
         atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
         atom_num_arr = np.array(list(atom_num_dict.values()))
+        zero_idx = np.argwhere(atom_num_arr == 0.)
 
         # Create node feature matrix
         self._feat_tensor = self.encoder.transform(atom_num_arr)
+        self._feat_tensor[zero_idx, :] = 0.
 
     @property
     def feat_tensor(self):
@@ -217,12 +219,14 @@ def featurize_graph(self, atoms_graph):
         # Get atomic numbers
         atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
         atom_num_arr = np.array(list(atom_num_dict.values()))
+        zero_idx = np.argwhere(atom_num_arr == 0.)
 
         # Map from atomic number to d-band center
         dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr)
 
         # Create node feature matrix
         self._feat_tensor = self.encoder.transform(dband_arr)
+        self._feat_tensor[zero_idx, :] = 0.
 
     @property
     def feat_tensor(self):
@@ -284,9 +288,14 @@ def featurize_graph(self, atoms_graph):
         # Get atomic numbers
         atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
         atom_num_arr = np.array(list(atom_num_dict.values()))
+        zero_idx = np.argwhere(atom_num_arr == 0.)
+
+        # Get valence electrons for each atom
+        valence_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr)
 
         # Create node feature matrix
-        self._feat_tensor = self.encoder.transform(atom_num_arr)
+        self._feat_tensor = self.encoder.transform(valence_arr)
+        self._feat_tensor[zero_idx, :] = 0.
 
     @property
     def feat_tensor(self):
@@ -305,6 +314,89 @@ def name():
         """Return the name of the featurizer."""
         return "valence"
 
+class ReactivityFeaturizer(Featurizer):
+    """Featurize nodes based on close-packed d-band center and/or valence."""
+
+    def __init__(self, encoder, min=-5, max=3, n_intervals=10):
+        """Initialize featurizer with min = -5, max = 3, n_intervals = 10.
+
+        Parameters
+        ----------
+        encoder: OneHotEncoder
+            Initialized object of class OneHotEncoder.
+        min: int
+            Minimum value of d-band center
+        max: int
+            Maximum value of d-band center
+        n_intervals: int
+            Number of intervals
+        """
+        # Initialize variables
+        self.min = min
+        self.max = max
+        self.n_intervals = n_intervals
+
+        # Fit encoder
+        self.encoder_dband = encoder
+        self.encoder_dband.fit(self.min, self.max, self.n_intervals)
+
+        # Fit valence encoder
+        self.encoder_val = encoder
+        self.encoder_val.fit(1, 12, self.n_intervals)
+
+        # Get dband centers from csv
+        self.map_dict_dband = {}
+        with open(DBAND_FILE_PATH, "r") as f:
+            csv_reader = csv.reader(f)
+            for row in csv_reader:
+                self.map_dict_dband[int(row[0])] = float(row[1])
+
+        # Create a map between atomic number and number of valence electrons
+        self.map_dict_val = {0: 0, 1: 1, 2: 0}
+        for i in range(3, 21, 1):
+            self.map_dict_val[i] = min(element(i).ec.get_valence().ne(), 12)
+
+    def featurize_graph(self, atoms_graph):
+        """Featurize an AtomsGraph.
+
+        Parameters
+        ----------
+        graph: AtomsGraph
+            A graph of a collection of bulk, surface, or adsorbate atoms.
+        """
+        # Get atomic numbers
+        atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
+        atom_num_arr = np.array(list(atom_num_dict.values()))
+        zero_idx = np.argwhere(atom_num_arr == 0.)
+
+        # Map from atomic number to d-band center
+        react_arr = np.zeros_like(atom_num_arr)
+        for i, n in enumerate(atom_num_arr):
+            if n < 21:
+                react_arr[i] = self.map_dict_val[n]
+            else:
+                react_arr[i] = self.map_dict_dband[n]
+
+        # Create node feature matrix
+        self._feat_tensor = self.encoder.transform(react_arr)
+        self._feat_tensor[zero_idx, :] = 0.
+
+    @property
+    def feat_tensor(self):
+        """Return the featurized node tensor.
+
+        Returns
+        -------
+        feat_tensor: torch.Tensor
+            Featurized tensor having shape (N, M) where N = number of atoms and
+            M = n_intervals provided to the encoder
+        """
+        return self._feat_tensor
+
+    @staticmethod
+    def name():
+        """Return the name of the featurizer."""
+        return "reactivity"
 
 class CoordinationFeaturizer(Featurizer):
     """Featurize nodes based on coordination number."""
@@ -341,11 +433,17 @@ def featurize_graph(self, atoms_graph):
             A graph of a collection of bulk, surface, or adsorbate atoms.
         """
         # Get atomic numbers
+        atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
+        atom_num_arr = np.array(list(atom_num_dict.values()))
+        zero_idx = np.argwhere(atom_num_arr == 0.)
+
+        # Get coordination numbers
         cn_dict = nx.get_node_attributes(atoms_graph.graph, "coordination")
         cn_arr = np.array(list(cn_dict.values()))
 
         # Create node feature matrix
         self._feat_tensor = self.encoder.transform(cn_arr)
+        self._feat_tensor[zero_idx, :] = 0.
 
     @property
     def feat_tensor(self):
@@ -407,7 +505,7 @@ def featurize_graph(self, atoms_graph):
         self._feat_tensor = self.encoder.transform(bond_dist_arr)
 
         # Create list of edge indices
-        self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges()))
+        self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges())).view(2, -1)
 
     @property
     def feat_tensor(self):
@@ -435,7 +533,7 @@ def edge_indices(self):
     @staticmethod
     def name():
         """Return the name of the featurizer."""
-        return "valence"
+        return "bond_distance"
 
 
 class BulkBondDistanceFeaturizer(BondDistanceFeaturizer):
@@ -449,6 +547,11 @@ class BulkBondDistanceFeaturizer(BondDistanceFeaturizer):
     def __init__(self, encoder, min=0, max=8, n_intervals=8):
         super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)
 
+    @staticmethod
+    def name():
+        """Return the name of the featurizer."""
+        return "bulk_bond_distance"
+
 
 class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer):
     """Featurize bulk bond distances.
@@ -461,6 +564,10 @@ class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer):
     def __init__(self, encoder, min=0, max=5, n_intervals=10):
         super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)
 
+    def name():
+        """Return the name of the featurizer."""
+        return "surface_bond_distance"
+
 
 class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer):
     """Featurize bulk bond distances.
@@ -473,6 +580,23 @@ class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer):
     def __init__(self, encoder, min=0, max=4, n_intervals=16):
         super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)
 
+    def name():
+        """Return the name of the featurizer."""
+        return "adsorbate_distance"
+
+
+list_of_node_featurizers = [
+    AtomNumFeaturizer,
+    DBandFeaturizer,
+    ValenceFeaturizer,
+    CoordinationFeaturizer,
+]
+
+list_of_edge_featurizers = [
+    BulkBondDistanceFeaturizer,
+    SurfaceBondDistanceFeaturizer,
+    AdsorbateBondDistanceFeaturizer,
+]
 
 if __name__ == "__main__":
     from ase.io import read
@@ -486,5 +610,5 @@ def __init__(self, encoder, min=0, max=4, n_intervals=16):
 
     bdf = BulkBondDistanceFeaturizer(OneHotEncoder())
     bdf.featurize_graph(g)
-    print(bdf.feat_tensor)
+    print(bdf.feat_tensor.shape)
     print(bdf.edge_indices)
diff --git a/src/utils.py b/src/utils.py
@@ -0,0 +1,145 @@
+"""Utility functions."""
+
+from copy import deepcopy
+
+import numpy as np
+import torch
+
+from featurizers import (
+    list_of_node_featurizers,
+    list_of_edge_featurizers,
+    OneHotEncoder
+)
+from graphs import AtomsGraph
+
+def partition_structure(atoms, n_partitions, z_cutoffs):
+    """Partition atomic structue into bulk, surface, and/or adsorbates.
+    
+    Parameters
+    ----------
+    atoms: ase.Atoms object
+        The structure to be partitioned
+    n_partitions: int
+        Number of partitions
+    z_cutoffs: list or np.ndarray
+        List of z-coordinate cutoffs. xy planes are placed at the specified
+        cutoffs to partition atoms above and below them. The length of z-cutoffs
+        should be equal to one less than the number of partitions. 
+    """
+    # Check if length of z_cutoffs is equal to n_paritions
+    if len(z_cutoffs) != n_partitions - 1:
+        raise ValueError("The length of z_cutoffs must be equal to\
+                         one less than the number of partitions")
+
+    # Add 0 and infinity to cutoffs
+    z_cutoffs = np.insert(z_cutoffs, 0, 0)
+    z_cutoffs = np.insert(z_cutoffs, len(z_cutoffs), np.inf)
+
+    # Get positions
+    pos = atoms.get_positions()
+
+    # Iterate over number of partitions
+    part_atoms = []
+    for i in range(n_partitions):
+        part_idx = np.argwhere(
+            (pos[:, -1] >= z_cutoffs[i]) & (pos[:, -1] < z_cutoffs[i+1])
+        ).flatten().tolist()
+        part_atoms.append(part_idx)
+
+    return part_atoms
+
+def featurize_atoms(
+        atoms,
+        select_idx,
+        node_features,
+        edge_features,
+        max_atoms=50,
+        encoder=OneHotEncoder()
+    ):
+    """Featurize atoms and bonds with the chosen featurizers.
+
+    Parameters
+    ----------
+    atoms: ase.Atoms objet
+        Atoms object containing the whole structure
+    select_idx: list
+        List of indices for atoms to featurize. Typically, this will be provided
+        by the partition_structure function
+    node_features: list or np.ndarray
+        Names of node featurizers to use (current options: atomic_number, dband
+        center, valence electrons, coordination, reactivity). The "reactivity"
+        featurizer uses valence electrons for adsorbates (atomic number < 21) and
+        d-band center for larger transition metal atoms (atomic number >= 21).
+    edge_features: list or np.ndarray
+        Names of edge featurizers to use (current options: bulk_bond_distance,
+        surface_bond_distance, adsorbate_bond_distance). All of these encode
+        bond distance using a one-hot encoder, but the bounds for each vary.
+    max_atoms: int (default = 50)
+        Maximum number of allowed atoms. If the number of atoms in the graph are
+        fewer than this number, the graph is padded with empty nodes. This is
+        required to make the sizes of the node feature tensors consistent across
+        structures.
+    encoder: encoder object from featurizers.py
+        Currently only the OneHotEncoder is supported
+
+    Returns
+    -------
+    dict
+        Dictionary with keys "node_tensor", "edge_tensor", and "edge_indices" and
+        corresponding tensors as values.
+    """
+    # Create graph
+    atoms_graph = AtomsGraph(atoms=atoms, select_idx=select_idx, max_atoms=max_atoms)
+
+    # Collect node featurizers
+    node_feats = []
+    node_intervals = []
+    for node_featurizer in list_of_node_featurizers:
+        if node_featurizer.name() in node_features:
+            nf = node_featurizer(deepcopy(encoder))
+            node_intervals.append(nf.n_intervals)
+            node_feats.append(nf)
+
+    # Collect edge featurizers
+    edge_feats = []
+    edge_intervals = []
+    for edge_featurizer in list_of_edge_featurizers:
+        if edge_featurizer.name() in edge_features:
+            ef = edge_featurizer(deepcopy(encoder))
+            edge_intervals.append(ef.n_intervals)
+            edge_feats.append(ef)
+
+    # Store node matrices from each feaurizer
+    node_mats = []
+    for nf in node_feats:
+        nf.featurize_graph(atoms_graph)
+        node_mats.append(nf.feat_tensor)
+    # Stack node mats to create node tensor
+    node_tensor = torch.hstack(node_mats)
+
+    # Store edge matrices from each featurizer
+    edge_mats = []
+    for ef in edge_feats:
+        ef.featurize_graph(atoms_graph)
+        edge_mats.append(ef.feat_tensor)
+    # Stack edge mats to create edge tensor
+    edge_tensor = torch.hstack(edge_mats)
+
+    # Store edge indices
+    edge_indices = ef.edge_indices
+
+    return {"node_tensor": node_tensor, "edge_tensor": edge_tensor,
+            "edge_indices": edge_indices}
+
+
+if __name__ == "__main__":
+    from ase.io import read
+    atoms = read("CONTCAR")
+
+    part_atoms = partition_structure(atoms, 3, z_cutoffs=[15, 23.5])
+    print(part_atoms)
+
+    feat_dict = featurize_atoms(atoms, part_atoms[0], ["atomic_number", "dband_center"],
+                                ["bulk_bond_distance"], max_atoms=34)
+    print(feat_dict)
+
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+, 0
 , 1.50
 , 1.06
 , 0.16
@@ Expand Down @@