Skip to content

Commit

Permalink
Added util functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Gaurav S Deshmukh committed Sep 15, 2023
1 parent 97bc2a8 commit 9bd23b1
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 4 deletions.
1 change: 1 addition & 0 deletions data/dband_centers.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
0, 0
22, 1.50
23, 1.06
24, 0.16
Expand Down
132 changes: 128 additions & 4 deletions src/featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,11 @@ def featurize_graph(self, atoms_graph):
# Get atomic numbers
atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
atom_num_arr = np.array(list(atom_num_dict.values()))
zero_idx = np.argwhere(atom_num_arr == 0.)

# Create node feature matrix
self._feat_tensor = self.encoder.transform(atom_num_arr)
self._feat_tensor[zero_idx, :] = 0.

@property
def feat_tensor(self):
Expand Down Expand Up @@ -217,12 +219,14 @@ def featurize_graph(self, atoms_graph):
# Get atomic numbers
atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
atom_num_arr = np.array(list(atom_num_dict.values()))
zero_idx = np.argwhere(atom_num_arr == 0.)

# Map from atomic number to d-band center
dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr)

# Create node feature matrix
self._feat_tensor = self.encoder.transform(dband_arr)
self._feat_tensor[zero_idx, :] = 0.

@property
def feat_tensor(self):
Expand Down Expand Up @@ -284,9 +288,14 @@ def featurize_graph(self, atoms_graph):
# Get atomic numbers
atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
atom_num_arr = np.array(list(atom_num_dict.values()))
zero_idx = np.argwhere(atom_num_arr == 0.)

# Get valence electrons for each atom
valence_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr)

# Create node feature matrix
self._feat_tensor = self.encoder.transform(atom_num_arr)
self._feat_tensor = self.encoder.transform(valence_arr)
self._feat_tensor[zero_idx, :] = 0.

@property
def feat_tensor(self):
Expand All @@ -305,6 +314,89 @@ def name():
"""Return the name of the featurizer."""
return "valence"

class ReactivityFeaturizer(Featurizer):
"""Featurize nodes based on close-packed d-band center and/or valence."""

def __init__(self, encoder, min=-5, max=3, n_intervals=10):
"""Initialize featurizer with min = -5, max = 3, n_intervals = 10.
Parameters
----------
encoder: OneHotEncoder
Initialized object of class OneHotEncoder.
min: int
Minimum value of d-band center
max: int
Maximum value of d-band center
n_intervals: int
Number of intervals
"""
# Initialize variables
self.min = min
self.max = max
self.n_intervals = n_intervals

# Fit encoder
self.encoder_dband = encoder
self.encoder_dband.fit(self.min, self.max, self.n_intervals)

# Fit valence encoder
self.encoder_val = encoder
self.encoder_val.fit(1, 12, self.n_intervals)

# Get dband centers from csv
self.map_dict_dband = {}
with open(DBAND_FILE_PATH, "r") as f:
csv_reader = csv.reader(f)
for row in csv_reader:
self.map_dict_dband[int(row[0])] = float(row[1])

# Create a map between atomic number and number of valence electrons
self.map_dict_val = {0: 0, 1: 1, 2: 0}
for i in range(3, 21, 1):
self.map_dict_val[i] = min(element(i).ec.get_valence().ne(), 12)

def featurize_graph(self, atoms_graph):
"""Featurize an AtomsGraph.
Parameters
----------
graph: AtomsGraph
A graph of a collection of bulk, surface, or adsorbate atoms.
"""
# Get atomic numbers
atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
atom_num_arr = np.array(list(atom_num_dict.values()))
zero_idx = np.argwhere(atom_num_arr == 0.)

# Map from atomic number to d-band center
react_arr = np.zeros_like(atom_num_arr)
for i, n in enumerate(atom_num_arr):
if n < 21:
react_arr[i] = self.map_dict_val[n]
else:
react_arr[i] = self.map_dict_dband[n]

# Create node feature matrix
self._feat_tensor = self.encoder.transform(react_arr)
self._feat_tensor[zero_idx, :] = 0.

@property
def feat_tensor(self):
"""Return the featurized node tensor.
Returns
-------
feat_tensor: torch.Tensor
Featurized tensor having shape (N, M) where N = number of atoms and
M = n_intervals provided to the encoder
"""
return self._feat_tensor

@staticmethod
def name():
"""Return the name of the featurizer."""
return "reactivity"

class CoordinationFeaturizer(Featurizer):
"""Featurize nodes based on coordination number."""
Expand Down Expand Up @@ -341,11 +433,17 @@ def featurize_graph(self, atoms_graph):
A graph of a collection of bulk, surface, or adsorbate atoms.
"""
# Get atomic numbers
atom_num_dict = nx.get_node_attributes(atoms_graph.graph, "atomic_number")
atom_num_arr = np.array(list(atom_num_dict.values()))
zero_idx = np.argwhere(atom_num_arr == 0.)

# Get coordination numbers
cn_dict = nx.get_node_attributes(atoms_graph.graph, "coordination")
cn_arr = np.array(list(cn_dict.values()))

# Create node feature matrix
self._feat_tensor = self.encoder.transform(cn_arr)
self._feat_tensor[zero_idx, :] = 0.

@property
def feat_tensor(self):
Expand Down Expand Up @@ -407,7 +505,7 @@ def featurize_graph(self, atoms_graph):
self._feat_tensor = self.encoder.transform(bond_dist_arr)

# Create list of edge indices
self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges()))
self._edge_indices = torch.Tensor(list(atoms_graph.graph.edges())).view(2, -1)

@property
def feat_tensor(self):
Expand Down Expand Up @@ -435,7 +533,7 @@ def edge_indices(self):
@staticmethod
def name():
"""Return the name of the featurizer."""
return "valence"
return "bond_distance"


class BulkBondDistanceFeaturizer(BondDistanceFeaturizer):
Expand All @@ -449,6 +547,11 @@ class BulkBondDistanceFeaturizer(BondDistanceFeaturizer):
def __init__(self, encoder, min=0, max=8, n_intervals=8):
super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)

@staticmethod
def name():
"""Return the name of the featurizer."""
return "bulk_bond_distance"


class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer):
"""Featurize bulk bond distances.
Expand All @@ -461,6 +564,10 @@ class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer):
def __init__(self, encoder, min=0, max=5, n_intervals=10):
super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)

def name():
"""Return the name of the featurizer."""
return "surface_bond_distance"


class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer):
"""Featurize bulk bond distances.
Expand All @@ -473,6 +580,23 @@ class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer):
def __init__(self, encoder, min=0, max=4, n_intervals=16):
super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)

def name():
"""Return the name of the featurizer."""
return "adsorbate_distance"


list_of_node_featurizers = [
AtomNumFeaturizer,
DBandFeaturizer,
ValenceFeaturizer,
CoordinationFeaturizer,
]

list_of_edge_featurizers = [
BulkBondDistanceFeaturizer,
SurfaceBondDistanceFeaturizer,
AdsorbateBondDistanceFeaturizer,
]

if __name__ == "__main__":
from ase.io import read
Expand All @@ -486,5 +610,5 @@ def __init__(self, encoder, min=0, max=4, n_intervals=16):

bdf = BulkBondDistanceFeaturizer(OneHotEncoder())
bdf.featurize_graph(g)
print(bdf.feat_tensor)
print(bdf.feat_tensor.shape)
print(bdf.edge_indices)
145 changes: 145 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""Utility functions."""

from copy import deepcopy

import numpy as np
import torch

from featurizers import (
list_of_node_featurizers,
list_of_edge_featurizers,
OneHotEncoder
)
from graphs import AtomsGraph

def partition_structure(atoms, n_partitions, z_cutoffs):
"""Partition atomic structue into bulk, surface, and/or adsorbates.
Parameters
----------
atoms: ase.Atoms object
The structure to be partitioned
n_partitions: int
Number of partitions
z_cutoffs: list or np.ndarray
List of z-coordinate cutoffs. xy planes are placed at the specified
cutoffs to partition atoms above and below them. The length of z-cutoffs
should be equal to one less than the number of partitions.
"""
# Check if length of z_cutoffs is equal to n_paritions
if len(z_cutoffs) != n_partitions - 1:
raise ValueError("The length of z_cutoffs must be equal to\
one less than the number of partitions")

# Add 0 and infinity to cutoffs
z_cutoffs = np.insert(z_cutoffs, 0, 0)
z_cutoffs = np.insert(z_cutoffs, len(z_cutoffs), np.inf)

# Get positions
pos = atoms.get_positions()

# Iterate over number of partitions
part_atoms = []
for i in range(n_partitions):
part_idx = np.argwhere(
(pos[:, -1] >= z_cutoffs[i]) & (pos[:, -1] < z_cutoffs[i+1])
).flatten().tolist()
part_atoms.append(part_idx)

return part_atoms

def featurize_atoms(
atoms,
select_idx,
node_features,
edge_features,
max_atoms=50,
encoder=OneHotEncoder()
):
"""Featurize atoms and bonds with the chosen featurizers.
Parameters
----------
atoms: ase.Atoms objet
Atoms object containing the whole structure
select_idx: list
List of indices for atoms to featurize. Typically, this will be provided
by the partition_structure function
node_features: list or np.ndarray
Names of node featurizers to use (current options: atomic_number, dband
center, valence electrons, coordination, reactivity). The "reactivity"
featurizer uses valence electrons for adsorbates (atomic number < 21) and
d-band center for larger transition metal atoms (atomic number >= 21).
edge_features: list or np.ndarray
Names of edge featurizers to use (current options: bulk_bond_distance,
surface_bond_distance, adsorbate_bond_distance). All of these encode
bond distance using a one-hot encoder, but the bounds for each vary.
max_atoms: int (default = 50)
Maximum number of allowed atoms. If the number of atoms in the graph are
fewer than this number, the graph is padded with empty nodes. This is
required to make the sizes of the node feature tensors consistent across
structures.
encoder: encoder object from featurizers.py
Currently only the OneHotEncoder is supported
Returns
-------
dict
Dictionary with keys "node_tensor", "edge_tensor", and "edge_indices" and
corresponding tensors as values.
"""
# Create graph
atoms_graph = AtomsGraph(atoms=atoms, select_idx=select_idx, max_atoms=max_atoms)

# Collect node featurizers
node_feats = []
node_intervals = []
for node_featurizer in list_of_node_featurizers:
if node_featurizer.name() in node_features:
nf = node_featurizer(deepcopy(encoder))
node_intervals.append(nf.n_intervals)
node_feats.append(nf)

# Collect edge featurizers
edge_feats = []
edge_intervals = []
for edge_featurizer in list_of_edge_featurizers:
if edge_featurizer.name() in edge_features:
ef = edge_featurizer(deepcopy(encoder))
edge_intervals.append(ef.n_intervals)
edge_feats.append(ef)

# Store node matrices from each feaurizer
node_mats = []
for nf in node_feats:
nf.featurize_graph(atoms_graph)
node_mats.append(nf.feat_tensor)
# Stack node mats to create node tensor
node_tensor = torch.hstack(node_mats)

# Store edge matrices from each featurizer
edge_mats = []
for ef in edge_feats:
ef.featurize_graph(atoms_graph)
edge_mats.append(ef.feat_tensor)
# Stack edge mats to create edge tensor
edge_tensor = torch.hstack(edge_mats)

# Store edge indices
edge_indices = ef.edge_indices

return {"node_tensor": node_tensor, "edge_tensor": edge_tensor,
"edge_indices": edge_indices}


if __name__ == "__main__":
from ase.io import read
atoms = read("CONTCAR")

part_atoms = partition_structure(atoms, 3, z_cutoffs=[15, 23.5])
print(part_atoms)

feat_dict = featurize_atoms(atoms, part_atoms[0], ["atomic_number", "dband_center"],
["bulk_bond_distance"], max_atoms=34)
print(feat_dict)

0 comments on commit 9bd23b1

Please sign in to comment.