Skip to content

Commit

Permalink
Added bond featurizers
Browse files Browse the repository at this point in the history
  • Loading branch information
Gaurav S Deshmukh committed Sep 12, 2023
1 parent 916580b commit d80ef8b
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 28 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
*POSCAR*
*CONTCAR*
*.csv
!data/dband_centers.csv
__pycache__
208 changes: 187 additions & 21 deletions src/featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from mendeleev import element

from constants import DBAND_FILE_PATH
from graphs import AtomsGraph

class OneHotEncoder:
"""Featurize a property using a one-hot encoding scheme."""
Expand Down Expand Up @@ -114,18 +115,24 @@ def name(self):

class AtomNumFeaturizer(Featurizer):
"""Featurize nodes based on atomic number."""
def __init__(self, encoder):
def __init__(self, encoder, min=1, max=80, n_intervals=10):
"""Initialize featurizer with min = 1, max = 80, n_intervals = 10.
Parameters
----------
encoder: OneHotEncoder
Initialized object of class OneHotEncoder.
min: int
Minimum value of atomic number
max: int
Maximum value of atomic number
n_intervals: int
Number of intervals
"""
# Initialize variables
self.min = 1
self.max = 80
self.n_intervals = 10
self.min = min
self.max = max
self.n_intervals = n_intervals

# Fit encoder
self.encoder = encoder
Expand All @@ -141,7 +148,7 @@ def featurize_graph(self, graph):
"""
# Get atomic numbers
atom_num_dict = nx.get_node_attributes(graph, "atomic_number")
atom_num_arr = np.array(atom_num_dict.values())
atom_num_arr = np.array(list(atom_num_dict.values()))

# Create node feature matrix
self._feat_tensor = self.encoder.transform(atom_num_arr)
Expand All @@ -165,18 +172,24 @@ def name():

class DBandFeaturizer(Featurizer):
"""Featurize nodes based on close-packed d-band center."""
def __init__(self, encoder):
def __init__(self, encoder, min=-5, max=3, n_intervals=10):
"""Initialize featurizer with min = -5, max = 3, n_intervals = 10.
Parameters
----------
encoder: OneHotEncoder
Initialized object of class OneHotEncoder.
min: int
Minimum value of d-band center
max: int
Maximum value of d-band center
n_intervals: int
Number of intervals
"""
# Initialize variables
self.min = -5
self.max = 3
self.n_intervals = 10
self.min = min
self.max = max
self.n_intervals = n_intervals

# Fit encoder
self.encoder = encoder
Expand All @@ -187,7 +200,7 @@ def __init__(self, encoder):
with open(DBAND_FILE_PATH, "r") as f:
csv_reader = csv.reader(f)
for row in csv_reader:
self.map_dict[row[0]] = row[1]
self.map_dict[int(row[0])] = float(row[1])

def featurize_graph(self, graph):
"""Featurize an AtomsGraph.
Expand All @@ -199,7 +212,7 @@ def featurize_graph(self, graph):
"""
# Get atomic numbers
atom_num_dict = nx.get_node_attributes(graph, "atomic_number")
atom_num_arr = np.array(atom_num_dict.values())
atom_num_arr = np.array(list(atom_num_dict.values()))

# Map from atomic number to d-band center
dband_arr = np.vectorize(self.map_dict.__getitem__)(atom_num_arr)
Expand All @@ -226,18 +239,24 @@ def name():

class ValenceFeaturizer(Featurizer):
"""Featurize nodes based on number of valence electrons."""
def __init__(self, encoder):
def __init__(self, encoder, min=1, max=12, n_intervals=12):
"""Initialize featurizer with min = 1, max = 12, n_intervals = 12.
Parameters
----------
encoder: OneHotEncoder
Initialized object of class OneHotEncoder.
min: int
Minimum value of valence electrons
max: int
Maximum value of valence electrons
n_intervals: int
Number of intervals
"""
# Initialize variables
self.min = 1
self.max = 12
self.n_intervals = 12
self.min = min
self.max = max
self.n_intervals = n_intervals

# Fit encoder
self.encoder = encoder
Expand All @@ -246,7 +265,7 @@ def __init__(self, encoder):
# Create a map between atomic number and number of valence electrons
self.map_dict = {1: 1, 2:0}
for i in range(3, 21, 1):
self.map_dict[i] = element(i).ec.get_valence().ne()
self.map_dict[i] = min(element(i).ec.get_valence().ne(), 12)

def featurize_graph(self, graph):
"""Featurize an AtomsGraph.
Expand All @@ -258,7 +277,7 @@ def featurize_graph(self, graph):
"""
# Get atomic numbers
atom_num_dict = nx.get_node_attributes(graph, "atomic_number")
atom_num_arr = np.array(atom_num_dict.values())
atom_num_arr = np.array(list(atom_num_dict.values()))

# Create node feature matrix
self._feat_tensor = self.encoder.transform(atom_num_arr)
Expand All @@ -279,9 +298,156 @@ def feat_tensor(self):
def name():
"""Return the name of the featurizer."""
return "valence"

class CoordinationFeaturizer(Featurizer):
"""Featurize nodes based on coordination number."""
def __init__(self, encoder, min=1, max=15, n_intervals=15):
"""Initialize featurizer with min = 1, max = 15, n_intervals = 15.
Parameters
----------
encoder: OneHotEncoder
Initialized object of class OneHotEncoder.
min: int
Minimum value of valence electrons
max: int
Maximum value of valence electrons
n_intervals: int
Number of intervals
"""
# Initialize variables
self.min = min
self.max = max
self.n_intervals = n_intervals

# Fit encoder
self.encoder = encoder
self.encoder.fit(self.min, self.max, self.n_intervals)

def featurize_graph(self, graph):
"""Featurize an AtomsGraph.
Parameters
----------
graph: AtomsGraph
A graph of a collection of bulk, surface, or adsorbate atoms.
"""
# Get atomic numbers
cn_dict = nx.get_node_attributes(graph, "coordination")
cn_arr = np.array(list(cn_dict.values()))

# Create node feature matrix
self._feat_tensor = self.encoder.transform(cn_arr)

@property
def feat_tensor(self):
"""Return the featurized node tensor.
Returns
-------
feat_tensor: torch.Tensor
Featurized tensor having shape (N, M) where N = number of atoms and
M = n_intervals provided to the encoder
"""
return self._feat_tensor

@staticmethod
def name():
"""Return the name of the featurizer."""
return "coordination"

class BondDistanceFeaturizer(Featurizer):
"""Featurize edges based on bond distance."""
def __init__(self, encoder, min, max, n_intervals):
"""Initialize bond distance featurizer.
Parameters
----------
encoder: OneHotEncoder
Initialized object of class OneHotEncoder.
min: int
Minimum value of atomic number
max: int
Maximum value of atomic number
n_intervals: int
Number of intervals
"""
# Initialize variables
self.min = min
self.max = max
self.n_intervals = n_intervals

# Fit encoder
self.encoder = encoder
self.encoder.fit(self.min, self.max, self.n_intervals)

def featurize_graph(self, graph):
"""Featurize an AtomsGraph.
Parameters
----------
graph: AtomsGraph
A graph of a collection of bulk, surface, or adsorbate atoms.
"""
# Get atomic numbers
bond_dist_dict = nx.get_edge_attributes(graph, "bond_distance")
bond_dist_arr = np.array(list(bond_dist_dict.values()))

# Create node feature matrix
self._feat_tensor = self.encoder.transform(bond_dist_arr)

@property
def feat_tensor(self):
"""Return the featurized node tensor.
Returns
-------
feat_tensor: torch.Tensor
Featurized tensor having shape (N, M) where N = number of atoms and
M = n_intervals provided to the encoder
"""
return self._feat_tensor

@staticmethod
def name():
"""Return the name of the featurizer."""
return "valence"

class BulkBondDistanceFeaturizer(BondDistanceFeaturizer):
"""Featurize bulk bond distances.
Child class of BondDistanceFeaturizer with suitable min, max, and n_interval
values initialized for bulk atoms. The values are: min = 0, max = 8,
n_intervals = 8.
"""
def __init__(self, encoder, min=0, max=8, n_intervals=8):
super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)

class SurfaceBondDistanceFeaturizer(BondDistanceFeaturizer):
"""Featurize bulk bond distances.
Child class of BondDistanceFeaturizer with suitable min, max, and n_interval
values initialized for surface atoms. The values are: min = 0, max = 5,
n_intervals = 10.
"""
def __init__(self, encoder, min=0, max=5, n_intervals=10):
super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)

class AdsorbateBondDistanceFeaturizer(BondDistanceFeaturizer):
"""Featurize bulk bond distances.
Child class of BondDistanceFeaturizer with suitable min, max, and n_interval
values initialized for adsorbate atoms. The values are: min = 0, max = 4,
n_intervals = 16.
"""
def __init__(self, encoder, min=0, max=4, n_intervals=16):
super().__init__(encoder, min=min, max=max, n_intervals=n_intervals)

if __name__ == "__main__":
prop = np.array([1.5, 2.5, 3.5, 4.5, 5.5])
ohf = OneHotEncoder()
ohf.fit(1, 6, 5)
print(ohf.transform(prop))
from ase.io import read

atoms = read("CONTCAR")
g = AtomsGraph(atoms, select_idx=[1, 10, 11, 12])
dbf = DBandFeaturizer(OneHotEncoder())
dbf.featurize_graph(g.graph)
print(dbf.feat_tensor)
28 changes: 21 additions & 7 deletions src/graphs.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
"""Classes to create bulk, surface, and adsorbate graphs."""

import abc

import networkx as nx
import numpy as np
from ase.neighborlist import build_neighbor_list
from ase.neighborlist import build_neighbor_list, natural_cutoffs


class AtomsGraph:
Expand Down Expand Up @@ -37,7 +35,10 @@ def create_graph(self):
"""Create a graph from an atoms object and neighbor_list."""
# Create neighbor list of atoms
self.neighbor_list = build_neighbor_list(
self.atoms, bothways=True, self_interaction=False
self.atoms,
natural_cutoffs(self.atoms),
bothways=True,
self_interaction=False
)

# Create NetworkX Multigraph
Expand All @@ -54,11 +55,21 @@ def create_graph(self):
)

# Iterate over nodes, identify neighbors, and add edges between them
for n in graph.nodes():
node_list = list(graph.nodes())
bond_tuples = []
for n in node_list:
# Get neighbors from neighbor list
neighbor_idx, _ = self.neighbor_list.get_neighbors(n)
# Iterate over neighbors
for nn in neighbor_idx:
# Save bond
bond = (n, nn)
rev_bond = tuple(reversed(bond))
# Check if bond has already been added
if rev_bond in bond_tuples:
continue
else:
bond_tuples.append(bond)
# If neighbor is not in graph, add it as a node
if not graph.has_node(nn):
graph.add_node(
Expand All @@ -69,10 +80,14 @@ def create_graph(self):
)
# Calculate bond distance
bond_dist = np.linalg.norm(
graph.nodes[n].position - graph.nodes[nn].position
graph.nodes[n]["position"] - graph.nodes[nn]["position"]
)
graph.add_edge(n, nn, bond_distance=bond_dist)

# Add coordination numbers
for n in graph.nodes():
graph.nodes[n]["coordination"] = graph.degree[n]

# Assign graph object
self.graph = graph

Expand All @@ -85,4 +100,3 @@ def plot(self, filename=None):
If provided, the plot is saved with the given filename.
"""
pass

0 comments on commit d80ef8b

Please sign in to comment.