Source code for mofdscribe.featurizers.pore.voxelgrid

# -*- coding: utf-8 -*-
"""Featurizer that computes 3D voxelgrids."""
from typing import List, Tuple, Union

import numpy as np
from element_coder import encode
from pymatgen.core import Element, IStructure, Structure

from mofdscribe.featurizers.base import MOFBaseFeaturizer

from ._voxelgrid import VoxelGrid as VGBase


[docs]def make_supercell( coords: np.ndarray, elements: List[int], lattice: Tuple[float, float, float], size: float, min_size: float = -5, ) -> np.ndarray: """Generate cubic supercell of a given size. Args: coords (np.ndarray): matrix of xyz coordinates of the system elements (List[int]): atomic numbers of every site lattice (Tuple[float, float, float]): lattice constants of the system size (float): dimension size of cubic cell, e.g., 10x10x10 min_size (float): minimum axes size to keep negative xyz coordinates from the original cell. Defaults to -5. Returns: new_cell: supercell array """ a, b, c = lattice elements = np.array(elements).reshape(-1, 1) xyz_periodic_copies = [coords] element_periodic_copies = [elements] min_range = -3 # we aren't going in the minimum direction too much, so can make this small max_range = 20 # make this large enough, but can modify if wanting an even larger cell for x in range(-min_range, max_range): for y in range(0, max_range): for z in range(0, max_range): if x == y == z == 0: continue add_vector = x * a + y * b + z * c xyz_periodic_copies.append(coords + add_vector) element_periodic_copies.append(elements) # Combine into one array xyz_periodic_total = np.vstack(xyz_periodic_copies) elements_all = np.vstack(element_periodic_copies) # Filter out all atoms outside of the cubic box filter_mask_a = np.max(xyz_periodic_total[:, :3], axis=1) < size new_cell = xyz_periodic_total[filter_mask_a] filter_mask_b = np.min(new_cell[:, :3], axis=1) > min_size new_cell = new_cell[filter_mask_b] elements_all = elements_all[filter_mask_a] elements_all = elements_all[filter_mask_b] return new_cell, elements_all.flatten()
def compute_properties(numbers: np.array, properties: Tuple[str]) -> np.array: property_lookup = {} unique_numbers = np.unique(numbers) for number in unique_numbers: element = Element.from_Z(number) prop_vec = [] for prop in properties: prop_vec.append(encode(element.symbol, prop)) property_lookup[number] = prop_vec property_array = [] for number in numbers: property_array.append(property_lookup[number]) return np.array(property_array) # ToDo: Potentially, we could also do the substruture-based approach for chemistry. # Not sure though how useful it would be as it would lead to quite large feature vectors.
[docs]class VoxelGrid(MOFBaseFeaturizer): """ Describe the structure using a voxel grid. For this, we first compute a supercell, the "voxelize" the point cloud. For setting the value of the voxels, different options are available: Geometry Aggregations: - `binary`: 1 if the voxel is occupied, 0 otherwise - `density`: the number of atoms in the voxel / total number of atoms - `TDF`: truncated distance function. Value between 0 and 1 indicating the distance between the voxel's center and the closest point. 1 on the surface, 0 on voxels further than 2 * voxel side. Properties: Alternatively/additionally one can use the average of any available properties of pymatgen Element objects. """ def __init__( self, min_size: float = 30, n_x: int = 25, n_y: int = 25, n_z: int = 25, geometry_aggregations: Tuple[str] = ("binary",), properties: Tuple[str, int] = ("X", "electron_affinity"), flatten: bool = True, regular_bounding_box: bool = True, primitive: bool = False, ): """Initialize a VoxelGrid featurizer. Args: min_size (float): Minimum supercell size in Angstrom. Defaults to 30. n_x (int): Number of bins in x direction (Hung et al used 30 and 60 at a cell size of 60). Defaults to 25. n_y (int): Number of bins in x direction (Hung et al used 30 and 60 at a cell size of 60). Defaults to 25. n_z (int): Number of bins in x direction (Hung et al used 30 and 60 at a cell size of 60). Defaults to 25. geometry_aggregations (Union[Tuple["density" | "binary" | "TDF"], None]): Mode for encoding the occupation of voxels. * binary: 0 for empty voxels, 1 for occupied. * density: number of points inside voxel / total number of points. * TDF: Truncated Distance Function. Value between 0 and 1 indicating the distance, between the voxel's center and the closest point. 1 on the surface, 0 on voxels further than 2 * voxel side. Defaults to ("binary",). properties (Union[Tuple[str, int], None]): Properties used for calculation of the AP-RDF. All properties of `pymatgen.core.Species` are available. Defaults to ("X", "electron_affinity"). flatten (bool): It true, flatten the 3D voxelgrid to 1D array. Defaults to True. regular_bounding_box (bool): If True, the bounding box of the point cloud will be adjusted in order to have all the dimensions of equal length. Defaults to True. primitive (bool): If True, the structure is reduced to its primitive form before the descriptor is computed. Defaults to True. """ self.min_size = min_size self.n_x = n_x self.n_y = n_y self.n_z = n_z self._num_voxels = n_x * n_y * n_z self.properties = properties self.geometry_aggregations = geometry_aggregations self.flatten = flatten self.regular_bounding_box = regular_bounding_box super().__init__(primitive=primitive) def _featurize(self, structure: Union[IStructure, Structure]) -> np.ndarray: coords, numbers = make_supercell( structure.cart_coords, structure.atomic_numbers, structure.lattice.matrix, self.min_size ) if self.properties is not None: properties = compute_properties(numbers, self.properties) else: properties = None vg = VGBase( coords, properties, self.n_x, self.n_y, self.n_z, regular_bounding_box=self.regular_bounding_box, ) vg.compute() features = [] if self.geometry_aggregations is not None: for agg in self.geometry_aggregations: features.append( vg.get_feature_vector( agg, flatten=self.flatten, ) ) if self.properties is not None: for i, _ in enumerate(self.properties): features.append( vg.get_feature_vector( i, flatten=self.flatten, ) ) if self.flatten: return np.stack(features).flatten() return features def _get_feature_labels(self): feature_labels = [] for geometry_aggregation in self.geometry_aggregations: for voxel in range(self._num_voxels): feature_labels.append(f"voxelgrid_{geometry_aggregation}_{voxel}") for prop in self.properties: for voxel in range(self._num_voxels): feature_labels.append(f"voxelgrid_{prop}_{voxel}") return feature_labels
[docs] def feature_labels(self) -> List[str]: return self._get_feature_labels()
[docs] def citations(self) -> List[str]: return [ "@article{Hung2022," "doi = {10.1021/acs.jpcc.1c09649}," "url = {https://doi.org/10.1021/acs.jpcc.1c09649}," "year = {2022}," "month = jan," "publisher = {American Chemical Society ({ACS})}," "volume = {126}," "number = {5}," "pages = {2813--2822}," "author = {Ting-Hsiang Hung and Zhi-Xun Xu and Dun-Yen Kang " "and Li-Chiang Lin}," "title = {Chemistry-Encoded Convolutional Neural Networks for " "Predicting Gaseous Adsorption in Porous Materials}," "journal = {The Journal of Physical Chemistry C}" "}", "@article{Cho2021," "doi = {10.1021/acs.jpclett.1c00293}," "url = {https://doi.org/10.1021/acs.jpclett.1c00293}," "year = {2021}," "month = mar," "publisher = {American Chemical Society ({ACS})}," "volume = {12}," "number = {9}," "pages = {2279--2285}," "author = {Eun Hyun Cho and Li-Chiang Lin}," "title = {Nanoporous Material Recognition via 3D Convolutional " "Neural Networks: Prediction of Adsorption Properties}," "journal = {The Journal of Physical Chemistry Letters}" "}", ]
[docs] def implementors(self) -> List[str]: return ["Kevin Maik Jablonka"]