Source code for mofdscribe.featurizers.chemistry.aprdf

# -*- coding: utf-8 -*-
"""Atomic-property weighted autocorrelation function.

See alternative implementation https://github.com/tomdburns/AP-RDF (likely
faster as it also has a lower-level implementation)
"""
from functools import cached_property
from typing import List, Tuple, Union

import numpy as np
from element_coder import encode
from pymatgen.core import IStructure, Structure

from mofdscribe.featurizers.base import MOFBaseFeaturizer

from ..utils.aggregators import AGGREGATORS
from ..utils.extend import operates_on_istructure, operates_on_structure

__all__ = ["APRDF"]


[docs]@operates_on_structure @operates_on_istructure class APRDF(MOFBaseFeaturizer): r"""Generalization of descriptor described by `Fernandez et al. <https://pubs.acs.org/doi/10.1021/jp404287t>`_. In the article they describe the product of atomic properties as weighting of a "conventional" radiual distribution function "RDF". .. math:: \operatorname{RDF}^{p}(R)=f \sum_{i, j}^{\text {all atom puirs }} P_{i} P_{j} \mathrm{e}^{-B\left(r_{ij}-R\right)^{2}} # noqa: E501 Here, we allow for a wider choice of option for aggregation of properties :math:`P_{i}` and :math:`P_{j}` (not only the product). Examples: >>> from mofdscribe.chemistry.aprdf import APRDF >>> from pymatgen.core.structure import Structure >>> s = Structure.from_file("tests/test_files/LiTiO3.cif") >>> aprdf = APRDF() >>> features = aprdf.featurize(s) """ def __init__( self, cutoff: float = 20.0, lower_lim: float = 2.0, bin_size: float = 0.25, b_smear: Union[float, None] = 10, properties: Tuple[str, int] = ("X", "electron_affinity"), aggregations: Tuple[str] = ("avg", "product", "diff"), normalize: bool = False, primitive: bool = True, ): """Set up an atomic property (AP) weighted radial distribution function. Args: cutoff (float): Consider neighbors up to this value (in Angstrom). Defaults to 20.0. lower_lim (float): Lowest distance (in Angstrom) to consider. Defaults to 2.0. bin_size (float): Bin size for binning. Defaults to 0.25. b_smear (Union[float, None]): Band width for Gaussian smearing. If None, the unsmeared histogram is used. Defaults to 10. properties (Tuple[str, int]): Properties used for calculation of the AP-RDF. All properties of `pymatgen.core.Species` are available in addition to the integer `1` that will set P_i=P_j=1. Defaults to ("X", "electron_affinity"). aggregations (Tuple[str]): Methods used to combine the properties. See `mofdscribe.featurizers.utils.aggregators.AGGREGATORS` for available options. Defaults to ("avg", "product", "diff"). normalize (bool): If True, the histogram is normalized by dividing by the number of atoms. Defaults to False. primitive (bool): If True, the structure is reduced to its primitive form before the descriptor is computed. Defaults to True. """ self.lower_lim = lower_lim self.cutoff = cutoff self.bin_size = bin_size self.properties = properties self.normalize = normalize self.b_smear = b_smear self.aggregations = aggregations super().__init__(primitive=primitive)
[docs] def precheck(self): pass
@cached_property def _bins(self): num_bins = int((self.cutoff - self.lower_lim) // self.bin_size) bins = np.linspace(self.lower_lim, self.cutoff, num_bins) return bins def _get_feature_labels(self): aprdfs = np.empty( (len(self.properties), len(self.aggregations), len(self._bins)), dtype=object ) for pi, prop in enumerate(self.properties): for ai, aggregation in enumerate(self.aggregations): for bin_index, _ in enumerate(self._bins): aprdfs[pi][ai][bin_index] = f"aprdf_{prop}_{aggregation}_{bin_index}" return list(aprdfs.flatten()) def _featurize(self, s: Union[Structure, IStructure]) -> np.array: bins = self._bins aprdfs = np.zeros((len(self.properties), len(self.aggregations), len(bins))) # todo: use numba to speed up for i, item in enumerate(s): for j in range(i + 1, len(s)): dist = s.get_distance(i, j) if dist < self.cutoff and dist > self.lower_lim: bin_idx = int((dist - self.lower_lim) // self.bin_size) for pi, prop in enumerate(self.properties): for ai, agg in enumerate(self.aggregations): p0 = encode(item.specie, prop) p1 = encode(s[j].specie, prop) agg_func = AGGREGATORS[agg] p = agg_func([p0, p1]) aprdfs[pi][ai][bin_idx] += p * np.exp( -self.b_smear * (dist - bins[bin_idx]) ** 2 ) if self.normalize: aprdfs /= len(s) return aprdfs.flatten()
[docs] def feature_labels(self) -> List[str]: return self._get_feature_labels()
[docs] def citations(self) -> List[str]: return [ "@article{Fernandez2013," "doi = {10.1021/jp404287t}," "url = {https://doi.org/10.1021/jp404287t}," "year = {2013}," "month = jul," "publisher = {American Chemical Society ({ACS})}," "volume = {117}," "number = {27}," "pages = {14095--14105}," "author = {Michael Fernandez and Nicholas R. Trefiak and Tom K. Woo}," "title = {Atomic Property Weighted Radial Distribution Functions " "Descriptors of Metal{\textendash}Organic Frameworks for the Prediction " "of Gas Uptake Capacity}," "journal = {The Journal of Physical Chemistry C}" "}" ]
[docs] def implementors(self): return ["Kevin Maik Jablonka"]