Source code for mofdscribe.featurizers.chemistry.aprdf

# -*- coding: utf-8 -*-
"""Atomic-property weighted autocorrelation function.

See alternative implementation https://github.com/tomdburns/AP-RDF (likely
faster as it also has a lower-level implementation)
"""
from functools import cached_property
from typing import List, Tuple, Union

import numpy as np
from element_coder import encode
from pymatgen.core import IStructure, Structure

from mofdscribe.featurizers.base import MOFBaseFeaturizer

from ..utils.aggregators import AGGREGATORS
from ..utils.extend import operates_on_istructure, operates_on_structure

__all__ = ["APRDF"]


[docs]@operates_on_structure
@operates_on_istructure
class APRDF(MOFBaseFeaturizer):
    r"""Generalization of descriptor described by `Fernandez et al. <https://pubs.acs.org/doi/10.1021/jp404287t>`_.

    In the article they describe the product of atomic properties as weighting
    of a "conventional" radiual distribution function "RDF".

    .. math::
        \operatorname{RDF}^{p}(R)=f \sum_{i, j}^{\text {all atom puirs }} P_{i} P_{j} \mathrm{e}^{-B\left(r_{ij}-R\right)^{2}} # noqa: E501

    Here, we allow for a wider choice of option for aggregation of properties
    :math:`P_{i}` and :math:`P_{j}` (not only the product).

    Examples:
        >>> from mofdscribe.chemistry.aprdf import APRDF
        >>> from pymatgen.core.structure import Structure
        >>> s = Structure.from_file("tests/test_files/LiTiO3.cif")
        >>> aprdf = APRDF()
        >>> features = aprdf.featurize(s)
    """

    def __init__(
        self,
        cutoff: float = 20.0,
        lower_lim: float = 2.0,
        bin_size: float = 0.25,
        b_smear: Union[float, None] = 10,
        properties: Tuple[str, int] = ("X", "electron_affinity"),
        aggregations: Tuple[str] = ("avg", "product", "diff"),
        normalize: bool = False,
        primitive: bool = True,
    ):
        """Set up an atomic property (AP) weighted radial distribution function.

        Args:
            cutoff (float): Consider neighbors up to this value (in
                Angstrom). Defaults to 20.0.
            lower_lim (float): Lowest distance (in Angstrom) to consider.
                Defaults to 2.0.
            bin_size (float): Bin size for binning.
                Defaults to 0.25.
            b_smear (Union[float, None]): Band width for Gaussian smearing.
                If None, the unsmeared histogram is used. Defaults to 10.
            properties (Tuple[str, int]): Properties used for calculation of the AP-RDF.
                All properties of `pymatgen.core.Species` are available in
                addition to the integer `1` that will set P_i=P_j=1. Defaults to
                ("X", "electron_affinity").
            aggregations (Tuple[str]): Methods used to combine the
                properties.
                See `mofdscribe.featurizers.utils.aggregators.AGGREGATORS` for available
                options. Defaults to ("avg", "product", "diff").
            normalize (bool): If True, the histogram is normalized by dividing
                by the number of atoms. Defaults to False.
            primitive (bool): If True, the structure is reduced to its primitive
                form before the descriptor is computed. Defaults to True.
        """
        self.lower_lim = lower_lim
        self.cutoff = cutoff
        self.bin_size = bin_size
        self.properties = properties
        self.normalize = normalize

        self.b_smear = b_smear
        self.aggregations = aggregations
        super().__init__(primitive=primitive)

[docs]    def precheck(self):
        pass

    @cached_property
    def _bins(self):
        num_bins = int((self.cutoff - self.lower_lim) // self.bin_size)
        bins = np.linspace(self.lower_lim, self.cutoff, num_bins)
        return bins

    def _get_feature_labels(self):
        aprdfs = np.empty(
            (len(self.properties), len(self.aggregations), len(self._bins)), dtype=object
        )
        for pi, prop in enumerate(self.properties):
            for ai, aggregation in enumerate(self.aggregations):
                for bin_index, _ in enumerate(self._bins):
                    aprdfs[pi][ai][bin_index] = f"aprdf_{prop}_{aggregation}_{bin_index}"

        return list(aprdfs.flatten())

    def _featurize(self, s: Union[Structure, IStructure]) -> np.array:
        bins = self._bins
        aprdfs = np.zeros((len(self.properties), len(self.aggregations), len(bins)))

        # todo: use numba to speed up
        for i, item in enumerate(s):
            for j in range(i + 1, len(s)):
                dist = s.get_distance(i, j)
                if dist < self.cutoff and dist > self.lower_lim:
                    bin_idx = int((dist - self.lower_lim) // self.bin_size)
                    for pi, prop in enumerate(self.properties):
                        for ai, agg in enumerate(self.aggregations):
                            p0 = encode(item.specie, prop)
                            p1 = encode(s[j].specie, prop)

                            agg_func = AGGREGATORS[agg]
                            p = agg_func([p0, p1])
                            aprdfs[pi][ai][bin_idx] += p * np.exp(
                                -self.b_smear * (dist - bins[bin_idx]) ** 2
                            )

        if self.normalize:
            aprdfs /= len(s)

        return aprdfs.flatten()

[docs]    def feature_labels(self) -> List[str]:
        return self._get_feature_labels()

[docs]    def citations(self) -> List[str]:
        return [
            "@article{Fernandez2013,"
            "doi = {10.1021/jp404287t},"
            "url = {https://doi.org/10.1021/jp404287t},"
            "year = {2013},"
            "month = jul,"
            "publisher = {American Chemical Society ({ACS})},"
            "volume = {117},"
            "number = {27},"
            "pages = {14095--14105},"
            "author = {Michael Fernandez and Nicholas R. Trefiak and Tom K. Woo},"
            "title = {Atomic Property Weighted Radial Distribution Functions "
            "Descriptors of Metal{\textendash}Organic Frameworks for the Prediction "
            "of Gas Uptake Capacity},"
            "journal = {The Journal of Physical Chemistry C}"
            "}"
        ]

[docs]    def implementors(self):
        return ["Kevin Maik Jablonka"]