Source code for mofdscribe.featurizers.chemistry.energygrid

# -*- coding: utf-8 -*-
"""Featurizer that runs RASPA to calculate energy grids."""
import os
from glob import glob
from typing import List, Tuple, Union

import numpy as np
import pandas as pd
from pymatgen.core import IStructure, Structure

from mofdscribe.featurizers.base import MOFBaseFeaturizer
from mofdscribe.featurizers.utils.histogram import get_rdf
from mofdscribe.featurizers.utils.raspa.resize_uc import resize_unit_cell
from mofdscribe.featurizers.utils.raspa.run_raspa import detect_raspa_dir, run_raspa

from ..utils.extend import operates_on_istructure, operates_on_structure

__all__ = ["EnergyGridHistogram"]
GRID_INPUT_TEMPLATE = """SimulationType  MakeASCIGrid

Forcefield      Local

Framework 0
FrameworkName input
UnitCells {unit_cells}

CutOff                        {cutoff}

NumberOfGrids {num_grids}
GridTypes {grid_types}
SpacingVDWGrid {vdw_spacing}
"""


def parse_energy_grids(directory: Union[str, os.PathLike]) -> dict:
    grids = glob(os.path.join(directory, "ASCI_Grids", "*.grid"))
    energies = {}
    for grid in grids:
        name = os.path.basename(grid).split(".")[0].replace("asci_grid_", "")
        energies[name] = read_ascii_grid(grid)
    return energies


def read_ascii_grid(filename: Union[str, os.PathLike]) -> pd.DataFrame:
    """Read an ASCII grid file into a pandas DataFrame.

    Args:
        filename (Union[str, os.PathLike]): The path to the file to read.

    Returns:
        A pandas DataFrame containing the grid data.
    """
    df = pd.read_csv(
        filename,
        delim_whitespace=True,
        header=None,
        names=["x", "y", "z", "energy", "deriv_x", "deriv_y", "deriv_z"],
        na_values="?",
    )
    df = df.astype(np.float)
    return df


[docs]@operates_on_istructure @operates_on_structure class EnergyGridHistogram(MOFBaseFeaturizer): """Computes the energy grid histograms as originally proposed by Bucior et al. [Bucior2019]_. Conventionally, energy grids can be used to speed up molecular simulations. The idea is that the interactions between the guest and host are pre-computed on a fine grid and then only need to be looked up (instead of re-computed all the time). Bucior et al. proposed (effectively) a dimensionality reduction of the energy grid by making a histogram of the energies. For H2 they used bins of a width of 1 kJ mol−1 ranging from −10 kJ mol−1 (attractive) to 0 kJ mol−1. For methane, they used bins in 2 kJ mol−1 increments between −26 and 0 kJ mol−1, again with a repulsion bin. This approach has also been used, for example, `Li et al. (2021) <https://aip.scitation.org/doi/10.1063/5.0050823>`_ and [Bucior2021]_. References: .. [Bucior2021] Li, Z.; Bucior, B. J.; Chen, H.; Haranczyk, M.; Siepmann, J. I.; Snurr, R. Q. Machine Learning Using Host/Guest Energy Histograms to Predict Adsorption in Metal–Organic Frameworks: Application to Short Alkanes and Xe/Kr Mixtures. J. Chem. Phys. 2021, 155 (1), 014701. https://doi.org/10.1063/5.0050823. """ def __init__( self, raspa_dir: Union[str, os.PathLike, None] = None, grid_spacing: float = 1.0, bin_size_vdw: float = 1, min_energy_vdw: float = -40, max_energy_vdw: float = 0, cutoff: float = 12, mof_ff: str = "UFF", mol_ff: str = "TraPPE", mol_name: str = "CO2", sites: Tuple[str] = ("C_co2",), tail_corrections: bool = True, mixing_rule: str = "Lorentz-Berthelot", shifted: bool = False, separate_interactions: bool = True, run_eqeq: bool = True, primitive: bool = False, ): """Construct the EnergyGridHistogram class. Args: raspa_dir (Union[str, Path, None]): Path to the raspa directory (with lib, bin, share) subdirectories. If `None` we will look for the `RASPA_DIR` environment variable. Defaults to None. grid_spacing (float): Spacing for the energy grids. Bucior et al. (2018) used 1.0 A. Defaults to 1.0. bin_size_vdw (float): Size of bins for the energy histogram. Defaults to 1. min_energy_vdw (float): Minimum energy for the energy histogram (defining start of first bin). Defaults to -10. max_energy_vdw (float): Maximum energy for energy histogram (defining last bin). Defaults to 0. cutoff (float): Cutoff for the Van-der-Waals interaction. Defaults to 12. mof_ff (str): Name of the forcefield used for the framework. Defaults to "UFF". mol_ff (str): Name of the forcefield used for the guest molecule. Defaults to "TraPPE". mol_name (str): Name of the guest molecule. Defaults to "CO2". sites (Tuple[str]): Name of the Van-der-Waals sites for which the energy diagrams are computed. Defaults to ("C_co2",). tail_corrections (bool): If true, use analytical tail-correction for the contribution of the interaction potential after the cutoff. Defaults to True. mixing_rule (str): Mixing rule for framework and guest molecule force field. Available options are `Jorgenson` and `Lorentz-Berthelot`. Defaults to "Lorentz-Berthelot". shifted (bool): If true, shifts the potential to equal to zero at the cutoff. Defaults to False. separate_interactions (bool): If True use framework's force field for framework-molecule interactions. Defaults to True. run_eqeq (bool): If true, runs EqEq to compute charges. Defaults to True. primitive (bool): If True, the structure is reduced to its primitive form before the descriptor is computed. Defaults to True. Raises: ValueError: If the `raspa_dir` is not a valid directory. """ self.raspa_dir = raspa_dir if raspa_dir else os.environ.get("RASPA_DIR") if self.raspa_dir is None: try: self.raspa_dir = detect_raspa_dir() except ValueError: raise ValueError( "Please set the RASPA_DIR environment variable or provide the path for the class initialization." ) self.grid_spacing = grid_spacing self.bin_size_vdw = bin_size_vdw self.min_energy_vdw = min_energy_vdw self.max_energy_vdw = max_energy_vdw self.cutoff = cutoff self.mof_ff = mof_ff self.mol_ff = mol_ff self.mol_name = mol_name self.sites = sites self.tail_corrections = tail_corrections self.mixing_rule = mixing_rule self.shifted = shifted self.separate_interactions = separate_interactions self.run_eqeq = run_eqeq super().__init__(primitive=primitive)
[docs] def fit_transform(self, structures: List[Union[Structure, IStructure]]): ...
[docs] def fit(self, structure: Union[Structure, IStructure]): ...
def _get_grid(self): return np.arange(self.min_energy_vdw, self.max_energy_vdw, self.bin_size_vdw)
[docs] def feature_labels(self) -> List[str]: grid = self._get_grid() labels = [] for site in self.sites: for grid_point in grid: labels.append(f"energygridhist_{self.mol_name}_{site}_{grid_point}") return labels
def _featurize(self, s: Union[Structure, IStructure]) -> np.array: ff_molecules = {self.mol_name: self.mol_ff} parameters = { "ff_framework": self.mof_ff, "ff_molecules": ff_molecules, "shifted": self.shifted, "tail_corrections": self.tail_corrections, "mixing_rule": self.mixing_rule, "separate_interactions": self.separate_interactions, } replicas = resize_unit_cell(s, self.cutoff) ucells = f"{replicas[0]} {replicas[1]} {replicas[2]}" simulation_script = GRID_INPUT_TEMPLATE.format( unit_cells=ucells, cutoff=self.cutoff, num_grids=len(self.sites), grid_types=" ".join(self.sites), vdw_spacing=self.grid_spacing, ) res = run_raspa( s, self.raspa_dir, simulation_script, parameters, parse_energy_grids, self.run_eqeq ) output = [] for _, v in res.items(): output.append( get_rdf( v["energy"].values, self.min_energy_vdw, self.max_energy_vdw, self.bin_size_vdw, None, None, normalized=False, ) ) return np.concatenate(output)
[docs] def citations(self) -> List[str]: return [ "@article{Bucior2019," "doi = {10.1039/c8me00050f}," "url = {https://doi.org/10.1039/c8me00050f}," "year = {2019}," "publisher = {Royal Society of Chemistry ({RSC})}," "volume = {4}," "number = {1}," "pages = {162--174}," "author = {Benjamin J. Bucior and N. Scott Bobbitt and Timur Islamoglu " "and Subhadip Goswami and Arun Gopalan and Taner Yildirim and " "Omar K. Farha and Neda Bagheri and Randall Q. Snurr}," "title = {Energy-based descriptors to rapidly predict hydrogen storage " "in metal{\textendash}organic frameworks}," r"journal = {Molecular Systems Design {\&}amp$\mathsemicolon$ Engineering}" "}", "@article{Dubbeldam2015," "doi = {10.1080/08927022.2015.1010082}," "url = {https://doi.org/10.1080/08927022.2015.1010082}," "year = {2015}," "month = feb," "publisher = {Informa {UK} Limited}," "volume = {42}," "number = {2}," "pages = {81--101}," r"author = {David Dubbeldam and Sof{'{\i}}a Calero and Donald E. Ellis " "and Randall Q. Snurr}," "title = {{RASPA}: molecular simulation software for adsorption and " "diffusion in flexible nanoporous materials}," "journal = {Molecular Simulation}" "}", ]
[docs] def implementors(self): return ["Kevin Maik Jablonka", "David Dubbeldam and RASPA authors"]