Source code for mofdscribe.featurizers.bu.compositionstats_featurizer

# -*- coding: utf-8 -*-
"""Describe the chemical composition of structures."""
from collections import defaultdict
from typing import List, Tuple, Union

import numpy as np
from element_coder import encode
from matminer.featurizers.base import BaseFeaturizer
from pymatgen.core import IMolecule, IStructure, Molecule, Structure

from mofdscribe.featurizers.utils.aggregators import ARRAY_AGGREGATORS
from mofdscribe.featurizers.utils.extend import (
    operates_on_imolecule,
    operates_on_istructure,
    operates_on_molecule,
    operates_on_structure,
)


[docs]@operates_on_molecule @operates_on_imolecule @operates_on_istructure @operates_on_structure class CompositionStats(BaseFeaturizer): """ Describe the composition of molecules by computing statistics of their compositions. The featurizer will encode the element on all sites in the structure using user-defined encodings. Then it aggregates those encodings using user-defined encodings (e.g. min, max, min). """ def __init__( self, encodings: Tuple[str] = ("mod_pettifor", "X"), aggregations: Tuple[str] = ("mean", "std", "max", "min"), ) -> None: """Initialize a CompositionStats featurizer. Args: encodings (Tuple[str]): Encoding used for the elements. Can be one of :py:obj:`element_coder.data.coding_data._PROPERTY_KEYS`. Defaults to ("mod_pettifor", "X"). aggregations (Tuple[str]): Statistic to compute over the element encodings. Can be one of :py:obj:`mofdscribe.featurizers.utils.aggregators.ARRAY_AGGREGATORS`. Defaults to ("mean", "std", "max", "min"). """ self.aggregations = aggregations self.encodings = encodings
[docs] def feature_labels(self) -> List[str]: feature_labels = [] for encoding in self.encodings: for agg in self.aggregations: feature_labels.append(f"composition_stats_{encoding}_{agg}") return feature_labels
[docs] def featurize(self, molecule: Union[Molecule, IMolecule, Structure, IStructure]) -> np.ndarray: encodings = defaultdict(list) for encoding in self.encodings: for site in molecule.sites: encodings[encoding] = encode(site.specie, encoding) features = [] for encoding in self.encodings: for agg in self.aggregations: features.append(ARRAY_AGGREGATORS[agg](encodings[encoding])) return np.array(features)
[docs] def implementors(self) -> List[str]: return ["Kevin Maik Jablonka"]
[docs] def citations(self) -> List[str]: return []