Source code for mofdscribe.bench.dummy_models

# -*- coding: utf-8 -*-
"""Simple baseline models."""
from typing import Dict, Optional

import numpy as np
from pymatgen.core import Structure
from sklearn.dummy import DummyRegressor as SklearnDummyRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

__all__ = ["DensityRegressor"]


[docs]class DensityRegressor: """Dummy model.""" def __init__(self, lr_kwargs: Optional[Dict] = None): """Initialize the model. Args: lr_kwargs (Optional[Dict], optional): Keyword arguments that are passed to the linear regressor. Defaults to None. """ self.model = Pipeline( [ ("scaler", StandardScaler()), ("poly", PolynomialFeatures(degree=3)), ("lr", LinearRegression(**(lr_kwargs or {}))), ] )
[docs] def featurize(self, s: Structure) -> float: """You might want to use a lookup in some dataframe instead. Or use some mofdscribe featurizers. Args: s (Structure): Structure to featurize. Returns: float: Density of the structure. """ return s.density
def fit(self, idx, structures, y): x = np.array([self.featurize(s) for s in structures]).reshape(-1, 1) self.model.fit(x, y) def predict(self, idx, structures): x = np.array([self.featurize(s) for s in structures]).reshape(-1, 1) return self.model.predict(x)
class DummyRegressor: """Dummy regressor model.""" def __init__(self, strategy: str = "mean"): """Initialize the model. Args: strategy (str): Strategy to use for prediction. Defaults to "mean". """ self.model = SklearnDummyRegressor(strategy=strategy) def fit(self, idx, structures, y): self.model.fit(idx, y) def predict(self, idx, structures): return self.model.predict(idx)