Source code for openadmet.models.architecture.rf

"""Random Forest model implementations."""

from typing import ClassVar

import numpy as np
from loguru import logger
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from openadmet.models.architecture.model_base import PickleableModelBase, models


[docs]class RFModelBase(PickleableModelBase): """Base class for Sklearn Random Forest models.""" # Meta parameters for this class type: ClassVar[str] mod_class: ClassVar[type]
[docs] def build(self): """Prepare the model.""" if not self.estimator: self.estimator = self.mod_class(**self.model_dump()) else: logger.warning("Model already exists, skipping build")
[docs] def train(self, X: np.ndarray, y: np.ndarray): """ Train the model. Parameters ---------- X: np.ndarray Training data features y: np.ndarray Training data labels """ self.build() self.estimator = self.estimator.fit(X, y, verbose=True)
[docs] def predict(self, X: np.ndarray, **kwargs) -> np.ndarray: """ Predict using the model. Parameters ---------- X: np.ndarray Data to predict on **kwargs Additional keyword arguments for the predict method. Returns ------- np.ndarray Predictions from the model """ if not self.estimator: raise ValueError("Model not trained") return np.expand_dims(self.estimator.predict(X), axis=1)
[docs]@models.register("RFRegressorModel") class RFRegressorModel(RFModelBase): """Random Forest regression model.""" # Meta parameters for this class type: ClassVar[str] = "RFRegressorModel" mod_class: ClassVar[type] = RandomForestRegressor # RF parameters n_estimators: int = 100 criterion: str = "squared_error" max_depth: int | None = None min_samples_split: int = 2 min_samples_leaf: int = 1 min_weight_fraction_leaf: float = 0.0 max_features: float = 1.0 max_leaf_nodes: int | None = None min_impurity_decrease: float = 0.0 bootstrap: bool = True oob_score: bool = False n_jobs: int | None = None random_state: int | None = None verbose: int = 0 warm_start: bool = False ccp_alpha: float = 0.0 max_samples: float | None = None monotonic_cst: float | None = None
[docs]@models.register("RFClassifierModel") class RFClassifierModel(RFModelBase): """RF classifier model.""" # Meta parameters for this class type: ClassVar[str] = "RFClassifierModel" mod_class: ClassVar[type] = RandomForestClassifier # RF parameters n_estimators: int = 100 criterion: str = "gini" max_depth: int | None = None min_samples_split: int = 2 min_samples_leaf: int = 1 min_weight_fraction_leaf: float = 0.0 max_features: float | str = "sqrt" max_leaf_nodes: int | None = None min_impurity_decrease: float = 0.0 bootstrap: bool = True oob_score: bool = False n_jobs: int | None = None random_state: int | None = None verbose: int = 0 warm_start: bool = False class_weight: dict | None = None ccp_alpha: float = 0.0 max_samples: float | None = None monotonic_cst: float | None = None
[docs] def predict_proba(self, X: np.ndarray) -> np.ndarray: """ Predict using the model, returning probabilities for each class. Parameters ---------- X: np.ndarray Data to predict on Returns ------- np.ndarray Probabilities for each class from the model """ if not self.estimator: raise ValueError("Model not trained") return self.estimator.predict_proba(X)