Source code for openadmet.models.eval.binary

"""Posthoc binary metrics evaluation."""

import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import (
    ConfusionMatrixDisplay,
    confusion_matrix,
    precision_score,
    recall_score,
)

from openadmet.models.eval.eval_base import EvalBase, evaluators


[docs]@evaluators.register("PosthocBinaryMetrics") class PosthocBinaryMetrics(EvalBase): """ Posthoc binary metrics. Intended to be used for regression-based models to calculate precision and recall metrics for user-input cutoffs Not intended for binary models """
[docs] def evaluate( self, y_true: list = None, y_pred: list = None, cutoff: float = None, report: bool = False, output_dir: str = None, ): """ Evaluate the precision and recall metrics for the model with user-input cutoffs. Parameters ---------- y_true : array-like True values or labels. y_pred : array-like Predicted values or labels. cutoff : float, optional Cutoff value to calculate precision and recall. report : bool, optional Whether to save JSON files of the resulting precision/recall metrics. Default is False. output_dir : str, optional Directory to save the output plots and report. Default is None. Raises ------ ValueError If `y_true` or `y_pred` is not provided. """ if y_true is None or y_pred is None: raise ValueError("Must provide y_true and y_pred") if cutoff is None: raise ValueError("Must provide cutoff") self.plot_confusion_matrix(y_true, y_pred, cutoff, output_dir) self.plot_posthoc_classification(y_true, y_pred, cutoff, output_dir) precision, recall = self.get_precision_recall(y_pred, y_true, cutoff) self.report(report, output_dir, precision=precision, recall=recall)
[docs] def get_precision_recall(self, y_pred: list, y_true: list, cutoff: float): """ Calculate precision and recall metrics for a given cutoff. Parameters ---------- y_pred : array-like Predicted values. y_true : array-like True values. cutoff : float Cutoff value to calculate precision and recall. Returns ------- tuple A tuple containing: - precision : float Precision value. - recall : float Recall value. """ pred_class = [y > cutoff for y in y_pred] true_class = [y > cutoff for y in y_true] precision = precision_score(true_class, pred_class) recall = recall_score(true_class, pred_class) return (precision, recall)
[docs] def plot_confusion_matrix( self, y_true: list, y_pred: list, cutoff: float, output_dir: str = None ): """ Plot the confusion matrix for a given cutoff. Parameters ---------- y_true : list or array-like True values or labels. y_pred : list or array-like Predicted values or labels. cutoff : float Cutoff value to binarize predictions and true values. output_dir : str, optional Directory to save the confusion matrix plot. If None, the plot is not saved. """ pred_class = [y > cutoff for y in y_pred] true_class = [y > cutoff for y in y_true] cm = confusion_matrix(true_class, pred_class) disp = ConfusionMatrixDisplay(cm) disp.plot() if output_dir is not None: plt.savefig(f"{output_dir}/confusion_matrix.png", dpi=300)
[docs] def plot_posthoc_classification( self, y_true: list, y_pred: list, cutoff: float, output_dir: str = None ): """ Plot the post-hoc classification scatter plot with cutoff lines. Parameters ---------- y_true : list or array-like True values or labels. y_pred : list or array-like Predicted values or labels. cutoff : float Cutoff value to draw threshold lines. output_dir : str, optional Directory to save the classification plot. If None, the plot is not saved. """ fig, ax = plt.subplots() plt.scatter(y_true, y_pred) plt.axvline(cutoff, color="r", linestyle="--") plt.axhline(cutoff, color="r", linestyle="--") plt.xlabel("True Value") plt.ylabel("Predicted Value") plt.title(f"Post-hoc classification with cutoff: {cutoff} ") if output_dir is not None: plt.savefig(f"{output_dir}/classification.png", dpi=300)
[docs] def stats_to_json(self, data_df, output_dir): """ Save the precision-recall DataFrame to a JSON file. Parameters ---------- data_df : pandas.DataFrame DataFrame containing precision and recall metrics. output_dir : str Directory to save the JSON file. """ data_df.to_json(f"{output_dir}/posthoc_binary_eval.json")
[docs] def report(self, write=False, output_dir=None, precision=None, recall=None): """ Report the evaluation results, optionally saving them to JSON. Parameters ---------- write : bool, optional Whether to write the results to a JSON file. Default is False. output_dir : str, optional Directory to save the JSON file if write is True. precision : float or array-like, optional Precision value(s) to report. recall : float or array-like, optional Recall value(s) to report. """ stats_df = pd.DataFrame({"precision": precision, "recall": recall}, index=[0]) if write and stats_df is not None: self.stats_to_json(stats_df, output_dir)