Source code for openadmet.models.active_learning.acquisition

"""Active learning acquisition functions."""

from scipy.stats import norm


[docs]def max_uncertainty_reduction(mean, std, **kwargs):
    r"""
    Maximum uncertainty reduction acquisition function. Refines an already well-performing model.

    .. math::

        x_{\text{next}} = \arg\max_x \sigma(x)

    Where:
        - \\( \sigma(x) \\): Predictive standard deviation at \\( x \\)

    Parameters
    ----------
    mean : np.array
        Predicted mean values, unused.
    std : np.array
        Predicted standard deviation values.
    kwargs : keyword arguments
        Additional keyword arguments.

    Returns
    -------
    np.array
        Uncertainty values for each instance in `X`.

    References
    ----------
    .. [1] Cohn, D., Ghahramani, Z., & Jordan, M. I. (1996). Active Learning with Statistical Models.
    Journal of Artificial Intelligence Research, 4, 129–145.

    """
    return std


[docs]def exploitation(mean, std, **kwargs):
    r"""
    Return the instances within `X` with highest predicted values.

    Parameters
    ----------
    mean : np.array
        Predicted mean values.
    std : np.array
        Predicted standard deviation values, unused.
    kwargs : keyword arguments
        Additional keyword arguments.

    Returns
    -------
    np.array
        Predicted values for each instance in `X`.

    """
    return mean


[docs]def probability_improvement(mean, std, best_y=0, xi=0.01, **kwargs):
    r"""
    Probability Improvement (PI) acquisition function. Balances exploration and exploitation.

    .. math::

        PI(x) = \Phi(\frac{\mu(x) - f^* - \xi}{\sigma(x)})

    Where:
        - \\( \mu(x) \\): Predictive mean at \\( x \\)
        - \\( \sigma(x) \\): Predictive standard deviation at \\( x \\)
        - \\( f^* \\): Best observed value so far
        - \\( \xi \\): Small positive number to encourage exploration
        - \\( \Phi(Z) \\): CDF of standard normal distribution

    Parameters
    ----------
    mean : np.array
        Predicted mean values.
    std : np.array
        Predicted standard deviation values.
    best_y : float
        Best observed value so far.
    xi : float
        Exploration-exploitation tradeoff parameter.
    kwargs : keyword arguments
        Additional keyword arguments.

    Returns
    -------
    np.array
        Probability improvement values for each instance in `X`.

    References
    ----------
    .. [1] Kushner, H. J. (1964). A new method of locating the maximum point of an arbitrary multipeak curve in the
    presence of noise. Journal of Basic Engineering, 86(1), 97–106.

    """
    std = std.clip(min=1e-9)  # Avoid division by zero

    PI = norm.cdf((mean - best_y - xi) / std)

    return PI


[docs]def expected_improvement(mean, std, best_y=0, xi=0.01, **kwargs):
    r"""
    Get expected Improvement (EI) acquisition function. Balances exploration and exploitation.

    .. math::

        EI(x) = (\mu(x) - f^* - \xi) \cdot \Phi(Z) + \sigma(x) \cdot \phi(Z)

        Z = \frac{\mu(x) - f^* - \xi}{\sigma(x)}

    Where:
        - \\( \mu(x) \\): Predictive mean at \\( x \\)
        - \\( \sigma(x) \\): Predictive standard deviation at \\( x \\)
        - \\( f^* \\): Best observed value so far
        - \\( \xi \\): Small positive number to encourage exploration
        - \\( \Phi(Z) \\): CDF of standard normal distribution
        - \\( \phi(Z) \\): PDF of standard normal distribution

    Parameters
    ----------
    mean : np.array
        Predicted mean values.
    std : np.array
        Predicted standard deviation values.
    best_y : float
        Best observed value so far.
    xi : float
        Exploration-exploitation tradeoff parameter.
    kwargs : keyword arguments
        Additional keyword arguments.

    Returns
    -------
    np.array
        Expected improvement values for each instance in `X`.

    References
    ----------
    .. [1] Jones, D. R., Schonlau, M., & Welch, W. J. (1998). Efficient global optimization of expensive black-box
    functions. Journal of Global Optimization, 13(4), 455–492.

    """
    std = std.clip(min=1e-9)  # Avoid division by zero

    improvement = mean - best_y - xi
    Z = improvement / std
    EI = improvement * norm.cdf(Z) + std * norm.pdf(Z)

    return EI


[docs]def upper_confidence_bound(mean, std, beta=2.0, **kwargs):
    r"""
    Upper Confidence Bound (UCB) acquisition function. Ensures exploration while still considering high predictions.

    .. math::

        UCB(x) = \mu(x) + \beta \cdot \sigma(x)

    Where:
        - \\( \mu(x) \\): Predictive mean at \\( x \\)
        - \\( \sigma(x) \\): Predictive standard deviation at \\( x \\)
        - \\( \beta \\): Trade-off parameter (higher \\( \beta \\) favors exploration)

    Parameters
    ----------
    mean : np.array
        Predicted mean values.
    std : np.array
        Predicted standard deviation values, unused.
    beta : float
        Tradeoff parameter (higher = more exploration).
    kwargs : keyword arguments
        Additional keyword arguments.

    Returns
    -------
    np.array
        Upper confidence bound values for each instance in `X`.

    References
    ----------
    .. [1] Srinivas, N., Krause, A., Kakade, S. M., & Seeger, M. (2010). Gaussian Process Optimization in the Bandit
    Setting: No Regret and Experimental Design. ICML.

    """
    ucb = mean + beta * std

    return ucb


_ACQUISITION_FUNCTIONS = {
    "max-uncertainty-reduction": max_uncertainty_reduction,
    "exploitation": exploitation,
    "upper-confidence-bound": upper_confidence_bound,
    "expected-improvement": expected_improvement,
    "probability-improvement": probability_improvement,
    "ur": max_uncertainty_reduction,
    "exp": exploitation,
    "ucb": upper_confidence_bound,
    "ei": expected_improvement,
    "pi": probability_improvement,
}