Source code for calibre.calibrators.spline

"""
I-Spline calibration with cross-validation.

This module provides monotonic I-spline calibration, which uses spline basis
functions with non-negative coefficients to ensure monotonicity while providing
smooth calibration curves.
"""

from __future__ import annotations

import logging

import numpy as np
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import Ridge
from sklearn.model_selection import KFold
from sklearn.preprocessing import SplineTransformer

from ..base import BaseCalibrator
from ..utils import check_arrays

logger = logging.getLogger(__name__)



[docs]
class SplineCalibrator(BaseCalibrator):
    """I-Spline calibration with cross-validation.

    This calibrator uses monotonic I-splines with non-negative coefficients
    to ensure monotonicity while providing a smooth calibration function.
    Cross-validation is used to find the best model.

    Parameters
    ----------
    n_splines
        Number of spline basis functions.
    degree
        Polynomial degree of spline basis functions.
    cv
        Number of cross-validation folds.
    enable_diagnostics
        Whether to enable plateau diagnostics analysis.


    Examples
    --------
    >>> import numpy as np
    >>> from calibre import SplineCalibrator
    >>>
    >>> X = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
    >>> y = np.array([0.12, 0.18, 0.35, 0.25, 0.55])
    >>>
    >>> cal = SplineCalibrator(n_splines=5)
    >>> cal.fit(X, y)
    >>> X_calibrated = cal.transform(np.array([0.15, 0.35, 0.55]))

    Notes
    -----
    I-splines are integrated versions of M-splines (monotone splines) that are
    guaranteed to be monotonically increasing when coefficients are non-negative.
    This calibrator fits a Ridge regression with positive=True constraint to
    ensure monotonicity.

    See Also
    --------
    IsotonicCalibrator : Non-parametric monotonic calibration
    SmoothedIsotonicCalibrator : Isotonic with local smoothing
    """


[docs]
    def __init__(
        self,
        n_splines: int = 10,
        degree: int = 3,
        cv: int = 5,
        enable_diagnostics: bool = False,
    ):
        # Call base class for diagnostic support
        super().__init__(enable_diagnostics=enable_diagnostics)

        self.n_splines = n_splines
        self.degree = degree
        self.cv = cv


    def _fit_impl(self, X: np.ndarray, y: np.ndarray) -> None:
        """Implement the I-Spline calibration fitting logic.

        Parameters
        ----------
        X
            The training input samples.
        y
            The target values.

        Notes
        -----
        This method implements the actual fitting logic. Data storage,
        diagnostics, and return value are handled by the base class fit() method.
        """
        X, y = check_arrays(X, y)

        # Validate parameters
        if self.n_splines < 3:
            logger.warning("n_splines should be at least 3. Setting to 3.")
            self.n_splines = 3

        if self.degree < 1:
            logger.warning("degree should be at least 1. Setting to 1.")
            self.degree = 1

        # Reshape X to 2D if needed
        X_2d = np.array(X).reshape(-1, 1)

        # Create spline transformer with monotonicity constraints
        spline = SplineTransformer(
            n_knots=self.n_splines,
            degree=self.degree,
            extrapolation="constant",
            include_bias=True,
        )

        # Perform cross-validation to find the best model
        kf = KFold(n_splits=self.cv, shuffle=True, random_state=42)
        best_score = -np.inf
        best_model = None

        for train_idx, val_idx in kf.split(X_2d):
            X_train, y_train = X_2d[train_idx], y[train_idx]
            X_val, y_val = X_2d[val_idx], y[val_idx]

            # Fit spline transformer
            X_train_spline = spline.fit_transform(X_train)

            # Fit linear model with non-negative coefficients (monotonicity constraint)
            model = Ridge(alpha=0.01, positive=True, fit_intercept=True)
            model.fit(X_train_spline, y_train)

            # Evaluate on validation set
            X_val_spline = spline.transform(X_val)
            score = model.score(X_val_spline, y_val)

            if score > best_score:
                best_score = score
                best_model = (spline, model)

        # If no best model was found, use simple isotonic regression
        if best_model is None:
            logger.warning(
                "Cross-validation failed to find a good model. Using fallback isotonic regression."
            )
            self.fallback_ = IsotonicRegression(out_of_bounds="clip")
            self.fallback_.fit(X, y)
            self.spline_ = None
            self.model_ = None
        else:
            self.spline_, self.model_ = best_model
            self.fallback_ = None


[docs]
    def transform(self, X: np.ndarray) -> np.ndarray:
        """Apply I-Spline calibration to new data.

        Parameters
        ----------
        X
            The values to be calibrated.

        Returns
        -------
        X_calibrated : array-like of shape (n_samples,)
            Calibrated values.

        Raises
        ------
        ValueError
            If model has not been fitted before transform.
        """
        X = np.asarray(X).ravel()
        X_2d = X.reshape(-1, 1)

        if self.fallback_ is not None:
            return np.asarray(self.fallback_.transform(X))

        if self.spline_ is None or self.model_ is None:
            raise ValueError("Model must be fitted before transform")

        X_spline = self.spline_.transform(X_2d)
        predictions = self.model_.predict(X_spline)

        # Ensure predictions are within [0, 1] bounds
        return np.asarray(np.clip(predictions, 0, 1))