Source code for rank_preserving_calibration.ovr_isotonic

# rank_preserving_calibration/ovr_isotonic.py
# This file will contain the implementation of the One-vs-Rest Isotonic Regression calibrator.
from __future__ import annotations

from typing import Any

import numpy as np

from .calibration import _isotonic_regression


[docs] def calibrate_ovr_isotonic( y: np.ndarray, probs: np.ndarray, ) -> dict[str, Any]: """ Calibrates multiclass probabilities using One-vs-Rest Isotonic Regression. For each class, this method trains a separate isotonic regression model on the binary problem of that class vs. all other classes. The resulting calibrated probabilities are then normalized to sum to 1. This is a common approach for multiclass calibration and is used by libraries like scikit-learn. Args: y: True class labels as integers of shape (N,). probs: Original probability matrix of shape (N, J). Returns: A dictionary containing the calibrated probabilities 'Q'. """ y = np.asarray(y, dtype=np.int64) probs = np.asarray(probs, dtype=np.float64) _, J = probs.shape calibrated_probs = np.zeros_like(probs) for j in range(J): # 1. Prepare data for the binary (one-vs-rest) problem. y_binary = (y == j).astype(int) p_j = probs[:, j] # 2. Sort the data based on the probabilities for the current class. # Use a stable sort to handle ties in probabilities correctly. order = np.argsort(p_j, kind="mergesort") p_j_sorted = p_j[order] y_binary_sorted = y_binary[order] # 3. Fit the isotonic regression model. # This finds an isotonic (non-decreasing) sequence that best fits the # binary labels. This sequence represents the calibrated probabilities # for the sorted input probabilities. calibrated_p_j_sorted = _isotonic_regression(y_binary_sorted, ties="stable") # 4. Create an interpolation function. # The sorted probabilities `p_j_sorted` and the calibrated probabilities # `calibrated_p_j_sorted` define a step function. We use interpolation # to map the original (unsorted) probabilities to their calibrated values. # We need to handle duplicate values in `p_j_sorted`. unique_p, unique_indices = np.unique(p_j_sorted, return_index=True) unique_calibrated_p = calibrated_p_j_sorted[unique_indices] calibrated_probs[:, j] = np.interp(p_j, unique_p, unique_calibrated_p) # 5. Normalize the rows to sum to 1, as the one-vs-rest procedure # does not guarantee that the calibrated probabilities for each # instance will sum to 1. row_sums = calibrated_probs.sum(axis=1) # Avoid division by zero for rows that sum to 0. # In such cases, assign uniform probabilities. zero_sum_mask = row_sums == 0 if np.any(zero_sum_mask): calibrated_probs[zero_sum_mask, :] = 1.0 / J row_sums[zero_sum_mask] = 1.0 calibrated_probs = calibrated_probs / row_sums[:, np.newaxis] return { "Q": calibrated_probs, }