Source code for optimal_cutoffs.multilabel

"""Multi-label classification threshold optimization.

This module implements threshold optimization for multi-label classification
where we have K independent binary labels, each with its own threshold τ_j.

Key insight: Multi-label problems are K independent binary problems!
- Macro averaging: Optimize each label independently → O(K·n log n)
- Micro averaging: Thresholds are coupled through global TP/FP/FN → Coordinate ascent

All functions assume calibrated probabilities: E[y|p] = p
"""

from __future__ import annotations

import numpy as np
from numpy.typing import ArrayLike

from .core import OptimizationResult, Task


[docs] def optimize_macro_multilabel( true_labels: ArrayLike, pred_proba: ArrayLike, *, metric: str = "f1", method: str = "auto", sample_weight: ArrayLike | None = None, comparison: str = ">", tolerance: float = 1e-10, ) -> OptimizationResult: """Optimize macro-averaged metrics for multi-label classification. For macro averaging, each label is optimized independently: Macro-F1 = (1/K) Σ_j F1_j(τ_j) Since each F1_j depends only on τ_j, we can optimize each threshold independently using binary optimization. This is exact and efficient. Parameters ---------- true_labels : array-like of shape (n_samples, n_labels) True multi-label binary matrix pred_proba : array-like of shape (n_samples, n_labels) Predicted probabilities for each label metric : str, default="f1" Metric to optimize per label ("f1", "precision", "recall") method : str, default="auto" Binary optimization method for each label sample_weight : array-like of shape (n_samples,), optional Sample weights comparison : str, default=">" Comparison operator tolerance : float, default=1e-10 Numerical tolerance Returns ------- OptimizationResult Result with per-label thresholds and macro-averaged score Examples -------- >>> # 3 independent labels >>> y_true = [[1, 0, 1], [0, 1, 0], [1, 1, 1]] >>> y_prob = [[0.8, 0.2, 0.9], [0.1, 0.7, 0.3], [0.9, 0.8, 0.7]] >>> result = optimize_macro_multilabel(y_true, y_prob, metric="f1") >>> len(result.thresholds) # One per label 3 """ from .binary import optimize_metric_binary # Validate inputs for multilabel true_labels = np.asarray(true_labels, dtype=np.int8) pred_proba = np.asarray(pred_proba, dtype=np.float64) if true_labels.ndim != 2: raise ValueError( f"Multilabel true_labels must be 2D, got shape {true_labels.shape}" ) if pred_proba.ndim != 2: raise ValueError( f"Multilabel pred_proba must be 2D, got shape {pred_proba.shape}" ) if true_labels.shape != pred_proba.shape: raise ValueError( f"Shape mismatch: labels {true_labels.shape} vs probs {pred_proba.shape}" ) if sample_weight is not None: sample_weight = np.asarray(sample_weight, dtype=np.float64) if len(sample_weight) != true_labels.shape[0]: raise ValueError("Sample weights must match number of samples") n_samples, n_labels = true_labels.shape # Optimize each label independently optimal_thresholds = np.zeros(n_labels, dtype=np.float64) optimal_scores = np.zeros(n_labels, dtype=np.float64) for j in range(n_labels): # Extract binary problem for label j y_true_j = ( true_labels[:, j] if true_labels.ndim == 2 else (true_labels == j).astype(int) ) y_prob_j = pred_proba[:, j] # Optimize threshold for this label result_j = optimize_metric_binary( y_true_j, y_prob_j, metric=metric, method=method, sample_weight=sample_weight, comparison=comparison, tolerance=tolerance, ) optimal_thresholds[j] = result_j.thresholds[0] optimal_scores[j] = result_j.scores[0] # Macro average score macro_score = np.mean(optimal_scores) def predict_multilabel(probs: ArrayLike) -> np.ndarray: """Predict using per-label thresholds (independent decisions).""" p = np.asarray(probs, dtype=np.float64) if p.ndim != 2 or p.shape[1] != n_labels: raise ValueError(f"Expected probabilities shape (n_samples, {n_labels})") if comparison == ">=": predictions = (p >= optimal_thresholds[None, :]).astype(np.int32) else: predictions = (p > optimal_thresholds[None, :]).astype(np.int32) return predictions return OptimizationResult( thresholds=optimal_thresholds, scores=np.array([macro_score]), predict=predict_multilabel, task=Task.MULTILABEL, metric=f"macro_{metric}", n_classes=n_labels, )
[docs] def optimize_micro_multilabel( true_labels: ArrayLike, pred_proba: ArrayLike, *, metric: str = "f1", max_iter: int = 30, sample_weight: ArrayLike | None = None, comparison: str = ">", tolerance: float = 1e-12, ) -> OptimizationResult: """Optimize micro-averaged metrics for multi-label classification. For micro averaging, thresholds are coupled through global TP/FP/FN: Micro-F1 = 2·TP_total / (2·TP_total + FP_total + FN_total) where TP_total = Σ_j TP_j(τ_j). Changing any τ_j affects the global metric, so we use coordinate ascent to optimize the coupled problem. Parameters ---------- true_labels : array-like of shape (n_samples, n_labels) True multi-label binary matrix pred_proba : array-like of shape (n_samples, n_labels) Predicted probabilities for each label metric : str, default="f1" Metric to optimize ("f1", "precision", "recall") max_iter : int, default=30 Maximum coordinate ascent iterations sample_weight : array-like of shape (n_samples,), optional Sample weights comparison : str, default=">" Comparison operator tolerance : float, default=1e-12 Convergence tolerance Returns ------- OptimizationResult Result with per-label thresholds optimized for micro averaging Examples -------- >>> result = optimize_micro_multilabel(y_true, y_prob, metric="f1") >>> # Thresholds are coupled - changing one affects global metric """ from .metrics_core import get_metric_function # Validate inputs for multilabel true_labels = np.asarray(true_labels, dtype=np.int8) pred_proba = np.asarray(pred_proba, dtype=np.float64) if true_labels.ndim != 2: raise ValueError( f"Multilabel true_labels must be 2D, got shape {true_labels.shape}" ) if pred_proba.ndim != 2: raise ValueError( f"Multilabel pred_proba must be 2D, got shape {pred_proba.shape}" ) if true_labels.shape != pred_proba.shape: raise ValueError( f"Shape mismatch: labels {true_labels.shape} vs probs {pred_proba.shape}" ) n_samples, n_labels = true_labels.shape if sample_weight is not None: sample_weight = np.asarray(sample_weight, dtype=np.float64) if len(sample_weight) != n_samples: raise ValueError("sample_weight must have same length as n_samples") else: sample_weight = np.ones(n_samples, dtype=np.float64) # Initialize thresholds thresholds = np.zeros(n_labels, dtype=np.float64) metric_fn = get_metric_function(metric) def compute_global_metric(tau: np.ndarray) -> float: """Compute micro-averaged metric for given thresholds.""" total_tp = total_fp = total_fn = 0.0 for j in range(n_labels): # Binary predictions for label j if comparison == ">=": pred_j = (pred_proba[:, j] >= tau[j]).astype(int) else: pred_j = (pred_proba[:, j] > tau[j]).astype(int) true_j = ( true_labels[:, j] if true_labels.ndim == 2 else (true_labels == j).astype(int) ) # Confusion matrix for label j tp_j = np.sum(sample_weight * (true_j == 1) * (pred_j == 1)) fp_j = np.sum(sample_weight * (true_j == 0) * (pred_j == 1)) fn_j = np.sum(sample_weight * (true_j == 1) * (pred_j == 0)) total_tp += tp_j total_fp += fp_j total_fn += fn_j # Micro metric (TN not meaningful for micro averaging) return float(metric_fn(total_tp, 0.0, total_fp, total_fn)) best_score = compute_global_metric(thresholds) # Coordinate ascent for _iteration in range(max_iter): improved = False for j in range(n_labels): # Fix all other thresholds, optimize τ_j candidates = np.unique(pred_proba[:, j]) best_tau_j = thresholds[j] best_score_j = best_score for tau_j in candidates: thresholds[j] = tau_j score = compute_global_metric(thresholds) if score > best_score_j + tolerance: best_score_j = score best_tau_j = tau_j improved = True thresholds[j] = best_tau_j best_score = best_score_j if not improved: break def predict_multilabel(probs: ArrayLike) -> np.ndarray: """Predict using micro-optimized thresholds.""" p = np.asarray(probs, dtype=np.float64) if p.ndim != 2 or p.shape[1] != n_labels: raise ValueError(f"Expected probabilities shape (n_samples, {n_labels})") if comparison == ">=": predictions = (p >= thresholds[None, :]).astype(np.int32) else: predictions = (p > thresholds[None, :]).astype(np.int32) return predictions return OptimizationResult( thresholds=thresholds, scores=np.array([best_score]), predict=predict_multilabel, task=Task.MULTILABEL, metric=f"micro_{metric}", n_classes=n_labels, )
[docs] def optimize_multilabel( true_labels: ArrayLike, pred_proba: ArrayLike, *, metric: str = "f1", average: str = "macro", method: str = "auto", sample_weight: ArrayLike | None = None, comparison: str = ">", tolerance: float = 1e-10, ) -> OptimizationResult: """General multi-label threshold optimization with automatic method selection. Routes to appropriate algorithm based on averaging strategy: - Macro: Independent optimization per label (exact, O(K·n log n)) - Micro: Coordinate ascent for coupled thresholds (local optimum) Parameters ---------- true_labels : array-like of shape (n_samples, n_labels) True multi-label binary matrix pred_proba : array-like of shape (n_samples, n_labels) Predicted probabilities for each label metric : str, default="f1" Metric to optimize average : {"macro", "micro"}, default="macro" Averaging strategy method : str, default="auto" Optimization method (passed to binary optimizer for macro) sample_weight : array-like of shape (n_samples,), optional Sample weights comparison : str, default=">" Comparison operator tolerance : float, default=1e-10 Numerical tolerance Returns ------- OptimizationResult Result with optimal thresholds and metric score Examples -------- >>> # Independent per-label optimization >>> result = optimize_multilabel(y_true, y_prob, average="macro") >>> >>> # Coupled optimization for global metric >>> result = optimize_multilabel(y_true, y_prob, average="micro") """ match average: case "macro": return optimize_macro_multilabel( true_labels, pred_proba, metric=metric, method=method, sample_weight=sample_weight, comparison=comparison, tolerance=tolerance, ) case "micro": return optimize_micro_multilabel( true_labels, pred_proba, metric=metric, max_iter=30, sample_weight=sample_weight, comparison=comparison, tolerance=tolerance, ) case _: raise ValueError(f"Unknown average: {average}. Use 'macro' or 'micro'")
__all__ = [ "optimize_macro_multilabel", "optimize_micro_multilabel", "optimize_multilabel", ]