Basic Usage Examples¶
This section provides step-by-step examples for common calibration tasks.
Complete Workflow Example¶
Here’s a complete example showing how to train a model, calibrate its predictions, and evaluate the results:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve
from calibre import (
NearlyIsotonicRegression,
mean_calibration_error,
expected_calibration_error,
unique_value_counts
)
# Generate synthetic dataset
X, y = make_classification(
n_samples=2000,
n_features=20,
n_redundant=2,
n_informative=18,
random_state=42
)
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, random_state=42
)
# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Get uncalibrated predictions
y_pred_uncal = model.predict_proba(X_test)[:, 1]
# Apply calibration
calibrator = NearlyIsotonicRegression(lam=1.0, method='path')
calibrator.fit(y_pred_uncal, y_test)
y_pred_cal = calibrator.transform(y_pred_uncal)
# Evaluate calibration quality
mce_before = mean_calibration_error(y_test, y_pred_uncal)
mce_after = mean_calibration_error(y_test, y_pred_cal)
ece_before = expected_calibration_error(y_test, y_pred_uncal, n_bins=10)
ece_after = expected_calibration_error(y_test, y_pred_cal, n_bins=10)
# Check granularity preservation
counts_before = unique_value_counts(y_pred_uncal)
counts_after = unique_value_counts(y_pred_cal, y_orig=y_pred_uncal)
print("Calibration Results:")
print(f"Mean Calibration Error: {mce_before:.4f} → {mce_after:.4f}")
print(f"Expected Calibration Error: {ece_before:.4f} → {ece_after:.4f}")
print(f"Unique values: {counts_before['n_unique_y_pred']} → {counts_after['n_unique_y_pred']}")
print(f"Preservation ratio: {counts_after['unique_value_ratio']:.3f}")
Comparing Calibration Methods¶
This example compares different calibration methods on the same dataset:
from calibre import (
NearlyIsotonicRegression,
ISplineCalibrator,
RelaxedPAVA,
RegularizedIsotonicRegression,
SmoothedIsotonicRegression
)
# Define calibrators to compare
calibrators = {
'Nearly Isotonic (strict)': NearlyIsotonicRegression(lam=10.0, method='path'),
'Nearly Isotonic (relaxed)': NearlyIsotonicRegression(lam=0.1, method='path'),
'I-Spline': ISplineCalibrator(n_splines=10, degree=3, cv=3),
'Relaxed PAVA': RelaxedPAVA(percentile=10, adaptive=True),
'Regularized Isotonic': RegularizedIsotonicRegression(alpha=0.1),
'Smoothed Isotonic': SmoothedIsotonicRegression(window_length=7, poly_order=3)
}
# Compare calibrators
results = {}
for name, calibrator in calibrators.items():
# Fit calibrator
calibrator.fit(y_pred_uncal, y_test)
y_cal = calibrator.transform(y_pred_uncal)
# Calculate metrics
mce = mean_calibration_error(y_test, y_cal)
ece = expected_calibration_error(y_test, y_cal, n_bins=10)
counts = unique_value_counts(y_cal, y_orig=y_pred_uncal)
results[name] = {
'mce': mce,
'ece': ece,
'unique_values': counts['n_unique_y_pred'],
'preservation_ratio': counts['unique_value_ratio']
}
# Display results
print("\\nComparison of Calibration Methods:")
print(f"{'Method':<25} {'MCE':<8} {'ECE':<8} {'Unique':<8} {'Preserve':<8}")
print("-" * 65)
for name, metrics in results.items():
print(f"{name:<25} {metrics['mce']:<8.4f} {metrics['ece']:<8.4f} "
f"{metrics['unique_values']:<8} {metrics['preservation_ratio']:<8.3f}")
Handling Different Data Types¶
Working with Imbalanced Data¶
from sklearn.datasets import make_classification
# Create imbalanced dataset
X_imbal, y_imbal = make_classification(
n_samples=2000,
n_features=20,
weights=[0.9, 0.1], # 90% class 0, 10% class 1
random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(
X_imbal, y_imbal, test_size=0.5, stratify=y_imbal, random_state=42
)
# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict_proba(X_test)[:, 1]
# Calibrate with method suitable for imbalanced data
calibrator = RelaxedPAVA(percentile=5, adaptive=True) # Lower percentile for imbalanced data
calibrator.fit(y_pred, y_test)
y_cal = calibrator.transform(y_pred)
print(f"Class distribution: {np.bincount(y_test)}")
print(f"MCE before: {mean_calibration_error(y_test, y_pred):.4f}")
print(f"MCE after: {mean_calibration_error(y_test, y_cal):.4f}")
Working with Small Datasets¶
# Simulate small dataset
np.random.seed(42)
n_small = 200
X_small = np.random.uniform(0, 1, n_small)
y_small = np.random.binomial(1, X_small, n_small)
# Use methods that work well with small datasets
calibrators_small = {
'I-Spline (small)': ISplineCalibrator(n_splines=5, degree=2, cv=3),
'Relaxed PAVA': RelaxedPAVA(percentile=20, adaptive=False),
'Regularized': RegularizedIsotonicRegression(alpha=1.0) # Higher regularization
}
for name, cal in calibrators_small.items():
try:
cal.fit(X_small, y_small)
y_cal = cal.transform(X_small)
mce = mean_calibration_error(y_small, y_cal)
print(f"{name}: MCE = {mce:.4f}")
except Exception as e:
print(f"{name}: Failed - {e}")
Visualization Examples¶
Plotting Calibration Curves¶
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
def plot_calibration_curve(y_true, y_prob_list, names, n_bins=10):
"""Plot calibration curves for multiple methods."""
fig, ax = plt.subplots(figsize=(10, 8))
# Perfect calibration line
ax.plot([0, 1], [0, 1], 'k--', label='Perfect calibration')
# Plot each method
for y_prob, name in zip(y_prob_list, names):
fraction_pos, mean_pred = calibration_curve(
y_true, y_prob, n_bins=n_bins
)
ax.plot(mean_pred, fraction_pos, 'o-', label=name)
ax.set_xlabel('Mean Predicted Probability')
ax.set_ylabel('Fraction of Positives')
ax.set_title('Calibration Plot (Reliability Diagram)')
ax.legend()
ax.grid(True, alpha=0.3)
return fig, ax
# Plot comparison
y_prob_list = [y_pred_uncal, y_pred_cal]
names = ['Uncalibrated', 'Nearly Isotonic']
fig, ax = plot_calibration_curve(y_test, y_prob_list, names)
plt.show()
Distribution Plots¶
def plot_prediction_distributions(y_prob_list, names):
"""Plot prediction distributions."""
fig, axes = plt.subplots(1, len(y_prob_list), figsize=(15, 5))
if len(y_prob_list) == 1:
axes = [axes]
for i, (y_prob, name) in enumerate(zip(y_prob_list, names)):
axes[i].hist(y_prob, bins=50, alpha=0.7, density=True)
axes[i].set_title(f'{name}\\nUnique values: {len(np.unique(y_prob))}')
axes[i].set_xlabel('Predicted Probability')
axes[i].set_ylabel('Density')
axes[i].grid(True, alpha=0.3)
plt.tight_layout()
return fig, axes
# Plot distributions
fig, axes = plot_prediction_distributions(y_prob_list, names)
plt.show()
Cross-Validation for Calibration¶
from sklearn.model_selection import cross_val_predict
from sklearn.base import clone
def cross_validated_calibration(model, calibrator, X, y, cv=5):
"""Perform cross-validated calibration."""
# Get cross-validated predictions
y_pred_cv = cross_val_predict(
model, X, y, cv=cv, method='predict_proba'
)[:, 1]
# Split for calibration training and testing
X_cal_train, X_cal_test, y_cal_train, y_cal_test = train_test_split(
y_pred_cv.reshape(-1, 1), y, test_size=0.5, random_state=42
)
# Fit calibrator
cal_clone = clone(calibrator)
cal_clone.fit(X_cal_train.ravel(), y_cal_train)
# Get calibrated predictions
y_cal_pred = cal_clone.transform(X_cal_test.ravel())
return y_cal_test, X_cal_test.ravel(), y_cal_pred
# Perform cross-validated calibration
y_true_cv, y_pred_uncal_cv, y_pred_cal_cv = cross_validated_calibration(
model, NearlyIsotonicRegression(lam=1.0), X, y
)
print("Cross-validated results:")
print(f"MCE uncalibrated: {mean_calibration_error(y_true_cv, y_pred_uncal_cv):.4f}")
print(f"MCE calibrated: {mean_calibration_error(y_true_cv, y_pred_cal_cv):.4f}")
Common Pitfalls and Solutions¶
Avoiding Overfitting in Calibration¶
# DON'T: Use the same data for training and calibration
model.fit(X_train, y_train)
y_pred = model.predict_proba(X_train)[:, 1] # Same data!
calibrator.fit(y_pred, y_train) # This will overfit
# DO: Use separate data or cross-validation
model.fit(X_train, y_train)
y_pred = model.predict_proba(X_test)[:, 1] # Different data
calibrator.fit(y_pred, y_test) # Better approach
Handling Edge Cases¶
# Check for problematic predictions
def validate_predictions(y_pred):
"""Validate prediction array."""
if np.any(y_pred < 0) or np.any(y_pred > 1):
print("Warning: Predictions outside [0,1] range")
if len(np.unique(y_pred)) < 10:
print("Warning: Very few unique prediction values")
if np.any(np.isnan(y_pred)):
print("Warning: NaN values in predictions")
validate_predictions(y_pred_uncal)
# Handle constant predictions
if len(np.unique(y_pred_uncal)) == 1:
print("Constant predictions detected - calibration may not be meaningful")
else:
calibrator.fit(y_pred_uncal, y_test)