Source code for hessband.cv

"""
Cross-validation utilities for kernel regression and density estimation.

This module defines a CVScorer class that can be used to evaluate
leave-one-out cross-validation (LOOCV) or K-fold cross-validation
for kernel regression or density estimation.
"""

import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

__all__ = ["CVScorer"]


[docs] class CVScorer: """ Cross-validation scorer for kernel regression. Parameters ---------- X : array-like, shape (n_samples,) Input values. y : array-like, shape (n_samples,) Target values. folds : int, optional (default=5) Number of folds for K-fold cross-validation. kernel : str, optional (default='gaussian') Kernel type ('gaussian' or 'epanechnikov'). """ def __init__(self, X, y, folds=5, kernel="gaussian"): self.X = np.asarray(X).ravel() self.y = np.asarray(y).ravel() if not (2 <= folds <= len(self.X)): raise ValueError( f"`folds` must be between 2 and {len(self.X)}, got {folds}" ) self.kf = KFold(n_splits=folds, shuffle=True, random_state=0) self.kernel = kernel self.evals = 0
[docs] def score(self, predict_fn, h): """ Compute the cross-validation MSE for a given bandwidth. Parameters ---------- predict_fn : callable Function that takes ``(X_train, y_train, X_test, h, kernel)`` and returns predictions. h : float Bandwidth value. Returns ------- float Cross-validation mean squared error. """ mses = [] for train_idx, test_idx in self.kf.split(self.X): Xtr, Xte = self.X[train_idx], self.X[test_idx] ytr, yte = self.y[train_idx], self.y[test_idx] ypred = predict_fn(Xtr, ytr, Xte, h, kernel=self.kernel) mses.append(mean_squared_error(yte, ypred)) self.evals += 1 return np.mean(mses)