Utility Functions¶
This module provides utility functions for data processing and validation.
Data Validation¶
Array Checking¶
Data Processing¶
Sorting Utilities¶
Binning Operations¶
Usage Examples¶
Input Validation¶
from calibre.utils import check_arrays
import numpy as np
# Valid input
X = np.array([0.1, 0.3, 0.5, 0.7, 0.9])
y = np.array([0, 0, 1, 1, 1])
try:
X_checked, y_checked = check_arrays(X, y)
print("Arrays are valid")
except ValueError as e:
print(f"Validation error: {e}")
Sorting Operations¶
from calibre.utils import sort_by_x
# Unsorted data
X = np.array([0.7, 0.1, 0.9, 0.3, 0.5])
y = np.array([1, 0, 1, 0, 1])
# Sort by X values
sort_indices, X_sorted, y_sorted = sort_by_x(X, y)
print(f"Original X: {X}")
print(f"Sorted X: {X_sorted}")
print(f"Sorted y: {y_sorted}")
print(f"Sort indices: {sort_indices}")
Creating Bins¶
from calibre.utils import create_bins, bin_data
import numpy as np
# Create uniform bins
X = np.random.uniform(0, 1, 1000)
bins_uniform = create_bins(X, n_bins=10, strategy='uniform')
print(f"Uniform bins: {bins_uniform}")
# Create quantile bins
bins_quantile = create_bins(X, n_bins=10, strategy='quantile')
print(f"Quantile bins: {bins_quantile}")
# Assign data to bins
bin_indices, bin_counts = bin_data(X, bins_uniform)
print(f"Bin counts: {bin_counts}")
Advanced Binning¶
# Custom bin range
bins_custom = create_bins(
X,
n_bins=5,
strategy='uniform',
x_min=0.2, # Custom range
x_max=0.8
)
# Bin with custom bins
bin_indices, bin_counts = bin_data(X, bins_custom)
# Analyze bin distribution
for i, count in enumerate(bin_counts):
bin_start = bins_custom[i]
bin_end = bins_custom[i + 1]
print(f"Bin [{bin_start:.2f}, {bin_end:.2f}): {count} samples")