Source code for rmcp.tools.statistical_tests

"""
Statistical hypothesis testing tools for RMCP.
Comprehensive statistical testing capabilities.
"""

from typing import Any

from ..core.schemas import table_schema
from ..r_assets.loader import get_r_script
from ..r_integration import execute_r_script_async
from ..registries.tools import tool


[docs] @tool( name="t_test", input_schema={ "type": "object", "properties": { "data": table_schema(), "variable": {"type": "string"}, "group": { "type": "string", "description": ( "Required for two-sample t-test. Column name for group variable. " "Omit for one-sample t-test." ), }, "mu": {"type": "number", "default": 0}, "alternative": { "type": "string", "enum": ["two.sided", "less", "greater"], "default": "two.sided", }, "paired": {"type": "boolean", "default": False}, "var_equal": {"type": "boolean", "default": False}, }, "required": ["data", "variable"], }, output_schema={ "type": "object", "properties": { "test_type": { "type": "string", "description": "Type of t-test performed", "enum": [ "One-sample t-test", "Paired t-test", "Two-sample t-test (equal variances)", "Welch's t-test", ], }, "statistic": {"type": "number", "description": "t-statistic value"}, "df": {"type": "number", "description": "Degrees of freedom", "minimum": 0}, "p_value": { "type": "number", "description": "P-value of the test", "minimum": 0, "maximum": 1, }, "confidence_interval": { "type": "object", "properties": { "lower": {"type": "number"}, "upper": {"type": "number"}, "level": {"type": "number", "minimum": 0, "maximum": 1}, }, "description": "Confidence interval for the mean difference", }, "alternative": { "type": "string", "enum": ["two.sided", "less", "greater"], "description": "Alternative hypothesis", }, "mean": {"type": "number", "description": "Sample mean (one-sample test)"}, "null_value": { "type": "number", "description": "Null hypothesis value (one-sample test)", }, "n_obs": { "type": "integer", "description": "Number of observations (one-sample test)", "minimum": 1, }, "mean_x": { "type": "number", "description": "Mean of group x (two-sample test)", }, "mean_y": { "type": "number", "description": "Mean of group y (two-sample test)", }, "mean_difference": { "type": "number", "description": "Difference between group means (two-sample test)", }, "groups": { "type": "array", "items": {"type": "string"}, "description": "Group levels (two-sample test)", }, "paired": { "type": "boolean", "description": "Whether test was paired (two-sample test)", }, "var_equal": { "type": "boolean", "description": "Whether equal variances assumed (two-sample test)", }, "n_obs_x": { "type": "integer", "description": "Number of observations in group x (two-sample test)", "minimum": 1, }, "n_obs_y": { "type": "integer", "description": "Number of observations in group y (two-sample test)", "minimum": 1, }, }, "required": [ "test_type", "statistic", "df", "p_value", "confidence_interval", "alternative", ], "additionalProperties": False, }, description="Performs Student's t-tests to compare means: one-sample (test if mean equals hypothesized value), two-sample (compare means between groups), or paired (compare before/after measurements). Returns t-statistic, degrees of freedom, p-value, confidence intervals, and effect size. Use for hypothesis testing about population means, comparing group differences, or analyzing experimental results. Handles equal/unequal variances and provides Cohen's d effect size.", ) async def t_test(context, params) -> dict[str, Any]: """Perform t-test analysis.""" await context.info("Performing t-test") # Load R script from separated file r_script = get_r_script("statistical_tests", "t_test") try: result = await execute_r_script_async(r_script, params) await context.info("T-test completed successfully") return result except Exception as e: await context.error("T-test failed", error=str(e)) raise
[docs] @tool( name="anova", input_schema={ "type": "object", "properties": { "data": table_schema(), "formula": {"type": "string"}, "type": {"type": "string", "enum": ["I", "II", "III"], "default": "I"}, }, "required": ["data", "formula"], }, output_schema={ "type": "object", "properties": { "anova_table": { "type": "object", "properties": { "terms": { "type": "array", "items": {"type": "string"}, "description": "Model terms/factors", }, "df": { "type": "array", "items": {"type": "integer"}, "description": "Degrees of freedom", }, "sum_sq": { "type": "array", "items": {"type": "number"}, "description": "Sum of squares", }, "mean_sq": { "type": "array", "items": {"type": "number"}, "description": "Mean squares", }, "f_value": { "type": "array", "items": {"type": "number"}, "description": "F-statistics", }, "p_value": { "type": "array", "items": {"type": "number"}, "description": "P-values", }, }, "description": "ANOVA table with test statistics", }, "model_summary": { "type": "object", "properties": { "r_squared": { "type": "number", "minimum": 0, "maximum": 1, "description": "R-squared value", }, "adj_r_squared": { "type": "number", "maximum": 1, "description": "Adjusted R-squared value", }, "residual_se": { "type": "number", "minimum": 0, "description": "Residual standard error", }, "df_residual": { "type": "integer", "minimum": 0, "description": "Residual degrees of freedom", }, "n_obs": { "type": "integer", "minimum": 1, "description": "Number of observations", }, }, "description": "Overall model fit statistics", }, "formula": {"type": "string", "description": "Model formula used"}, "anova_type": { "type": "string", "description": "Type of ANOVA performed", "enum": ["Type I", "Type II", "Type III"], }, }, "required": ["anova_table", "model_summary", "formula", "anova_type"], "additionalProperties": False, }, description="Performs Analysis of Variance (ANOVA) to test for significant differences between group means. Supports one-way ANOVA (single factor), two-way ANOVA (two factors with interaction), and repeated measures designs. Returns F-statistics, p-values, effect sizes (eta-squared), and post-hoc comparisons when significant. Use when comparing means across 3+ groups, testing factorial designs, or analyzing experimental data with multiple conditions.", ) async def anova(context, params) -> dict[str, Any]: """Perform ANOVA analysis.""" await context.info("Performing ANOVA") # Load R script from separated file r_script = get_r_script("statistical_tests", "anova") try: result = await execute_r_script_async(r_script, params) await context.info("ANOVA completed successfully") return result except Exception as e: await context.error("ANOVA failed", error=str(e)) raise
[docs] @tool( name="chi_square_test", input_schema={ "type": "object", "properties": { "data": table_schema(), "test_type": { "type": "string", "enum": ["independence", "goodness_of_fit"], "description": "Type of chi-square test", }, "x": { "type": "string", "description": "First variable (or only variable for goodness of fit)", }, "y": { "type": "string", "description": "Second variable (required for independence test)", }, "expected": { "type": "array", "items": {"type": "number", "minimum": 0}, "minItems": 1, "description": "Expected frequencies (for goodness of fit test)", }, }, "required": ["data", "test_type", "x"], "additionalProperties": False, }, output_schema={ "type": "object", "properties": { "test_type": { "type": "string", "description": "Type of chi-square test performed", "enum": [ "Chi-square test of independence", "Chi-square goodness of fit test", ], }, "statistic": { "type": "number", "description": "Chi-square test statistic", "minimum": 0, }, "df": {"type": "number", "description": "Degrees of freedom", "minimum": 0}, "p_value": { "type": "number", "description": "P-value of the test", "minimum": 0, "maximum": 1, }, "expected_frequencies": { "type": "array", "description": "Expected frequencies under null hypothesis", "items": {"type": "array", "items": {"type": "number"}}, }, "residuals": { "type": "array", "description": "Standardized residuals", "items": {"type": "array", "items": {"type": "number"}}, }, "contingency_table": { "type": "array", "description": "Observed contingency table (independence test)", "items": {"type": "array", "items": {"type": "number"}}, }, "x_variable": { "type": "string", "description": "X variable name (independence test)", }, "y_variable": { "type": "string", "description": "Y variable name (independence test)", }, "cramers_v": { "type": "number", "description": "Cramer's V effect size (independence test)", "minimum": 0, "maximum": 1, }, "observed_frequencies": { "type": "array", "items": {"type": "number"}, "description": "Observed frequencies (goodness of fit test)", }, "categories": { "type": "array", "items": {"type": "string"}, "description": "Category names (goodness of fit test)", }, }, "required": [ "test_type", "statistic", "df", "p_value", "expected_frequencies", "residuals", ], "additionalProperties": False, }, description="Performs chi-square tests for categorical data analysis: test of independence (relationship between two categorical variables) and goodness-of-fit (whether data follows expected distribution). Returns chi-square statistic, degrees of freedom, p-value, expected frequencies, and standardized residuals. Use for analyzing contingency tables, testing associations between categorical variables, or validating theoretical distributions against observed data.", ) async def chi_square_test(context, params) -> dict[str, Any]: """Perform chi-square tests.""" await context.info("Performing chi-square test") # Load R script from separated file r_script = get_r_script("statistical_tests", "chi_square_test") try: result = await execute_r_script_async(r_script, params) await context.info("Chi-square test completed successfully") return result except Exception as e: await context.error("Chi-square test failed", error=str(e)) raise
[docs] @tool( name="normality_test", input_schema={ "type": "object", "properties": { "data": table_schema(), "variable": {"type": "string"}, "test": { "type": "string", "enum": ["shapiro", "jarque_bera", "anderson"], "default": "shapiro", }, }, "required": ["data", "variable"], }, output_schema={ "type": "object", "properties": { "test_name": { "type": "string", "description": "Name of the normality test performed", "enum": [ "Shapiro-Wilk normality test", "Jarque-Bera normality test", "Anderson-Darling normality test", ], }, "statistic": {"type": "number", "description": "Test statistic value"}, "df": { "type": "number", "description": "Degrees of freedom (Jarque-Bera only)", "minimum": 0, }, "p_value": { "type": "number", "description": "P-value of the test", "minimum": 0, "maximum": 1, }, "is_normal": { "type": "boolean", "description": "Whether data appears normal (p > 0.05)", }, "variable": { "type": "string", "description": "Variable tested for normality", }, "n_obs": { "type": "integer", "description": "Number of valid observations", "minimum": 1, }, "mean": {"type": "number", "description": "Sample mean"}, "sd": { "type": "number", "description": "Sample standard deviation", "minimum": 0, }, "skewness": {"type": "number", "description": "Sample skewness"}, "excess_kurtosis": { "type": "number", "description": "Excess kurtosis (normal distribution = 0)", }, }, "required": [ "test_name", "statistic", "p_value", "is_normal", "variable", "n_obs", "mean", "sd", "skewness", "excess_kurtosis", ], "additionalProperties": False, }, description="Tests variables for normal distribution using multiple methods: Shapiro-Wilk (most powerful for small samples), Kolmogorov-Smirnov, Anderson-Darling, or Jarque-Bera tests. Returns test statistics, p-values, and clear interpretation for each test. Use before parametric statistical analyzes, to validate model assumptions, or to choose appropriate statistical methods. Critical for regression diagnostics and assumption checking.", ) async def normality_test(context, params) -> dict[str, Any]: """Test for normality.""" await context.info("Testing for normality") # Load R script from separated file r_script = get_r_script("statistical_tests", "normality_test") try: result = await execute_r_script_async(r_script, params) await context.info("Normality test completed successfully") return result except Exception as e: await context.error("Normality test failed", error=str(e)) raise