Source code for plotnine_extra.stats.stat_anova_test

from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np
import pandas as pd
from plotnine.doctools import document

from ._base_stat_test import _base_stat_test

if TYPE_CHECKING:
    from plotnine_extra.stats._stat_test import StatTestResult



[docs]
@document
class stat_anova_test(_base_stat_test):
    """
    Add ANOVA test p-values to a plot

    Performs one-way ANOVA and displays the result as a
    text annotation including F-statistic, degrees of
    freedom, and p-value.

    {usage}

    Parameters
    ----------
    {common_parameters}
    method : str, default="one_way"
        ANOVA method. Currently supports ``"one_way"``.
    effect_size : str, default="ges"
        Type of effect size. One of ``"ges"``
        (generalized eta-squared) or ``"pes"``
        (partial eta-squared).
    label_x_npc : float or str, default="center"
        Normalized x position for the label.
    label_y_npc : float or str, default="top"
        Normalized y position for the label.
    p_digits : int, default=3
        Number of digits for p-value formatting.

    See Also
    --------
    plotnine.geom_text : The default `geom` for this `stat`.
    """

    _aesthetics_doc = """
    {aesthetics_table}

    **Options for computed aesthetics**

    ```python
    "label"        # Formatted test result label
    "p"            # P-value
    "p_signif"     # Significance symbol
    "f"            # F-statistic
    "df"           # Numerator degrees of freedom
    "df_residual"  # Denominator degrees of freedom
    "effect_size"  # Effect size (eta-squared)
    "method"       # Name of the test
    ```

    """
    DEFAULT_PARAMS = {
        "geom": "text",
        "position": "identity",
        "na_rm": False,
        "method": "one_way",
        "effect_size": "ges",
        "label_x_npc": "center",
        "label_y_npc": "top",
        "p_digits": 3,
    }
    CREATES = {
        "label",
        "p",
        "p_signif",
        "f",
        "df",
        "df_residual",
        "effect_size",
        "method",
    }

    _test_method = "anova"
    _min_groups = 2

    def _build_result(
        self,
        result: StatTestResult,
        p_str: str,
        p_signif: str,
        x_pos: float,
        y_pos: float,
        data: pd.DataFrame,
        groups: list[np.ndarray],
    ) -> pd.DataFrame:
        # Compute effect size (eta-squared)
        all_data = np.concatenate(groups)
        grand_mean = np.mean(all_data)
        ss_between = sum(
            len(g) * (np.mean(g) - grand_mean) ** 2
            for g in groups
        )
        ss_total = np.sum((all_data - grand_mean) ** 2)
        eta_sq = (
            ss_between / ss_total if ss_total > 0 else 0
        )

        df1 = result.df if result.df is not None else np.nan
        df2 = (
            result.df2 if result.df2 is not None else np.nan
        )

        label = (
            f"F({df1:.0f}, {df2:.0f})"
            f" = {result.statistic:.2f}, {p_str},"
            f" η² = {eta_sq:.2f}"
        )

        return pd.DataFrame(
            {
                "x": [x_pos],
                "y": [y_pos],
                "label": [label],
                "p": [result.p_value],
                "p_signif": [p_signif],
                "f": [result.statistic],
                "df": [df1],
                "df_residual": [df2],
                "effect_size": [eta_sq],
                "method": [result.method],
            }
        )