Source code for plotnine_extra.stats.stat_anova_test
from __future__ import annotations
from typing import TYPE_CHECKING
import numpy as np
import pandas as pd
from plotnine.doctools import document
from ._base_stat_test import _base_stat_test
if TYPE_CHECKING:
from plotnine_extra.stats._stat_test import StatTestResult
[docs]
@document
class stat_anova_test(_base_stat_test):
"""
Add ANOVA test p-values to a plot
Performs one-way ANOVA and displays the result as a
text annotation including F-statistic, degrees of
freedom, and p-value.
{usage}
Parameters
----------
{common_parameters}
method : str, default="one_way"
ANOVA method. Currently supports ``"one_way"``.
effect_size : str, default="ges"
Type of effect size. One of ``"ges"``
(generalized eta-squared) or ``"pes"``
(partial eta-squared).
label_x_npc : float or str, default="center"
Normalized x position for the label.
label_y_npc : float or str, default="top"
Normalized y position for the label.
p_digits : int, default=3
Number of digits for p-value formatting.
See Also
--------
plotnine.geom_text : The default `geom` for this `stat`.
"""
_aesthetics_doc = """
{aesthetics_table}
**Options for computed aesthetics**
```python
"label" # Formatted test result label
"p" # P-value
"p_signif" # Significance symbol
"f" # F-statistic
"df" # Numerator degrees of freedom
"df_residual" # Denominator degrees of freedom
"effect_size" # Effect size (eta-squared)
"method" # Name of the test
```
"""
DEFAULT_PARAMS = {
"geom": "text",
"position": "identity",
"na_rm": False,
"method": "one_way",
"effect_size": "ges",
"label_x_npc": "center",
"label_y_npc": "top",
"p_digits": 3,
}
CREATES = {
"label",
"p",
"p_signif",
"f",
"df",
"df_residual",
"effect_size",
"method",
}
_test_method = "anova"
_min_groups = 2
def _build_result(
self,
result: StatTestResult,
p_str: str,
p_signif: str,
x_pos: float,
y_pos: float,
data: pd.DataFrame,
groups: list[np.ndarray],
) -> pd.DataFrame:
# Compute effect size (eta-squared)
all_data = np.concatenate(groups)
grand_mean = np.mean(all_data)
ss_between = sum(
len(g) * (np.mean(g) - grand_mean) ** 2
for g in groups
)
ss_total = np.sum((all_data - grand_mean) ** 2)
eta_sq = (
ss_between / ss_total if ss_total > 0 else 0
)
df1 = result.df if result.df is not None else np.nan
df2 = (
result.df2 if result.df2 is not None else np.nan
)
label = (
f"F({df1:.0f}, {df2:.0f})"
f" = {result.statistic:.2f}, {p_str},"
f" η² = {eta_sq:.2f}"
)
return pd.DataFrame(
{
"x": [x_pos],
"y": [y_pos],
"label": [label],
"p": [result.p_value],
"p_signif": [p_signif],
"f": [result.statistic],
"df": [df1],
"df_residual": [df2],
"effect_size": [eta_sq],
"method": [result.method],
}
)