Source code for plotnine_extra.stats.stat_pwc

"""
Pairwise comparisons stat layer.

Performs pairwise statistical tests between groups and
displays the results as bracket annotations with p-values.
This is a Python port of ggpubr's geom_pwc/stat_pwc.
"""

from __future__ import annotations

from itertools import combinations

import numpy as np
import pandas as pd
from plotnine.doctools import document
from plotnine.mapping.evaluation import after_stat
from plotnine.stats.stat import stat

from ._common import preserve_panel_columns
from ._p_format import format_p_value, p_to_signif
from ._stat_test import run_stat_test


[docs] @document class stat_pwc(stat): """ Add pairwise comparison p-values to a plot Performs pairwise statistical tests between groups and displays the results as bracket annotations. Supports t-test and Wilcoxon (Mann-Whitney U) tests. The default geom is ``geom_bracket``. {usage} Parameters ---------- {common_parameters} method : str, default="wilcox.test" Statistical test method for pairwise comparisons. One of ``"t.test"`` or ``"wilcox.test"``. paired : bool, default=False Whether to perform a paired test. ref_group : str or int, default=None Reference group for pairwise comparisons. Each group is compared against this reference. If ``None``, all pairwise combinations are tested. comparisons : list of tuple, default=None Explicit list of group pairs to compare, e.g. ``[("A", "B"), ("A", "C")]``. Overrides ``ref_group`` when specified. p_adjust_method : str, default="holm" Method for adjusting p-values for multiple comparisons. One of ``"bonferroni"``, ``"holm"``, ``"hochberg"``, ``"hommel"``, ``"BH"``, ``"BY"``, ``"fdr"``, ``"none"``. label : str, default="p.format" Label format. One of ``"p.format"``, ``"p.signif"``, ``"p.adj.format"``, ``"p.adj.signif"``, ``"p.format.signif"``, ``"p.adj.format.signif"``. hide_ns : bool, default=False If ``True``, hide non-significant comparisons. p_digits : int, default=3 Number of digits for p-value formatting. step_increase : float, default=0.12 Fraction of y-range to step between comparison brackets to minimize overlap. bracket_nudge_y : float, default=0.05 Vertical offset for brackets as a fraction of the y-range. tip_length : float, default=0.03 Length of bracket tips as a fraction of y-range, passed to ``geom_bracket``. bracket_shorten : float, default=0 Amount to shorten brackets from each end. remove_bracket : bool, default=False If ``True``, remove brackets and only show labels (uses ``geom_text`` instead of ``geom_bracket``). See Also -------- plotnine_extra.geom_bracket : The default `geom` for this `stat`. plotnine_extra.stat_compare_means : Mean comparison with support for global tests. """ _aesthetics_doc = """ {aesthetics_table} **Options for computed aesthetics** ```python "label" # Formatted test result label "xmin" # Left x-coordinate of bracket "xmax" # Right x-coordinate of bracket "y" # Y-coordinate of bracket "p" # Raw p-value "p_adj" # Adjusted p-value "p_signif" # Significance symbol (raw p) "p_adj_signif" # Significance symbol (adjusted p) "method" # Name of the test "group1" # First group in comparison "group2" # Second group in comparison ``` """ REQUIRED_AES = {"x", "y"} DEFAULT_AES = {"label": after_stat("label")} DEFAULT_PARAMS = { "geom": "bracket", "position": "identity", "na_rm": False, "method": "wilcox.test", "paired": False, "ref_group": None, "comparisons": None, "p_adjust_method": "holm", "label": "p.format", "hide_ns": False, "p_digits": 3, "step_increase": 0.12, "bracket_nudge_y": 0.05, "tip_length": 0.03, "bracket_shorten": 0, "remove_bracket": False, } CREATES = { "label", "xmin", "xmax", "y", "p", "p_adj", "p_signif", "p_adj_signif", "method", "group1", "group2", } def __init__(self, mapping=None, data=None, **kwargs): super().__init__(mapping, data, **kwargs) # Remove 'label' from _kwargs so it is not forwarded # to the geom as a static aesthetic value. The 'label' # kwarg is a stat parameter controlling format (e.g. # "p.signif"), not a literal label string. self._kwargs.pop("label", None)
[docs] def compute_panel(self, data, scales): method = self.params["method"] paired = self.params["paired"] ref_group = self.params["ref_group"] comparisons_param = self.params["comparisons"] p_adjust_method = self.params["p_adjust_method"] label_type = self.params["label"] hide_ns = self.params["hide_ns"] p_digits = self.params["p_digits"] step_increase = self.params["step_increase"] bracket_nudge_y = self.params["bracket_nudge_y"] bracket_shorten = self.params["bracket_shorten"] # Group data by x categories grouped = dict(list(data.groupby("x"))) group_names = sorted(grouped.keys()) if len(group_names) < 2: return pd.DataFrame() # Build mapping from original labels to numeric # x values if using a discrete scale label_to_num = {} if hasattr(scales, "x"): try: # For discrete scales, get the original # labels via final_limits or get_labels labels = None if hasattr(scales.x, "final_limits"): fl = scales.x.final_limits if fl and isinstance(fl[0], str): labels = fl if labels is None and hasattr(scales.x, "get_labels"): gl = scales.x.get_labels() if gl and isinstance(gl[0], str): labels = gl if labels: for i, lbl in enumerate(labels): num_val = float(i + 1) if num_val in grouped: label_to_num[lbl] = num_val except Exception: pass if not label_to_num: for name in group_names: label_to_num[name] = name # Build reverse mapping for display labels num_to_label = {v: k for k, v in label_to_num.items()} # Determine what comparisons to make if comparisons_param is not None: pairs = [] for g1, g2 in comparisons_param: k1 = label_to_num.get(g1, g1) k2 = label_to_num.get(g2, g2) pairs.append((k1, k2)) elif ref_group is not None: ref_key = label_to_num.get(ref_group, ref_group) pairs = [(ref_key, g) for g in group_names if g != ref_key] else: # All pairwise combinations pairs = list(combinations(group_names, 2)) if not pairs: return pd.DataFrame() # Run pairwise tests y_max = data["y"].max() y_range = data["y"].max() - data["y"].min() if y_range == 0: y_range = abs(y_max) if y_max != 0 else 1.0 raw_results = [] for g1, g2 in pairs: if g1 not in grouped or g2 not in grouped: continue group1_vals = grouped[g1]["y"].to_numpy(dtype=float) group2_vals = grouped[g2]["y"].to_numpy(dtype=float) result = run_stat_test( [group1_vals, group2_vals], method=method, paired=paired, ) raw_results.append( { "g1": g1, "g2": g2, "p": result.p_value, "method_name": result.method, } ) if not raw_results: return pd.DataFrame() # Apply p-value adjustment raw_p_values = np.array([r["p"] for r in raw_results]) adjusted_p_values = _adjust_pvalues(raw_p_values, p_adjust_method) # Build result rows results = [] for i, raw in enumerate(raw_results): g1 = raw["g1"] g2 = raw["g2"] p_val = raw["p"] p_adj = adjusted_p_values[i] p_signif = p_to_signif(p_val) p_adj_signif = p_to_signif(p_adj) # Filter non-significant if requested if hide_ns: check_signif = ( p_adj_signif if "adj" in label_type else p_signif ) if check_signif == "ns": continue label = _make_pwc_label( label_type, p_val, p_adj, p_signif, p_adj_signif, p_digits, ) # Position brackets x1 = float(g1) x2 = float(g2) xmin = min(x1, x2) + bracket_shorten xmax = max(x1, x2) - bracket_shorten y_pos = ( y_max + y_range * bracket_nudge_y + y_range * step_increase * i ) # Display labels for group1/group2 g1_label = num_to_label.get(g1, str(g1)) g2_label = num_to_label.get(g2, str(g2)) results.append( { "xmin": xmin, "xmax": xmax, "x": (xmin + xmax) / 2, "y": y_pos, "label": label, "p": p_val, "p_adj": p_adj, "p_signif": p_signif, "p_adj_signif": p_adj_signif, "method": raw["method_name"], "group1": g1_label, "group2": g2_label, } ) if not results: return pd.DataFrame() return preserve_panel_columns(pd.DataFrame(results), data)
def _make_pwc_label( label_type: str, p_val: float, p_adj: float, p_signif: str, p_adj_signif: str, p_digits: int, ) -> str: """Create label based on label type.""" if label_type == "p.signif": return p_signif elif label_type == "p.adj.signif": return p_adj_signif elif label_type == "p.adj.format": return format_p_value(p_adj, digits=p_digits) elif label_type == "p.format.signif": p_str = format_p_value(p_val, digits=p_digits) return f"{p_str} ({p_signif})" elif label_type == "p.adj.format.signif": p_str = format_p_value(p_adj, digits=p_digits) return f"{p_str} ({p_adj_signif})" else: # p.format (default) return format_p_value(p_val, digits=p_digits) def _adjust_pvalues( p_values: np.ndarray, method: str, ) -> np.ndarray: """ Adjust p-values for multiple comparisons. Parameters ---------- p_values : array-like Raw p-values. method : str Adjustment method. Returns ------- np.ndarray Adjusted p-values. """ n = len(p_values) if n <= 1 or method == "none": return p_values.copy() p = p_values.copy() if method == "bonferroni": return np.minimum(p * n, 1.0) elif method == "holm": order = np.argsort(p) sorted_p = p[order] adjusted = np.empty(n) for i in range(n): adjusted[i] = sorted_p[i] * (n - i) # Enforce monotonicity for i in range(1, n): adjusted[i] = max(adjusted[i], adjusted[i - 1]) adjusted = np.minimum(adjusted, 1.0) result = np.empty(n) result[order] = adjusted return result elif method == "hochberg": order = np.argsort(p)[::-1] sorted_p = p[order] adjusted = np.empty(n) for i in range(n): adjusted[i] = sorted_p[i] * (i + 1) # Enforce monotonicity (decreasing) for i in range(1, n): adjusted[i] = min(adjusted[i], adjusted[i - 1]) adjusted = np.minimum(adjusted, 1.0) result = np.empty(n) result[order] = adjusted return result elif method in ("BH", "fdr"): order = np.argsort(p)[::-1] sorted_p = p[order] adjusted = np.empty(n) for i in range(n): rank = n - i # rank from largest to smallest adjusted[i] = sorted_p[i] * n / rank # Enforce monotonicity (non-increasing after sort) for i in range(1, n): adjusted[i] = min(adjusted[i], adjusted[i - 1]) adjusted = np.minimum(adjusted, 1.0) result = np.empty(n) result[order] = adjusted return result elif method == "BY": order = np.argsort(p)[::-1] sorted_p = p[order] q = sum(1.0 / i for i in range(1, n + 1)) adjusted = np.empty(n) for i in range(n): rank = n - i adjusted[i] = sorted_p[i] * n * q / rank for i in range(1, n): adjusted[i] = min(adjusted[i], adjusted[i - 1]) adjusted = np.minimum(adjusted, 1.0) result = np.empty(n) result[order] = adjusted return result elif method == "hommel": # Hommel's step-down procedure order = np.argsort(p) sorted_p = p[order] adjusted = sorted_p.copy() * n for i in range(n - 1, 0, -1): lower = np.arange(1, n - i + 1) q_values = sorted_p[i:n] * i / lower q_min = np.min(q_values) adjusted[:i] = np.minimum(adjusted[:i], q_min) adjusted[i] = max(adjusted[i], q_min) # Enforce monotonicity for i in range(1, n): adjusted[i] = max(adjusted[i], adjusted[i - 1]) adjusted = np.minimum(adjusted, 1.0) result = np.empty(n) result[order] = adjusted return result else: # Unknown method, return unadjusted return p.copy()