"""
Manually add p-value annotations with brackets.
Unlike other stat_ layers which are ggproto stat objects,
this is a function that returns a list of plotnine layers
for adding pre-computed p-value annotations to a plot.
"""
from __future__ import annotations
from typing import Any
import numpy as np
import pandas as pd
from plotnine import aes, geom_segment, geom_text
from ._p_format import p_to_signif
[docs]
def stat_pvalue_manual(
data: pd.DataFrame,
label: str | None = None,
y_position: str | float | None = None,
xmin: str | None = "group1",
xmax: str | None = "group2",
tip_length: float = 0.02,
step_increase: float = 0.05,
step_group_by: str | None = None,
hide_ns: bool = False,
remove_bracket: bool = False,
bracket_nudge_y: float = 0,
label_size: float = 8,
vjust: float = -0.5,
color: str = "black",
**kwargs: Any,
) -> list:
"""
Add manually specified p-values to a plot with brackets.
Parameters
----------
data : DataFrame
Data frame containing at minimum columns for group
positions and p-values. Expected columns include
``group1``, ``group2``, and ``p`` or ``p.adj``.
label : str, optional
Column name to use for the label, or ``"p.signif"``
to auto-convert p-values to significance symbols.
If ``None``, uses ``"p"`` column formatted.
y_position : str or float, optional
Column name for y-coordinates of brackets, or a
single float value. If ``None``, uses
``"y.position"`` or ``"y_position"`` column.
xmin : str, default="group1"
Column name for the left x-coordinate of brackets.
xmax : str, default="group2"
Column name for the right x-coordinate of brackets.
tip_length : float, default=0.02
Length of bracket tips as fraction of y range.
step_increase : float, default=0.05
Step increase between brackets as fraction of y
range.
step_group_by : str, optional
Column to group comparisons for stacking.
hide_ns : bool, default=False
If ``True``, hide non-significant results.
remove_bracket : bool, default=False
If ``True``, show only labels without brackets.
bracket_nudge_y : float, default=0
Vertical nudge for brackets.
label_size : float, default=8
Font size for labels.
vjust : float, default=-0.5
Vertical justification of labels.
color : str, default="black"
Color for brackets and labels.
**kwargs
Additional aesthetic parameters.
Returns
-------
list
List of plotnine layers (geom_segment + geom_text)
that can be added to a ggplot.
"""
df = data.copy()
# Resolve label column
if label == "p.signif":
p_col = _find_p_column(df)
df["_label"] = df[p_col].apply(p_to_signif)
elif label is not None and label in df.columns:
df["_label"] = df[label].astype(str)
elif label is not None:
df["_label"] = label
else:
p_col = _find_p_column(df)
df["_label"] = df[p_col].apply(
lambda p: f"p = {p:.3g}"
)
# Resolve y_position
if isinstance(y_position, (int, float)):
df["_y_pos"] = float(y_position)
elif (
isinstance(y_position, str)
and y_position in df.columns
):
df["_y_pos"] = df[y_position].astype(float)
else:
# Try common column names
for col in ("y.position", "y_position"):
if col in df.columns:
df["_y_pos"] = df[col].astype(float)
break
else:
raise ValueError(
"y_position must be specified or data must "
"contain a 'y.position' or 'y_position' "
"column"
)
# Resolve xmin/xmax
if xmin and xmin in df.columns:
df["_xmin"] = df[xmin]
else:
raise ValueError(
f"Column '{xmin}' not found in data"
)
if xmax and xmax in df.columns:
df["_xmax"] = df[xmax]
else:
raise ValueError(
f"Column '{xmax}' not found in data"
)
# Filter non-significant if requested
if hide_ns:
p_col = _find_p_column(df)
df = df[df[p_col] <= 0.05]
if df.empty:
return []
# Apply step increase for stacking
df = df.reset_index(drop=True)
y_max = df["_y_pos"].max()
if step_group_by and step_group_by in df.columns:
groups = df.groupby(step_group_by)
for _, group_df in groups:
for i, idx in enumerate(group_df.index):
df.loc[idx, "_y_pos"] += (
step_increase * i * y_max
)
else:
steps = np.arange(len(df)) * step_increase * y_max
df["_y_pos"] += steps
df["_y_pos"] += bracket_nudge_y
layers = []
if not remove_bracket:
# Horizontal bars
bracket_data = pd.DataFrame(
{
"x": df["_xmin"],
"xend": df["_xmax"],
"y": df["_y_pos"],
"yend": df["_y_pos"],
}
)
layers.append(
geom_segment(
data=bracket_data,
mapping=aes(
x="x", xend="xend", y="y", yend="yend"
),
inherit_aes=False,
color=color,
**kwargs,
)
)
# Left tips
y_range = df["_y_pos"].max() - df["_y_pos"].min()
if y_range == 0:
y_range = df["_y_pos"].max()
tip = tip_length * y_range if y_range > 0 else 0.1
left_tips = pd.DataFrame(
{
"x": df["_xmin"],
"xend": df["_xmin"],
"y": df["_y_pos"],
"yend": df["_y_pos"] - tip,
}
)
layers.append(
geom_segment(
data=left_tips,
mapping=aes(
x="x", xend="xend", y="y", yend="yend"
),
inherit_aes=False,
color=color,
**kwargs,
)
)
# Right tips
right_tips = pd.DataFrame(
{
"x": df["_xmax"],
"xend": df["_xmax"],
"y": df["_y_pos"],
"yend": df["_y_pos"] - tip,
}
)
layers.append(
geom_segment(
data=right_tips,
mapping=aes(
x="x", xend="xend", y="y", yend="yend"
),
inherit_aes=False,
color=color,
**kwargs,
)
)
# Labels
label_data = pd.DataFrame(
{
"x": (df["_xmin"] + df["_xmax"]) / 2,
"y": df["_y_pos"],
"label": df["_label"],
}
)
layers.append(
geom_text(
data=label_data,
mapping=aes(x="x", y="y", label="label"),
inherit_aes=False,
size=label_size,
va="bottom",
nudge_y=vjust,
color=color,
)
)
return layers
def _find_p_column(df: pd.DataFrame) -> str:
"""Find the p-value column in a DataFrame."""
for col in ("p", "p.adj", "p_adj", "pvalue", "p_value"):
if col in df.columns:
return col
raise ValueError(
"No p-value column found. Expected one of: "
"'p', 'p.adj', 'p_adj', 'pvalue', 'p_value'"
)