Source code for plotnine_extra.stats.stat_central_tendency
from __future__ import annotations
import numpy as np
import pandas as pd
from plotnine.doctools import document
from plotnine.stats.stat import stat
[docs]
@document
class stat_central_tendency(stat):
"""
Add central tendency measure to a density plot
Computes the mean, median, or mode of the data and
returns coordinates for a vertical line at that position.
{usage}
Parameters
----------
{common_parameters}
type : str, default="mean"
Type of central tendency measure. One of
``"mean"``, ``"median"``, or ``"mode"``.
See Also
--------
plotnine.geom_line : The default `geom` for this `stat`.
"""
_aesthetics_doc = """
{aesthetics_table}
"""
REQUIRED_AES = {"x"}
DEFAULT_PARAMS = {
"geom": "line",
"position": "identity",
"na_rm": False,
"type": "mean",
}
[docs]
def compute_group(self, data, scales) -> pd.DataFrame:
ct_type = self.params["type"]
x = data["x"]
if ct_type == "mean":
center = x.mean()
elif ct_type == "median":
center = x.median()
elif ct_type == "mode":
center = _get_mode(x)
else:
raise ValueError(
f"type must be 'mean', 'median', or 'mode', "
f"got '{ct_type}'"
)
return pd.DataFrame(
{
"x": [center, center],
"y": [-np.inf, np.inf],
}
)
def _get_mode(series):
"""
Compute the mode of a pandas Series.
For continuous data, uses the value with the highest
kernel density estimate approximated by value_counts binning.
"""
counts = series.value_counts()
return counts.idxmax()