Source code for proteopy.pl.stat_tests

from __future__ import annotations

from pathlib import Path
import warnings

import numpy as np
import pandas as pd
from scipy import sparse
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
import seaborn as sns
from adjustText import adjust_text
import anndata as ad

from proteopy.utils.anndata import check_proteodata
from proteopy.utils.parsers import parse_stat_test_varm_slot
from proteopy.utils.matplotlib import _resolve_color_scheme


def _stat_test_title_from_varm_slot(
    adata: ad.AnnData,
    varm_slot: str,
) -> str:
    """
    Generate a human-readable plot title from a stat test varm slot.

    Parses the varm slot name to extract test type, group_by, design
    (group comparison), and optional layer information, then formats them
    into a concise title string. If parsing fails, returns the original
    slot name with a runtime warning.

    Parameters
    ----------
    adata : ad.AnnData
        AnnData object containing the layers used for slot name
        parsing.
    varm_slot : str
        Stat test result slot name in ``adata.varm``. Expected
        format: ``<test_type>;<group_by>;<design>`` or
        ``<test_type>;<group_by>;<design>;<layer>`` (e.g.,
        ``welch;condition;treatment_vs_control``).

    Returns
    -------
    str
        Formatted title string. Format:
        ``<Test Label> | <group_by>: <Design>`` or
        ``<Test Label> | <group_by>: <Design> | layer: <layer>``
        if a layer was specified. Returns the original ``varm_slot``
        unchanged if parsing fails.
    """
    try:
        parsed = parse_stat_test_varm_slot(varm_slot, adata=adata)
    except ValueError as exc:
        warnings.warn(
            f"Could not parse varm slot '{varm_slot}': {exc}",
            RuntimeWarning,
        )
        return varm_slot

    title = (
        f"{parsed['test_type_label']} | "
        f"{parsed['group_by']}: {parsed['design_label']}"
    )
    if parsed["layer"]:
        title = f"{title} | layer: {parsed['layer']}"
    return title


def _normalize_alt_color(
    alt_color: pd.Series | list[bool] | np.ndarray,
    adata: ad.AnnData,
    plot_index: pd.Index,
) -> pd.Series:
    """
    Validate and align alternative color boolean mask to plot data.

    Converts the user-provided ``alt_color`` input into a boolean
    Series indexed by ``adata.var_names``, then reindexes to match
    the plotting data (which may be a subset after filtering for
    finite values and positive p-values). This ensures the mask
    correctly aligns with proteins that will appear in the volcano
    plot.

    Parameters
    ----------
    alt_color : pd.Series | list[bool] | np.ndarray
        Boolean mask of length ``n_vars`` indicating which proteins
        should receive alternative coloring. Accepts a pandas
        Series (optionally indexed), a list, or a numpy array.
        Series indices are ignored and values are aligned to
        ``adata.var_names`` positionally.
    adata : ad.AnnData
        AnnData object used to determine expected length
        (``n_vars``) and indexing (``var_names``).
    plot_index : pd.Index
        Index of proteins remaining after filtering (subset of
        ``adata.var_names``). Used to align ``alt_color`` to the
        plotting subset.

    Returns
    -------
    pd.Series
        Boolean Series indexed by ``plot_index``, indicating which
        proteins should be colored with the alternative scheme.

    Raises
    ------
    ValueError
        If ``alt_color`` length does not match ``adata.n_vars``, is
        not boolean dtype, is not 1D, or contains missing values
        after reindexing.
    TypeError
        If ``alt_color`` is not a pandas Series, list, or numpy
        array.
    """
    if isinstance(alt_color, pd.Series):
        if len(alt_color) != adata.n_vars:
            raise ValueError(
                "alt_color must have length matching adata.n_vars."
            )
        if not pd.api.types.is_bool_dtype(alt_color):
            raise ValueError("alt_color must be boolean.")
        if alt_color.index.equals(adata.var_names):
            series = alt_color.copy()
        else:
            series = pd.Series(
                alt_color.to_numpy(),
                index=adata.var_names,
            )
    elif isinstance(alt_color, (list, np.ndarray)):
        values = np.asarray(alt_color)
        if values.ndim != 1:
            raise ValueError("alt_color must be a 1D boolean sequence.")
        if len(values) != adata.n_vars:
            raise ValueError(
                "alt_color must have length matching adata.n_vars."
            )
        if not np.issubdtype(values.dtype, np.bool_):
            raise ValueError("alt_color must be boolean.")
        series = pd.Series(values, index=adata.var_names)
    else:
        raise TypeError(
            "alt_color must be a pandas Series, list, or numpy array."
        )

    # Align to plot subset (may introduce NaNs if plot_index contains
    # proteins not in adata.var_names, which indicates a bug)
    series = series.reindex(plot_index)
    if series.isna().any():
        raise ValueError(
            "alt_color contains missing values after aligning to "
            "varm data."
        )
    return series



[docs]
def volcano_plot(
    adata: ad.AnnData,
    varm_slot: str,
    fc_col: str = "logfc",
    pval_col: str = "pval_adj",
    fc_thresh: float = 1.0,
    pval_thresh: float = 0.05,
    top_labels: int | None = None,
    label_col: str | None = None,
    figsize: tuple[float, float] = (6.0, 5.0),
    xlabel: str | None = None,
    alt_color: pd.Series | list[bool] | np.ndarray | None = None,
    ylabel_logscale: bool = True,
    title: str | None = None,
    show: bool = True,
    save: str | Path | None = None,
    ax: bool | None = None,
) -> Axes | None:
    """
    Visualize differential abundance results as a volcano plot.

    Creates a scatter plot of log fold change (x-axis) versus p-value
    (y-axis) for proteins from a statistical test stored in
    ``adata.varm``. Points are colored by significance (exceeding both
    fold change and p-value thresholds), with options for custom
    coloring and automatic labeling of top hits.

    Parameters
    ----------
    adata : ad.AnnData
        :class:`~anndata.AnnData` containing differential abundance
        test results in ``.varm``.
    varm_slot : str
        Key in ``adata.varm`` containing the differential abundance
        test results as a DataFrame. Expected format produced by
        ``copro.tl.differential_abundance``.
    fc_col : str, optional
        Column name in the varm DataFrame containing log fold change
        values. Log base depends on the test method used.
    pval_col : str, optional
        Column name in the varm DataFrame containing adjusted p-values.
        If this column is not found, the function falls back to
        ``"pval"`` (unadjusted p-values).
    fc_thresh : float, optional
        Absolute log fold change threshold for significance. Proteins
        with ``|logfc| >= fc_thresh`` and ``pval <= pval_thresh`` are
        highlighted as significant.
    pval_thresh : float, optional
        P-value threshold for significance. Used in conjunction with
        ``fc_thresh`` to identify significant proteins.
    top_labels : int | None, optional
        Number of top proteins to label on each side of the volcano
        plot (up to 2N labels total). For each direction (positive
        and negative fold change), selects the top N proteins that
        meet BOTH significance thresholds (``pval <= pval_thresh``
        AND ``|logfc| >= fc_thresh``). Proteins are ranked first by
        smallest p-value, then by largest absolute fold change.
        ``None`` disables automatic labeling.
    label_col : str | None, optional
        Column in ``adata.var`` to use for labeling proteins.
        Defaults to ``adata.var_names`` if ``None``.
    figsize : tuple[float, float], optional
        Figure dimensions (width, height) in inches.
    xlabel : str | None, optional
        Label for the x-axis. Defaults to the value of ``fc_col`` if
        ``None``.
    alt_color : pd.Series | list[bool] | np.ndarray | None, optional
        Boolean mask (length ``n_vars``) for alternative coloring
        scheme. When provided, this COMPLETELY OVERRIDES the default
        significance-based coloring: proteins with ``True`` are
        colored light purple (#8E54E5), proteins with ``False`` are
        colored gray (#808080). Significance thresholds (``fc_thresh``
        and ``pval_thresh``) are still visualized as dashed lines
        but do not influence point colors. ``None`` uses the default
        significance-based coloring (gray, blue, red).
    ylabel_logscale : bool, optional
        Controls y-axis representation of p-values. When ``True``,
        plot raw p-values on a log10-scaled y-axis (inverted so
        smaller p-values appear higher); y-axis label shows
        ``pval_col`` name. When ``False``, apply ``-log10``
        transform to p-values and plot on a linear y-axis; y-axis
        label shows ``-log10(pval_col)``. Both representations
        produce visually similar plots with the same interpretation.
    title : str | None, optional
        Plot title. If ``None``, generates a title from the
        ``varm_slot`` name using the stat test metadata (test type,
        group comparison, layer).
    show : bool, optional
        Call ``matplotlib.pyplot.show()`` to display the plot.
    save : str | Path | None, optional
        File path to save the figure. Saved at 300 DPI with tight
        bounding box. ``None`` skips saving.
    ax : bool | None, optional
        Return the :class:`matplotlib.axes.Axes` object. When
        ``None`` or ``False``, returns ``None``. When ``True``,
        returns the Axes object for further customization.

    Returns
    -------
    Axes | None
        The Matplotlib Axes object if ``ax=True``, otherwise ``None``.

    Raises
    ------
    KeyError
        If ``varm_slot`` is not in ``adata.varm``, ``fc_col`` or
        p-value columns are not in the varm DataFrame, or
        ``label_col`` is not in ``adata.var``.
    TypeError
        If ``adata.varm[varm_slot]`` is not a pandas DataFrame.
    ValueError
        If no valid (finite, positive p-value) results remain after
        filtering, if ``top_labels`` is not a positive integer, or if
        all of ``show``, ``save``, and ``ax`` are ``False``.

    Notes
    -----
    **Data Filtering**:
    Proteins are filtered before plotting to remove:
    - Missing values in fold change or p-value columns
    - Non-finite values (inf, -inf, nan)
    - Non-positive p-values (cannot be log-transformed)

    **Color Schemes**:
    Default coloring (when ``alt_color=None``):
    - Gray: non-significant proteins (fail one or both thresholds)
    - Blue (#1f77b4): significantly downregulated
      (``logfc <= -fc_thresh`` AND ``pval <= pval_thresh``)
    - Red (#d62728): significantly upregulated
      (``logfc >= fc_thresh`` AND ``pval <= pval_thresh``)

    Alternative coloring (when ``alt_color`` is provided):
    - Gray (#808080): proteins with ``alt_color=False``
    - Light purple (#8E54E5): proteins with ``alt_color=True``
    - Significance thresholds do NOT affect colors, only threshold
      lines are drawn

    **Label Selection Algorithm** (when ``top_labels`` is set):
    1. Filter proteins to those meeting BOTH significance thresholds
    2. Separate into positive (``logfc > 0``) and negative
       (``logfc < 0``) groups
    3. Within each group, rank by: (1) smallest p-value, then
       (2) largest absolute fold change
    4. Select top N from each group (up to 2N total labels)
    5. Use ``adjustText`` library to prevent label overlap

    Examples
    --------
    Plot differential abundance results with default settings:

    >>> pp.pl.volcano_plot(adata, varm_slot="welch;condition;treatment_vs_ctrl")

    Label top 10 proteins per side and save to file:

    >>> pp.pl.volcano_plot(
    ...     adata,
    ...     varm_slot="welch;condition;treatment_vs_ctrl",
    ...     top_labels=10,
    ...     save="volcano.png",
    ... )

    Use custom coloring to highlight proteins of interest:

    >>> proteins_of_interest = adata.var["protein_id"].isin(
    ...     ["P12345", "Q67890"]
    ... )
    >>> pp.pl.volcano_plot(
    ...     adata,
    ...     varm_slot="welch;condition;treatment_vs_ctrl",
    ...     alt_color=proteins_of_interest,
    ... )
    """
    check_proteodata(adata)

    # Validate varm slot exists and contains a DataFrame
    if varm_slot not in adata.varm:
        raise KeyError(
            f"varm_slot '{varm_slot}' not found in adata.varm."
        )

    results = adata.varm[varm_slot]
    if not isinstance(results, pd.DataFrame):
        raise TypeError(
            "Expected adata.varm[varm_slot] to be a pandas DataFrame."
        )

    # Validate required columns exist
    if fc_col not in results.columns:
        raise KeyError(
            f"Column '{fc_col}' not found in varm slot '{varm_slot}'."
        )

    # Prioritize adjusted p-values, fall back to unadjusted if needed.
    # This allows the function to work with test results that may not
    # include multiple testing correction.
    if pval_col in results.columns:
        pval_col_used = pval_col
    elif "pval" in results.columns:
        pval_col_used = "pval"
    else:
        raise KeyError(
            f"Columns '{pval_col}' or 'pval' not found in '{varm_slot}'."
        )

    if label_col is not None and label_col not in adata.var.columns:
        raise KeyError(
            f"Column '{label_col}' not found in adata.var."
        )

    # Extract only needed columns and filter invalid values
    plot_df = results.loc[:, [fc_col, pval_col_used]].copy()
    plot_df = plot_df.dropna(subset=[fc_col, pval_col_used])

    # Remove infinite values (inf, -inf) that cannot be plotted
    finite_mask = np.isfinite(plot_df[fc_col]) & np.isfinite(
        plot_df[pval_col_used]
    )
    if not finite_mask.all():
        warnings.warn(
            "Dropping entries with non-finite fold changes or p-values.",
            RuntimeWarning,
        )
        plot_df = plot_df.loc[finite_mask]

    # Remove non-positive p-values that cannot be log-transformed
    if (plot_df[pval_col_used] <= 0).any():
        warnings.warn(
            "Dropping non-positive p-values before log transform.",
            RuntimeWarning,
        )
        plot_df = plot_df.loc[plot_df[pval_col_used] > 0]

    if plot_df.empty:
        raise ValueError("No valid results available for plotting.")

    # Prepare plotting arrays
    fc_vals = plot_df[fc_col].to_numpy()
    pvals = plot_df[pval_col_used].to_numpy()
    neg_log_p = -np.log10(pvals)
    # Choose y-axis representation based on ylabel_logscale parameter.
    # Both approaches produce visually similar plots: log-scale shows
    # raw p-values with inverted axis, linear shows -log10 transform.
    y_vals = pvals if ylabel_logscale else neg_log_p
    # Keep y_map for label positioning (needed later if labels added).
    # This ensures labels use the same y-coordinates as the scatter
    # points regardless of the scale chosen.
    y_map = pd.Series(y_vals, index=plot_df.index)

    # Create masks for significance-based coloring.
    # Proteins must satisfy BOTH p-value AND fold change thresholds
    # to be considered significantly upregulated or downregulated.
    sig_mask = pvals <= pval_thresh
    up_mask = sig_mask & (fc_vals >= fc_thresh)
    down_mask = sig_mask & (fc_vals <= -fc_thresh)
    other_mask = ~(up_mask | down_mask)  # All non-significant proteins

    # Process alternative color scheme if provided.
    # When alt_color is set, it completely overrides significance-based
    # coloring. The masks are still used for threshold line drawing.
    alt_mask = None
    if alt_color is not None:
        alt_series = _normalize_alt_color(alt_color, adata, plot_df.index)
        alt_mask = alt_series.to_numpy()

    # Create scatter plot with significance-based or alternative coloring
    fig, _ax = plt.subplots(figsize=figsize)
    if alt_mask is None:
        # Default coloring: gray (non-sig), blue (down), red (up)
        _ax.scatter(
            fc_vals[other_mask],
            y_vals[other_mask],
            color="grey",
            alpha=0.5,
            s=12,
        )
        _ax.scatter(
            fc_vals[down_mask],
            y_vals[down_mask],
            color="#1f77b4",  # Blue for downregulated
            alpha=0.8,
            s=14,
        )
        _ax.scatter(
            fc_vals[up_mask],
            y_vals[up_mask],
            color="#d62728",  # Red for upregulated
            alpha=0.8,
            s=14,
        )
    else:
        # Alternative coloring: gray (False) vs purple (True)
        # This completely overrides significance-based coloring
        _ax.scatter(
            fc_vals[~alt_mask],
            y_vals[~alt_mask],
            color="grey",
            alpha=0.5,
            s=12,
        )
        _ax.scatter(
            fc_vals[alt_mask],
            y_vals[alt_mask],
            color="#8E54E5",  # Light purple for highlighted proteins
            alpha=0.8,
            s=14,
        )

    # Add threshold lines (vertical for fold change, horizontal for
    # p-value). These dashed lines visualize the significance cutoffs.
    _ax.axvline(fc_thresh, color="black", linestyle="--", linewidth=1)
    _ax.axvline(-fc_thresh, color="black", linestyle="--", linewidth=1)
    if ylabel_logscale:
        # Log-scale y-axis: plot raw p-values, invert so smaller
        # p-values appear higher (more significant = higher on plot)
        _ax.axhline(
            pval_thresh, color="black", linestyle="--", linewidth=1
        )
        _ax.set_yscale("log", base=10)
        _ax.invert_yaxis()
    else:
        # Linear y-axis: plot -log10(p-value), where larger values
        # represent more significant (smaller) p-values
        _ax.axhline(
            -np.log10(pval_thresh),
            color="black",
            linestyle="--",
            linewidth=1,
        )

    # Set axis labels and title
    _ax.set_xlabel(xlabel or fc_col)
    if ylabel_logscale:
        _ax.set_ylabel(pval_col_used)
    else:
        _ax.set_ylabel(f"-log10({pval_col_used})")
    if title is None:
        title = _stat_test_title_from_varm_slot(adata, varm_slot)
    _ax.set_title(title)

    # Add automatic labeling for top hits if requested.
    # This implements the top_labels selection algorithm described in
    # the docstring: N proteins per side, ranked by p-value then |FC|.
    if top_labels is not None:
        if not isinstance(top_labels, int) or top_labels <= 0:
            raise ValueError("top_labels must be a positive integer.")

        # Get label source (var_names or specified column in var)
        if label_col is None:
            labels = adata.var_names.to_series()
        else:
            labels = adata.var[label_col]

        # Align labels to plotting data (subset after filtering) and
        # prepare a working DataFrame with labels and metrics
        label_series = labels.reindex(plot_df.index)
        label_df = plot_df.copy()
        label_df["label"] = label_series
        label_df = label_df.dropna(subset=["label"])
        label_df["abs_fc"] = label_df[fc_col].abs()
        # Only label proteins that meet BOTH significance thresholds.
        # This ensures we only highlight genuinely significant hits.
        label_df = label_df[
            (label_df[pval_col_used] <= pval_thresh)
            & (label_df["abs_fc"] >= fc_thresh)
        ]

        # Select top N proteins per side (positive/negative FC).
        # Ranking criteria (in order of priority):
        # 1. Smallest p-value (most statistically significant)
        # 2. Largest absolute fold change (largest effect size)
        pos_df = label_df[label_df[fc_col] > 0].sort_values(
            by=[pval_col_used, "abs_fc"],
            ascending=[True, False],
        ).head(top_labels)
        neg_df = label_df[label_df[fc_col] < 0].sort_values(
            by=[pval_col_used, "abs_fc"],
            ascending=[True, False],
        ).head(top_labels)
        label_df = pd.concat([pos_df, neg_df])

        # Add text labels at protein positions and adjust to avoid
        # overlap using the adjustText library
        texts = []
        for _, row in label_df.iterrows():
            texts.append(
                _ax.text(
                    row[fc_col],
                    y_map.loc[row.name],
                    str(row["label"]),
                    fontsize=8,
                )
            )
        if texts:
            # Automatically adjust label positions with arrows pointing
            # to original data points
            adjust_text(
                texts,
                ax=_ax,
                arrowprops=dict(arrowstyle="->", color="0.4", lw=0.7),
            )

    if save:
        fig.savefig(save, dpi=300, bbox_inches="tight")
    if show:
        plt.show()
    if ax:
        return _ax
    if not save and not show and not ax:
        raise ValueError(
            "Args show, ax and save all set to False, function does nothing.",
        )
    return None




[docs]
def differential_abundance_box(
    adata: ad.AnnData,
    varm_slot: str,
    order: list[str] | None = None,
    top_n: int | None = None,
    layer: str | None = None,
    verbose: bool = False,
    figsize: tuple[float, float] = (10.0, 6.0),
    title: str | None = None,
    xlabel_rotation: float = 45,
    color_scheme: str | dict | list | None = None,
    show_pval: bool = True,
    pval_fontsize: float | int = 9,
    show: bool = True,
    save: str | Path | None = None,
    ax: bool | None = None,
) -> Axes | None:
    """
    Display boxplots of intensities for top differentially abundant variables.

    For each of the top N differentially abundant variables (sorted by
    p-value), shows side-by-side boxplots comparing intensities across
    groups defined by the statistical test and annotates the per-variable
    p-values.

    Parameters
    ----------
    adata : ad.AnnData
        :class:`~anndata.AnnData` containing differential abundance
        test results in ``.varm`` and expression data in ``.X`` or
        a specified layer.
    varm_slot : str
        Key in ``adata.varm`` containing the differential abundance
        test results. Expected format produced by
        ``proteopy.tl.differential_abundance``.
    order : list[str] | None, optional
        Order and/or subset groups within each variable's boxplots. When
        ``None``, uses all groups as they appear in the data. When
        provided, only groups listed in ``order`` are shown, in the
        given sequence. All values must exist in the ``group_by`` column.
    top_n : int, optional
        Number of top differentially abundant variables to display.
        Variables are ranked by ascending p-value (most significant
        first). Defaults to 10.
    layer : str | None, optional
        Key in ``adata.layers`` providing the intensity matrix. When
        ``None``, auto-detects the layer from the statistical test
        metadata.
    verbose : bool, optional
        If ``True``, print which layer or ``.X`` matrix is used for
        intensity data.
    figsize : tuple[float, float], optional
        Figure dimensions (width, height) in inches.
    title : str | None, optional
        Plot title. If ``None``, generates a title from the
        ``varm_slot`` name.
    xlabel_rotation : float, optional
        Rotation angle (degrees) for x-axis variable labels.
    color_scheme : str | dict | list | None, optional
        Color mapping for groups. Accepts a named Matplotlib colormap,
        a dict mapping group names to colors, or a list of colors.
        If ``None``, uses the default color cycle.
    show_pval : bool, optional
        If ``True``, annotate the per-variable p-values above the
        boxplots. If ``False``, omit the annotations.
    pval_fontsize : float | int, optional
        Font size for the p-value annotations when ``show_pval`` is
        ``True``.
    show : bool, optional
        Call ``matplotlib.pyplot.show()`` to display the plot.
    save : str | Path | None, optional
        File path to save the figure. Saved at 300 DPI with tight
        bounding box. ``None`` skips saving.
    ax : bool | None, optional
        Return the :class:`matplotlib.axes.Axes` object. When
        ``None`` or ``False``, returns ``None``. When ``True``,
        returns the Axes object for further customization.

    Returns
    -------
    Axes | None
        The Matplotlib Axes object if ``ax=True``, otherwise ``None``.

    Raises
    ------
    KeyError
        If ``varm_slot`` is not in ``adata.varm``, if ``layer`` is not
        in ``adata.layers``, or if the parsed ``group_by`` column is
        not in ``adata.obs``.
    TypeError
        If ``adata.varm[varm_slot]`` is not a pandas DataFrame.
    ValueError
        If ``top_n`` is not a positive integer, if no valid results
        remain after filtering, if ``order`` contains values not
        present in the ``group_by`` column, or if ``pval_fontsize``
        is not a positive number when ``show_pval`` is ``True``.

    Examples
    --------
    Plot top 10 differentially abundant proteins:

    >>> pp.pl.differential_abundance_box(
    ...     adata,
    ...     varm_slot="welch;condition;treated_vs_control",
    ... )

    Plot top 5 proteins for specific groups in a given order:

    >>> pp.pl.differential_abundance_box(
    ...     adata,
    ...     varm_slot="welch;condition;treated_vs_control",
    ...     order=["control", "treated"],
    ...     top_n=5,
    ... )
    """
    check_proteodata(adata)

    # Validate varm slot exists and contains a DataFrame
    if varm_slot not in adata.varm:
        raise KeyError(
            f"varm_slot '{varm_slot}' not found in adata.varm."
        )

    results = adata.varm[varm_slot]
    if not isinstance(results, pd.DataFrame):
        raise TypeError(
            "Expected adata.varm[varm_slot] to be a pandas DataFrame."
        )

    # Validate top_n
    if top_n is None:
        top_n = 10
    if not isinstance(top_n, int) or top_n <= 0:
        raise ValueError("top_n must be a positive integer.")

    # Parse varm slot to get group_by column
    parsed = parse_stat_test_varm_slot(varm_slot, adata=adata)
    group_by = parsed["group_by"]

    if group_by not in adata.obs.columns:
        raise KeyError(
            f"Column '{group_by}' not found in adata.obs."
        )

    # Auto-detect layer from test metadata when not specified
    if layer is None:
        layer = parsed["layer"]

    # Validate layer if specified
    if layer is not None and layer not in adata.layers:
        raise KeyError(
            f"Layer '{layer}' not found in adata.layers."
        )

    if verbose:
        if layer is not None:
            print(f"Using layer: '{layer}'")
        else:
            print("Using .X matrix")

    # Determine p-value column for sorting
    if "pval_adj" in results.columns:
        pval_col = "pval_adj"
    elif "pval" in results.columns:
        pval_col = "pval"
    else:
        raise KeyError(
            f"Neither 'pval_adj' nor 'pval' found in varm slot '{varm_slot}'."
        )

    # Sort by p-value and select top N variables
    results_sorted = results.sort_values(by=pval_col, ascending=True)
    top_vars = results_sorted.head(top_n).index.tolist()

    if not top_vars:
        raise ValueError("No valid variables found after filtering.")

    if show_pval:
        if not isinstance(pval_fontsize, (int, float)) or pval_fontsize <= 0:
            raise ValueError("pval_fontsize must be a positive number.")

    pvals_to_plot = None
    if show_pval:
        pvals_to_plot = results.loc[top_vars, pval_col].reindex(top_vars)

    # Get intensity data
    if layer is not None:
        X = adata[:, top_vars].layers[layer]
    else:
        X = adata[:, top_vars].X

    if sparse.issparse(X):
        X = X.toarray()
    X = np.asarray(X, dtype=float)

    # Build long-format DataFrame for plotting
    df = pd.DataFrame(
        X,
        index=adata.obs_names,
        columns=top_vars,
    )
    df[group_by] = adata.obs[group_by].values
    df_long = df.melt(
        id_vars=[group_by],
        var_name="variable",
        value_name="intensity",
    )

    # Determine group order and optionally filter groups
    if order is not None:
        available_groups = set(df_long[group_by].unique())
        missing_in_data = set(order) - available_groups
        if missing_in_data:
            raise ValueError(
                f"Groups in 'order' not found in data: "
                f"{sorted(missing_in_data)}. "
                f"Available groups: {sorted(available_groups)}"
            )
        df_long = df_long[df_long[group_by].isin(order)]
        group_order = list(order)
    else:
        group_order = df_long[group_by].dropna().unique().tolist()

    if df_long.empty:
        raise ValueError("No data remaining after filtering.")

    # Preserve variable order (by significance)
    df_long["variable"] = pd.Categorical(
        df_long["variable"],
        categories=top_vars,
        ordered=True,
    )

    # Resolve color scheme
    palette = None
    if color_scheme is not None:
        colors = _resolve_color_scheme(color_scheme, group_order)
        if colors:
            palette = dict(zip(group_order, colors))

    # Create plot
    fig, _ax = plt.subplots(figsize=figsize)

    sns.boxplot(
        data=df_long,
        x="variable",
        y="intensity",
        hue=group_by,
        hue_order=group_order,
        palette=palette,
        gap=0.1,
        flierprops={'marker':'.', 'markersize':1,},
        ax=_ax,
    )

    if show_pval and pvals_to_plot is not None:
        # Annotate p-values at a uniform height near the top of the plot
        all_intensity = df_long["intensity"].to_numpy()
        finite_intensity = all_intensity[np.isfinite(all_intensity)]
        data_max = (
            float(finite_intensity.max()) if finite_intensity.size else 0.0
        )
        y_min, y_max = _ax.get_ylim()
        span = y_max - y_min if y_max != y_min else 1.0
        label_y = data_max + 0.05 * span
        needed_y_max = label_y + 2.5 * (0.05 * span)
        if needed_y_max > y_max:
            _ax.set_ylim(y_min, needed_y_max)
            y_max = needed_y_max
            span = y_max - y_min if y_max != y_min else 1.0
        for x_pos, var in zip(_ax.get_xticks(), top_vars):
            pval = pvals_to_plot.get(var)
            if pd.isna(pval):
                continue
            _ax.text(
                x_pos,
                label_y,
                f"p={pval:.2e}",
                ha="center",
                va="bottom",
                fontsize=pval_fontsize,
            )

    # Set axis labels and title
    _ax.set_xlabel("")
    _ax.set_ylabel("Intensity")
    plt.setp(_ax.get_xticklabels(), rotation=xlabel_rotation, ha="right")

    if title is None:
        title = _stat_test_title_from_varm_slot(adata, varm_slot)
    _ax.set_title(title)

    # Adjust legend
    _ax.legend(title=group_by, bbox_to_anchor=(1.02, 1), loc="upper left")

    plt.tight_layout()

    if save:
        fig.savefig(save, dpi=300, bbox_inches="tight")
    if show:
        plt.show()
    if ax:
        return _ax
    if not save and not show and not ax:
        raise ValueError(
            "Args show, ax and save all set to False, function does nothing.",
        )
    return None