Source code for proteopy.pl.copf

from __future__ import annotations

from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import anndata as ad
from matplotlib.axes import Axes

from proteopy.utils.anndata import check_proteodata

[docs] def proteoform_scores( adata: ad.AnnData, *, adj: bool = True, pval_threshold: float | int | None = None, score_threshold: float | int | None = None, log_scores: bool = False, show: bool = True, save: str | Path | None = None, ax: bool = False, ) -> Axes | None: """Scatter plot of COPF proteoform scores vs. p-values. Parameters ---------- adata : AnnData :class:`~anndata.AnnData` with COPF score annotations in ``.var``. adj : bool Use adjusted ``proteoform_score_pval_adj`` values when ``True``. pval_threshold : float | int | None Maximum p-value used to highlight points. ``None`` disables filtering by p-value. score_threshold : float | int | None Minimum proteoform score used to highlight points. ``None`` disables score-based filtering. log_scores : bool Plot p-values on a log-scaled y-axis when ``True``; otherwise use a linear scale. show : bool Call :func:`matplotlib.pyplot.show` when ``True``. save : str | Path | None File path to save the figure. ``None`` skips saving. ax : bool Return the created :class:`matplotlib.axes.Axes` instead of ``None``. """ check_proteodata(adata) if adj: pval_col = "proteoform_score_pval_adj" else: pval_col = "proteoform_score_pval" required_cols = {"proteoform_score", pval_col} missing = required_cols.difference(adata.var.columns) if missing: missing_str = ", ".join(sorted(missing)) raise ValueError( "Missing required columns in `adata.var`: " f"{missing_str}" ) var = adata.var.loc[:, ["proteoform_score", pval_col]].copy() var = var.drop_duplicates() var = var.dropna(subset=["proteoform_score", pval_col]) # Filter out invalid p-values before plotting. finite_mask = np.isfinite(var[pval_col]) if not finite_mask.all(): warnings.warn( "Dropping entries with non-finite p-values.", RuntimeWarning, ) var = var.loc[finite_mask] if log_scores: positive_mask = var[pval_col] > 0 if not positive_mask.all(): warnings.warn( "Dropping non-positive p-values before log-transforming.", RuntimeWarning, ) var = var.loc[positive_mask] plot_pvals = -np.log10(var[pval_col]) if adj: ylabel = "-log10(adj. p-value)" else: ylabel = "-log10(p-value)" else: non_negative = var[pval_col] >= 0 if not non_negative.all(): warnings.warn( "Dropping negative p-values before plotting.", RuntimeWarning, ) var = var.loc[non_negative] plot_pvals = var[pval_col] ylabel = "adj. p-value" if adj else "p-value" if var.empty: raise ValueError("No valid proteoform scores available for plotting.") def _validate_threshold( value: float | int | None, *, name: str, allow_zero: bool = False, upper_bound: float | None = None, ) -> float | int | None: if value is None: return None if isinstance(value, bool): raise ValueError(f"{name} must be a number, not bool.") if not isinstance(value, (int, float, np.integer, np.floating)): raise ValueError(f"{name} must be a real number.") if not np.isfinite(value): raise ValueError(f"{name} must be a finite number.") if not allow_zero and value <= 0: raise ValueError(f"{name} must be greater than 0.") if upper_bound is not None and value > upper_bound: raise ValueError( f"{name} must be less than or equal to {upper_bound}." ) return value pval_threshold = _validate_threshold( pval_threshold, name="pval_threshold", allow_zero=False, upper_bound=1.0, ) score_threshold = _validate_threshold( score_threshold, name="score_threshold", allow_zero=True, ) if pval_threshold is not None: if log_scores: pval_threshold_line = -np.log10(pval_threshold) else: pval_threshold_line = pval_threshold else: pval_threshold_line = None mask = pd.Series(True, index=var.index) has_condition = False if score_threshold is not None: mask &= var["proteoform_score"] >= score_threshold has_condition = True if pval_threshold is not None: mask &= var[pval_col] <= pval_threshold has_condition = True if not has_condition: mask[:] = False var["is_above_threshold"] = mask var["plot_pval"] = plot_pvals _fig, _ax = plt.subplots() sns.scatterplot( data=var, x="proteoform_score", y="plot_pval", hue="is_above_threshold", palette={True: "#008A1D", False: "#BDBDBD"}, alpha=0.5, s=30, edgecolor=None, legend=False, ax=_ax, ) if score_threshold is not None: _ax.axvline( score_threshold, color="#A2A2A2", linestyle="--", ) if pval_threshold_line is not None: _ax.axhline( pval_threshold_line, color="#A2A2A2", linestyle="--", ) _ax.set_xlabel("Proteoform Score") _ax.set_ylabel(ylabel) _fig.tight_layout() if save is not None: if not isinstance(save, (str, Path)): raise TypeError("`save` must be a path-like object or None.") _fig.savefig(save, dpi=300, bbox_inches="tight") if show: plt.show() if ax: return _ax if not (show or save or ax): raise ValueError( "Function does nothing: set one of `show`, `save`, or `ax`." )