Source code for proteopy.ann.proteins

from pathlib import Path

import pandas as pd
from anndata import AnnData

from proteopy.ann import base_anndata as ann_base



[docs]
def proteins_from_csv(
    adata: AnnData,
    file_path: str | Path,
    *,
    sep: str = ",",
    file_protein_col: str = "protein_id",
    suffix: str = "_annotated",
    inplace: bool = True,
) -> AnnData | None:
    """Annotate protein-level metadata from a CSV file.

    Parameters
    ----------
    adata : AnnData
        AnnData object containing proteomics data with a ``protein_id`` column
        in ``adata.var``.
    file_path : str | Path
        Path to the CSV file holding additional protein annotations.
    sep : str, optional
        Column delimiter passed to :func:`pandas.read_csv`.
    file_protein_col : str, optional
        Column in the CSV file used to match entries by protein ID.
    suffix : str, optional
        Suffix applied to colliding column names passed through to
        :func:`proteopy.ann.base_anndata.var`.
    inplace : bool, optional
        Forwarded to :func:`proteopy.ann.base_anndata.var`. If ``True``, modify
        ``adata`` in-place and return ``None``.

    Returns
    -------
    AnnData or None
        Updated AnnData when ``inplace`` is ``False``; otherwise ``None``.
    """
    annotations = pd.read_csv(file_path, sep=sep)
    return ann_base.var(
        adata,
        annotations,
        var_on="protein_id",
        df_on=file_protein_col,
        suffix=suffix,
        inplace=inplace,
    )