Source code for SpaRCL._regulons

# -*- coding: utf-8 -*-

from typing import Optional, Sequence
from anndata import AnnData

import numpy as np
import pandas as pd
import os
from yaml import load, dump
try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper

from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell as pyscenic_aucell


[docs]def regulons( adata: AnnData, tf_names: Optional[Sequence[str]] = None, motif_annotations_fname: Optional[str] = None, db_fnames: Optional[Sequence[str]] = None, thresholds=(0.75, 0.90), top_n_targets=(50,), top_n_regulators=(5,10,50), min_genes=20, relation_key: Optional[str] = None, key_added: Optional[str] = None, copy: bool = False, ) -> Optional[AnnData]: ''' Regulon inference for spatial transcriptomics [Aibar17]_. Parameters ---------- adata Annotated data matrix. tf_names List of transcription factors (TFs). motif_annotations_fname Path of motif annotation file. db_fnames List of path of ranking database files. thresholds The first method to create the TF-modules based on the best targets for each transcription factor. top_n_targets The second method is to select the top targets for a given TF. top_n_regulators The alternative way to create the TF-modules is to select the best regulators for each gene. min_genes The required minimum number of genes in a resulting module. relation_key If not specified, it looks `.uns['relation']` for relational contrastive learning settings (default storage place for :func:`~SpaRCL.run_RCL`). If specified, it looks `.uns[relation_key]` for relational contrastive learning settings. key_added If not specified, the regulon inference data is stored in `adata.uns['regulon']`. If specified, the regulon inference data is added to `adata.uns[key_added]`. copy Return a copy instead of writing to ``adata``. Returns ------- Depending on ``copy``, returns or updates ``adata`` with the following fields. See ``key_added`` parameter description for the storage path of the regulon inference. .uns['regulon'] The inferred regulons. ''' adata = adata.copy() if copy else adata if relation_key is None: relation_key = 'relation' if relation_key not in adata.uns: raise ValueError(f'Did not find .uns["{relation_key}"].') adata_use = adata[:, adata.uns[relation_key]['gene_names']] neighbors = pd.DataFrame( adata.uns[relation_key]['gene_relation'].toarray(), index=adata_use.var_names, columns=adata_use.var_names ) if tf_names is None: neighbors = pd.melt(neighbors, ignore_index=False) neighbors.columns = ['target', 'importance'] neighbors = neighbors.reset_index() else: tf_idx = np.flatnonzero(np.isin(adata_use.var_names, tf_names)) if tf_idx.shape[0] == 0: raise ValueError("Could not find any tf in var_names") neighbors = pd.melt(neighbors.iloc[tf_idx, :], ignore_index=False) neighbors.columns = ['target', 'importance'] neighbors = neighbors.reset_index() neighbors.columns = ['TF', 'target', 'importance'] neighbors = neighbors[neighbors['importance'] > 0] neighbors = neighbors.sort_values(by='importance', ascending=False) neighbors = neighbors.reset_index(drop=True) regulons = modules_from_adjacencies( adjacencies=neighbors, ex_mtx=adata_use.to_df(), thresholds=thresholds, top_n_targets=top_n_targets, top_n_regulators=top_n_regulators, min_genes=min_genes, rho_dichotomize=False, ) for idx in range(len(regulons)): gene2weight = dict(regulons[idx].gene2weight) gene2weight[regulons[idx].transcription_factor] = regulons[idx].weights[1] regulons[idx] = regulons[idx].copy(gene2weight=gene2weight) if motif_annotations_fname is not None and db_fnames is not None: def name(fname): return os.path.splitext(os.path.basename(fname))[0] dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames] df = prune2df(dbs, regulons, motif_annotations_fname) regulons = df2regulons(df) if key_added is None: key_added = 'regulons' adata.uns[key_added] = {} regulons_dict = adata.uns[key_added] regulons_dict['regulons'] = dump(regulons, default_flow_style=False, Dumper=Dumper) regulons_dict['params'] = {} regulons_dict['params']['tf_names'] = tf_names regulons_dict['params']['thresholds'] = str(thresholds) regulons_dict['params']['top_n_targets'] = str(top_n_targets) regulons_dict['params']['top_n_regulators'] = str(top_n_regulators) regulons_dict['params']['min_genes'] = min_genes return adata if copy else None
[docs]def aucell( adata: AnnData, auc_threshold: float = 0.05, noweights: bool = False, normalize: bool = False, random_state: int = 0, num_workers: int = 1, regulon_key: Optional[str] = None, key_added: Optional[str] = None, copy: bool = False, ) -> Optional[AnnData]: ''' Compute the regulon activity AUCell matrix [Aibar17]_. Parameters ---------- adata Annotated data matrix. auc_threshold The fraction of the ranked genome to take into account for the calculation of the Area Under the recovery Curve. noweights Should the weights of the genes part of a signature be used in calculation of enrichment? normalize Normalize the AUC values to a maximum of 1.0 per regulon. random_state Change to use different initial states for the optimization. num_workers The number of cores to use. regulon_key The key of regulon inference. key_added If not specified, the AUCell data is stored in `adata.uns['aucell']` and the AUCell matrix is stored in `adata.obsm['aucell']`. If specified, the AUCell data is added to `adata.uns[key_added]` and the AUCell matrix is stored in `adata.obsp[key_added]`. copy Return a copy instead of writing to ``adata``. Returns ------- Depending on ``copy``, returns or updates ``adata`` with the following fields. See ``key_added`` parameter description for the storage path of the AUCell matrix. .obsm['aucell'] : :class:`~pandas.DataFrame` The regulon activity AUCell matrix. ''' adata = adata.copy() if copy else adata if regulon_key is None: regulon_key = 'regulons' if regulon_key not in adata.uns: raise ValueError(f'Did not find .uns["{regulon_key}"].') regulons = load(adata.uns[regulon_key]['regulons'], Loader=Loader) auc_mtx = pyscenic_aucell( exp_mtx=adata.to_df(), signatures=regulons, auc_threshold=auc_threshold, noweights=noweights, normalize=normalize, seed=random_state, num_workers=num_workers, ) if key_added is None: key_added = 'aucell' adata.uns[key_added] = {} aucell_dict = adata.uns[key_added] aucell_dict['params'] = {} aucell_dict['params']['auc_threshold'] = auc_threshold aucell_dict['params']['noweights'] = noweights aucell_dict['params']['normalize'] = normalize aucell_dict['params']['random_state'] = random_state adata.obsm[key_added] = auc_mtx.loc[adata.obs_names,:] return adata if copy else None