Source code for pypath.inputs.cytosig

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from typing import Union
import collections
import itertools

import pandas as pd

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.session as session
import pypath.utils.mapping as mapping


_log = session.Logger(name = 'cytosig_input')._log


CUSTOM_MAPPINGS = {
    'Activin A': ('P08476', 'INHBA'),
    'IL12': ('P29459', 'IL12A'),
    'IL36': ('Q9UHA7', 'IL36A'),
    'MCSF': ('P09603', 'CSF1'),
    'TWEAK': ('O43508', 'TNFSF12'),
}


[docs] def cytosig_df(long: bool = False) -> Union[pd.DataFrame, pd.Series]: """ CytoSig core data is a matrix of cytokines vs. targets. Here this matrix is returned as a pandas data frame, either in a wide or long format. Args long: Convert the data frame to long format. Returns The original matrix format data frame (rows are signature genes, columns are cytokines) by default; if `long` is `True`, a series with multi index is returned. """ url = urls.urls['cytosig']['url'] c = curl.Curl(url, large = True, silent = False) # bravo pandas! # I would've never expected that anything in pandas works this smooth df = pd.read_csv(c.fileobj, sep = '\t') if long: # well done, pandas. # your api and docs though are still an ugly mess df = df.stack() return df
[docs] def cytosig_annotations() -> dict: """ CytoSig is a compendium of expression signatures from cytokine perturbation experiments. Returns Dict of sets of annotations. """ def map_to_uniprot(genesymbol): uniprots = mapping.map_name(genesymbol, 'genesymbol', 'uniprot') if not uniprots and genesymbol in CUSTOM_MAPPINGS: uniprots = {CUSTOM_MAPPINGS.get(genesymbol)[0]} return uniprots cytosig = cytosig_df(long = True) record = collections.namedtuple( 'CytosigAnnotation', ('cytokine', 'score', 'cytokine_genesymbol', 'target_genesymbol'), ) result = collections.defaultdict(set) unmapped = set() for (target, cytokine), score in cytosig.items(): u_target = map_to_uniprot(target) u_cytokine = map_to_uniprot(cytokine) if not u_cytokine: unmapped.add(cytokine) for u_t, u_c in itertools.product(u_target, u_cytokine): result[u_t].add( record( cytokine = u_c, score = score, cytokine_genesymbol = cytokine, target_genesymbol = target, ) ) if unmapped: _log( 'Could not translate to UniProt IDs the following cytokines: ' + ', '.join(sorted(unmapped)) ) return dict(result)