Source code for pypath.inputs.cellcall

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import csv
import itertools
import collections

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.utils.taxonomy as taxonomy
import pypath.share.session as session

_logger = session.Logger(name = 'cellcall_input')
_log = _logger._log


[docs] def cellcall_download(extended = False, mouse = False): """ Downloads a ligand-receptor-TF pathway dataset from CellCall (https://github.com/ShellyCoder/cellcall). This function downloads a single dataset, to download multiple datasets, see ``cellcall_download_all``. The pathway identifiers refer to KEGG pathways. Args extended (bool): CellCall has core and extended datasets, if this argument is True, the extended dataset will be retrieved. mouse (bool): CellCall has human and homology inferred mouse datasets. If this argument is True, the homology inferred dataset will be retrieved with mouse identifiers. Returns A list of dicts, each is a record as it provided by the CellCall database. """ dataset = '%s%s' % ( '_homology' if mouse else '', '_extended' if extended else '', ) url = urls.urls['cellcall']['url'] % dataset c = curl.Curl(url, large = True, silent = False) return list(csv.DictReader(c.result, delimiter = '\t'))
[docs] def cellcall_download_all(extended = True, human = True, mouse = True): """ Downloads ligand-receptor-TF pathway data from CellCall (https://github.com/ShellyCoder/cellcall). CellCall has core (high confidence) and extended datasets, human and homology inferred mouse datasets, 4 datasets in total. By default all these are downloaded here, with the parameters you can exclude the extended part and select the organism. The pathway identifiers refer to KEGG pathways. Args extended (bool): Use also the extended datasets. human (bool): Include human interactions. mouse (bool): Include mouse interactions. Returns A list of dicts, each is a record as it provided by the CellCall database. """ result = [] for ext, homo in itertools.product(*((True, False),) * 2): if ( (extended or not ext) and ( (human and not homo) or (mouse and homo) ) ): dataset = cellcall_download(extended = ext, mouse = homo) _ = [ ( rec.update(extended = ext), rec.update(organism = 10090 if homo else 9606), ) for rec in dataset ] result.extend(dataset) return result
[docs] def cellcall_interactions(extended = False, organism = 9606): """ Ligand-receptor interactions from the CellCall database (https://github.com/ShellyCoder/cellcall). Args extended (bool): Include not only the core but also the extended set of interactions. organism (int,str): The organism to use, human (9606) and mouse (10090) are supported. Returns List of named tuples, each describing a ligand-receptor interaction. """ record = collections.namedtuple( 'CellcallInteraction', ( 'ligand_uniprot', 'receptor_uniprot', 'core', ), ) ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(organism) if ncbi_tax_id not in {9606, 10090}: msg = 'Unknown organism: `%s`.' % str(organism) _log(msg) raise ValueError(msg) human = ncbi_tax_id == 9606 mouse = ncbi_tax_id == 10090 raw = cellcall_download_all( extended = extended, human = human, mouse = mouse, ) result = set() unmapped = set() for r in raw: ligands = mapping.map_name( r['Ligand_ID'], 'entrez', 'uniprot', ncbi_tax_id = ncbi_tax_id, ) receptors = mapping.map_name( r['Receptor_ID'], 'entrez', 'uniprot', ncbi_tax_id = ncbi_tax_id, ) if not ligands: unmapped.add(r['Ligand_ID']) if not receptors: unmapped.add(r['Receptor_ID']) for lig_up, rec_up in itertools.product(ligands, receptors): result.add( record( ligand_uniprot = lig_up, receptor_uniprot = rec_up, core = not r['extended'], ) ) _log( 'Could not find UniProt IDs for %u ' 'CellCall proteins.' % len(unmapped) ) return list(result)
[docs] def cellcall_annotations(extended = False, organism = 9606): """ Ligand and receptor annotations from the CellCall database (https://github.com/ShellyCoder/cellcall). Args extended (bool): Include not only the core but also the extended set of interactions. organism (int,str): The organism to use, human (9606) and mouse (10090) are supported. Returns Dict of annotations, keys are UniProt IDs, values are sets of annotations. """ record = collections.namedtuple( 'CellcallAnnotation', ( 'role', ), ) interactions = cellcall_interactions( extended = extended, organism = organism, ) result = collections.defaultdict(set) for i in interactions: result[i.ligand_uniprot].add( record(role = 'ligand') ) result[i.receptor_uniprot].add( record(role = 'receptor') ) return dict(result)