#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importcsvimportitertoolsimportcollectionsimportpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.utils.mappingasmappingimportpypath.utils.taxonomyastaxonomyimportpypath.share.sessionassession_logger=session.Logger(name='cellcall_input')_log=_logger._log
[docs]defcellcall_download(extended=False,mouse=False):""" Downloads a ligand-receptor-TF pathway dataset from CellCall (https://github.com/ShellyCoder/cellcall). This function downloads a single dataset, to download multiple datasets, see ``cellcall_download_all``. The pathway identifiers refer to KEGG pathways. Args extended (bool): CellCall has core and extended datasets, if this argument is True, the extended dataset will be retrieved. mouse (bool): CellCall has human and homology inferred mouse datasets. If this argument is True, the homology inferred dataset will be retrieved with mouse identifiers. Returns A list of dicts, each is a record as it provided by the CellCall database. """dataset='%s%s'%('_homology'ifmouseelse'','_extended'ifextendedelse'',)url=urls.urls['cellcall']['url']%datasetc=curl.Curl(url,large=True,silent=False)returnlist(csv.DictReader(c.result,delimiter='\t'))
[docs]defcellcall_download_all(extended=True,human=True,mouse=True):""" Downloads ligand-receptor-TF pathway data from CellCall (https://github.com/ShellyCoder/cellcall). CellCall has core (high confidence) and extended datasets, human and homology inferred mouse datasets, 4 datasets in total. By default all these are downloaded here, with the parameters you can exclude the extended part and select the organism. The pathway identifiers refer to KEGG pathways. Args extended (bool): Use also the extended datasets. human (bool): Include human interactions. mouse (bool): Include mouse interactions. Returns A list of dicts, each is a record as it provided by the CellCall database. """result=[]forext,homoinitertools.product(*((True,False),)*2):if((extendedornotext)and((humanandnothomo)or(mouseandhomo))):dataset=cellcall_download(extended=ext,mouse=homo)_=[(rec.update(extended=ext),rec.update(organism=10090ifhomoelse9606),)forrecindataset]result.extend(dataset)returnresult
[docs]defcellcall_interactions(extended=False,organism=9606):""" Ligand-receptor interactions from the CellCall database (https://github.com/ShellyCoder/cellcall). Args extended (bool): Include not only the core but also the extended set of interactions. organism (int,str): The organism to use, human (9606) and mouse (10090) are supported. Returns List of named tuples, each describing a ligand-receptor interaction. """record=collections.namedtuple('CellcallInteraction',('ligand_uniprot','receptor_uniprot','core',),)ncbi_tax_id=taxonomy.ensure_ncbi_tax_id(organism)ifncbi_tax_idnotin{9606,10090}:msg='Unknown organism: `%s`.'%str(organism)_log(msg)raiseValueError(msg)human=ncbi_tax_id==9606mouse=ncbi_tax_id==10090raw=cellcall_download_all(extended=extended,human=human,mouse=mouse,)result=set()unmapped=set()forrinraw:ligands=mapping.map_name(r['Ligand_ID'],'entrez','uniprot',ncbi_tax_id=ncbi_tax_id,)receptors=mapping.map_name(r['Receptor_ID'],'entrez','uniprot',ncbi_tax_id=ncbi_tax_id,)ifnotligands:unmapped.add(r['Ligand_ID'])ifnotreceptors:unmapped.add(r['Receptor_ID'])forlig_up,rec_upinitertools.product(ligands,receptors):result.add(record(ligand_uniprot=lig_up,receptor_uniprot=rec_up,core=notr['extended'],))_log('Could not find UniProt IDs for %u ''CellCall proteins.'%len(unmapped))returnlist(result)
[docs]defcellcall_annotations(extended=False,organism=9606):""" Ligand and receptor annotations from the CellCall database (https://github.com/ShellyCoder/cellcall). Args extended (bool): Include not only the core but also the extended set of interactions. organism (int,str): The organism to use, human (9606) and mouse (10090) are supported. Returns Dict of annotations, keys are UniProt IDs, values are sets of annotations. """record=collections.namedtuple('CellcallAnnotation',('role',),)interactions=cellcall_interactions(extended=extended,organism=organism,)result=collections.defaultdict(set)foriininteractions:result[i.ligand_uniprot].add(record(role='ligand'))result[i.receptor_uniprot].add(record(role='receptor'))returndict(result)