#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importreimportcsvimportcollectionsimportitertoolsimportpypath.utils.mappingasmappingimportpypath.utils.taxonomyastaxonomyimportpypath.internals.interaasinteraimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.commonascommonimportpypath_common._constantsas_constimportpypath.share.sessionassessionimportpypath.core.entityasentity_logger=session.Logger(name='scconnect_input')_log=_logger._log
[docs]defscconnect_annotations(organism=9606):""" Ligand and receptor annotations from scConnect (https://github.com/JonETJakobsson/scConnect). Args organism (int,str): Name or identifier of the organism. Human, mouse, fruitfly, zebrafish, chicken, C. elegans, Xenopus tropicalis, yeast and Anolis carolinensis are available. """_organism=taxonomy.ensure_ensembl_name(organism)ncbi_tax_id=taxonomy.ensure_ncbi_tax_id(organism)ifnot_organism:msg='Could not recognize organism: `%s`.'%str(organism)_log(msg)raiseValueError(msg)record=collections.namedtuple('ScconnectAnnotation',('role','family','type','inferred_from',),)result=collections.defaultdict(set)reinf=re.compile('inferred from (\w+)')forrolein('ligand','receptor'):url=urls.urls['scconnect']['annot']%(_organism,role)c=curl.Curl(url,silent=False,large=True)tab=csv.DictReader(c.result)forrecintab:typ=rec.get('type',None)family=rec.get('family',None)inferred_from=rec.get('comment',None)ifinferred_from:inferred_from=reinf.match(inferred_from)ifinferred_from:inferred_from=inferred_from.groups()[0].lower()annot=record(role=role,family=family,type=typ,inferred_from=inferred_from,)genesymbols=(rec.get('gene',rec.get('preprogene')).strip('[\']').split(','))forgsingenesymbols:components=gs.split('|')uniprots=[mapping.map_name(gs_comp.strip(),'genesymbol','uniprot',ncbi_tax_id=ncbi_tax_id,)forgs_compincomponents]forupsinitertools.product(*uniprots):iflen(ups)>1:cplex=intera.Complex(components=ups,sources='scConnect',)result[cplex].add(annot)else:result[ups[0]].add(annot)returndict(result)
[docs]defscconnect_complexes(organism=9606):""" Protein complexes from scConnect (https://github.com/JonETJakobsson/scConnect). Args organism (int,str): Name or identifier of the organism. Human, mouse, fruitfly, zebrafish, chicken, C. elegans, Xenopus tropicalis, yeast and Anolis carolinensis are available. """annot=scconnect_annotations(organism=organism)return{cplexforcplexinannot.keys()ifentity.Entity._is_complex(cplex)}
[docs]defscconnect_interactions():""" Ligand-receptor interactions from scConnect (https://github.com/JonETJakobsson/scConnect). Returns (list): List of interactions, each represented as a named tuple. Proteins and protein complexes are translated to UniProt IDs, small molecule IDs are left intact. """defprocess_partner(rec,partner):organisms=[taxonomy.ensure_ncbi_tax_id(org)iforgnotin{'','None','Unknown'}else_const.NOT_ORGANISM_SPECIFICfororginrec['%s_species'%partner].split('|')]id_field=('target_uniprot'ifpartner=='target'else'ligand_gene_symbol')id_type='uniprot'ifpartner=='target'else'genesymbol'fororganisminset(organisms):ids_raw=[_idfor_id,_orginzip(rec[id_field].split('|'),organisms)if_org==organismororganismisNone]ids=([mapping.map_name(_id,id_type,'uniprot',ncbi_tax_id=organism,)for_idinids_raw]iforganismelse[(rec[partner],)])ids=[((intera.Complex(components=_ids,sources='scConnect',),'complex',)iflen(_ids)>1else(_ids[0],'protein'iforganismelse'small_molecule',))for_idsinitertools.product(*ids)]for_id,entity_typeinids:yield_id,entity_type,organismurl=urls.urls['scconnect']['intera']c=curl.Curl(url,silent=False,large=True)tab=csv.DictReader(c.result)record=collections.namedtuple('ScconnectInteraction',('ligand_id','target_id','ligand_organism','target_organism','ligand_type','target_type','effect','references',),)result=[]forrecintab:targets=process_partner(rec,'target')ligands=process_partner(rec,'ligand')forligand_targetinitertools.product(ligands,targets):((ligand,ligand_type,ligand_organism),(target,target_type,target_organism))=ligand_targetresult.append(record(ligand_id=ligand,target_id=target,ligand_organism=ligand_organism,target_organism=target_organism,ligand_type=ligand_type,target_type=target_type,effect=rec['action'],references=rec['pubmed_id'],))returnresult