#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#frompast.builtinsimportxrange,rangeimportcollectionsfromlxmlimportetreeimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.inputs.uniprot_dbasuniprot_dbimportpypath.utils.taxonomyastaxonomyimportpypath.utils.mappingasmapping
[docs]deflocate_localizations(organism=9606,literature=True,external=True,predictions=False,):record=collections.namedtuple('LocateAnnotation',('source','location','cls','pmid','score'),)record.__new__.__defaults__=(None,None,None)organism_uniprots=set(uniprot_db.all_uniprots(organism=organism,swissprot=True))organism_str=taxonomy.taxids[organism]url=urls.urls['locate']['url_rescued']%organism_strfname=url.split('/')[-1][:-4]c=curl.Curl(url,large=True,default_mode='rb',silent=False,files_needed=[fname],)c.result[fname]parser=etree.iterparse(c.result[fname],events=('start','end'))result=collections.defaultdict(set)root=next(parser)used_elements=[]forev,eleminparser:ifev=='end'andelem.tag=='LOCATE_protein':tag_protein=elem.find('protein')this_uniprot=Nonethis_uniprots=Nonethis_entrez=Nonethis_organism=(tag_protein.find('organism').textiftag_proteinisnotNoneelseNone)this_class=(tag_protein.find('class').textiftag_proteinisnotNoneelseNone)xrefs=elem.find('xrefs')ifxrefsisNone:continueforxrefinxrefs.findall('xref'):src=xref.find('source')src_name=src.find('source_name').textifsrc_name=='UniProtKB-SwissProt':this_uniprot=src.find('accn').textifsrc_name=='Entrez Gene':this_entrez=src.find('accn').textifsrc_name=='UniProt/SPTrEMBL'andthis_uniprotisNone:this_uniprot=src.find('accn').text# if we don't know what it is, does not make sense to proceedifthis_uniprotisNoneandthis_entrezisNone:continueifthis_uniprot:this_uniprots=mapping.map_name(this_uniprot,'uniprot','uniprot',ncbi_tax_id=organism,)ifnotthis_uniprotsandthis_entrez:this_uniprots=mapping.map_name(this_entrez,'entrez','uniprot',ncbi_tax_id=organism,)this_uniprots=set(this_uniprots)&organism_uniprots# if we don't know what it is, does not make sense to proceedifnotthis_uniprots:continueifexternal:# External database annotationsextannot=elem.find('externalannot')ifextannotisnotNone:forextannotrefinextannot.findall('reference'):sources=[]forsrcinextannotref.findall('source'):src_name=src.find('source_name')ifsrc_nameisnotNone:sources.append(src_name.text)sources=';'.join(sources)ifsourceselseNonelocations=extannotref.find('locations')iflocationsisnotNone:forlocationinlocations.findall('location'):forlocinlocation.iterchildren():ifloc.tag[:4]=='tier':this_loc=loc.text.lower().split(',')foruniprotinthis_uniprots:for_locinthis_loc:result[uniprot].add(record(source=sources,location=_loc.strip(),cls=this_class,score=None,))ifpredictions:# Predictionssclpred=elem.find('scl_prediction')ifsclpredisnotNone:forsclpred_srcinsclpred.findall('source'):score=float(sclpred_src.find('evaluation').text)ifscore==0.0:continuethis_src=sclpred_src.find('method').textthis_loc=sclpred_src.find('location').text.lower()ifthis_loc=='no prediction':continueforuniprotinthis_uniprots:result[uniprot].add(record(source=this_src,location=this_loc,cls=this_class,score=score,))ifliterature:# Literature curationlit=elem.find('literature')iflitisnotNone:forlitrefinlit.findall('reference'):locs=set()forllocin(litref.find('locations').findall('location')):forlocinlloc.iterchildren():ifloc.tag[:4]=='tier':locs.add(loc.text.lower())pmid=litref.find('source')pmid=(NoneifpmidisNoneelsepmid.find('accn').text)forlocinlocs:foruniprotinthis_uniprots:result[uniprot].add(record(source='literature',location=loc,pmid=pmid,cls=this_class,score=None,))used_elements.append(elem)# removing used elements to keep memory lowiflen(used_elements)>1000:for_inxrange(500):e=used_elements.pop(0)e.clear()# closing the XMLc.fileobj.close()delcreturndict(result)