#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#frompast.builtinsimportxrange,rangeimportcollectionsimportitertoolsfromlxmlimportetreeimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.utils.mappingasmappingimportpypath.utils.taxonomyastaxonomy
[docs]deftopdb_annotations(ncbi_tax_id=9606):TopdbAnnotation=collections.namedtuple('TopdbAnnotation',['membrane','topology','score','tmregions'],)result=collections.defaultdict(set)url=urls.urls['topdb']['url']c=curl.Curl(url,large=True,default_mode='rb',silent=False,)parser=etree.iterparse(c.fileobj,events=('start','end'))result=collections.defaultdict(set)root=next(parser)used_elements=[]forev,eleminparser:ifev=='end'andelem.tag=='TOPDB':used_elements.append(elem)organism=elem.find('Organism').textorganism=taxonomy.ensure_ncbi_tax_id(organism)ifnotorganism:continuetag_uniprots=elem.find('./CrossRef/UniProt')iftag_uniprotsisNone:continueuniprots=[u.textforuintag_uniprots.findall('AC')]uniprots=set(mapping.map_name0(u,'uniprot','uniprot',ncbi_tax_id=ncbi_tax_id,)foruinuniprots)ifnotuniprots:continuemembranes=set(memfortag_meminelem.findall('Membrane')formemintag_mem.text.split(';'))ntm=0score=0topologies=()tag_topo=elem.find('Topology')iftag_topoisnotNone:ntm=int(tag_topo.find('Numtm').attrib['Count'])score=int(tag_topo.find('Reliability').text)topologies=set(tag_reg.attrib['Loc']fortag_regintag_topo.findall('./Regions/Region'))ifnotmembranes:membranes=(None,)ifnottopologies:topologies=(None,)fortopology,membrane,uniprotinitertools.product(topologies,membranes,uniprots,):ifuniprotisNone:continueresult[uniprot].add(TopdbAnnotation(membrane=membrane,topology=topology,tmregions=ntm,score=score,))# removing used elements to keep memory lowiflen(used_elements)>2000:for_inxrange(1000):e=used_elements.pop(0)e.clear()# closing the XMLc.fileobj.close()delcreturndict(result)