#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importitertoolsimportcollectionsimportpypath.share.curlascurlimportpypath.share.sessionassessionimportpypath.share.settingsassettingsimportpypath.resources.urlsasurlsimportpypath.utils.mappingasmappingimportpypath.utils.taxonomyastaxonomy_logger=session.Logger(name='comppi_input')_log=_logger._log
[docs]defcomppi_interaction_locations(organism=9606):""" Downloads and preprocesses protein interaction and cellular compartment association data from the ComPPI database. This data provides scores for occurrence of protein-protein interactions in various compartments. """ComppiLocation=collections.namedtuple('ComppiLocation',['location','score',],)ComppiInteraction=collections.namedtuple('ComppiInteraction',['id_a','id_b','loc_a','loc_b',],)defprocess_locations(loc):returntuple(ComppiLocation(location=llloc[0],score=float(llloc[1]))forlllocin(lloc.split(':')forllocinloc.split('|')))organisms={9606:0,7227:1,6239:2,4932:3,}ncbi_tax_id=taxonomy.ensure_ncbi_tax_id(organism)ifncbi_tax_idnotinorganisms:raiseValueError('Can not recognize organism: `%s`. ''Available organisms are human (9606), Drosophila (7227), ''C. elegans (6239) and S. cerevisiae (4932).'%str(organism))url=urls.urls['comppi']['url']headers=[settings.get('user_agent')]# obtaining cookiec=curl.Curl(url,cache=False,req_headers=headers)cookie=';'.join([h.decode().split(':')[1].split(';')[0].strip()forhinc.resp_headersifh.startswith(b'Set-Cookie')])cookie_hdr=['Cookie: %s'%cookie]_log('Cookie from ComPPI: %s'%cookie)# performing the downloadpost={'fDlSet':'comp','fDlSpec':'%u'%ncbi_tax_id,'fDlMLoc':'all','fDlSubmit':'Download'}c=curl.Curl(url,req_headers=headers+cookie_hdr,post=post,large=True,silent=False,compr='gz',)_=next(c.result)forlinc.result:l=l.strip('\r\n').split('\t')organism_a=int(l[7])organism_b=int(l[15])iforganismand(organism_a!=organismororganism_b!=organism):continueforuniprot1,uniprot2initertools.product(mapping.map_name(l[0],'uniprot','uniprot'),mapping.map_name(l[8],'uniprot','uniprot'),):yieldComppiInteraction(id_a=uniprot1,id_b=uniprot2,loc_a=process_locations(l[2]),loc_b=process_locations(l[10]),)