#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritemsimportosimportreimportcollectionsimportitertoolsimportshutiltry:importcPickleaspickleexcept:importpickleimportbs4importpypath.share.curlascurlimportpypath.share.commonascommonimportpypath.share.progressasprogressimportpypath.share.cacheascacheimportpypath.share.settingsassettingsimportpypath.share.sessionassession_modimportpypath.resources.urlsasurlsimportpypath.internals.interaasinteraimportpypath.utils.mappingasmapping_logger=session_mod.Logger(name='hpmr_input')_log=_logger._logHpmrInteraction=collections.namedtuple('HpmrInteraction',('receptor','partner_role','partner','references','unambiguous',),)
[docs]defget_hpmr(use_cache=None):""" Downloads ligand-receptor and receptor-receptor interactions from the Human Plasma Membrane Receptome database. Args use_cache (bool): Use the intermediate cache (pickle file of processed data). Returns (dict): Two elements: "interactions" and "families". """defget_partner(interactors,typ,recname=None,references=None):""" typ : str `Receptor` or `Ligand`. """components=[i[1]foriininteractorsifi[0]==typ]iftyp=='Receptor'andrecname:components.append(recname)iflen(components)==1:returncomponents[0]eliflen(components)>1:returnintera.Complex(components=components,sources='HPMR',references=references,)cachefile=cache.cache_item('hpmr_preprocessed')use_cache=(use_cacheifisinstance(use_cache,bool)elsesettings.get('use_intermediate_cache'))ifos.path.exists(cachefile)anduse_cache:_log('Reading HPMR data from cache file `%s`.'%cachefile)returnpickle.load(open(cachefile,'rb'))rerecname=re.compile(r'Receptor ([A-z0-9]+) interacts with:')reint=re.compile(r'(Receptor|Ligand) ([A-z0-9]+) -')rerefid=re.compile(r'list_uids=([- \.:,0-9A-z]+)')refamid=re.compile(r'.*FamId=([0-9\.]+)')a_family_title='Open Family Page'a_receptor_title='Open Receptor Page'a_titles={a_family_title,a_receptor_title}interactions=[]families={}complexes=set()recpages=[]c=curl.Curl(urls.urls['hpmri']['browse_rescued'])soup=bs4.BeautifulSoup(c.result,'html.parser')this_family=('0',None)this_subfamily=('0',None)this_subsubfamily=('0',None)forainsoup.find_all('a'):a_title=a.attrs['title']if'title'ina.attrselseNoneifa_titlenotina_titles:continueifa_title==a_family_title:family_id=refamid.match(a.attrs['href']).groups()[0]iffamily_id.startswith(this_subfamily[0]):this_subsubfamily=(family_id,a.text)eliffamily_id.startswith(this_family[0]):this_subfamily=(family_id,a.text)this_subsubfamily=('0',None)else:this_family=(family_id,a.text)this_subfamily=('0',None)this_subsubfamily=('0',None)elifa_title==a_receptor_title:recpages.append((a.attrs['href'],this_family[1],this_subfamily[1],this_subsubfamily[1],))prg=progress.Progress(len(recpages),'Downloading HPMR data',1)i_complex=0regene=re.compile(r'Param=([^&]+)&ProtId=(\d+)&ProtType=(\w+)')genes_curl=curl.Curl(urls.urls['hpmri']['genes_rescued'],silent=False,large=True,)forurl,family,subfamily,subsubfamilyinrecpages:protein,prot_id,prot_type=regene.search(url).groups()fname='gene_%s-%s-%s.html'%(protein,prot_id,prot_type)prg.step(status='Processing `%s`'%fname)_log('Accessing `%s` from `%s` (%s).'%(fname,genes_curl.cache_file_name,genes_curl.url,))iffnamenotingenes_curl.result:_log('File `%s` not found in the archive.'%fname)continuesoup=bs4.BeautifulSoup(genes_curl.result[fname].read(),'html.parser',)ints=soup.find('div',{'id':'GeneInts'})ifnotints:_log('No interactions: `%s`'%url)continuerecname=rerecname.search(ints.find_previous_sibling('span').text)recname=recname.groups()[0]ifrecnameelse'Unknown'ifrecname=='Unknown':_log('Could not find receptor name: `%s`'%url)continuerecname_u=mapping.map_name0(recname,'genesymbol','uniprot')ifnotrecname_u:continuefamilies[recname_u]=(family,subfamily,subsubfamily,)fortdinints.find_all('td'):interactors=[]forspanintd.find_all('span',{'class':'IntRow'}):ints=reint.search(span.text)ifints:interactors.append(ints.groups())references=[]forrefintd.find_all('a',{'title':'click to open reference in new window'}):references.append(rerefid.search(ref.attrs['href']).groups()[0].strip())interactors_u=[]forrole,genesymbolininteractors:uniprot=(mapping.map_name0(genesymbol,'genesymbol','uniprot'))ifuniprot:interactors_u.append((role,uniprot))partner_role=('receptor'ifall(i[0]=='Receptor'foriininteractors_u)else'ligand')receptors=(recname_uifpartner_role=='receptor'elseget_partner(interactors_u,'Receptor',recname=recname_u,references=references,))partners=({u[1]foruininteractors_u}-{recname_u}ifpartner_role=='receptor'elseget_partner(interactors_u,'Ligand',references=references,))receptors=common.to_list(receptors)partners=common.to_list(partners)unambiguous=(partner_role=='ligand'or(len(receptors)==1andlen(partners)==1))forreceptor,partnerinitertools.product(receptors,partners):interactions.append(HpmrInteraction(receptor=receptor,partner=partner,partner_role=partner_role,references=';'.join(references),unambiguous=unambiguous,))forentityinitertools.chain(receptors,partners):ifhasattr(entity,'components'):complexes.add(entity)prg.terminate()result={'interactions':interactions,'families':families,}_log('Saving HPMR data to cache file `%s`.'%cachefile)pickle.dump(result,open(cachefile,'wb'))returnresult
[docs]defhpmr_complexes(use_cache=None):""" HPMR does not contain unambiguous protein complex data, and considering the resource is unmaintained, probably it never will. Hence this function always returns an empty dict. """hpmr_data=get_hpmr(use_cache=use_cache)complexes=dict((cplex.__str__(),cplex,)forcplexinhpmr_data.get('complexes',()))returncomplexes
[docs]defhpmr_annotations(use_cache=None):annot=collections.defaultdict(set)HPMRAnnotation=collections.namedtuple('HPMRAnnotation',('role','mainclass','subclass','subsubclass'),)hpmr_data=get_hpmr(use_cache=use_cache)foriinhpmr_data['interactions']:# first partner is always a receptor# (because ligand pages simply don't work on HPMR webpage)args1=('Receptor',)+(hpmr_data['families'][i[0]]ifi[0]inhpmr_data['families']else(None,None,None))# the second is either a ligand or another receptorargs2=(i[1],)+(hpmr_data['families'][i[2]]ifi[2]inhpmr_data['families']else(None,None,None))annot[i[0]].add(HPMRAnnotation(*args1))annot[i[2]].add(HPMRAnnotation(*args2))foruniprot,classesiniteritems(hpmr_data['families']):args=('Receptor',)+classesannot[uniprot].add(HPMRAnnotation(*args))returndict(annot)