#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritems# Py 2/3try:input=raw_inputexceptNameError:passtry:importcPickleaspickleexcept:importpickleimportsysimportosimportitertoolsimportgzipimportbs4fromlxmlimportetreeimportcopyimportstructimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.progressasprogressimportpypath.share.commonascommonimportpypath.share.cacheascache_modimportpypath.inputs.acsnasacsn_inputfrompypath.resourcesimportdata_formats
[docs]defreactome_sbml():""" Downloads Reactome human reactions in SBML format. Returns a dict of file objects. """url=urls.urls['reactome']['sbml']c=curl.Curl(url,silent=False,large=True,slow=True)returnc.result
[docs]defreactome_biopax(organism=9606,cache=True):""" Downloads Reactome human reactions in SBML format. Returns File object. """organisms={9606:'Homo_sapiens'}unzipped=os.path.join(cache_mod.get_cachedir(),'reactome_biopax_%s.owl'%organisms[organism])ifnotos.path.exists(unzipped)ornotcache:fname='%s.owl'%organisms[organism]url=urls.urls['reactome']['biopax_l3']c=curl.Curl(url,silent=False,large=True,files_needed=[fname])fileobj=c.result[fname]withopen(unzipped,'w')as_unzipped:whileTrue:chunk=fileobj.read(4096)ifnotchunk:break_unzipped.write(chunk)fileobj.close()_unzipped=open(unzipped,'r')return_unzipped
[docs]defreactome_bs():""" Reactome pathways in SBML format. Yields tuples of pathway IDs (string) and SBML representationa of the pathwaya as a `bs4.BeautifulSoup` objects. """sbml=reactome_sbml()fork,vinsbml.items():yieldk[:-5],bs4.BeautifulSoup(v.read(),'html.parser')
[docs]defprocess_complex(depth,cref,entity_uniprot,complexes,complexvariations,cplex,stoichiometries):log=open('reactome.log','a')tabs='\t'*(depth+1)log.write('%sStarting processing %s, depth = %u\n'%(tabs[1:],cref,depth))this_cplex=[{'members':[],'ptms':{}}]log.write('%sComplex %s have %u member entities\n'%(tabs,cref,len(cplex)))forstoiincplex:ifstoiinstoichiometries:ref,num=stoichiometries[stoi]log.write('%sNew member entity: %s, stoichiometric coeff: %u\n'%(tabs,ref,num))ifref.startswith('Complex') \
andrefnotinentity_uniprot:ifrefincomplexes:log.write('%s%s is a complex with %u subentities, and hasn\'t been processed yet\n'%(tabs,ref,len(complexes[ref])))process_complex(depth+1,ref,entity_uniprot,complexes,complexvariations,complexes[ref],stoichiometries)ifrefincomplexvariations:log.write('%s%s is a complex group with %u variations, and hasn\'t been processed yet\n'%(tabs,ref,len(complexvariations[ref])))entity_uniprot[ref]=[]formrefincomplexvariations[ref]:ifmrefnotinentity_uniprotandmrefincomplexes:log.write('%s%s is a complex with %u subentities, and hasn\'t been processed yet\n'%(tabs,mref,len(complexes[mref])))process_complex(depth+1,mref,entity_uniprot,complexes,complexvariations,complexes[mref],stoichiometries)ifmrefinentity_uniprot:log.write('%s%s is now processed, adding it as an instance of %s\n'%(tabs,mref,ref))entity_uniprot[ref].extend(entity_uniprot[mref])ifrefinentity_uniprot:log.write('%s%s is an already processed entity, with %u variants and %u members\n'%(tabs,ref,len(entity_uniprot[ref]),len(entity_uniprot[ref][0]['members'])iflen(entity_uniprot[ref])>0else0))log.write('%sNumber of variants after processing %s: %u x %u = %u\n'%(tabs,ref,len(this_cplex),len(entity_uniprot[ref]),len(this_cplex)*len(entity_uniprot[ref])))this_cplex_new=[]forvarinthis_cplex:i=0fornew_memberinentity_uniprot[ref]:var_new=copy.deepcopy(var)var_new['members'].extend(new_member['members']*num)foru,ptminiteritems(new_member['ptms']):ifunotinvar_new['ptms']:var_new['ptms'][u]=set([])var_new['ptms'][u]=var_new['ptms'][u]|new_member['ptms'][u]this_cplex_new.append(var_new)i+=1this_cplex=this_cplex_newlog.write('%sNumber of variants after processing %s: %u\n'%(tabs,ref,len(this_cplex)))log.write('%sNumber of members in %s: %u\n'%(tabs,cref,len(this_cplex[0]['members'])iflen(this_cplex)>0else0))else:log.write('%sPermanently missing: %s\n'%(tabs,ref))log.write('%sFinished processing %s, found %u variants with %u members\n'%(tabs[1:],cref,len(this_cplex),len(this_cplex[0]['members'])iflen(this_cplex)>0else0))ifcrefnotinentity_uniprot:entity_uniprot[cref]=[]entity_uniprot[cref].extend(this_cplex)
[docs]defreactome_interactions(cacheFile=None,ask=True,**kwargs):""" Downloads and processes Reactome BioPAX. Extracts binary interactions. The applied criteria are very stringent, yields very few interactions. Requires large free memory, approx. 2G. """cacheFile=os.path.join(cache_mod.get_cachedir(),'reactome.interactions.pickle')ifcacheFileisNoneelsecacheFileifos.path.exists(cacheFile):interactions=pickle.load(open(cacheFile,'rb'))elifask:whileTrue:sys.stdout.write('\n\tProcessing Reactome requires huge memory.\n''\tPlease hit `y` if you have at least 2G free memory,\n''\tor `n` to omit Reactome.\n''\tAfter processing once, it will be saved in \n''\t%s, so next time can be loaded quickly.\n\n''\tProcess Reactome now? [y/n]\n'%cacheFile)sys.stdout.flush()answer=input().lower()ifanswerin{'y','n'}:breakelse:answer='y'ifanswer=='y':returnget_interactions('reactome',**kwargs)else:return[]
[docs]defprocess_controls(controls,mandatory_refs=True):interactions=set([])ptms=[]regulations=[]prg=progress.Progress(len(controls),'Processing interactions',11)forcincontrols.values():prg.step()iflen(c['refs'])>0ornotmandatory_refs:ifc['controller']isnotNoneandlen(c['controller'])>0:forctrinc['controller']:iflen(common.unique_list(ctr['members']))==1:this_ctr=ctr['members'][0].split('-')[0]ctd=c['controlled']ifctdisnotNone:# ctd['left'] is not None and ctd['right'] is not# None:forleftInstinitertools.product(*ctd['left']):forrightInstinitertools.product(*ctd['right']):lr=common.unique_list(common.flat_list([l['members']forlinleftInst]+[r['members']forrinrightInst]))iflen(lr)==1:this_ctd=lr[0].split('-')[0]interactions.add((this_ctr,this_ctd,c['type'],';'.join(c['refs']iflen(c['refs'])>0elsectd['refs']),'directed'))else:modDiff={}ptmsLeft=set([(ptms[0],ptm)forlinleftInstforptmsinl['ptms'].items()forptminptms[1]])ptmsRight=set([(ptms[0],ptm)forrinrightInstforptmsinr['ptms'].items()forptminptms[1]])ptmsDiff=ptmsLeft^ptmsRightdiffUniProts=common.unique_list([ptm[0]forptminptmsDiff])iflen(diffUniProts)==1:this_ctd=diffUniProts[0].split('-')[0]interactions.add((this_ctr,this_ctd,c['type'],';'.join(c['refs']iflen(c['refs'])>0elsectd['refs']),'directed'))else:lefts=[set(l['members'])forlinleftInst]rights=[set(r['members'])forrinrightInst]onlyLefts=[lforlinleftsiflnotinrights]onlyRights=[rforrinrightsifrnotinlefts]diffs=[]forlinonlyLefts:forrinonlyRights:diff=l^riflen(diff)==1:diffs.append(list(diff))diffs=common.unique_list(common.flat_list(diffs))iflen(diffs)==1:this_ctd=diffs[0].split('-')[0]interactions.add((this_ctr,this_ctd,c['type'],';'.join(c['refs']iflen(c['refs'])>0elsectd['refs']),'undirected'))# if the controller is unknown# and the reaction has only 2 proteins# these most probably bind each other# to form a complexelse:ctd=c['controlled']ifctdisnotNone:forleftInstinitertools.product(*ctd['left']):forrightInstinitertools.product(*ctd['right']):lr=common.unique_list(common.flat_list([l['members']forlinleftInst]+[r['members']forrinrightInst]))iflen(lr)==2:interactions.add((lr[0].split('-')[0],lr[1].split('-')[0],c['type'],';'.join(ctd['refs'])))prg.terminate()returnlist(interactions),ptms,regulations
# Process Reactome SBMLdef_reactome_id(obj,attr):return_reactome_extract_id(obj.attrs[attr])def_reactome_extract_id(value):returnint(value.split('_')[1])def_reactome_res(obj):return_reactome_extract_res(obj.attrs['rdf:resource'])def_reactome_extract_res(value):returnvalue.split(':')[-1]def_reactome_reactions():species={}compartments={}reactions={}soups=reactome_bs()forpw,soupinsoups:m=soup.find('model')forcpinm.find('listofcompartments').find_all('compartment'):compartments[_reactome_id(cp,'id')]=cp.attrs['name']forspinm.find('listofspecies').find_all('species'):cp=_reactome_id(sp,'compartment')si=_reactome_id(sp,'id')nm=sp.attrs['name']ids=[]foriinsp.find('bqbiol:haspart').find_all('rdf:li'):ids.append(_reactome_res(i))ids=sorted(common.unique_list(ids))species[si]={'name':nm,'comp':cp,'ids':ids}forreainm.find('listofreactions').find_all('reaction'):ri=_reactome_id(rea,'id')refs=[]forrinrea.find('bqbiol:isdescribedby').find_all('rdf:li'):refs.append(_reactome_res(r))refs=sorted(common.unique_list(refs))reas=[]forrinrea.find('listofreactants').find_all('speciesreference'):reas.append(_reactome_id(r,'species'))reas=sorted(common.unique_list(reas))prds=[]forpinrea.find('listofproducts').find_all('speciesreference'):prds.append(_reactome_id(p,'species'))prds=sorted(common.unique_list(prds))note=rea.find('notes').textreactions[ri]={'refs':refs,'reas':reas,'prds':prds,'note':note}returncompartments,species,reactionsdef_reactome_reactions_et():sbmlPfx='{http://www.sbml.org/sbml/level2/version4}'compStr='%scompartment'%sbmlPfxreacStr='%sreaction'%sbmlPfxspecStr='%sspecies'%sbmlPfxspecies={}compartments={}reactions={}sbml=reactome_sbml()forpw_sbmlinsbml.values():ctx=etree.iterparse(pw_sbml,events=('end',))forev,eleminctx:ifelem.tag==compStr:k,v=_reactome_compartment(elem)compartments[k]=velifelem.tag==reacStr:k,v=_reactome_reaction(elem)reactions[k]=velifelem.tag==specStr:k,v=_reactome_species(elem)species[k]=velem.clear()whileelem.getprevious()isnotNone:delelem.getparent()[0]returncompartments,species,reactionsdef_reactome_compartment(elem):ci=_reactome_extract_id(elem.get('id'))nm=elem.get('name')returnci,nmdef_reactome_species(elem):bqBiolPfx='{http://biomodels.net/biology-qualifiers/}'rdfPfx='{http://www.w3.org/1999/02/22-rdf-syntax-ns#}'hasPartStr='%shasPart'%bqBiolPfxresStr='%sresource'%rdfPfxsi=_reactome_extract_id(elem.get('id'))cp=_reactome_extract_id(elem.get('compartment'))nm=elem.get('name')ids=sorted(common.unique_list(_reactome_collect_resources(elem,hasPartStr)))returnsi,{'name':nm,'comp':cp,'ids':ids}def_reactome_reaction(elem):bqBiolPfx='{http://biomodels.net/biology-qualifiers/}'rdfPfx='{http://www.w3.org/1999/02/22-rdf-syntax-ns#}'sbmlPfx='{http://www.sbml.org/sbml/level2/version4}'specStr='species'spRefStr='%sspeciesReference'%sbmlPfxisDescStr='%sisDescribedBy'%bqBiolPfxresStr='%sresource'%rdfPfxlofReaStr='%slistOfReactants'%sbmlPfxlofPrdStr='%slistOfProducts'%sbmlPfxri=_reactome_extract_id(elem.get('id'))refs=_reactome_collect_resources(elem,isDescStr)reas=_reactome_collect_species(elem,lofReaStr)prds=_reactome_collect_species(elem,lofPrdStr)note=elem.find('note').text# prefix?returnri,{'refs':refs,'reas':reas,'prds':prds,'note':note}def_reactome_collect_resources(elem,tag):rdfPfx='{http://www.w3.org/1999/02/22-rdf-syntax-ns#}'resStr='%sresource'%rdfPfxliStr='%sli'%rdfPfxres=[]foriinelem.find('.//%s'%tag).iterfind('.//%s'%liStr):res.append(_reactome_extract_res(i.get(resStr)))returnresdef_reactome_collect_species(elem,tag):sbmlPfx='{http://www.sbml.org/sbml/level2/version4}'spRefStr='%sspeciesReference'%sbmlPfxspecStr='species'res=[]forspinelem.find('.//%s'%tag).iterfind('.//%s'%spRefStr):res.apped(_reactome_extract_id(sp.get(specStr)))returnres
[docs]defget_acsn_effects():""" Processes ACSN data, returns list of effects. """negatives=set(['NEGATIVE_INFLUENCE','UNKNOWN_NEGATIVE_INFLUENCE'])positives=set(['TRIGGER','POSITIVE_INFLUENCE','UNKNOWN_POSITIVE_INFLUENCE'])directed=set(['UNKNOWN_TRANSITION','INTERACTION_TYPE','KNOWN_TRANSITION_OMITTED','INHIBITION','UNKNOWN_POSITIVE_INFLUENCE','PROTEIN_INTERACTION','UNKNOWN_CATALYSIS','POSITIVE_INFLUENCE','STATE_TRANSITION','TRANSLATION','UNKNOWN_NEGATIVE_INFLUENCE','NEGATIVE_INFLUENCE','MODULATION','TRANSCRIPTION','COMPLEX_EXPANSION','TRIGGER','CATALYSIS','PHYSICAL_STIMULATION','UNKNOWN_INHIBITION','TRANSPORT'])data=acsn_input.acsn_interactions()effects=[]forlindata:iflen(l)==4:eff=set(l[2].split(';'))iflen(eff&negatives)>0:effects.append([l[0],l[1],'-'])eliflen(eff&positives)>0:effects.append([l[0],l[1],'+'])eliflen(eff&directed)>0:effects.append([l[0],l[1],'*'])returneffects