#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#frompast.builtinsimportxrange,rangeimportreimportitertoolsimportcollectionsimportpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.share.sessionassessionimportpypath.share.progressasprogressimportpypath.inputs.ontologyasontologyimportpypath.internals.interaasinteraimportpypath.internals.resourceasresource_internalsimportpypath.core.evidenceasevidence_logger=session.Logger(name='domino_input')_log=_logger._log
[docs]defget_domino(none_values=False,outfile=None):""" Returns A list of records with the following fields: header = ['uniprot_A', 'uniprot_B', 'isoform_A', 'isoform_B', #3 'exp_method', 'references', 'taxon_A', 'taxon_B', #7 'role_A', 'role_B', 'binding_site_range_A', 'binding_site_range_B', #11 'domains_A', 'domains_B', 'ptm_residue_A', 'ptm_residue_B', #15 'ptm_type_mi_A', 'ptm_type_mi_B', 'ptm_type_A', 'ptm_type_B', #19 'ptm_res_name_A', 'ptm_res_name_B', 'mutations_A', 'mutations_B', #23 'mutation_effects_A', 'mutation_effects_B', 'domains_interpro_A', #26 'domains_interpro_B', 'negative'] #28 """DominoRecord=collections.namedtuple('DominoRecord',('uniprot_A','uniprot_B','isoform_A','isoform_B','exp_method','references','taxon_A','taxon_B','role_A','role_B','binding_site_range_A','binding_site_range_B','domains_A','domains_B','ptm_residue_A','ptm_residue_B','ptm_type_mi_A','ptm_type_mi_B','ptm_type_A','ptm_type_B','ptm_res_name_A','ptm_res_name_B','mutations_A','mutations_B','mutation_effects_A','mutation_effects_B','domains_interpro_A','domains_interpro_B','negative',),)result=[]taxid=re.compile(r'taxid:(.*)\([a-zA-Z ]*\)')miont=re.compile(r'MI:[0-9]{4}\((.*)\)')binds=re.compile(r'([-0-9]*);.*')domai=re.compile(r'.*;.*;.*\((.*)\)')dipro=re.compile(r'.*;.*;.+:(IPR[0-9]*).*')ptmrs=re.compile(r'([-0-9]*);.*')ptmmi=re.compile(r'[0-9]*;(MI:[0-9]*)\(.*\);.*;.*')ptmrn=re.compile(r'.*sequence:[\s]*[0-9]+-[0-9]+[\s]*:[\s]*([A-Z]{10,}).*')ptmty=re.compile(r'[0-9]*;MI:[0-9]*\((.*)\);.*;.*')refrs=re.compile(r'(pubmed|doi):["]*([-0-9a-zA-Z\.\(\)/]*)["]*')url=urls.urls['domino']['rescued']c=curl.Curl(url,silent=False,large=True)data=c.result_=next(data)forrindata:r=r.strip().split('\t')iflen(r)<39:continuethis_row=[Noneif':'notinr[0]elser[0].split(':')[1].split('-')[0],Noneif':'notinr[1]elser[1].split(':')[1].split('-')[0],'1'if'-'notinr[0]elser[0].split('-')[1],'1'if'-'notinr[1]elser[1].split('-')[1],miont.match(r[6]).groups(1)[0]ifmiont.match(r[6])elseNone,refrs.match(r[8]).groups(1)[1]ifrefrs.match(r[8])elseNone,taxid.match(r[9]).groups(1)[0]iftaxid.match(r[9])elseNone,taxid.match(r[10]).groups(1)[0]iftaxid.match(r[10])elseNone,miont.match(r[11]).groups(1)[0]ifmiont.match(r[11])elseNone,miont.match(r[17]).groups(1)[0]ifmiont.match(r[16])elseNone,';'.join(binds.match(x).groups(1)[0]ifbinds.match(x)else''forxinr[32].split(',')),';'.join(binds.match(x).groups(1)[0]ifbinds.match(x)else''forxinr[33].split(',')),';'.join(domai.match(x).groups(1)[0]ifdomai.match(x)else''forxinr[32].split(',')),';'.join(domai.match(x).groups(1)[0]ifdomai.match(x)else''forxinr[33].split(',')),';'.join(ptmrs.match(x).groups(1)[0]ifptmrs.match(x)else''forxinr[34].split('|')),';'.join(ptmrs.match(x).groups(1)[0]ifptmrs.match(x)else''forxinr[35].split('|')),';'.join(ptmmi.match(x).groups(1)[0]ifptmmi.match(x)else''forxinr[34].split('|')),';'.join(ptmmi.match(x).groups(1)[0]ifptmmi.match(x)else''forxinr[35].split('|')),';'.join(ptmty.match(x).groups(1)[0]ifptmty.match(x)else''forxinr[34].split('|')),';'.join(ptmty.match(x).groups(1)[0]ifptmty.match(x)else''forxinr[35].split('|')),';'.join(ptmrn.match(x).groups(1)[0]ifptmrn.match(x)else''forxinr[34].split('|')),';'.join(ptmrn.match(x).groups(1)[0]ifptmrn.match(x)else''forxinr[35].split('|')),';'.join(ptmrs.match(x).groups(1)[0]ifptmrs.match(x)else''forxinr[36].split('|')),';'.join(ptmrs.match(x).groups(1)[0]ifptmrs.match(x)else''forxinr[37].split('|')),';'.join(ptmty.match(x).groups(1)[0]ifptmty.match(x)else''forxinr[36].split('|')),';'.join(ptmty.match(x).groups(1)[0]ifptmty.match(x)else''forxinr[37].split('|')),dipro.match(r[32]).groups(1)[0]ifdipro.match(r[32])else'',dipro.match(r[33]).groups(1)[0]ifdipro.match(r[33])else'','0'ifr[38].strip()=='-'else'1',]ifnotnone_values:this_row=[''ifxisNoneelsexforxinthis_row]this_row=DominoRecord(*this_row)result.append(this_row)ifoutfile:_log('Saving data into `%s`.'%outfile)withopen(outfile,'w')asoutf:outf.write('\t'.join(header)+'\n')forrinresult:outf.write('\t'.join(''ifxisNoneelsexforxinr)+'\n')returnresult
[docs]defdomino_enzsub():""" Returns A dict of two elements: `ddi` contains domain-domain, while `dmi` domain-motif interactions. The latter includes protein-PTM interactions. """domino_resource=resource_internals.EnzymeSubstrateResource(name='DOMINO',input_method='domino.domino_enzsub',)domino=get_domino()try:miont=ontology.ontology('MI')except:miont={}dmi=[]ddi=[]prg=progress.Progress(len(domino),'Processing DOMINO',11)ptm_types={"o4'-phospho-tyrosine":'phosphorylation','phosphorylated residue':'phosphorylation','o-phospho-threonine':'phosphorylation','o-phospho-serine':'phosphorylation','n6-methyl-lysine':'methylation','n6,n6,n6-trimethyl-lysine':'trimethylation','n6,n6-dimethyl-lysine':'dimethylation','acetylated residue':'acetylation',}forlindomino:prg.step()if((l[14].strip()!=''orl[15].strip()!=''or(l[10]!=''andl[11]!=''))andlen(l[0])>0andlen(l[1])>0):uniprot1=l[0]uniprot2=l[1]# ptmsif('-'notinl[14]and'-'notinl[15]):ptmre12=[int(x)forxinl[14].split(';')]ifl[14]else[]ptmre21=[int(x)forxinl[15].split(';')]ifl[15]else[]ptmty12=l[16].split(';')ifl[16]else[None]*len(ptmre12)ptmty12=[ptm_types[miont[x]]ifxinmiontelseNoneforxinptmty12]ptmrn12=l[20].split(';')ifl[20]else[None]*len(ptmre12)ptmrn12=[Noneif(xisNoneorx==''orlen(x)<min(ptmre12[i]-1,11))elsex[10]ifptmre12[i]>10elsex[ptmre12[i]-1]fori,xinenumerate(ptmrn12)]ptmty21=l[17].split(';')ifl[17]else[None]*len(ptmre12)ptmty21=[ptm_types[miont[x]]ifxinmiontelseNoneforxinptmty21]ptmrn21=l[21].split(';')ifl[21]else[None]*len(ptmre21)ptmrn21=[Noneif(xisNoneorx==''orlen(x)<min(ptmre21[i]-1,11))elsex[10]ifptmre21[i]>10elsex[ptmre21[i]-1]fori,xinenumerate(ptmrn21)]fori,resnuminenumerate(ptmre12):res=intera.Residue(resnum,ptmrn12[i],uniprot2)ptm=intera.Ptm(uniprot2,typ=ptmty12[i]or'unknown',residue=res,evidences=evidence.Evidence(resource=domino_resource,),)dom=intera.Domain(uniprot1)dm=intera.DomainMotif(domain=dom,ptm=ptm,evidences=evidence.Evidence(resource=domino_resource,references=l[5].split(';'),),)dmi.append(dm)# binding sitesifl[10]andl[11]:try:bssrt1=[int(x.split('-')[0])forxinl[10].split(';')ifx!=''andx!='0']bsend1=[int(x.split('-')[1])forxinl[10].split(';')ifx!=''andx!='0']bssrt2=[int(x.split('-')[0])forxinl[11].split(';')ifx!=''andx!='0']bsend2=[int(x.split('-')[1])forxinl[11].split(';')ifx!=''andx!='0']except:sys.stdout.write('Error processing line:\n')sys.stdout.write(l)sys.stdout.write('\n')sys.stdout.flush()returnNonebs1=[]bs2=[]ifl[26]:fori,ninenumerate(bssrt1):bs1.append(intera.Domain(protein=uniprot1,domain=l[26],start=bssrt1[i],end=bsend1[i],domain_id_type='interpro',isoform=l[2],))else:fori,ninenumerate(bssrt1):mot=intera.Motif(protein=uniprot1,start=bssrt1[i],end=bsend1[i],isoform=l[2],)bs1.append(intera.Ptm(protein=uniprot1,motif=mot,evidences=evidence.Evidence(resource=domino_resource,),isoform=l[2],))ifl[27]:fori,ninenumerate(bssrt2):bs2.append(intera.Domain(protein=uniprot2,domain=l[27],start=bssrt2[i],end=bsend2[i],domain_id_type='interpro',isoform=l[3],))else:fori,ninenumerate(bssrt2):mot=intera.Motif(protein=uniprot2,start=bssrt2[i],end=bsend2[i],isoform=l[3],)bs2.append(intera.Ptm(protein=uniprot2,motif=mot,evidences=evidence.Evidence(resource=domino_resource,),))forone,twoinitertools.product(bs1,bs2):if(one.__class__.__name__=='Domain'andtwo.__class__.__name__=='Domain'):dd=intera.DomainDomain(one,two,sources='DOMINO',)ddi.append(dd)if(one.__class__.__name__=='Domain'andtwo.__class__.__name__=='Ptm'):dm=intera.DomainMotif(domain=one,ptm=two,evidences=evidence.Evidence(resource=domino_resource,references=l[6].split(';')),)dmi.append(dm)if(two.__class__.__name__=='Domain'andone.__class__.__name__=='Ptm'):dm=intera.DomainMotif(domain=two,ptm=one,evidences=evidence.Evidence(resource=domino_resource,references=l[6].split(';')),)dmi.append(dm)prg.terminate()return{'ddi':ddi,'dmi':dmi}