#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from past.builtins import xrange, range
import re
import itertools
import collections
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.session as session
import pypath.share.progress as progress
import pypath.inputs.ontology as ontology
import pypath.internals.intera as intera
import pypath.internals.resource as resource_internals
import pypath.core.evidence as evidence
_logger = session.Logger(name = 'domino_input')
_log = _logger._log
[docs]
def get_domino(none_values = False, outfile = None):
"""
Returns
A list of records with the following fields:
header = ['uniprot_A', 'uniprot_B', 'isoform_A', 'isoform_B', #3
'exp_method', 'references', 'taxon_A', 'taxon_B', #7
'role_A', 'role_B', 'binding_site_range_A', 'binding_site_range_B', #11
'domains_A', 'domains_B', 'ptm_residue_A', 'ptm_residue_B', #15
'ptm_type_mi_A', 'ptm_type_mi_B', 'ptm_type_A', 'ptm_type_B', #19
'ptm_res_name_A', 'ptm_res_name_B', 'mutations_A', 'mutations_B', #23
'mutation_effects_A', 'mutation_effects_B', 'domains_interpro_A', #26
'domains_interpro_B', 'negative'] #28
"""
DominoRecord = collections.namedtuple(
'DominoRecord',
(
'uniprot_A',
'uniprot_B',
'isoform_A',
'isoform_B',
'exp_method',
'references',
'taxon_A',
'taxon_B',
'role_A',
'role_B',
'binding_site_range_A',
'binding_site_range_B',
'domains_A',
'domains_B',
'ptm_residue_A',
'ptm_residue_B',
'ptm_type_mi_A',
'ptm_type_mi_B',
'ptm_type_A',
'ptm_type_B',
'ptm_res_name_A',
'ptm_res_name_B',
'mutations_A',
'mutations_B',
'mutation_effects_A',
'mutation_effects_B',
'domains_interpro_A',
'domains_interpro_B',
'negative',
),
)
result = []
taxid = re.compile(r'taxid:(.*)\([a-zA-Z ]*\)')
miont = re.compile(r'MI:[0-9]{4}\((.*)\)')
binds = re.compile(r'([-0-9]*);.*')
domai = re.compile(r'.*;.*;.*\((.*)\)')
dipro = re.compile(r'.*;.*;.+:(IPR[0-9]*).*')
ptmrs = re.compile(r'([-0-9]*);.*')
ptmmi = re.compile(r'[0-9]*;(MI:[0-9]*)\(.*\);.*;.*')
ptmrn = re.compile(
r'.*sequence:[\s]*[0-9]+-[0-9]+[\s]*:[\s]*([A-Z]{10,}).*')
ptmty = re.compile(r'[0-9]*;MI:[0-9]*\((.*)\);.*;.*')
refrs = re.compile(r'(pubmed|doi):["]*([-0-9a-zA-Z\.\(\)/]*)["]*')
url = urls.urls['domino']['rescued']
c = curl.Curl(url, silent = False, large = True)
data = c.result
_ = next(data)
for r in data:
r = r.strip().split('\t')
if len(r) < 39:
continue
this_row = [
None if ':' not in r[0] else r[0].split(':')[1].split('-')[0],
None if ':' not in r[1] else r[1].split(':')[1].split('-')[0],
'1' if '-' not in r[0] else r[0].split('-')[1],
'1' if '-' not in r[1] else r[1].split('-')[1],
miont.match(r[6]).groups(1)[0] if miont.match(r[6]) else None,
refrs.match(r[8]).groups(1)[1] if refrs.match(r[8]) else None,
taxid.match(r[9]).groups(1)[0] if taxid.match(r[9]) else None,
taxid.match(r[10]).groups(1)[0] if taxid.match(r[10]) else None,
miont.match(r[11]).groups(1)[0] if miont.match(r[11]) else None,
miont.match(r[17]).groups(1)[0] if miont.match(r[16]) else None,
';'.join(
binds.match(x).groups(1)[0] if binds.match(x) else ''
for x in r[32].split(',')
),
';'.join(
binds.match(x).groups(1)[0] if binds.match(x) else ''
for x in r[33].split(',')
),
';'.join(
domai.match(x).groups(1)[0] if domai.match(x) else ''
for x in r[32].split(',')
),
';'.join(
domai.match(x).groups(1)[0] if domai.match(x) else ''
for x in r[33].split(',')
),
';'.join(
ptmrs.match(x).groups(1)[0] if ptmrs.match(x) else ''
for x in r[34].split('|')
),
';'.join(
ptmrs.match(x).groups(1)[0] if ptmrs.match(x) else ''
for x in r[35].split('|')
),
';'.join(
ptmmi.match(x).groups(1)[0] if ptmmi.match(x) else ''
for x in r[34].split('|')
),
';'.join(
ptmmi.match(x).groups(1)[0] if ptmmi.match(x) else ''
for x in r[35].split('|')
),
';'.join(
ptmty.match(x).groups(1)[0] if ptmty.match(x) else ''
for x in r[34].split('|')
),
';'.join(
ptmty.match(x).groups(1)[0] if ptmty.match(x) else ''
for x in r[35].split('|')
),
';'.join(
ptmrn.match(x).groups(1)[0] if ptmrn.match(x) else ''
for x in r[34].split('|')
),
';'.join(
ptmrn.match(x).groups(1)[0] if ptmrn.match(x) else ''
for x in r[35].split('|')
),
';'.join(
ptmrs.match(x).groups(1)[0] if ptmrs.match(x) else ''
for x in r[36].split('|')
), ';'.join(
ptmrs.match(x).groups(1)[0] if ptmrs.match(x) else ''
for x in r[37].split('|')
),
';'.join(
ptmty.match(x).groups(1)[0] if ptmty.match(x) else ''
for x in r[36].split('|')
),
';'.join(
ptmty.match(x).groups(1)[0] if ptmty.match(x) else ''
for x in r[37].split('|')
),
dipro.match(r[32]).groups(1)[0] if dipro.match(r[32]) else '',
dipro.match(r[33]).groups(1)[0] if dipro.match(r[33]) else '',
'0' if r[38].strip() == '-' else '1',
]
if not none_values:
this_row = ['' if x is None else x for x in this_row]
this_row = DominoRecord(*this_row)
result.append(this_row)
if outfile:
_log('Saving data into `%s`.' % outfile)
with open(outfile, 'w') as outf:
outf.write('\t'.join(header) + '\n')
for r in result:
outf.write(
'\t'.join('' if x is None else x for x in r) + '\n'
)
return result
[docs]
def domino_interactions():
domino = get_domino()
interactions = [
l for l in domino
if (
l[0] and
l[1] and
''.join(l[5]) and
''.join([
l[i]
for i in range(10, 12) + range(14, 22) + range(24, 26)
]) and
l[28] != '1'
)
]
return interactions
[docs]
def domino_ddi():
domi = domino_enzsub()
return domi['ddi']
[docs]
def domino_enzsub():
"""
Returns
A dict of two elements: `ddi` contains domain-domain, while `dmi`
domain-motif interactions. The latter includes protein-PTM
interactions.
"""
domino_resource = resource_internals.EnzymeSubstrateResource(
name = 'DOMINO',
input_method = 'domino.domino_enzsub',
)
domino = get_domino()
try:
miont = ontology.ontology('MI')
except:
miont = {}
dmi = []
ddi = []
prg = progress.Progress(len(domino), 'Processing DOMINO', 11)
ptm_types = {
"o4'-phospho-tyrosine": 'phosphorylation',
'phosphorylated residue': 'phosphorylation',
'o-phospho-threonine': 'phosphorylation',
'o-phospho-serine': 'phosphorylation',
'n6-methyl-lysine': 'methylation',
'n6,n6,n6-trimethyl-lysine': 'trimethylation',
'n6,n6-dimethyl-lysine': 'dimethylation',
'acetylated residue': 'acetylation',
}
for l in domino:
prg.step()
if (
(
l[14].strip() != '' or
l[15].strip() != '' or
(
l[10] != '' and
l[11] != ''
)
) and
len(l[0]) > 0 and
len(l[1]) > 0
):
uniprot1 = l[0]
uniprot2 = l[1]
# ptms
if (
'-' not in l[14] and
'-' not in l[15]
):
ptmre12 = [int(x) for x in l[14].split(';')] if l[14] else []
ptmre21 = [int(x) for x in l[15].split(';')] if l[15] else []
ptmty12 = l[16].split(';') if l[16] else [None] * len(ptmre12)
ptmty12 = [
ptm_types[miont[x]] if x in miont else None
for x in ptmty12
]
ptmrn12 = l[20].split(';') if l[20] else [None] * len(ptmre12)
ptmrn12 = [
None
if (
x is None or
x == '' or
len(x) < min(ptmre12[i] - 1, 11)
) else
x[10]
if ptmre12[i] > 10 else
x[ptmre12[i] - 1]
for i, x in enumerate(ptmrn12)
]
ptmty21 = l[17].split(';') if l[17] else [None] * len(ptmre12)
ptmty21 = [
ptm_types[miont[x]] if x in miont else None
for x in ptmty21
]
ptmrn21 = l[21].split(';') if l[21] else [None] * len(ptmre21)
ptmrn21 = [
None
if (
x is None or
x == '' or
len(x) < min(ptmre21[i] - 1, 11)
) else
x[10]
if ptmre21[i] > 10 else
x[ptmre21[i] - 1]
for i, x in enumerate(ptmrn21)
]
for i, resnum in enumerate(ptmre12):
res = intera.Residue(resnum, ptmrn12[i], uniprot2)
ptm = intera.Ptm(
uniprot2,
typ = ptmty12[i] or 'unknown',
residue = res,
evidences = evidence.Evidence(
resource = domino_resource,
),
)
dom = intera.Domain(uniprot1)
dm = intera.DomainMotif(
domain = dom,
ptm = ptm,
evidences = evidence.Evidence(
resource = domino_resource,
references = l[5].split(';'),
),
)
dmi.append(dm)
# binding sites
if l[10] and l[11]:
try:
bssrt1 = [
int(x.split('-')[0])
for x in l[10].split(';')
if x != '' and x != '0'
]
bsend1 = [
int(x.split('-')[1])
for x in l[10].split(';')
if x != '' and x != '0'
]
bssrt2 = [
int(x.split('-')[0])
for x in l[11].split(';')
if x != '' and x != '0'
]
bsend2 = [
int(x.split('-')[1])
for x in l[11].split(';')
if x != '' and x != '0'
]
except:
sys.stdout.write('Error processing line:\n')
sys.stdout.write(l)
sys.stdout.write('\n')
sys.stdout.flush()
return None
bs1 = []
bs2 = []
if l[26]:
for i, n in enumerate(bssrt1):
bs1.append(
intera.Domain(
protein = uniprot1,
domain = l[26],
start = bssrt1[i],
end = bsend1[i],
domain_id_type = 'interpro',
isoform = l[2],
)
)
else:
for i, n in enumerate(bssrt1):
mot = intera.Motif(
protein = uniprot1,
start = bssrt1[i],
end = bsend1[i],
isoform = l[2],
)
bs1.append(
intera.Ptm(
protein = uniprot1,
motif = mot,
evidences = evidence.Evidence(
resource = domino_resource,
),
isoform = l[2],
)
)
if l[27]:
for i, n in enumerate(bssrt2):
bs2.append(
intera.Domain(
protein = uniprot2,
domain = l[27],
start = bssrt2[i],
end = bsend2[i],
domain_id_type = 'interpro',
isoform = l[3],
)
)
else:
for i, n in enumerate(bssrt2):
mot = intera.Motif(
protein = uniprot2,
start = bssrt2[i],
end = bsend2[i],
isoform = l[3],
)
bs2.append(
intera.Ptm(
protein = uniprot2,
motif = mot,
evidences = evidence.Evidence(
resource = domino_resource,
),
)
)
for one, two in itertools.product(bs1, bs2):
if (
one.__class__.__name__ == 'Domain' and
two.__class__.__name__ == 'Domain'
):
dd = intera.DomainDomain(
one,
two,
sources = 'DOMINO',
)
ddi.append(dd)
if (
one.__class__.__name__ == 'Domain' and
two.__class__.__name__ == 'Ptm'
):
dm = intera.DomainMotif(
domain = one,
ptm = two,
evidences = evidence.Evidence(
resource = domino_resource,
references = l[6].split(';')
),
)
dmi.append(dm)
if (
two.__class__.__name__ == 'Domain' and
one.__class__.__name__ == 'Ptm'
):
dm = intera.DomainMotif(
domain = two,
ptm = one,
evidences = evidence.Evidence(
resource = domino_resource,
references = l[6].split(';')
),
)
dmi.append(dm)
prg.terminate()
return {'ddi': ddi, 'dmi': dmi}