Source code for pypath.inputs.domino

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from past.builtins import xrange, range

import re
import itertools
import collections

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.session as session
import pypath.share.progress as progress
import pypath.inputs.ontology as ontology
import pypath.internals.intera as intera
import pypath.internals.resource as resource_internals
import pypath.core.evidence as evidence

_logger = session.Logger(name = 'domino_input')
_log = _logger._log


[docs] def get_domino(none_values = False, outfile = None): """ Returns A list of records with the following fields: header = ['uniprot_A', 'uniprot_B', 'isoform_A', 'isoform_B', #3 'exp_method', 'references', 'taxon_A', 'taxon_B', #7 'role_A', 'role_B', 'binding_site_range_A', 'binding_site_range_B', #11 'domains_A', 'domains_B', 'ptm_residue_A', 'ptm_residue_B', #15 'ptm_type_mi_A', 'ptm_type_mi_B', 'ptm_type_A', 'ptm_type_B', #19 'ptm_res_name_A', 'ptm_res_name_B', 'mutations_A', 'mutations_B', #23 'mutation_effects_A', 'mutation_effects_B', 'domains_interpro_A', #26 'domains_interpro_B', 'negative'] #28 """ DominoRecord = collections.namedtuple( 'DominoRecord', ( 'uniprot_A', 'uniprot_B', 'isoform_A', 'isoform_B', 'exp_method', 'references', 'taxon_A', 'taxon_B', 'role_A', 'role_B', 'binding_site_range_A', 'binding_site_range_B', 'domains_A', 'domains_B', 'ptm_residue_A', 'ptm_residue_B', 'ptm_type_mi_A', 'ptm_type_mi_B', 'ptm_type_A', 'ptm_type_B', 'ptm_res_name_A', 'ptm_res_name_B', 'mutations_A', 'mutations_B', 'mutation_effects_A', 'mutation_effects_B', 'domains_interpro_A', 'domains_interpro_B', 'negative', ), ) result = [] taxid = re.compile(r'taxid:(.*)\([a-zA-Z ]*\)') miont = re.compile(r'MI:[0-9]{4}\((.*)\)') binds = re.compile(r'([-0-9]*);.*') domai = re.compile(r'.*;.*;.*\((.*)\)') dipro = re.compile(r'.*;.*;.+:(IPR[0-9]*).*') ptmrs = re.compile(r'([-0-9]*);.*') ptmmi = re.compile(r'[0-9]*;(MI:[0-9]*)\(.*\);.*;.*') ptmrn = re.compile( r'.*sequence:[\s]*[0-9]+-[0-9]+[\s]*:[\s]*([A-Z]{10,}).*') ptmty = re.compile(r'[0-9]*;MI:[0-9]*\((.*)\);.*;.*') refrs = re.compile(r'(pubmed|doi):["]*([-0-9a-zA-Z\.\(\)/]*)["]*') url = urls.urls['domino']['rescued'] c = curl.Curl(url, silent = False, large = True) data = c.result _ = next(data) for r in data: r = r.strip().split('\t') if len(r) < 39: continue this_row = [ None if ':' not in r[0] else r[0].split(':')[1].split('-')[0], None if ':' not in r[1] else r[1].split(':')[1].split('-')[0], '1' if '-' not in r[0] else r[0].split('-')[1], '1' if '-' not in r[1] else r[1].split('-')[1], miont.match(r[6]).groups(1)[0] if miont.match(r[6]) else None, refrs.match(r[8]).groups(1)[1] if refrs.match(r[8]) else None, taxid.match(r[9]).groups(1)[0] if taxid.match(r[9]) else None, taxid.match(r[10]).groups(1)[0] if taxid.match(r[10]) else None, miont.match(r[11]).groups(1)[0] if miont.match(r[11]) else None, miont.match(r[17]).groups(1)[0] if miont.match(r[16]) else None, ';'.join( binds.match(x).groups(1)[0] if binds.match(x) else '' for x in r[32].split(',') ), ';'.join( binds.match(x).groups(1)[0] if binds.match(x) else '' for x in r[33].split(',') ), ';'.join( domai.match(x).groups(1)[0] if domai.match(x) else '' for x in r[32].split(',') ), ';'.join( domai.match(x).groups(1)[0] if domai.match(x) else '' for x in r[33].split(',') ), ';'.join( ptmrs.match(x).groups(1)[0] if ptmrs.match(x) else '' for x in r[34].split('|') ), ';'.join( ptmrs.match(x).groups(1)[0] if ptmrs.match(x) else '' for x in r[35].split('|') ), ';'.join( ptmmi.match(x).groups(1)[0] if ptmmi.match(x) else '' for x in r[34].split('|') ), ';'.join( ptmmi.match(x).groups(1)[0] if ptmmi.match(x) else '' for x in r[35].split('|') ), ';'.join( ptmty.match(x).groups(1)[0] if ptmty.match(x) else '' for x in r[34].split('|') ), ';'.join( ptmty.match(x).groups(1)[0] if ptmty.match(x) else '' for x in r[35].split('|') ), ';'.join( ptmrn.match(x).groups(1)[0] if ptmrn.match(x) else '' for x in r[34].split('|') ), ';'.join( ptmrn.match(x).groups(1)[0] if ptmrn.match(x) else '' for x in r[35].split('|') ), ';'.join( ptmrs.match(x).groups(1)[0] if ptmrs.match(x) else '' for x in r[36].split('|') ), ';'.join( ptmrs.match(x).groups(1)[0] if ptmrs.match(x) else '' for x in r[37].split('|') ), ';'.join( ptmty.match(x).groups(1)[0] if ptmty.match(x) else '' for x in r[36].split('|') ), ';'.join( ptmty.match(x).groups(1)[0] if ptmty.match(x) else '' for x in r[37].split('|') ), dipro.match(r[32]).groups(1)[0] if dipro.match(r[32]) else '', dipro.match(r[33]).groups(1)[0] if dipro.match(r[33]) else '', '0' if r[38].strip() == '-' else '1', ] if not none_values: this_row = ['' if x is None else x for x in this_row] this_row = DominoRecord(*this_row) result.append(this_row) if outfile: _log('Saving data into `%s`.' % outfile) with open(outfile, 'w') as outf: outf.write('\t'.join(header) + '\n') for r in result: outf.write( '\t'.join('' if x is None else x for x in r) + '\n' ) return result
[docs] def domino_interactions(): domino = get_domino() interactions = [ l for l in domino if ( l[0] and l[1] and ''.join(l[5]) and ''.join([ l[i] for i in range(10, 12) + range(14, 22) + range(24, 26) ]) and l[28] != '1' ) ] return interactions
[docs] def domino_ddi(): domi = domino_enzsub() return domi['ddi']
[docs] def domino_enzsub(): """ Returns A dict of two elements: `ddi` contains domain-domain, while `dmi` domain-motif interactions. The latter includes protein-PTM interactions. """ domino_resource = resource_internals.EnzymeSubstrateResource( name = 'DOMINO', input_method = 'domino.domino_enzsub', ) domino = get_domino() try: miont = ontology.ontology('MI') except: miont = {} dmi = [] ddi = [] prg = progress.Progress(len(domino), 'Processing DOMINO', 11) ptm_types = { "o4'-phospho-tyrosine": 'phosphorylation', 'phosphorylated residue': 'phosphorylation', 'o-phospho-threonine': 'phosphorylation', 'o-phospho-serine': 'phosphorylation', 'n6-methyl-lysine': 'methylation', 'n6,n6,n6-trimethyl-lysine': 'trimethylation', 'n6,n6-dimethyl-lysine': 'dimethylation', 'acetylated residue': 'acetylation', } for l in domino: prg.step() if ( ( l[14].strip() != '' or l[15].strip() != '' or ( l[10] != '' and l[11] != '' ) ) and len(l[0]) > 0 and len(l[1]) > 0 ): uniprot1 = l[0] uniprot2 = l[1] # ptms if ( '-' not in l[14] and '-' not in l[15] ): ptmre12 = [int(x) for x in l[14].split(';')] if l[14] else [] ptmre21 = [int(x) for x in l[15].split(';')] if l[15] else [] ptmty12 = l[16].split(';') if l[16] else [None] * len(ptmre12) ptmty12 = [ ptm_types[miont[x]] if x in miont else None for x in ptmty12 ] ptmrn12 = l[20].split(';') if l[20] else [None] * len(ptmre12) ptmrn12 = [ None if ( x is None or x == '' or len(x) < min(ptmre12[i] - 1, 11) ) else x[10] if ptmre12[i] > 10 else x[ptmre12[i] - 1] for i, x in enumerate(ptmrn12) ] ptmty21 = l[17].split(';') if l[17] else [None] * len(ptmre12) ptmty21 = [ ptm_types[miont[x]] if x in miont else None for x in ptmty21 ] ptmrn21 = l[21].split(';') if l[21] else [None] * len(ptmre21) ptmrn21 = [ None if ( x is None or x == '' or len(x) < min(ptmre21[i] - 1, 11) ) else x[10] if ptmre21[i] > 10 else x[ptmre21[i] - 1] for i, x in enumerate(ptmrn21) ] for i, resnum in enumerate(ptmre12): res = intera.Residue(resnum, ptmrn12[i], uniprot2) ptm = intera.Ptm( uniprot2, typ = ptmty12[i] or 'unknown', residue = res, evidences = evidence.Evidence( resource = domino_resource, ), ) dom = intera.Domain(uniprot1) dm = intera.DomainMotif( domain = dom, ptm = ptm, evidences = evidence.Evidence( resource = domino_resource, references = l[5].split(';'), ), ) dmi.append(dm) # binding sites if l[10] and l[11]: try: bssrt1 = [ int(x.split('-')[0]) for x in l[10].split(';') if x != '' and x != '0' ] bsend1 = [ int(x.split('-')[1]) for x in l[10].split(';') if x != '' and x != '0' ] bssrt2 = [ int(x.split('-')[0]) for x in l[11].split(';') if x != '' and x != '0' ] bsend2 = [ int(x.split('-')[1]) for x in l[11].split(';') if x != '' and x != '0' ] except: sys.stdout.write('Error processing line:\n') sys.stdout.write(l) sys.stdout.write('\n') sys.stdout.flush() return None bs1 = [] bs2 = [] if l[26]: for i, n in enumerate(bssrt1): bs1.append( intera.Domain( protein = uniprot1, domain = l[26], start = bssrt1[i], end = bsend1[i], domain_id_type = 'interpro', isoform = l[2], ) ) else: for i, n in enumerate(bssrt1): mot = intera.Motif( protein = uniprot1, start = bssrt1[i], end = bsend1[i], isoform = l[2], ) bs1.append( intera.Ptm( protein = uniprot1, motif = mot, evidences = evidence.Evidence( resource = domino_resource, ), isoform = l[2], ) ) if l[27]: for i, n in enumerate(bssrt2): bs2.append( intera.Domain( protein = uniprot2, domain = l[27], start = bssrt2[i], end = bsend2[i], domain_id_type = 'interpro', isoform = l[3], ) ) else: for i, n in enumerate(bssrt2): mot = intera.Motif( protein = uniprot2, start = bssrt2[i], end = bsend2[i], isoform = l[3], ) bs2.append( intera.Ptm( protein = uniprot2, motif = mot, evidences = evidence.Evidence( resource = domino_resource, ), ) ) for one, two in itertools.product(bs1, bs2): if ( one.__class__.__name__ == 'Domain' and two.__class__.__name__ == 'Domain' ): dd = intera.DomainDomain( one, two, sources = 'DOMINO', ) ddi.append(dd) if ( one.__class__.__name__ == 'Domain' and two.__class__.__name__ == 'Ptm' ): dm = intera.DomainMotif( domain = one, ptm = two, evidences = evidence.Evidence( resource = domino_resource, references = l[6].split(';') ), ) dmi.append(dm) if ( two.__class__.__name__ == 'Domain' and one.__class__.__name__ == 'Ptm' ): dm = intera.DomainMotif( domain = two, ptm = one, evidences = evidence.Evidence( resource = domino_resource, references = l[6].split(';') ), ) dmi.append(dm) prg.terminate() return {'ddi': ddi, 'dmi': dmi}