Source code for pypath.inputs.dbptm
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import re
import bs4
import pypath.utils.taxonomy as taxonomy
import pypath.share.curl as curl
import pypath.resources.urls as urls
[docs]
def dbptm_enzyme_substrate(organism = 9606):
"""
Downloads enzyme-substrate interactions from dbPTM.
Returns list of dicts.
"""
if organism is None:
_organism = None
elif organism in taxonomy.dbptm_taxids:
_organism = taxonomy.dbptm_taxids[organism]
else:
sys.stdout.write('\t:: Unknown organism: `%u`.\n' % organism)
return []
url = urls.urls['dbptm']['old_table']
c = curl.Curl(url, silent = False, large = True)
data = []
hdr = next(c.result).strip().split('\t')
for l in c.result:
l = l.strip().split('\t')
data.append(dict(
(
key,
(
None
if val == '' else
val.split(';')
if key in {'references', 'kinase'} else
int(val)
if val.isdigit() else
val
)
)
for key, val in zip(hdr, l)
))
return data
[docs]
def dbptm_enzyme_substrate_old(organism = 9606):
"""
Downloads enzyme-substrate interactions from dbPTM.
Returns list of dicts.
"""
if organism is None:
_organism = None
elif organism in taxonomy.dbptm_taxids:
_organism = taxonomy.dbptm_taxids[organism]
else:
sys.stdout.write('\t:: Unknown organism: `%u`.\n' % organism)
return []
result = []
byre = re.compile(r'.*by\s([A-Za-z0-9\s]+)\.*')
andre = re.compile(r',|and')
non_digit = re.compile(r'[^\d.-]+')
for url in urls.urls['dbptm']['urls']:
c = curl.Curl(url, silent = False)
extra = c.result
for k, data in iteritems(extra):
data = [x.split('\t') for x in data.split('\n')]
for l in data:
if len(l) > 8:
if _organism:
mnemonic = l[0].split('_')[1].strip()
if mnemonic != _organism:
continue
resnum = int(non_digit.sub('', l[2]))
ptm = ({
'substrate': l[1],
'typ': l[7].lower(),
'resaa': l[8][6],
'resnum': resnum,
'instance': l[8].strip(),
'references': l[4].split(';'),
'databases': (l[5].split()[0],),
'kinase': None if byre.match(l[3]) is None else [
i.strip()
for i in andre.split(
byre.match(l[3]).groups(1)[0])
],
'start': resnum - 6,
'end': resnum + 6,
})
if ptm['kinase'] is not None:
if 'autocatalysis' in ptm['kinase']:
ptm['kinase'].append(ptm['substrate'])
ptm['kinase'].remove('autocatalysis')
ptm['kinase'] = [
k.replace('host', '').strip()
for k in ptm['kinase']
]
ptm['kinase'] = [
k for k in ptm['kinase'] if len(k) > 0
]
if len(ptm['kinase']) == 0:
ptm['kinase'] = None
result.append(ptm)
return result
[docs]
def dbptm_interactions():
result = []
data = dbptm_enzyme_substrate()
for r in data:
if r['kinase'] is not None:
for src in r['kinase']:
result.append([
src,
r['substrate'],
';'.join(
i
for i in r['references']
if i != '-'
),
])
return result