Source code for pypath.inputs.mimp
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import pypath.share.curl as curl
import pypath.resources.urls as urls
[docs]
def mimp_enzyme_substrate():
db_names = {
'PhosphoSitePlus': 'PhosphoSite',
'PhosphoELM': 'phosphoELM',
}
result = []
non_digit = re.compile(r'[^\d.-]+')
motre = re.compile(r'(-*)([A-Za-z]+)(-*)')
url = urls.urls['mimp']['url']
c = curl.Curl(url, silent = False)
data = c.result
kclass = get_kinase_class()
if data is None:
return None
data = [x.split('\t') for x in data.split('\n')]
del data[0]
for l in data:
if len(l) > 6 and len(l[2]) > 0:
kinases = l[2].split(';')
kinases_gnames = []
for k in kinases:
if k.endswith('GROUP'):
grp = k.split('_')[0]
if grp in kclass['groups']:
kinases_gnames += kclass['groups'][grp]
elif grp in kclass['families']:
kinases_gnames += kclass['families'][grp]
elif grp in kclass['subfamilies']:
kinases_gnames += kclass['subfamilies'][grp]
else:
kinases_gnames.append(k)
mot = motre.match(l[4])
for k in kinases_gnames:
resaa = l[4][7]
resnum = int(non_digit.sub('', l[3]))
if mot:
start = resnum - 7 + len(mot.groups()[0])
end = resnum + 7 - len(mot.groups()[2])
instance = l[4].replace('-', '').upper()
else:
start = None
end = None
instance = l[4]
databases = [
db_names[db] if db in db_names else db
for db in l[6].split(';')
]
result.append({
'instance': instance,
'kinase': k.upper(),
'resaa': resaa,
'resnum': resnum,
'npmid': int(non_digit.sub('', l[5])),
'substrate_refseq': l[1],
'substrate': l[0],
'start': start,
'end': end,
'databases': databases,
})
return result
[docs]
def get_kinase_class():
result = {'groups': {}, 'families': {}, 'subfamilies': {}, 'kinases': {}}
tabs = re.compile(r'[\t]{3,}')
reps = re.compile(r'ps[0-9]*$')
url = urls.urls['kinclass']['rescued']
c = curl.Curl(url, silent = False)
data = c.result
data = tabs.sub('', data)
data = [x.split('\t') for x in data.split('\n')]
data = data[9:]
for l in data:
if len(l) > 4:
kinase = reps.sub('', l[0])
group = l[2]
family = l[3]
subfamily = l[4]
if group not in result['groups']:
result['groups'][group] = []
result['groups'][group].append(kinase)
if family not in result['families']:
result['families'][family] = []
result['families'][family].append(kinase)
if subfamily not in result['subfamilies']:
result['subfamilies'][subfamily] = []
result['subfamilies'][subfamily].append(kinase)
result['kinases'][kinase] = {
'group': group,
'family': family,
'subfamily': subfamily
}
return result
[docs]
def mimp_interactions():
result = []
mimp = mimp_enzyme_substrate()
for m in mimp:
result.append([m['kinase'], m['substrate'], m['databases']])
return result