Source code for pypath.inputs.mimp #!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import pypath.share.curl as curl
import pypath.resources.urls as urls
[docs]
def mimp_enzyme_substrate ():
db_names = {
'PhosphoSitePlus' : 'PhosphoSite' ,
'PhosphoELM' : 'phosphoELM' ,
}
result = []
non_digit = re . compile ( r '[^\d.-]+' )
motre = re . compile ( r '(-*)([A-Za-z]+)(-*)' )
url = urls . urls [ 'mimp' ][ 'url' ]
c = curl . Curl ( url , silent = False )
data = c . result
kclass = get_kinase_class ()
if data is None :
return None
data = [ x . split ( ' \t ' ) for x in data . split ( ' \n ' )]
del data [ 0 ]
for l in data :
if len ( l ) > 6 and len ( l [ 2 ]) > 0 :
kinases = l [ 2 ] . split ( ';' )
kinases_gnames = []
for k in kinases :
if k . endswith ( 'GROUP' ):
grp = k . split ( '_' )[ 0 ]
if grp in kclass [ 'groups' ]:
kinases_gnames += kclass [ 'groups' ][ grp ]
elif grp in kclass [ 'families' ]:
kinases_gnames += kclass [ 'families' ][ grp ]
elif grp in kclass [ 'subfamilies' ]:
kinases_gnames += kclass [ 'subfamilies' ][ grp ]
else :
kinases_gnames . append ( k )
mot = motre . match ( l [ 4 ])
for k in kinases_gnames :
resaa = l [ 4 ][ 7 ]
resnum = int ( non_digit . sub ( '' , l [ 3 ]))
if mot :
start = resnum - 7 + len ( mot . groups ()[ 0 ])
end = resnum + 7 - len ( mot . groups ()[ 2 ])
instance = l [ 4 ] . replace ( '-' , '' ) . upper ()
else :
start = None
end = None
instance = l [ 4 ]
databases = [
db_names [ db ] if db in db_names else db
for db in l [ 6 ] . split ( ';' )
]
result . append ({
'instance' : instance ,
'kinase' : k . upper (),
'resaa' : resaa ,
'resnum' : resnum ,
'npmid' : int ( non_digit . sub ( '' , l [ 5 ])),
'substrate_refseq' : l [ 1 ],
'substrate' : l [ 0 ],
'start' : start ,
'end' : end ,
'databases' : databases ,
})
return result
[docs]
def get_kinase_class ():
result = { 'groups' : {}, 'families' : {}, 'subfamilies' : {}, 'kinases' : {}}
tabs = re . compile ( r '[\t]{3,}' )
reps = re . compile ( r 'ps[0-9]*$' )
url = urls . urls [ 'kinclass' ][ 'rescued' ]
c = curl . Curl ( url , silent = False )
data = c . result
data = tabs . sub ( '' , data )
data = [ x . split ( ' \t ' ) for x in data . split ( ' \n ' )]
data = data [ 9 :]
for l in data :
if len ( l ) > 4 :
kinase = reps . sub ( '' , l [ 0 ])
group = l [ 2 ]
family = l [ 3 ]
subfamily = l [ 4 ]
if group not in result [ 'groups' ]:
result [ 'groups' ][ group ] = []
result [ 'groups' ][ group ] . append ( kinase )
if family not in result [ 'families' ]:
result [ 'families' ][ family ] = []
result [ 'families' ][ family ] . append ( kinase )
if subfamily not in result [ 'subfamilies' ]:
result [ 'subfamilies' ][ subfamily ] = []
result [ 'subfamilies' ][ subfamily ] . append ( kinase )
result [ 'kinases' ][ kinase ] = {
'group' : group ,
'family' : family ,
'subfamily' : subfamily
}
return result
[docs]
def mimp_interactions ():
result = []
mimp = mimp_enzyme_substrate ()
for m in mimp :
result . append ([ m [ 'kinase' ], m [ 'substrate' ], m [ 'databases' ]])
return result
Copy to clipboard