Source code for pypath.inputs.depod #!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import itertools
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.utils.taxonomy as taxonomy
import pypath.share.common as common
[docs]
def depod_interactions ( organism = 9606 ):
url = urls . urls [ 'depod' ][ 'urls' ][ 1 ]
c = curl . Curl ( url , silent = False , large = True , encoding = 'iso-8859-1' )
data = c . result
result = []
i = []
lnum = 0
for l in data :
if lnum == 0 :
lnum += 1
continue
l = l . replace ( ' \n ' , '' ) . replace ( ' \r ' , '' )
l = l . split ( ' \t ' )
specA = int ( l [ 9 ] . split ( ':' )[ 1 ] . split ( '(' )[ 0 ])
specB = int ( l [ 10 ] . split ( ':' )[ 1 ] . split ( '(' )[ 0 ])
if organism is None or ( specA == organism and specB == organism ):
pm = l [ 8 ] . replace ( 'pubmed:' , '' )
sc = l [ 14 ] . replace ( 'curator score:' , '' )
ty = l [ 11 ] . split ( '(' )[ 1 ] . replace ( ')' , '' )
l = [ l [ 0 ], l [ 1 ]]
interaction = ()
for ll in l :
ll = ll . split ( '|' )
uniprot = ''
for lll in ll :
nm = lll . split ( ':' )
u = nm [ 1 ] . strip ()
if nm [ 0 ] == 'uniprotkb' and len ( u ) == 6 :
uniprot = u
interaction += ( uniprot , )
interaction += ( pm , sc , ty )
if len ( interaction [ 0 ]) > 1 and len ( interaction [ 1 ]) > 1 :
i . append ( interaction )
lnum += 1
return i
[docs]
def depod_enzyme_substrate ( organism = 9606 ):
result = []
reunip = re . compile ( r 'uniprotkb:([A-Z0-9]+)' )
reptm = re . compile ( r '([A-Z][a-z] {2} )-([0-9]+)' )
repmidsep = re . compile ( r '[,|]\s?' )
url = urls . urls [ 'depod' ][ 'urls' ][ 0 ]
c = curl . Curl ( url , silent = False , encoding = 'ascii' )
data = c . result
data = [ x . split ( ' \t ' ) for x in data . split ( ' \n ' )]
del data [ 0 ]
url_mitab = urls . urls [ 'depod' ][ 'urls' ][ 1 ]
c_mitab = curl . Curl ( url_mitab , silent = False , encoding = 'iso-8859-1' )
data_mitab = c_mitab . result
data_mitab = [ x . split ( ' \t ' ) for x in data_mitab . split ( ' \n ' )]
del data_mitab [ 0 ]
for i , l in enumerate ( data ):
if (
len ( l ) > 6 and
l [ 2 ] == 'protein substrate' and
taxonomy . ensure_ncbi_tax_id (
l [ 3 ] . split ( '(' )[ 0 ] . strip ()
) == organism and
l [ 4 ] . strip () != 'N/A'
):
enzyme_uniprot = reunip . search ( data_mitab [ i ][ 0 ]) . groups ()[ 0 ]
substrate_uniprot = reunip . search ( data_mitab [ i ][ 1 ]) . groups ()[ 0 ]
for enzyme_up , substrate_up in itertools . product (
mapping . map_name (
enzyme_uniprot ,
'uniprot' ,
'uniprot'
),
mapping . map_name (
substrate_uniprot ,
'uniprot' ,
'uniprot'
),
):
for resaa , resnum in reptm . findall ( l [ 4 ]):
resnum = int ( resnum )
resaa = (
common . aminoa_3_to_1_letter [ resaa ]
if resaa in common . aminoa_3_to_1_letter else
resaa
)
result . append ({
'instance' : None ,
'kinase' : enzyme_up ,
'resaa' : resaa ,
'resnum' : resnum ,
'references' : repmidsep . split ( l [ 6 ] . strip ()),
'substrate' : substrate_up ,
'start' : None ,
'end' : None ,
'typ' : 'dephosphorylation' ,
})
return result
Copy to clipboard