Source code for pypath.inputs.depod
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import itertools
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.utils.taxonomy as taxonomy
import pypath.share.common as common
[docs]
def depod_interactions(organism = 9606):
url = urls.urls['depod']['urls'][1]
c = curl.Curl(url, silent = False, large = True, encoding = 'iso-8859-1')
data = c.result
result = []
i = []
lnum = 0
for l in data:
if lnum == 0:
lnum += 1
continue
l = l.replace('\n', '').replace('\r', '')
l = l.split('\t')
specA = int(l[9].split(':')[1].split('(')[0])
specB = int(l[10].split(':')[1].split('(')[0])
if organism is None or (specA == organism and specB == organism):
pm = l[8].replace('pubmed:', '')
sc = l[14].replace('curator score:', '')
ty = l[11].split('(')[1].replace(')', '')
l = [l[0], l[1]]
interaction = ()
for ll in l:
ll = ll.split('|')
uniprot = ''
for lll in ll:
nm = lll.split(':')
u = nm[1].strip()
if nm[0] == 'uniprotkb' and len(u) == 6:
uniprot = u
interaction += (uniprot, )
interaction += (pm, sc, ty)
if len(interaction[0]) > 1 and len(interaction[1]) > 1:
i.append(interaction)
lnum += 1
return i
[docs]
def depod_enzyme_substrate(organism = 9606):
result = []
reunip = re.compile(r'uniprotkb:([A-Z0-9]+)')
reptm = re.compile(r'([A-Z][a-z]{2})-([0-9]+)')
repmidsep = re.compile(r'[,|]\s?')
url = urls.urls['depod']['urls'][0]
c = curl.Curl(url, silent = False, encoding = 'ascii')
data = c.result
data = [x.split('\t') for x in data.split('\n')]
del data[0]
url_mitab = urls.urls['depod']['urls'][1]
c_mitab = curl.Curl(url_mitab, silent = False, encoding = 'iso-8859-1')
data_mitab = c_mitab.result
data_mitab = [x.split('\t') for x in data_mitab.split('\n')]
del data_mitab[0]
for i, l in enumerate(data):
if (
len(l) > 6 and
l[2] == 'protein substrate' and
taxonomy.ensure_ncbi_tax_id(
l[3].split('(')[0].strip()
) == organism and
l[4].strip() != 'N/A'
):
enzyme_uniprot = reunip.search(data_mitab[i][0]).groups()[0]
substrate_uniprot = reunip.search(data_mitab[i][1]).groups()[0]
for enzyme_up, substrate_up in itertools.product(
mapping.map_name(
enzyme_uniprot,
'uniprot',
'uniprot'
),
mapping.map_name(
substrate_uniprot,
'uniprot',
'uniprot'
),
):
for resaa, resnum in reptm.findall(l[4]):
resnum = int(resnum)
resaa = (
common.aminoa_3_to_1_letter[resaa]
if resaa in common.aminoa_3_to_1_letter else
resaa
)
result.append({
'instance': None,
'kinase': enzyme_up,
'resaa': resaa,
'resnum': resnum,
'references': repmidsep.split(l[6].strip()),
'substrate': substrate_up,
'start': None,
'end': None,
'typ': 'dephosphorylation',
})
return result