Source code for pypath.inputs.phosphoelm
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import re
import bs4
import pypath.utils.taxonomy as taxonomy
import pypath.share.curl as curl
import pypath.resources.urls as urls
[docs]
def phosphoelm_enzyme_substrate(organism = 9606, ltp_only = True):
"""
Downloads kinase-substrate interactions from phosphoELM.
Returns list of dicts.
:param int organism: NCBI Taxonomy ID.
:param bool ltp_only: Include only low-throughput interactions.
"""
result = []
non_digit = re.compile(r'[^\d.-]+')
if organism is None:
_organism = None
elif organism in taxonomy.phosphoelm_taxids:
_organism = taxonomy.phosphoelm_taxids[organism]
else:
sys.stdout.write('\t:: Unknown organism: `%u`.\n' % organism)
return []
url = urls.urls['p_elm']['url']
c = curl.Curl(url, silent = False)
data = c.result
data = [
n for d, n in iteritems(data)
if d.startswith(urls.urls['p_elm']['psites'])
]
data = data[0] if len(data) > 0 else ''
data = [l.split('\t') for l in data.split('\n')]
kinases = phosphoelm_kinases()
del data[0]
for l in data:
if (
len(l) == 9 and (
l[7] == _organism or
_organism is None
) and (
not ltp_only or
l[6] == 'LTP'
)
):
l[1] = 1 if '-' not in l[0] else int(l[0].split('-')[1])
l[0] = l[0].split('-')[0]
del l[-1]
if len(l[5]) > 0 and l[5] in kinases:
kinase = kinases[l[5]]
result.append({
'instance': None,
'isoform': l[1],
'resaa': l[3],
'resnum': int(non_digit.sub('', l[2])),
'start': None,
'end': None,
'substrate': l[0],
'kinase': kinase,
'references': l[4].split(';'),
'experiment': l[6],
'organism': l[7]
})
return result
[docs]
def phosphoelm_interactions(organism = 'Homo sapiens'):
result = []
data = phosphoelm_enzyme_substrate(ltp_only = True)
for l in data:
result.append([
l['kinase'],
l['substrate'],
';'.join(l['references']),
l['organism'],
])
return result
[docs]
def phosphoelm_kinases():
result = {}
url = urls.urls['p_elm_kin']['url']
c = curl.Curl(url, silent = False)
data = c.result
soup = bs4.BeautifulSoup(data, 'html.parser')
for row in soup.find_all('table')[1].find_all('tr'):
thisRow = [x.text for x in row.find_all('td')]
if len(thisRow) > 2 and len(thisRow[2].strip()) > 0:
result[thisRow[0]] = thisRow[2].strip()
return result