Source code for pypath.inputs.phosphoelm

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import re
import bs4

import pypath.utils.taxonomy as taxonomy
import pypath.share.curl as curl
import pypath.resources.urls as urls



[docs]
def phosphoelm_enzyme_substrate(organism = 9606, ltp_only = True):
    """
    Downloads kinase-substrate interactions from phosphoELM.
    Returns list of dicts.

    :param int organism: NCBI Taxonomy ID.
    :param bool ltp_only: Include only low-throughput interactions.
    """

    result = []
    non_digit = re.compile(r'[^\d.-]+')

    if organism is None:
        _organism = None
    elif organism in taxonomy.phosphoelm_taxids:
        _organism = taxonomy.phosphoelm_taxids[organism]
    else:
        sys.stdout.write('\t:: Unknown organism: `%u`.\n' % organism)
        return []

    url = urls.urls['p_elm']['url']
    c = curl.Curl(url, silent = False)
    data = c.result
    data = [
        n for d, n in iteritems(data)
        if d.startswith(urls.urls['p_elm']['psites'])
    ]
    data = data[0] if len(data) > 0 else ''
    data = [l.split('\t') for l in data.split('\n')]
    kinases = phosphoelm_kinases()
    del data[0]

    for l in data:

        if (
            len(l) == 9 and (
                l[7] == _organism or
                _organism is None
            ) and (
                not ltp_only or
                l[6] == 'LTP'
            )
        ):

            l[1] = 1 if '-' not in l[0] else int(l[0].split('-')[1])
            l[0] = l[0].split('-')[0]
            del l[-1]

            if len(l[5]) > 0 and l[5] in kinases:
                kinase = kinases[l[5]]

                result.append({
                    'instance': None,
                    'isoform': l[1],
                    'resaa': l[3],
                    'resnum': int(non_digit.sub('', l[2])),
                    'start': None,
                    'end': None,
                    'substrate': l[0],
                    'kinase': kinase,
                    'references': l[4].split(';'),
                    'experiment': l[6],
                    'organism': l[7]
                })

    return result




[docs]
def phosphoelm_interactions(organism = 'Homo sapiens'):

    result = []
    data = phosphoelm_enzyme_substrate(ltp_only = True)

    for l in data:

        result.append([
            l['kinase'],
            l['substrate'],
            ';'.join(l['references']),
            l['organism'],
        ])

    return result




[docs]
def phosphoelm_kinases():

    result = {}
    url = urls.urls['p_elm_kin']['url']
    c = curl.Curl(url, silent = False)
    data = c.result
    soup = bs4.BeautifulSoup(data, 'html.parser')

    for row in soup.find_all('table')[1].find_all('tr'):

        thisRow = [x.text for x in row.find_all('td')]

        if len(thisRow) > 2 and len(thisRow[2].strip()) > 0:

            result[thisRow[0]] = thisRow[2].strip()

    return result