Source code for pypath.inputs.dbptm

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import re
import bs4

import pypath.utils.taxonomy as taxonomy
import pypath.share.curl as curl
import pypath.resources.urls as urls



[docs]
def dbptm_enzyme_substrate(organism = 9606):
    """
    Downloads enzyme-substrate interactions from dbPTM.
    Returns list of dicts.
    """

    if organism is None:
        _organism = None
    elif organism in taxonomy.dbptm_taxids:
        _organism = taxonomy.dbptm_taxids[organism]
    else:
        sys.stdout.write('\t:: Unknown organism: `%u`.\n' % organism)
        return []

    url = urls.urls['dbptm']['old_table']
    c = curl.Curl(url, silent = False, large = True)
    data = []

    hdr = next(c.result).strip().split('\t')

    for l in c.result:

        l = l.strip().split('\t')

        data.append(dict(
            (
                key,
                (
                    None
                        if val == '' else
                    val.split(';')
                        if key in {'references', 'kinase'} else
                    int(val)
                        if val.isdigit() else
                    val
                )
            )
            for key, val in zip(hdr, l)
        ))

    return data




[docs]
def dbptm_enzyme_substrate_old(organism = 9606):
    """
    Downloads enzyme-substrate interactions from dbPTM.
    Returns list of dicts.
    """

    if organism is None:
        _organism = None
    elif organism in taxonomy.dbptm_taxids:
        _organism = taxonomy.dbptm_taxids[organism]
    else:
        sys.stdout.write('\t:: Unknown organism: `%u`.\n' % organism)
        return []

    result = []
    byre = re.compile(r'.*by\s([A-Za-z0-9\s]+)\.*')
    andre = re.compile(r',|and')
    non_digit = re.compile(r'[^\d.-]+')

    for url in urls.urls['dbptm']['urls']:

        c = curl.Curl(url, silent = False)
        extra = c.result

        for k, data in iteritems(extra):

            data = [x.split('\t') for x in data.split('\n')]

            for l in data:

                if len(l) > 8:

                    if _organism:
                        mnemonic = l[0].split('_')[1].strip()
                        if mnemonic != _organism:
                            continue

                    resnum = int(non_digit.sub('', l[2]))

                    ptm = ({
                        'substrate': l[1],
                        'typ': l[7].lower(),
                        'resaa': l[8][6],
                        'resnum': resnum,
                        'instance': l[8].strip(),
                        'references': l[4].split(';'),
                        'databases': (l[5].split()[0],),
                        'kinase': None if byre.match(l[3]) is None else [
                            i.strip()
                            for i in andre.split(
                                byre.match(l[3]).groups(1)[0])
                        ],
                        'start': resnum - 6,
                        'end': resnum + 6,
                    })

                    if ptm['kinase'] is not None:

                        if 'autocatalysis' in ptm['kinase']:

                            ptm['kinase'].append(ptm['substrate'])
                            ptm['kinase'].remove('autocatalysis')

                        ptm['kinase'] = [
                            k.replace('host', '').strip()
                            for k in ptm['kinase']
                        ]

                        ptm['kinase'] = [
                            k for k in ptm['kinase'] if len(k) > 0
                        ]

                        if len(ptm['kinase']) == 0:
                            ptm['kinase'] = None

                    result.append(ptm)

    return result




[docs]
def dbptm_interactions():

    result = []
    data = dbptm_enzyme_substrate()
    for r in data:

        if r['kinase'] is not None:

            for src in r['kinase']:

                result.append([
                    src,
                    r['substrate'],
                    ';'.join(
                        i
                        for i in r['references']
                        if i != '-'
                    ),
                ])

    return result