Source code for pypath.inputs.elm

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from past.builtins import xrange, range

import re
import collections

import bs4

import pypath.share.curl as curl
import pypath.resources.urls as urls



[docs]
def elm_domains():

    url = urls.urls['ielm_domains']['url']
    c = curl.Curl(url, silent = False)
    data = c.result
    soup = bs4.BeautifulSoup(data, 'html.parser')
    tbl = soup.find('table').find_all('td')
    rows = [tbl[x:x + 4] for x in xrange(0, len(tbl), 4)]
    result = {}

    for r in rows:

        uniprot = r[1].text
        motif = r[0].text

        if uniprot not in result:

            result[uniprot] = {}

        if motif not in result[uniprot]:

            result[uniprot][motif] = []

        result[uniprot][motif].append((r[2].text, r[3].text))

    return result




[docs]
def elm_classes():

    ELMClass = collections.namedtuple(
        'ELMClass',
        (
            'accession',
            'identifier',
            'functional_name',
            'description',
            'regex',
            'probability',
            'n_instances',
            'n_pdb',
        ),
    )

    url = urls.urls['elm_class']['url']
    c = curl.Curl(url, silent = False)
    data = c.result
    data = [
        ELMClass(*x.split('\t'))
        for x in data.replace('"', '').split('\n')[6:]
        if len(x) > 0
    ]

    return dict(zip(
        (x[1] for x in data),
        data,
    ))




[docs]
def elm_instances():

    ELMInstance = collections.namedtuple(
        'ELMInstance',
        (
            'accession',
            'type',
            'identifier',
            'uniprot_id',
            'uniprot',
            'synonyms',
            'start',
            'end',
            'references',
            'methods',
            'logic',
            'pdb',
            'organism',
        ),
    )

    url = urls.urls['elm_inst']['url']
    c = curl.Curl(url, silent = False, slow = True)
    data = c.result
    data = data.replace('"', '').split('\n')
    data = [
        x.split('\t')
        for x in data[6:]
    ]

    return [
        ELMInstance(*x)
        for x in data
        if len(x) == 13
    ]




[docs]
def elm_interactions():
    """
    Downlods manually curated interactions from ELM.
    This is the gold standard set of ELM.
    """

    def number_or_none(value, typ = int):

        return typ(value) if value != 'None' else None

    # UniProt ID with isoform e.g. O14754-1
    reupi = re.compile(r'([\w]{6,10})(?:-([0-9]{1,2}))?')
    retax = re.compile(r'"([0-9]+)"\([-:/,\.\[\]\(\)\w\s]+\)')

    ELMInteraction = collections.namedtuple(
        'ELMInteraction',
        [
            'motif_elm',
            'domain_pfam',
            'uniprot_motif',
            'uniprot_domain',
            'isoform_motif',
            'isoform_domain',
            'start_motif',
            'end_motif',
            'start_domain',
            'end_domain',
            'affinity_min',
            'affinity_max',
            'pubmeds',
            'taxon_motif',
            'taxon_domain',
        ],
    )

    result = []
    url = urls.urls['elm_int']['url']
    c = curl.Curl(url, silent = False, slow = True)
    data = c.result
    data = data.split('\n')
    del data[0]

    for l in data:

        if not l:

            continue

        l = tuple(x.strip() for x in l.split('\t'))

        uniprot_mofif, isoform_motif = reupi.match(l[2]).groups()
        uniprot_domain, isoform_domain = reupi.match(l[3]).groups()

        result.append(
            ELMInteraction(
                motif_elm = l[0],
                domain_pfam = l[1],
                uniprot_motif = uniprot_mofif,
                uniprot_domain = uniprot_domain,
                isoform_motif = int(isoform_motif) if isoform_motif else 1,
                isoform_domain = int(isoform_domain) if isoform_domain else 1,
                start_motif = int(l[4]),
                end_motif = int(l[5]),
                start_domain = number_or_none(l[6]),
                end_domain = number_or_none(l[7]),
                affinity_min = number_or_none(l[8], float),
                affinity_max = number_or_none(l[9], float),
                pubmeds = tuple(map(int, l[10].split(','))) if l[10] else (),
                taxon_motif = int(retax.match(l[11]).groups()[0]),
                taxon_domain = int(retax.match(l[12]).groups()[0]),
            )
        )

    return result