Source code for pypath.inputs.elm

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from past.builtins import xrange, range

import re
import collections

import bs4

import pypath.share.curl as curl
import pypath.resources.urls as urls


[docs] def elm_domains(): url = urls.urls['ielm_domains']['url'] c = curl.Curl(url, silent = False) data = c.result soup = bs4.BeautifulSoup(data, 'html.parser') tbl = soup.find('table').find_all('td') rows = [tbl[x:x + 4] for x in xrange(0, len(tbl), 4)] result = {} for r in rows: uniprot = r[1].text motif = r[0].text if uniprot not in result: result[uniprot] = {} if motif not in result[uniprot]: result[uniprot][motif] = [] result[uniprot][motif].append((r[2].text, r[3].text)) return result
[docs] def elm_classes(): ELMClass = collections.namedtuple( 'ELMClass', ( 'accession', 'identifier', 'functional_name', 'description', 'regex', 'probability', 'n_instances', 'n_pdb', ), ) url = urls.urls['elm_class']['url'] c = curl.Curl(url, silent = False) data = c.result data = [ ELMClass(*x.split('\t')) for x in data.replace('"', '').split('\n')[6:] if len(x) > 0 ] return dict(zip( (x[1] for x in data), data, ))
[docs] def elm_instances(): ELMInstance = collections.namedtuple( 'ELMInstance', ( 'accession', 'type', 'identifier', 'uniprot_id', 'uniprot', 'synonyms', 'start', 'end', 'references', 'methods', 'logic', 'pdb', 'organism', ), ) url = urls.urls['elm_inst']['url'] c = curl.Curl(url, silent = False, slow = True) data = c.result data = data.replace('"', '').split('\n') data = [ x.split('\t') for x in data[6:] ] return [ ELMInstance(*x) for x in data if len(x) == 13 ]
[docs] def elm_interactions(): """ Downlods manually curated interactions from ELM. This is the gold standard set of ELM. """ def number_or_none(value, typ = int): return typ(value) if value != 'None' else None # UniProt ID with isoform e.g. O14754-1 reupi = re.compile(r'([\w]{6,10})(?:-([0-9]{1,2}))?') retax = re.compile(r'"([0-9]+)"\([-:/,\.\[\]\(\)\w\s]+\)') ELMInteraction = collections.namedtuple( 'ELMInteraction', [ 'motif_elm', 'domain_pfam', 'uniprot_motif', 'uniprot_domain', 'isoform_motif', 'isoform_domain', 'start_motif', 'end_motif', 'start_domain', 'end_domain', 'affinity_min', 'affinity_max', 'pubmeds', 'taxon_motif', 'taxon_domain', ], ) result = [] url = urls.urls['elm_int']['url'] c = curl.Curl(url, silent = False, slow = True) data = c.result data = data.split('\n') del data[0] for l in data: if not l: continue l = tuple(x.strip() for x in l.split('\t')) uniprot_mofif, isoform_motif = reupi.match(l[2]).groups() uniprot_domain, isoform_domain = reupi.match(l[3]).groups() result.append( ELMInteraction( motif_elm = l[0], domain_pfam = l[1], uniprot_motif = uniprot_mofif, uniprot_domain = uniprot_domain, isoform_motif = int(isoform_motif) if isoform_motif else 1, isoform_domain = int(isoform_domain) if isoform_domain else 1, start_motif = int(l[4]), end_motif = int(l[5]), start_domain = number_or_none(l[6]), end_domain = number_or_none(l[7]), affinity_min = number_or_none(l[8], float), affinity_max = number_or_none(l[9], float), pubmeds = tuple(map(int, l[10].split(','))) if l[10] else (), taxon_motif = int(retax.match(l[11]).groups()[0]), taxon_domain = int(retax.match(l[12]).groups()[0]), ) ) return result