Source code for pypath.inputs.depod

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import itertools

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.utils.taxonomy as taxonomy
import pypath.share.common as common


[docs] def depod_interactions(organism = 9606): url = urls.urls['depod']['urls'][1] c = curl.Curl(url, silent = False, large = True, encoding = 'iso-8859-1') data = c.result result = [] i = [] lnum = 0 for l in data: if lnum == 0: lnum += 1 continue l = l.replace('\n', '').replace('\r', '') l = l.split('\t') specA = int(l[9].split(':')[1].split('(')[0]) specB = int(l[10].split(':')[1].split('(')[0]) if organism is None or (specA == organism and specB == organism): pm = l[8].replace('pubmed:', '') sc = l[14].replace('curator score:', '') ty = l[11].split('(')[1].replace(')', '') l = [l[0], l[1]] interaction = () for ll in l: ll = ll.split('|') uniprot = '' for lll in ll: nm = lll.split(':') u = nm[1].strip() if nm[0] == 'uniprotkb' and len(u) == 6: uniprot = u interaction += (uniprot, ) interaction += (pm, sc, ty) if len(interaction[0]) > 1 and len(interaction[1]) > 1: i.append(interaction) lnum += 1 return i
[docs] def depod_enzyme_substrate(organism = 9606): result = [] reunip = re.compile(r'uniprotkb:([A-Z0-9]+)') reptm = re.compile(r'([A-Z][a-z]{2})-([0-9]+)') repmidsep = re.compile(r'[,|]\s?') url = urls.urls['depod']['urls'][0] c = curl.Curl(url, silent = False, encoding = 'ascii') data = c.result data = [x.split('\t') for x in data.split('\n')] del data[0] url_mitab = urls.urls['depod']['urls'][1] c_mitab = curl.Curl(url_mitab, silent = False, encoding = 'iso-8859-1') data_mitab = c_mitab.result data_mitab = [x.split('\t') for x in data_mitab.split('\n')] del data_mitab[0] for i, l in enumerate(data): if ( len(l) > 6 and l[2] == 'protein substrate' and taxonomy.ensure_ncbi_tax_id( l[3].split('(')[0].strip() ) == organism and l[4].strip() != 'N/A' ): enzyme_uniprot = reunip.search(data_mitab[i][0]).groups()[0] substrate_uniprot = reunip.search(data_mitab[i][1]).groups()[0] for enzyme_up, substrate_up in itertools.product( mapping.map_name( enzyme_uniprot, 'uniprot', 'uniprot' ), mapping.map_name( substrate_uniprot, 'uniprot', 'uniprot' ), ): for resaa, resnum in reptm.findall(l[4]): resnum = int(resnum) resaa = ( common.aminoa_3_to_1_letter[resaa] if resaa in common.aminoa_3_to_1_letter else resaa ) result.append({ 'instance': None, 'kinase': enzyme_up, 'resaa': resaa, 'resnum': resnum, 'references': repmidsep.split(l[6].strip()), 'substrate': substrate_up, 'start': None, 'end': None, 'typ': 'dephosphorylation', }) return result