Source code for pypath.inputs.hprd

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re

import pypath.share.curl as curl
import pypath.resources.urls as urls


[docs] def get_hprd(in_vivo = True): """ Downloads and preprocesses HPRD data. """ url = urls.urls['hprd_all']['url_rescued'] files = [urls.urls['hprd_all']['ptm_file']] c = curl.Curl(url, silent = False, files_needed = files) data = c.result if len(data) == 0: return [] data = [l.split('\t') for l in data[files[0]].split('\n')][:-1] if in_vivo: data = [i for i in data if 'in vivo' in i[9].split(';')] return data
[docs] def hprd_interactions(in_vivo = True): """ Processes HPRD data and extracts interactions. Returns list of interactions. """ return [i for i in get_hprd(in_vivo = in_vivo) if i[6] != '-']
[docs] def hprd_interactions_htp(): url = urls.urls['hprd_all']['url_rescued'] fname = urls.urls['hprd_all']['int_file'] c = curl.Curl(url, silent = False, large = True, files_needed = [fname]) return list( map( lambda l: l.split('\t'), c.result[fname].read().decode('ascii').split('\n') ) )
[docs] def hprd_enzyme_substrate(in_vivo = True): """ Processes HPRD data and extracts PTMs. Returns list of kinase-substrate interactions. """ ptms = [] non_digit = re.compile(r'[^\d]+') data = get_hprd(in_vivo = in_vivo) for ptm in data: if ptm[6] != '-': resnums = [ int(nn) for nn in [non_digit.sub('', n) for n in ptm[4].split(';')] if len(nn) > 0 ] for resnum in resnums: modtype = ptm[8].lower().replace('proteolytic', '').strip() ptms.append({ 'resaa': ptm[5], 'resnum': resnum, 'typ': modtype, 'references': ptm[10].split(','), 'kinase': ptm[6], 'substrate_refseqp': ptm[3], 'substrate': ptm[1], 'start': max(resnum - 7, 1), 'end': resnum + 7, 'instance': None }) return ptms