Source code for pypath.inputs.li2012

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.share.common as common
import pypath.inputs.common as inputs_common
import pypath.utils.mapping as mapping
import pypath.internals.intera as intera
import pypath.utils.seq as seq


[docs] def get_li2012(): """ Reads supplementary data of Li 2012 from local file. Returns table (list of lists). """ url = urls.urls['li2012']['url'] c = curl.Curl(url, silent = False, large = True, slow = True) xls = c.fileobj xlsfile = xls.name xls.close() tbl = inputs_common.read_xls(xlsfile, sheet = 'File S1') return filter(lambda l: len(l[-1]) > 0, map(lambda l: l[:7], tbl[2:]))
[docs] def li2012_interactions(): """ Converts table read by ``pypath.inputs.li2012.get_li2012`` to list of interactions. """ result = [] data = get_li2012() for l in data: subs_protein = l[1].split('/')[0] tk_protein = l[2].split()[0] reader_protein = l[3].split()[0] route = l[4] result.append(( tk_protein, subs_protein, route, 'phosphorylation' )) result.append(( subs_protein, reader_protein, route, 'phosphomotif_binding' )) return [list(l) for l in common.unique_list(result)]
[docs] def li2012_enzyme_substrate(): """ Converts table read by ``pypath.inputs.li2012.get_li2012`` to list of dicts of kinase-substrate interactions. """ result = [] data = get_li2012() for l in data: subs_protein = l[1].split('/')[0] tk_protein = l[2].split()[0] subs_resnum = int(common.non_digit.sub('', l[1].split('/')[1])) result.append( ( subs_protein, # substrate tk_protein, # kinase None, # instance None, # start None, # end 'Y', # residue letter subs_resnum, # residue offset ) ) result = [ dict( zip( [ 'substrate', 'kinase', 'instance', 'start', 'end', 'resaa', 'resnum', ], list(l) ) ) for l in common.unique_list(result) ] return result
[docs] def li2012_dmi(): """ Converts table read by ``pypath.inputs.li2012.get_li2012`` to list of ``pypath.internals.intera.DomainMotif`` objects. Translates GeneSymbols to UniProt IDs. """ result = [] se = seq.swissprot_seq(isoforms = True) data = get_li2012() for l in data: subs_protein = l[1].split('/')[0] tk_protein = l[2].split()[0] reader_protein = l[3].split()[0] subs_uniprots = mapping.map_name( subs_protein, 'genesymbol', 'uniprot', ) tk_uniprots = mapping.map_name(tk_protein, 'genesymbol', 'uniprot') reader_uniprots = mapping.map_name(reader_protein, 'genesymbol', 'uniprot') subs_resnum = int(common.non_digit.sub('', l[1].split('/')[1])) for su in subs_uniprots: if su in se: subs_iso = None for iso, s in iteritems(se[su].isof): if se[su].get(subs_resnum, isoform = iso) == 'Y': subs_iso = iso break if subs_iso: start = min(1, subs_resnum - 7) end = max(subs_resnum + 7, len(se[su].isof[subs_iso])) for ku in tk_uniprots: res = intera.Residue( subs_resnum, 'Y', su, isoform = subs_iso, ) mot = intera.Motif( su, start, end, isoform = subs_iso, instance = se[su].get( start, end, isoform = subs_iso ), ) ptm = intera.Ptm( su, motif = mot, residue = res, isoform = subs_iso, typ = 'phosphorylation', evidences = 'Li2012' ) dom = intera.Domain(ku) dommot = intera.DomainMotif( domain = dom, ptm = ptm, evidences = 'Li2012', ) result.append(dommot) return result