Source code for pypath.inputs.wang

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import collections

from typing import List, Literal, Union

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.settings as settings
import pypath.inputs.embopress as embo
import pypath.inputs.ca1 as ca1
import pypath.utils.mapping as mapping


KEY = {
    'NA': None,
    'Neucleus': 'Nucleus',
    'Ribosomes': 'Ribosome',
    'Vesicles': 'Vesicle',
    'Endoplasmic reticulum': 'ER',
    'Not available': None,
    'Not Available': None,
    'Mitochondrial': 'Mitochondrion',
    'Mitochondria': 'Mitochondrion',
    'Anti-Apoptic': 'Anti-apoptotic',
}


ca1_interactions = ca1.ca1_interactions


[docs] def hsn_interactions( source: Literal['rescued', 'researchgate'] = 'rescued', ) -> List[tuple]: """ Downloads and processes HumanSignalingNetwork version 6 (published 2014 Jan by Edwin Wang). Args source: The same file is available from two domains: the OmniPath rescued repository and Research Gate. These both are secondary sources, the dataset is not available any more from its original site of publication, which was the old webpage of the Wang Lab. Details: This dataset is identical to the one returned by `wang_interactions`, but it does not contain function and localization details. """ effects = { 'Pos': '+', 'Neg': '-', 'Phy': '0', } class HsnInteraction( collections.namedtuple( 'HsnInteractionBase', ( 'genesymbol_source', 'genesymbol_target', 'entrez_source', 'entrez_target', 'effect', ), ), ): def __new__(cls, *args): args = args[0] if len(args) == 1 else args identifiers = dict(zip( ( 'entrez_source', 'genesymbol_source', 'entrez_target', 'genesymbol_target', ), args[:-1] )) return super(HsnInteraction, cls).__new__( cls, **identifiers, effect = effects.get(args[-1], args[-1]), ) url = urls.urls['hsn'][source] c = curl.Curl( url, silent = False, large = True, req_headers = [settings.get('user_agent')], ) _ = next(c.result) result = [HsnInteraction(*r.strip().split(',')) for r in c.result if r] return result
[docs] def wang_interactions() -> List[tuple]: """ Downloads and processes Wang Lab HumanSignalingNetwork. Returns list of interactions as tuples of source, target and effect. """ url = urls.urls['wang']['rescued'] c = curl.Curl(url, silent = False) data = c.result data = data.split('\n') return _wang_process(data)
[docs] def cui_interactions() -> List[tuple]: """ Interactions from Supplementary Table 9 of Cui et al. 2007 (https://www.embopress.org/doi/full/10.1038/msb4100200). """ raw = embo.embopress_supplementary( url = urls.urls['wang']['cui'], init_url = urls.urls['wang']['cui_init'], sheet = 'Supplementary Table 9', ) return _wang_process(raw)
def _wang_process(raw: List[Union[List, str]]) -> List[tuple]: Node = collections.namedtuple( 'Node', ( 'genesymbol', 'entrez', 'function', 'location', ), ) WangInteraction = collections.namedtuple( 'WangInteraction', ( 'genesymbol_source', 'genesymbol_target', 'entrez_source', 'entrez_target', 'effect', 'function_source', 'location_source', 'function_target', 'location_target', ), ) key = KEY.copy() result = [] nodes = {} reading_nodes = False reading_edges = False reading_key = False _key = lambda y: (lambda x: key.get(x, x))(key.get(y, y)) for l in raw: if not l or not l[0] or (hasattr(l, 'strip') and not l.strip()): reading_key = False reading_nodes = False reading_edges = False l = l.split(',') if hasattr(l, 'split') else l if reading_key: key[l[0]] = l[1] elif reading_nodes: nodes[l[0]] = Node( genesymbol = l[1], entrez = l[2].split('.')[0], function = _key(l[3]), location = _key(l[4]), ) elif reading_edges: src = nodes[l[0]] tgt = nodes[l[1]] result.append( WangInteraction( genesymbol_source = src.genesymbol, genesymbol_target = tgt.genesymbol, entrez_source = src.entrez, entrez_target = tgt.entrez, effect = l[2].replace('_', '-').split('.')[0], function_source = src.function, location_source = src.location, function_target = tgt.function, location_target = tgt.location, ) ) if l[0].startswith('Node'): reading_key = False reading_nodes = True if l[0].startswith('From'): reading_key = False reading_nodes = False reading_edges = True if l[0].startswith('Notes:'): reading_key = True return result
[docs] def wang_annotations(): NA_VALUES = {None, 'NA', 'Not available'} record = collections.namedtuple( 'WangAnnotation', ( 'function', 'location', ), ) result = collections.defaultdict(set) for dataset in ('ca1', 'cui', 'wang'): func = globals()['%s_interactions' % dataset] data = func() for i in data: for side in ('source', 'target'): a_uniprot = 'uniprot_%s' % side a_entrez = 'entrez_%s' % side a_function = 'function_%s' % side a_location = 'location_%s' % side function = getattr(i, a_function) location = getattr(i, a_location) if function in NA_VALUES and location in NA_VALUES: continue uniprots = ( (getattr(i, a_uniprot),) if hasattr(i, a_uniprot) else mapping.map_name(getattr(i, a_entrez), 'entrez', 'uniprot') ) for uniprot in uniprots: result[uniprot].add( record( function = function, location = location, ) ) return dict(result)