Source code for pypath.inputs.ramilowski2015

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import itertools
import collections

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.inputs.common as inputs_common
import pypath.utils.mapping as mapping



[docs]
def ramilowski_interactions(putative = False):
    """
    Downloads and processes ligand-receptor interactions from
    Supplementary Table 2 of Ramilowski 2015.

    Returns list of lists with ligand and receptor gene symbols, reference
    and resources as elements.
    """

    c = curl.Curl(urls.urls['rami']['url'], silent = False, large = True)
    xlsname = c.fname
    del(c)
    raw = inputs_common.read_xls(xlsname, 'All.Pairs')[1:]

    Ramilowski2015Interaction = collections.namedtuple(
        'Ramilowski2015Interaction',
        (
            'ligand',
            'receptor',
            'references',
            'resources',
        ),
    )

    return [
        Ramilowski2015Interaction(
            ligand = r[1],
            receptor = r[3],
            references = r[13].replace(' ', ''), # references
            resources = ';'.join(
                filter(len, itertools.chain(r[5:11], [r[15]]))
            ),
        )
        for r in raw
        if not r[15].startswith('EXCLUDED') and (
            putative or r[15] != 'putative'
        )
    ]

    return raw




[docs]
def ramilowski_locations(long_notes = False):
    """
    Subcellular location annotations from Ramilowski 2015.
    """

    reloc = re.compile(
        r'([^\(]+[^\s^\(])'
        r'\s?\('
        r'?(?:(.*[^\)])?)'
        r'\)?'
    )
    resep = re.compile(r'[\.;,]')
    renote = re.compile(r'Note=([- \w\(\),\s\+\./%\'":;]*)')

    sources = (
        (4, 'UniProt'),
        (5, 'HPRD'),
        (7, 'LocTree3'),
        (10, 'Consensus'),
        (11, 'Consensus6'),
    )

    RamilowskiLocation = collections.namedtuple(
        'RamilowskiLocation',
        [
            'location',
            'source',
            'tmh',
            'note',
            'long_note',
        ],
    )

    url = urls.urls['rami']['loc']
    c = curl.Curl(url, silent = False, large = True)

    _ = next(c.result)

    result = collections.defaultdict(set)

    for l in c.result:
        l = l.strip('\n\r').split('\t')

        for idx, source in sources:
            locs = l[idx]

            long_note = None
            mnote = renote.search(locs)

            if mnote:
                long_note = mnote.groups()[0]
                locs = renote.sub('', locs)

            for loc in resep.split(locs):

                if ':' in loc and 'GO:' not in loc:

                    loc = loc.split(':')[-1]

                loc = loc.strip().replace('- ', '-').lower()

                if (
                    not loc or
                    len(loc.split()) > 3 or
                    re.search(r'\d', loc) or
                    loc == 'n/a' or
                    any(
                        w in loc for w in
                        ('tumor',)
                    )
                ):
                    continue

                m = reloc.match(loc)

                if not m:
                    continue

                location, note = m.groups()
                tmh = l[9].strip()

                uniprots = mapping.map_name(l[3], 'uniprot', 'uniprot')

                for uniprot in uniprots:

                    result[uniprot].add(
                        RamilowskiLocation(
                            location = (
                                location.lower().replace('=', '').strip()
                            ),
                            source = source,
                            tmh = int(tmh) if tmh.isdigit() else None,
                            note = note,
                            long_note = long_note if long_notes else None,
                        )
                    )

    return dict(result)