Source code for pypath.inputs.ramilowski2015

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import re
import itertools
import collections

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.inputs.common as inputs_common
import pypath.utils.mapping as mapping


[docs] def ramilowski_interactions(putative = False): """ Downloads and processes ligand-receptor interactions from Supplementary Table 2 of Ramilowski 2015. Returns list of lists with ligand and receptor gene symbols, reference and resources as elements. """ c = curl.Curl(urls.urls['rami']['url'], silent = False, large = True) xlsname = c.fname del(c) raw = inputs_common.read_xls(xlsname, 'All.Pairs')[1:] Ramilowski2015Interaction = collections.namedtuple( 'Ramilowski2015Interaction', ( 'ligand', 'receptor', 'references', 'resources', ), ) return [ Ramilowski2015Interaction( ligand = r[1], receptor = r[3], references = r[13].replace(' ', ''), # references resources = ';'.join( filter(len, itertools.chain(r[5:11], [r[15]])) ), ) for r in raw if not r[15].startswith('EXCLUDED') and ( putative or r[15] != 'putative' ) ] return raw
[docs] def ramilowski_locations(long_notes = False): """ Subcellular location annotations from Ramilowski 2015. """ reloc = re.compile( r'([^\(]+[^\s^\(])' r'\s?\(' r'?(?:(.*[^\)])?)' r'\)?' ) resep = re.compile(r'[\.;,]') renote = re.compile(r'Note=([- \w\(\),\s\+\./%\'":;]*)') sources = ( (4, 'UniProt'), (5, 'HPRD'), (7, 'LocTree3'), (10, 'Consensus'), (11, 'Consensus6'), ) RamilowskiLocation = collections.namedtuple( 'RamilowskiLocation', [ 'location', 'source', 'tmh', 'note', 'long_note', ], ) url = urls.urls['rami']['loc'] c = curl.Curl(url, silent = False, large = True) _ = next(c.result) result = collections.defaultdict(set) for l in c.result: l = l.strip('\n\r').split('\t') for idx, source in sources: locs = l[idx] long_note = None mnote = renote.search(locs) if mnote: long_note = mnote.groups()[0] locs = renote.sub('', locs) for loc in resep.split(locs): if ':' in loc and 'GO:' not in loc: loc = loc.split(':')[-1] loc = loc.strip().replace('- ', '-').lower() if ( not loc or len(loc.split()) > 3 or re.search(r'\d', loc) or loc == 'n/a' or any( w in loc for w in ('tumor',) ) ): continue m = reloc.match(loc) if not m: continue location, note = m.groups() tmh = l[9].strip() uniprots = mapping.map_name(l[3], 'uniprot', 'uniprot') for uniprot in uniprots: result[uniprot].add( RamilowskiLocation( location = ( location.lower().replace('=', '').strip() ), source = source, tmh = int(tmh) if tmh.isdigit() else None, note = note, long_note = long_note if long_notes else None, ) ) return dict(result)