Source code for pypath.inputs.lrdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import csv
import collections

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.share.common as common


LrdbAnnotation = collections.namedtuple(
    'LrdbAnnotation',
    [
        'role',
        'cell_type',
        'sources',
        'references',
    ],
)


[docs] def lrdb_interactions(): resource_names = { 'reactome': 'Reactome', 'fantom5': 'Fantom5', 'IUPHAR': 'Guide2Pharma', 'uniprot': 'UniProt', } def remove(lst, to_remove): to_remove = common.to_set(to_remove) return [ it for it in lst if it not in to_remove ] LrdbRecord = collections.namedtuple( 'LrdbRecord', [ 'ligand_genesymbol', 'receptor_genesymbol', 'sources', 'references', 'ligand_cells', 'receptor_cells', ] ) url = urls.urls['lrdb']['url'] c = curl.Curl(url, silent = False, large = True) reader = csv.DictReader(c.result, delimiter = '\t') result = [] for rec in reader: result.append( LrdbRecord( ligand_genesymbol = rec['ligand'], receptor_genesymbol = rec['receptor'], sources = [ resource_names[src] if src in resource_names else src for src in remove( rec['source'].split(','), {'literature', ''}, ) ], references = remove(rec['PMIDs'].split(','), ''), ligand_cells = remove(rec['cells.L'].split(','), ''), receptor_cells = remove(rec['cells.R'].split(','), ''), ) ) return result
[docs] def lrdb_annotations(): result = collections.defaultdict(set) lrdb = lrdb_interactions() for rec in lrdb: for role in ('ligand', 'receptor'): uniprots = mapping.map_name( getattr(rec, '%s_genesymbol' % role), 'genesymbol', 'uniprot', ) for uniprot in uniprots: cell_types = getattr(rec, '%s_cells' % role) or (None,) for cell_type in cell_types: cell_type = ( 'T lymphocyte' if cell_type == 'tymphocyte' else cell_type.replace('cells', 'cell') if cell_type else None ) result[uniprot].add( LrdbAnnotation( role = role, cell_type = cell_type, sources = tuple(sorted(rec.sources)), references = tuple(sorted(rec.references)), ) ) return dict(result)