#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import csv
import itertools
import collections
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.taxonomy as taxonomy
import pypath.utils.mapping as mapping
import pypath.internals.intera as intera
import pypath.share.session as session
import pypath.core.entity as entity
_logger = session.Logger(name = 'cellinker_input')
_log = _logger._log
CellinkerInteraction = collections.namedtuple(
'CellinkerInteraction',
(
'ligand',
'receptor',
'ligand_location',
'receptor_location',
'resources',
'pmids',
'type',
),
)
[docs]
def cellinker_complexes_raw(organism = 9606):
"""
Downloads protein complex data from the Cellinker database
(http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(list): List of tuples each describing a protein complex with its
role (ligand or receptor), components, localization and
Cellinker ID.
"""
CellinkerComplex = collections.namedtuple(
'CellinkerComplex',
(
'role',
'cellinker_id',
'components',
'location',
),
)
CellinkerComplexComponent = collections.namedtuple(
'CellinkerComplexComponent',
(
'genesymbol',
'entrez',
),
)
organism_common = taxonomy.ensure_common_name(organism).lower()
if organism_common not in {'human', 'mouse'}:
msg = (
'Unknown organism: %s (%s). Only human and mouse '
'are available.' % (str(organism_common), str(organism))
)
_log(msg)
raise ValueError(msg)
url = urls.urls['cellinker_rescued']['complex'] % organism_common
c = curl.Curl(url, large = True, silent = False)
result = []
_ = next(c.result)
for r in c.result:
r = r.split(',')
components = tuple(
CellinkerComplexComponent(
genesymbol = r[i],
entrez = r[i + 1],
)
for i in range(3, 13, 2)
if r[i]
)
result.append(
CellinkerComplex(
role = 'ligand' if r[0] else 'receptor',
cellinker_id = r[0] or r[1],
components = components,
location = r[13],
)
)
return result
[docs]
def components_to_complex(components, organism = None):
"""
Converts a set of components to `pypath.internals.intera.Complex`
objects.
Args
components (tuple): Components of a complex, as returned by
`cellinker_complexes_raw`.
organism (int,str): Name or identifier of the organism. Only mouse
and human are available. Optional, because organism can be
guessed from the identifiers.
Returns
(set): A set of `pypath.internals.intera.Complex` objects.
"""
if not organism:
organism = (
9606 if (
all(c.genesymbol.upper() == c.genesymbol for c in components)
) else 10090
)
_organism = taxonomy.ensure_ncbi_tax_id(organism)
if _organism not in {9606, 10090}:
msg = (
'Unknown organism: %s (%s). Only human and mouse '
'are available.' % (str(_organism), str(organism))
)
_log(msg)
raise ValueError(msg)
result = set()
for uniprots in itertools.product(*(
_cellinker_uniprots(c.genesymbol, c.entrez, _organism)
for c in components
)):
result.add(
intera.Complex(
components = uniprots,
ncbi_tax_id = _organism,
sources = 'Cellinker',
)
)
return result
[docs]
def cellinker_complexes(organism = 9606):
"""
Protein complex information from the Cellinker database
(http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(dict): A dict of complexes, with string representations as keys and
`pypath.internals.intera.Complex` objects as values.
"""
result = {}
for c in cellinker_complexes_raw(organism = organism):
for cplex in components_to_complex(c.components, organism = organism):
result[cplex.__str__()] = cplex
return result
[docs]
def cellinker_lr_interactions_raw(organism = 9606):
"""
Ligand-receptor interactions from the Cellinker database
(http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(list): A list of dicts, each representing an interaction records
as it is provided by the database.
"""
return _cellinker_interactions_raw(organism = organism)
[docs]
def cellinker_smol_interactions_raw(organism = 9606):
"""
Small molecule ligand-protein receptor interactions from the Cellinker
database (http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(list): A list of dicts, each representing an interaction records
as it is provided by the database.
"""
return _cellinker_interactions_raw(dataset = 'smol', organism = organism)
def _cellinker_interactions_raw(dataset = 'lr', organism = 9606):
"""
Downloads either the ligand-receptor or the small molecule ligand-receptor
dataset from the Cellinker database.
Args
dataset (str): Either `lr` or `smol`, meainng protein-protein or
small molecule-protein ligand-receptor interactions.
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(list): A list of dicts, each representing an interaction records
as it is provided by the database.
"""
if dataset not in {'lr', 'smol'}:
msg = 'Unknown Cellinker interaction dataset: `%s`.' % str(dataset)
_log(msg)
raise ValueError(msg)
org_name_type = 'latin' if dataset == 'lr' else 'common'
organisms_allowed = (
{'Homo sapiens', 'Mus musculus'}
if org_name_type == 'latin' else
{'Human', 'Mouse'}
)
_organism = getattr(taxonomy, 'ensure_%s_name' % org_name_type)(organism)
if _organism not in organisms_allowed:
msg = (
'Unknown organism: %s (%s). Only human and mouse '
'are available.' % (str(_organism), str(organism))
)
_log(msg)
raise ValueError(msg)
if org_name_type == 'common': _organism = _organism.lower()
url = urls.urls['cellinker_rescued'][dataset] % _organism
c = curl.Curl(url, large = True, silent = False)
result = list(csv.DictReader(c.result, delimiter = '\t'))
return result
[docs]
def cellinker_lr_interactions(organism = 9606):
"""
Ligand-receptor interactions from the Cellinker database
(http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(set): A set of tuples, each representing a preprocessed Cellinker
interaction. The proteins are represented by their UniProt IDs,
while the protein complexes by `Complex` objects.
"""
db_names = {
'IUPHAR': 'Guide2Pharma',
'CellphoneDB': 'CellPhoneDB',
}
result = set()
raw = cellinker_lr_interactions_raw(organism = organism)
ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(organism)
complexes = dict(
(
c.cellinker_id,
components_to_complex(c.components, organism = ncbi_tax_id)
)
for c in cellinker_complexes_raw(organism = organism)
)
for r in raw:
ligands = _cellinker_uniprots(
r['Ligand_symbol'],
r['Ligand_id'],
ncbi_tax_id,
complexes = complexes,
)
receptors = _cellinker_uniprots(
r['Receptor_symbol'],
r['Receptor_id'],
ncbi_tax_id,
complexes = complexes,
)
resources = ';'.join(
db_names.get(db, db)
for db in r['Other.DB'].split(';')
) or None
for ligand, receptor in itertools.product(ligands, receptors):
result.add(
CellinkerInteraction(
ligand = ligand,
receptor = receptor,
ligand_location = r['Ligand_location'],
# yes, labels are not consistent
receptor_location = r['Receptor.location'],
resources = resources,
pmids = r['Pmubmed.ID'] or None, # typo
type = r['Type'],
)
)
return result
[docs]
def cellinker_smol_interactions(organism = 9606):
"""
Small molecule ligand-protein receptor interactions from the Cellinker
database (http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(set): A set of tuples, each representing a preprocessed Cellinker
interaction. The proteins are represented by their UniProt IDs,
the small molecules by PubChem CIDs, while the protein complexes
by `Complex` objects.
"""
db_names = {
'IUPHAR': 'Guide2Pharma',
'CellphoneDB': 'CellPhoneDB',
}
result = set()
raw = cellinker_smol_interactions_raw(organism = organism)
ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(organism)
complexes = dict(
(
c.cellinker_id,
components_to_complex(c.components, organism = ncbi_tax_id)
)
for c in cellinker_complexes_raw(organism = organism)
)
for r in raw:
if not r['ligand_pubchem_cid']:
continue
ligands = (r['ligand_pubchem_cid'],)
receptors = _cellinker_uniprots(
r['Receptor_symbol'],
r['Receptor_id'],
ncbi_tax_id,
complexes = complexes,
)
resources = ';'.join(
db_names.get(db, db)
for db in r['Other.DB'].split(';')
) or None
for ligand, receptor in itertools.product(ligands, receptors):
result.add(
CellinkerInteraction(
ligand = ligand,
receptor = receptor,
ligand_location = None,
receptor_location = r['Receptor_location'],
resources = resources,
pmids = r['pubmed_id'] or None,
type = r['Type'],
)
)
return result
[docs]
def cellinker_annotations(organism = 9606, entity_type = None):
"""
Ligand and receptor annotations from the Cellinker database
(http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
entity_type (str): Either `protein` or `complex`. If `None`, both
proteins and protein complexes will be included.
Returns
(dict): A dict of sets of tuples, keys are UniProt IDs for proteins
and `Complex` objects for protein complexes. The tuples are
annotations with ligand or receptor role, localization and
type.
"""
CellinkerAnnotation = collections.namedtuple(
'CellinkerAnnotation',
(
'role',
'location',
'type',
),
)
ia = cellinker_lr_interactions(organism = organism)
result = collections.defaultdict(set)
for i in ia:
for role in ('ligand', 'receptor'):
this_entity = getattr(i, role)
this_entity_type = entity.Entity._get_entity_type(this_entity)
if not entity_type or entity_type == this_entity_type:
result[this_entity].add(
CellinkerAnnotation(
role = role,
location = getattr(i, '%s_location' % role),
type = i.type,
)
)
return dict(result)
[docs]
def cellinker_protein_annotations(organism = 9606):
"""
Ligand and receptor annotations from the Cellinker database
(http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(dict): A dict of sets of tuples, keys are UniProt IDs. The tuples
are annotations with ligand or receptor role, localization and
type.
"""
return cellinker_annotations(organism = organism, entity_type = 'protein')
[docs]
def cellinker_complex_annotations(organism = 9606):
"""
Ligand and receptor annotations from the Cellinker database
(http://www.rna-society.org/cellinker/).
Args
organism (int,str): Name or identifier of the organism. Only mouse
and human are available.
Returns
(dict): A dict of sets of tuples, keys are `Complex` objects.
The tuples are annotations with ligand or receptor role,
localization and type.
"""
return cellinker_annotations(organism = organism, entity_type = 'complex')
def _cellinker_uniprots(gsymbol, entrez, ncbi_tax_id, complexes = None):
"""
Translates the Gene Symbols and Entrez Gene IDs to UniProt IDs.
Returns
(set): Set of UniProt IDs.
"""
return (
complexes[entrez]
if complexes and entrez in complexes else
(
mapping.map_name(
gsymbol,
'genesymbol',
'uniprot',
ncbi_tax_id = ncbi_tax_id,
) |
mapping.map_name(
entrez,
'entrez',
'uniprot',
ncbi_tax_id = ncbi_tax_id,
)
)
)