Source code for pypath.inputs.cellphonedb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems
from past.builtins import xrange, range

import re
import csv
import collections
import itertools

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.internals.intera as intera
import pypath.share.common as common
import pypath.share.session as session
import pypath.inputs.uniprot_db as uniprot_db

_logger = session.Logger(name = 'cellphonedb_input')
_log = _logger._log

CellPhoneDBAnnotation = collections.namedtuple(
    'CellPhoneDBAnnotation',
    (
        'receptor',
        'receptor_class',
        'peripheral',
        'secreted',
        'secreted_class',
        'transmembrane',
        'integrin',
    )
)


[docs] def cellphonedb_ligands_receptors(): """ Retrieves the set of ligands and receptors from CellPhoneDB. Returns tuple of sets. """ receptors = set() ligands = set() proteins = cellphonedb_protein_annotations() complexes = cellphonedb_complex_annotations() for _id, annot in itertools.chain( iteritems(proteins), iteritems(complexes) ): if annot.receptor: receptors.add(_id) if annot.secreted or ( not annot.receptor and ( annot.transmembrane or annot.peripheral ) ): ligands.add(_id) return ligands, receptors
def _cellphonedb_annotations(url, name_method): replacements = { '_add': '', ' | ': ',', ' ': '_', } def get_bool(rec, attr): return attr in rec and rec[attr].upper() == 'TRUE' def get_desc(rec, attr): desc = '%s_desc' % attr value = ( '' if ( attr in rec and rec[attr].upper() == 'FALSE' or attr not in rec and not rec[desc] ) else rec[desc] if rec[desc] else attr ) for pattern, repl in iteritems(replacements): value = value.replace(pattern, repl) value = value.lower().split(',') if value else None return tuple(sorted(common.to_set(value))) record = CellPhoneDBAnnotation annot = {} c = curl.Curl(url, large = True, silent = False) tab = list(csv.DictReader(c.result)) for rec in tab: names = name_method(rec) if isinstance(names, (str, intera.Complex)): names = (names,) for name in names: annot[name] = record( receptor = get_bool(rec, 'receptor'), receptor_class = get_desc(rec, 'receptor'), peripheral = get_bool(rec, 'peripheral'), secreted = get_bool(rec, 'secreted'), secreted_class = get_desc(rec, 'secreted'), transmembrane = get_bool(rec, 'transmembrane'), integrin = get_bool(rec, 'integrin'), ) return annot
[docs] def cellphonedb_protein_annotations(add_complex_annotations = False): """ :arg bool add_complex_annotations: Deprecated because results wrong annotations. Copy the annotations of complexes to each of their member proteins. """ def name_method(rec): uniprot = rec['uniprot'] uniprot = _cellphonedb_hla(uniprot) uniprot = mapping.map_names(uniprot, 'uniprot', 'uniprot') return uniprot protein_annotations = _cellphonedb_annotations( url = urls.urls['cellphonedb_git']['proteins'], name_method = name_method, ) return protein_annotations
def _cellphonedb_hla(uniprot): """ Returns *set*. """ uniprots = None # for HLA genes in the uniprot column we have "HLA..." gene symbols # but not always in the standard format (with dash) if uniprot.startswith('HLA') and '-' not in uniprot: genesymbol = 'HLA-%s' % uniprot[3:] uniprots = mapping.map_name(genesymbol, 'genesymbol', 'uniprot') return uniprots or {uniprot}
[docs] def cellphonedb_complex_annotations(): def get_uniprots(rec): return tuple( uniprot for uniprot in (rec['uniprot_%u' % i] for i in xrange(1, 5)) if uniprot ) def get_stoichiometry(rec): if not rec['stoichiometry']: return get_uniprots(rec) return tuple( mapping.map_name0(genesymbol, 'genesymbol', 'uniprot') for genesymbol in rec['stoichiometry'].split(';') ) def name_method(rec): comp = get_stoichiometry(rec) cplex = intera.Complex( name = rec['complex_name'], components = comp, sources = 'CellPhoneDB', ids = rec['complex_name'], ) return cplex return _cellphonedb_annotations( url = urls.urls['cellphonedb_git']['complexes'], name_method = name_method, )
def _cellphonedb_get_entity(name, complexes): if name in complexes: return (complexes[name],) if '_by' in name: _log(f'Ignoring entity: `{name}`.') return () if ':' in name: name = name.split(':')[1] if '_' in name: name = mapping.map_name0(name, 'name-entry', 'name') if not uniprot_db.is_uniprot(name): uniprot = mapping.map_name0(name, 'genesymbol', 'uniprot') name = uniprot or name name = _cellphonedb_hla(name) return (name,) if isinstance(name, str) else name
[docs] def cellphonedb_interactions(): """ Interactions between ligands and receptors from CellPhoneDB. Yields: Named tuples representing interactions. """ def get_type(entity): return ( 'ligand' if entity in ligands else 'receptor' if entity in receptors else 'unknown' ) def get_bool(rec, attr): return attr in rec and rec[attr].upper() == 'TRUE' CellphonedbInteraction = collections.namedtuple( 'CellphonedbInteraction', [ 'id_a', 'id_b', 'sources', 'references', 'interaction_type', 'type_a', 'type_b', 'is_ppi', ] ) repmid = re.compile(r'PMID: ([0-9]+)') recomma = re.compile(r'[,;]') ligands, receptors = cellphonedb_ligands_receptors() complexes = dict( ( _id, cplex ) for cplex in cellphonedb_complexes().values() for _id in cplex.ids['CellPhoneDB'] ) url = urls.urls['cellphonedb_git']['interactions'] c = curl.Curl(url, silent = False, large = True) reader = csv.DictReader(c.result) for rec in reader: _partner_a = _cellphonedb_get_entity( rec['partner_a'], complexes = complexes, ) _partner_b = _cellphonedb_get_entity( rec['partner_b'], complexes = complexes, ) _is_ppi = get_bool(rec, 'is_ppi') for partner_a, partner_b in itertools.product(_partner_a, _partner_b): type_a = get_type(partner_a) type_b = get_type(partner_b) rev = type_b == 'ligand' and type_a == 'receptor' _type_a = type_b if rev else type_a _type_b = type_a if rev else type_b sources = ( 'CellPhoneDB' if rec['annotation_strategy'] == 'curated' else '%s;CellPhoneDB' % ( ';'.join( recomma.split( rec['annotation_strategy'].replace( 'guidetopharmacology.org', 'Guide2Pharma' ) ) ) ) ) refs = ';'.join(repmid.findall(rec['source'])) yield ( CellphonedbInteraction( id_a = partner_b if rev else partner_a, id_b = partner_a if rev else partner_b, sources = sources, references = refs, interaction_type = '%s-%s' % (_type_a, _type_b), type_a = _type_a, type_b = _type_b, is_ppi = _is_ppi, ) )
[docs] def cellphonedb_complexes(): annot = cellphonedb_complex_annotations() complexes = {} for cplex in annot.keys(): key = cplex.__str__() if key in annot: cplex.add_attr('CellPhoneDB', annot[key]) complexes[key] = cplex return complexes