Source code for pypath.inputs.celltalkdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import os
import datetime
import collections

import bs4

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.cache as cache
import pypath.utils.taxonomy as taxonomy
import pypath.utils.mapping as mapping



[docs]
def celltalkdb_download(filename = 'lr_pair', organism = 9606):
    """
    Downloads a file from CellTalkDB.

    :param str filename:
        The file name of the dataset to download. Possible values:
        lr_pair, gene_info, gene2ensembl, uniprot.
    :param int,str organism:
        Human and mouse supported, in case of incomprehensible value will
        fall back to human.

    :return:
        Generator yielding dataset specific records as named tuples.
    """

    taxid = taxonomy.ensure_common_name(organism)
    organism = taxid.lower() if taxid in {'Mouse', 'Human'} else 'human'

    cache_fname = 'celltalkdb_%s_%s' % (organism, filename)
    cache_path = os.path.join(cache.get_cachedir(), cache_fname)

    if os.path.exists(cache_path):

        result = curl.FileOpener(cache_path).result

    else:

        url = urls.urls['celltalkdb']['url']
        ref_url = urls.urls['celltalkdb']['ref_url']
        init_url = urls.urls['celltalkdb']['init_url']

        cookie = ''

        c_init = curl.Curl(
            init_url,
            silent = True,
            large = True,
            cache = False,
            follow = False,
            bypass_url_encoding = True,
            retries = 1,
            empty_attempt_again = False,
        )

        for h in c_init.resp_headers:

            if h.lower().startswith(b'set-cookie'):

                cookie = h.decode().split(':')[1].split(';')[0]

        soup = bs4.BeautifulSoup(c_init.fileobj, 'html.parser')
        form = soup.find('form', {'action': 'handler/download.php'})
        inputs = dict(
            (
                field.attrs['name'],
                field.attrs['value']
            )
            for field in form.find_all('input')
        )

        inputs['ref'] = ref_url
        inputs['filename'] = '%s_%s.txt' % (organism, filename)

        c = curl.Curl(
            url = url,
            cache = cache_path,
            post = inputs,
            silent = False,
            large = True,
            req_headers = [
                'Cookie: %s' % cookie,
                'Referer: http://tcm.zju.edu.cn/celltalkdb/download.php',
            ],
        )

        result = c.result

    header = next(result).strip().split('\t')

    record = collections.namedtuple('CellTalkDbRecord', header)

    for values in result:

        yield record(*values.strip().split('\t'))




[docs]
def celltalkdb_interactions(organism = 9606):
    """
    Retrieves ligand-receptor interactions from CellTalkDB
    http://tcm.zju.edu.cn/celltalkdb/index.php

    :param int,str organism:
        Human and mouse supported, in case of incomprehensible value will
        fall back to human.

    :return:
        List of interactions as named tuples.
    """

    CellTalkDBInteraction = collections.namedtuple(
        'CellTalkDBInteraction',
        [
            'ligand_genesymbol',
            'receptor_genesymbol',
            'reference',
        ]
    )

    return [
        CellTalkDBInteraction(
            ligand_genesymbol = rec.ligand_gene_symbol,
            receptor_genesymbol = rec.receptor_gene_symbol,
            reference = rec.evidence,
        )
        for rec in celltalkdb_download(organism = organism)
    ]




[docs]
def celltalkdb_annotations(organism = 9606):
    """
    Retrieves annotation of protein ligand and receptor roles from CellTalkDB
    http://tcm.zju.edu.cn/celltalkdb/index.php

    :param int,str organism:
        Human and mouse supported, in case of incomprehensible value will
        fall back to human.

    :return:
        Dictionary of annotations with UniProt IDs as keys.
    """

    CellTalkDBAnnotation = collections.namedtuple(
        'CellTalkDBAnnotation',
        [
            'role',
            'pmid',
        ]
    )

    ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(organism)
    ncbi_tax_id = ncbi_tax_id if ncbi_tax_id in {9606, 10090} else 9606

    annot = collections.defaultdict(set)

    for rec in celltalkdb_download(organism = ncbi_tax_id):

        for role in ('ligand', 'receptor'):

            uniprots = mapping.map_name(
                getattr(rec, '%s_gene_symbol' % role),
                'genesymbol',
                'uniprot',
                ncbi_tax_id = ncbi_tax_id,
            )

            for uniprot in uniprots:

                annot[uniprot].add(
                    CellTalkDBAnnotation(
                        role = role,
                        pmid = rec.evidence,
                    )
                )

    return dict(annot)