Source code for pypath.inputs.celltalkdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#  This file is part of the `pypath` python module
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#  Website:

import os
import datetime
import collections

import bs4

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.cache as cache
import pypath.utils.taxonomy as taxonomy
import pypath.utils.mapping as mapping

[docs] def celltalkdb_download(filename = 'lr_pair', organism = 9606): """ Downloads a file from CellTalkDB. :param str filename: The file name of the dataset to download. Possible values: lr_pair, gene_info, gene2ensembl, uniprot. :param int,str organism: Human and mouse supported, in case of incomprehensible value will fall back to human. :return: Generator yielding dataset specific records as named tuples. """ taxid = taxonomy.ensure_common_name(organism) organism = taxid.lower() if taxid in {'Mouse', 'Human'} else 'human' cache_fname = 'celltalkdb_%s_%s' % (organism, filename) cache_path = os.path.join(cache.get_cachedir(), cache_fname) if os.path.exists(cache_path): result = curl.FileOpener(cache_path).result else: url = urls.urls['celltalkdb']['url'] ref_url = urls.urls['celltalkdb']['ref_url'] init_url = urls.urls['celltalkdb']['init_url'] cookie = '' c_init = curl.Curl( init_url, silent = True, large = True, cache = False, follow = False, bypass_url_encoding = True, retries = 1, empty_attempt_again = False, ) for h in c_init.resp_headers: if h.lower().startswith(b'set-cookie'): cookie = h.decode().split(':')[1].split(';')[0] soup = bs4.BeautifulSoup(c_init.fileobj, 'html.parser') form = soup.find('form', {'action': 'handler/download.php'}) inputs = dict( ( field.attrs['name'], field.attrs['value'] ) for field in form.find_all('input') ) inputs['ref'] = ref_url inputs['filename'] = '%s_%s.txt' % (organism, filename) c = curl.Curl( url = url, cache = cache_path, post = inputs, silent = False, large = True, req_headers = [ 'Cookie: %s' % cookie, 'Referer:', ], ) result = c.result header = next(result).strip().split('\t') record = collections.namedtuple('CellTalkDbRecord', header) for values in result: yield record(*values.strip().split('\t'))
[docs] def celltalkdb_interactions(organism = 9606): """ Retrieves ligand-receptor interactions from CellTalkDB :param int,str organism: Human and mouse supported, in case of incomprehensible value will fall back to human. :return: List of interactions as named tuples. """ CellTalkDBInteraction = collections.namedtuple( 'CellTalkDBInteraction', [ 'ligand_genesymbol', 'receptor_genesymbol', 'reference', ] ) return [ CellTalkDBInteraction( ligand_genesymbol = rec.ligand_gene_symbol, receptor_genesymbol = rec.receptor_gene_symbol, reference = rec.evidence, ) for rec in celltalkdb_download(organism = organism) ]
[docs] def celltalkdb_annotations(organism = 9606): """ Retrieves annotation of protein ligand and receptor roles from CellTalkDB :param int,str organism: Human and mouse supported, in case of incomprehensible value will fall back to human. :return: Dictionary of annotations with UniProt IDs as keys. """ CellTalkDBAnnotation = collections.namedtuple( 'CellTalkDBAnnotation', [ 'role', 'pmid', ] ) ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(organism) ncbi_tax_id = ncbi_tax_id if ncbi_tax_id in {9606, 10090} else 9606 annot = collections.defaultdict(set) for rec in celltalkdb_download(organism = ncbi_tax_id): for role in ('ligand', 'receptor'): uniprots = mapping.map_name( getattr(rec, '%s_gene_symbol' % role), 'genesymbol', 'uniprot', ncbi_tax_id = ncbi_tax_id, ) for uniprot in uniprots: annot[uniprot].add( CellTalkDBAnnotation( role = role, pmid = rec.evidence, ) ) return dict(annot)