Source code for pypath.inputs.panglaodb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import csv
import collections

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.settings as settings
import pypath.share.common as common
import pypath.utils.mapping as mapping


[docs] def panglaodb_raw(): """ Cell type marker genes from PanglaoDB (https://panglaodb.se/index.html). """ url = urls.urls['panglaodb']['url'] headers = [settings.get('user_agent')] c = curl.Curl(url, silent = False, large = True, req_headers = headers) result = list(csv.DictReader(c.result, delimiter = '\t')) return result
[docs] def panglaodb_annotations(): """ Cell type marker genes from PanglaoDB (https://panglaodb.se/index.html). """ to_float = common.float_or_nan record = collections.namedtuple( 'PanglaodbAnnotation', ( 'canonical_marker', 'cell_type', 'organ', 'germ_layer', 'entity_type', 'human', 'mouse', 'human_sensitivity', 'mouse_sensitivity', 'human_specificity', 'mouse_specificity', 'ubiquitiousness', 'ncbi_tax_id', ) ) result = collections.defaultdict(set) raw = panglaodb_raw() for r in raw: human = 'Hs' in r['species'], mouse = 'Mm' in r['species'], entity_type = 'protein' uniprots = mapping.map_name( r['official gene symbol'], 'genesymbol', 'uniprot', ) this_ncbi_tax_id = 9606 if not uniprots and human: uniprots = mapping.map_name( r['official gene symbol'], 'genesymbol', 'trembl', ) if not uniprots and mouse: mouse_gs = '-'.join( x.capitalize() for x in r['official gene symbol'].split('-') ) uniprots = mapping.map_name( mouse_gs, 'genesymbol', 'uniprot', ncbi_tax_id = 10090, ) if not uniprots: uniprots = mapping.map_name( mouse_gs, 'genesymbol', 'trembl', ncbi_tax_id = 10090, ) this_ncbi_tax_id = 10090 if not uniprots and r['gene type'] == 'non-coding RNA': uniprots = (r['official gene symbol'],) entity_type = 'lncrna' for u in uniprots: result[u].add( record( canonical_marker = r['canonical marker'] == '1', cell_type = r['cell type'], organ = r['organ'], germ_layer = r['germ layer'], entity_type = entity_type, # TODO: handle all entity types human = human, mouse = mouse, human_sensitivity = to_float(r['sensitivity_human']), mouse_sensitivity = to_float(r['sensitivity_mouse']), human_specificity = to_float(r['specificity_human']), mouse_specificity = to_float(r['specificity_mouse']), ubiquitiousness = to_float(r['ubiquitousness index']), ncbi_tax_id = this_ncbi_tax_id, ) ) return result