Source code for pypath.inputs.celltypist
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import re
import collections
import pypath.inputs.common as inputs_common
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
[docs]
def celltypist_annotations():
"""
Immune cell type marker genes from Celltypist
(https://github.com/Teichlab/celltypist_wiki).
"""
recomma = re.compile(r'\s?,\s?')
def multi_value_field(value):
return tuple(sorted(recomma.split(value.strip())))
record = collections.namedtuple(
'CelltypistAnnotation',
(
'cell_type',
'cell_subtype',
'cell_ontology',
'marker_type',
'tissues',
'datasets',
)
)
result = collections.defaultdict(set)
url = urls.urls['celltypist']['url']
c = curl.Curl(url, silent = False, large = True)
xls = c.fileobj
xlsfile = xls.name
xls.close()
tbl = inputs_common.read_xls(xlsfile)[1:]
marker_columns = ((6, 'curated_marker'), (7, 'celltypist_model'))
for r in tbl:
for col, marker_type in marker_columns:
genesymbols = recomma.split(r[col].strip())
uniprots = mapping.map_names(genesymbols, 'genesymbol', 'uniprot')
annot = record(
cell_type = r[0],
cell_subtype = r[1],
cell_ontology = r[3],
marker_type = marker_type,
tissues = multi_value_field(r[4]),
datasets = multi_value_field(r[5]),
)
for u in uniprots:
result[u].add(annot)
return dict(result)