Source code for pypath.inputs.cspa

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#  This file is part of the `pypath` python module
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#  Website:

from future.utils import iteritems

import collections

import pypath.share.curl as curl
import pypath.share.common as common
import pypath.resources.urls as urls
import pypath.inputs.common as inputs_common
import pypath.utils.mapping as mapping
import pypath.utils.taxonomy as taxonomy

[docs] def cspa_annotations(organism = 9606): CspaAnnotation = collections.namedtuple( 'CspaAnnotation', [ 'high_confidence', 'n_cell_types', 'tm', 'gpi', 'uniprot_cell_surface', ], ) sheets = { 'Human': 'Table A', 'Mouse': 'Table B', } str_organism = taxonomy.taxids[organism].capitalize() url = urls.urls['cspa']['url_s2'] c = curl.Curl(url, large = True, silent = False) xlsname = c.fname del(c) raw = inputs_common.read_xls(xlsname, sheets[str_organism])[1:] result = collections.defaultdict(set) for row in raw: for uniprot in mapping.map_name(row[1], 'uniprot', 'uniprot'): result[uniprot].add( CspaAnnotation( high_confidence = 'high confidence' in row[2], n_cell_types = int(float(row[9])), tm = int(float(row[11])), gpi = int(float(row[12])), uniprot_cell_surface = row[13] == 'yes', ) ) return dict(result)
[docs] def cspa_cell_types(organism = 9606): sheets = { 'Human': 'Table_E', 'Mouse': 'Table_F', } str_organism = taxonomy.taxids[organism].capitalize() url = urls.urls['cspa']['url_s1'] c = curl.Curl(url, large = True, silent = False) xlsname = c.fname del(c) raw = inputs_common.read_xls(xlsname, sheets[str_organism]) result = collections.defaultdict(lambda: collections.defaultdict(dict)) cell_types = raw[0][1:] for row in raw[1:]: for uniprot in mapping.map_name(row[0], 'uniprot', 'uniprot'): for col, cell_type in enumerate(cell_types): value = row[col + 1] result[cell_type][uniprot] = ( float(value) if common.is_float(value) else None ) return dict((k, dict(v)) for k, v in iteritems(result))
[docs] def cspa_cell_type_annotations(organism = 9606): CspaCellType = collections.namedtuple( 'CspaCellType', [ 'cell_type', 'value', ], ) cell_type_data = cspa_cell_types(organism = organism) result = collections.defaultdict(set) for cell_type, data in iteritems(cell_type_data): for uniprot, value in iteritems(data): if value: result[uniprot].add( CspaCellType( cell_type = cell_type, value = value, ) ) return dict(result)