Source code for pypath.inputs.phosphatome

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import collections

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.inputs.common as inputs_common
import pypath.inputs.science as science_input



[docs]
def phosphatome_annotations():
    """
    Downloads the list of phosphatases from Chen et al, Science Signaling
    (2017) Table S1.
    """

    PhosphatomeAnnotation = collections.namedtuple(
        'PhosphatomeAnnotation',
        [
            'fold',
            'family',
            'subfamily',
            'has_protein_substrates',
            'has_non_protein_substrates',
            'has_catalytic_activity',
        ],
    )

    url = urls.urls['phosphatome']['url']
    path = science_input.science_download(url = url)
    c = curl.FileOpener(
        path,
        compr = 'zip',
        files_needed = ['aag1796_Tables S1 to S23.xlsx'],
        large = True,
        default_mode = 'rb',
    )
    tbl = inputs_common.read_xls(c.result['aag1796_Tables S1 to S23.xlsx'])
    result = []

    result = collections.defaultdict(set)

    for rec in tbl[2:]:

        uniprots = mapping.map_name(rec[0], 'genesymbol', 'uniprot')

        for uniprot in uniprots:

            result[uniprot].add(
                PhosphatomeAnnotation(
                    fold = rec[2],
                    family = rec[3],
                    subfamily = rec[4],
                    has_protein_substrates = rec[21].strip().lower() == 'yes',
                    has_non_protein_substrates = (
                        rec[22].strip().lower() == 'yes'
                    ),
                    has_catalytic_activity = rec[23].strip().lower() == 'yes',
                )
            )

    return dict(result)