Source code for pypath.inputs.phosphatome
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import collections
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.inputs.common as inputs_common
import pypath.inputs.science as science_input
[docs]
def phosphatome_annotations():
"""
Downloads the list of phosphatases from Chen et al, Science Signaling
(2017) Table S1.
"""
PhosphatomeAnnotation = collections.namedtuple(
'PhosphatomeAnnotation',
[
'fold',
'family',
'subfamily',
'has_protein_substrates',
'has_non_protein_substrates',
'has_catalytic_activity',
],
)
url = urls.urls['phosphatome']['url']
path = science_input.science_download(url = url)
c = curl.FileOpener(
path,
compr = 'zip',
files_needed = ['aag1796_Tables S1 to S23.xlsx'],
large = True,
default_mode = 'rb',
)
tbl = inputs_common.read_xls(c.result['aag1796_Tables S1 to S23.xlsx'])
result = []
result = collections.defaultdict(set)
for rec in tbl[2:]:
uniprots = mapping.map_name(rec[0], 'genesymbol', 'uniprot')
for uniprot in uniprots:
result[uniprot].add(
PhosphatomeAnnotation(
fold = rec[2],
family = rec[3],
subfamily = rec[4],
has_protein_substrates = rec[21].strip().lower() == 'yes',
has_non_protein_substrates = (
rec[22].strip().lower() == 'yes'
),
has_catalytic_activity = rec[23].strip().lower() == 'yes',
)
)
return dict(result)