Source code for pypath.inputs.progeny
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
import collections
import pypath.share.session as session
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.taxonomy as taxonomy
import pypath.utils.mapping as mapping
import pypath.inputs.rdata as rdata
_logger = session.Logger(name = 'progeny_input')
_log = _logger._log
[docs]
def progeny_raw(organism = 9606):
"""
Pathway responsive genes: signatures based on transcriptomics data
from PROGENy (https://github.com/saezlab/progeny).
Args
organism (int,str): Name or NCBI Taxonomy ID of the organism. Human
and mouse are supported.
Returns
(pandas.DataFrame): A data frame of genes, pathways, weights and
p-values for each association.
"""
_organism = taxonomy.ensure_common_name(organism)
if _organism not in ('Human', 'Mouse'):
msg = (
'Wrong organism: `%s`; '
'only human and mouse are available.' % organism
)
_log(msg)
raise ValueError(msg)
_organism = _organism.lower()
url = urls.urls['progeny']['url'] % _organism
c = curl.Curl(url, large = True, silent = False)
rdata_path = c.fileobj.name
c.fileobj.close()
rdata_parsed = rdata.rdata.parser.parse_file(rdata_path)
rdata_converted = rdata.rdata.conversion.convert(rdata_parsed)
key = 'model_%s_full' % _organism
return rdata_converted[key]
[docs]
def progeny_annotations(organism = 9606):
"""
Pathway responsive genes: signatures based on transcriptomics data
from PROGENy (https://github.com/saezlab/progeny).
Args
organism (int,str): Name or NCBI Taxonomy ID of the organism. Human
and mouse are supported.
Returns
(dict): Dict of sets, keys are UniProt IDs, values are pathway
association records, each with a weight and p-value.
"""
record = collections.namedtuple(
'ProgenyAnnotation',
(
'pathway',
'weight',
'p_value',
)
)
raw = progeny_raw(organism = organism)
result = collections.defaultdict(set)
ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(organism)
for rec in raw.itertuples():
uniprots = mapping.map_name(
rec.gene,
'genesymbol',
'uniprot',
ncbi_tax_id = ncbi_tax_id,
)
annot = record(
pathway = rec.pathway,
weight = rec.weight,
p_value = rec[4], # omg, stupid pandas
)
for uniprot in uniprots:
result[uniprot].add(annot)
return dict(result)