Source code for pypath.inputs.progeny

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import collections

import pypath.share.session as session
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.taxonomy as taxonomy
import pypath.utils.mapping as mapping
import pypath.inputs.rdata as rdata

_logger = session.Logger(name = 'progeny_input')
_log = _logger._log


[docs] def progeny_raw(organism = 9606): """ Pathway responsive genes: signatures based on transcriptomics data from PROGENy (https://github.com/saezlab/progeny). Args organism (int,str): Name or NCBI Taxonomy ID of the organism. Human and mouse are supported. Returns (pandas.DataFrame): A data frame of genes, pathways, weights and p-values for each association. """ _organism = taxonomy.ensure_common_name(organism) if _organism not in ('Human', 'Mouse'): msg = ( 'Wrong organism: `%s`; ' 'only human and mouse are available.' % organism ) _log(msg) raise ValueError(msg) _organism = _organism.lower() url = urls.urls['progeny']['url'] % _organism c = curl.Curl(url, large = True, silent = False) rdata_path = c.fileobj.name c.fileobj.close() rdata_parsed = rdata.rdata.parser.parse_file(rdata_path) rdata_converted = rdata.rdata.conversion.convert(rdata_parsed) key = 'model_%s_full' % _organism return rdata_converted[key]
[docs] def progeny_annotations(organism = 9606): """ Pathway responsive genes: signatures based on transcriptomics data from PROGENy (https://github.com/saezlab/progeny). Args organism (int,str): Name or NCBI Taxonomy ID of the organism. Human and mouse are supported. Returns (dict): Dict of sets, keys are UniProt IDs, values are pathway association records, each with a weight and p-value. """ record = collections.namedtuple( 'ProgenyAnnotation', ( 'pathway', 'weight', 'p_value', ) ) raw = progeny_raw(organism = organism) result = collections.defaultdict(set) ncbi_tax_id = taxonomy.ensure_ncbi_tax_id(organism) for rec in raw.itertuples(): uniprots = mapping.map_name( rec.gene, 'genesymbol', 'uniprot', ncbi_tax_id = ncbi_tax_id, ) annot = record( pathway = rec.pathway, weight = rec.weight, p_value = rec[4], # omg, stupid pandas ) for uniprot in uniprots: result[uniprot].add(annot) return dict(result)