Source code for pypath.inputs.hpo

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from typing import Union

import re
import collections

import pypath.utils.mapping as mapping
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.formats.obo as obo


[docs] def hpo_annotations() -> dict[str, set[tuple]]: """ Human Phenotype Ontology annotations. Returns Dict of proteins as keys and sets of HPO annotations as values. """ url = urls.urls['hpo']['gene'] c = curl.Curl(url, large = True, silent = False) _ = next(c.result) fields = ('entrez_gene_id','entrez_gene_symbol','hpo_id') HpoAnnotation = collections.namedtuple( 'HpoAnnotation', fields,defaults = ("",) * len(fields) ) result = collections.defaultdict(set) for r in c.result: r = r.strip().split('\t') uniprots = mapping.map_name(r[0], 'entrez', 'uniprot') for uniprot in uniprots: result[uniprot].add( HpoAnnotation( entrez_gene_id = r[0], entrez_gene_symbol = r[1], hpo_id = r[2], ) ) return dict(result)
[docs] def hpo_terms() -> dict[str, str]: """ Human Phenotype Ontology accession to term mapping. """ return hpo_ontology()['terms']
[docs] def hpo_diseases() -> dict[str, set[tuple]]: """ HPO term-disease relationships from Human Phenotype Ontology. Returns A set of disease records for each HPO term. """ url = urls.urls['hpo']['disease'] c = curl.Curl(url, large = True, silent = False) HpoDisease = collections.namedtuple( 'HpoDisease', ( 'omim', 'name', 'pmid', 'qualifier', 'evidence', 'onset', 'frequency', 'sex', 'modifier', 'aspect', ), ) result = collections.defaultdict(set) for r in c.result: if r[0] == '#': continue r = r.split('\t') pmid = re.sub('^PMID:', '', r[4]) if r[4][:4] == 'PMID' else None result[r[3]].add( HpoDisease( omim = r[0], name = r[1], pmid = pmid, qualifier = r[2] or None, evidence = r[5] or None, onset = r[6] or None, frequency = r[7] or None, sex = r[8] or None, modifier = r[9] or None, aspect = r[10], ) ) return dict(result)
[docs] def hpo_ontology() -> dict[str, dict[str, Union[str, set[str]]]]: """ Ontology data from HPO. Returns Five dictionaries with term names, term definitions, parents in the ontology tree, term synonyms and cross references to other databases. The dicts "terms" and "defs" are one-to-one, while "parents", "synonyms" and "xrefs" are one-to-many mappings, the keys are always HPO terms. """ url = urls.urls['hpo']['ontology'] reader = obo.Obo(url) result = { 'terms': {}, 'defs': {}, 'parents': collections.defaultdict(set), 'synonyms': collections.defaultdict(set), 'xrefs': collections.defaultdict(set), } for r in reader: if r.stanza != 'Term': continue if ( r.name is None or r.name.value == 'obsolete' or r.attrs.get('is_obsolete') ): continue term = r.id.value name = ( (r.name.value, r.name.modifiers) if r.name.modifiers else r.name.value ) if isinstance(name, tuple): name = ' '.join(n for n in name if n) result['terms'][term] = name result['defs'][term] = r.definition.value if r.definition else None for key, obokey in ( ('parents', 'is_a'), ('synonyms', 'synonym'), ('xrefs', 'xref'), ): proc = ( lambda x: tuple(x.split(':')) if key == 'xrefs' else lambda x: x ) for x in r.attrs.get(obokey, ()): y = proc(x.value) result[key][term].update( { y(x.value) if type(y) != tuple else y } ) return {k: dict(v) for k, v in result.items()}