Source code for pypath.inputs.hpo

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from typing import Union

import re
import collections

import pypath.utils.mapping as mapping
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.formats.obo as obo



[docs]
def hpo_annotations() -> dict[str, set[tuple]]:
    """
    Human Phenotype Ontology annotations.

    Returns
        Dict of proteins as keys and sets of HPO annotations as values.
    """

    url = urls.urls['hpo']['gene']
    c = curl.Curl(url, large = True, silent = False)
    _ = next(c.result)

    fields = ('entrez_gene_id','entrez_gene_symbol','hpo_id')

    HpoAnnotation = collections.namedtuple(
        'HpoAnnotation',
        fields,defaults = ("",) * len(fields)
    )

    result = collections.defaultdict(set)

    for r in c.result:

        r = r.strip().split('\t')

        uniprots = mapping.map_name(r[0], 'entrez', 'uniprot')

        for uniprot in uniprots:

            result[uniprot].add(
                HpoAnnotation(
                    entrez_gene_id = r[0],
                    entrez_gene_symbol = r[1],
                    hpo_id = r[2],
                )
            )

    return dict(result)




[docs]
def hpo_terms() -> dict[str, str]:
    """
    Human Phenotype Ontology accession to term mapping.
    """

    return hpo_ontology()['terms']




[docs]
def hpo_diseases() -> dict[str, set[tuple]]:
    """
    HPO term-disease relationships from Human Phenotype Ontology.

    Returns
        A set of disease records for each HPO term.
    """

    url = urls.urls['hpo']['disease']
    c = curl.Curl(url, large = True, silent = False)

    HpoDisease = collections.namedtuple(
        'HpoDisease',
        (
            'omim',
            'name',
            'pmid',
            'qualifier',
            'evidence',
            'onset',
            'frequency',
            'sex',
            'modifier',
            'aspect',
        ),
    )

    result = collections.defaultdict(set)

    for r in c.result:

        if r[0] == '#': continue

        r = r.split('\t')

        pmid = re.sub('^PMID:', '', r[4]) if r[4][:4] == 'PMID' else None

        result[r[3]].add(
            HpoDisease(
                omim = r[0],
                name = r[1],
                pmid = pmid,
                qualifier = r[2] or None,
                evidence = r[5] or None,
                onset = r[6] or None,
                frequency = r[7] or None,
                sex = r[8] or None,
                modifier = r[9] or None,
                aspect = r[10],
            )
        )

    return dict(result)




[docs]
def hpo_ontology() -> dict[str, dict[str, Union[str, set[str]]]]:
    """
    Ontology data from HPO.

    Returns
        Five dictionaries with term names, term definitions, parents in the
        ontology tree, term synonyms and cross references to other databases.
        The dicts "terms" and "defs" are one-to-one, while "parents",
        "synonyms" and "xrefs" are one-to-many mappings, the keys are always
        HPO terms.
    """

    url = urls.urls['hpo']['ontology']
    reader = obo.Obo(url)

    result = {
        'terms': {},
        'defs': {},
        'parents': collections.defaultdict(set),
        'synonyms': collections.defaultdict(set),
        'xrefs': collections.defaultdict(set),
    }

    for r in reader:

        if r.stanza != 'Term': continue

        if (
            r.name is None or
            r.name.value == 'obsolete' or
            r.attrs.get('is_obsolete')
        ):
            continue

        term = r.id.value

        name = (
            (r.name.value, r.name.modifiers)
                if r.name.modifiers else
            r.name.value
        )

        if isinstance(name, tuple): name = ' '.join(n for n in name if n)

        result['terms'][term] = name

        result['defs'][term] = r.definition.value if r.definition else None

        for key, obokey in (
            ('parents', 'is_a'),
            ('synonyms', 'synonym'),
            ('xrefs', 'xref'),
        ):

            proc = (
                lambda x: tuple(x.split(':'))
                    if key == 'xrefs' else
                lambda x: x
            )

            for x in r.attrs.get(obokey, ()):
                y = proc(x.value)
                result[key][term].update(
                    {
                        y(x.value)
                        if type(y) != tuple else
                        y
                    }
                )

    return {k: dict(v) for k, v in result.items()}