Source code for pypath.inputs.opentargets

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

from typing import Generator, Literal

import json
import re
import collections

import pandas as pd

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.share.session as session

_log = session.Logger(name = 'opentargets_input')._log



[docs]
def opentargets_general(
        dataset: Literal[
            'assoc_direct',
            'assoc_indirect',
            'adr',
            'expr',
            'associationByOverallIndirect',
            'associationByOverallDirect',
            'fda/significantAdverseDrugReactions',
            'baselineExpression',
        ],
        return_df: bool = False,
        by: str | bool = False,
    ) -> Generator[dict] | dict[str, list[dict]] | pd.DataFrame:
    """
    Download data from the Open Targets database.

    Args:
        dataset:
            Name of a dataset, either as a shorthand synonym or as it is shown
            in the URL.
        return_df:
            Return a pandas data frame.
        by:
            Name of the variable to be used as top level key in the returned
            dictionary. If True, the default grouping variable for the given
            dataset will be used. If False, no grouping will be performed.
    """

    by_defaults = {
        'associationByOverallIndirect': 'diseaseId',
        'associationByOverallDirect': 'diseaseId',
        'fda/significantAdverseDrugReactions': 'chembl_id',
        'baselineExpression': 'id',
    }

    by = by_defaults[dataset] if by == True else by

    result = _opentargets_general(dataset)


    if return_df:

        result = pd.DataFrame(result)

    elif by:

        grouped = collections.defaultdict(list)

        for it in result:

            key = it[by]
            del it[by]
            grouped[key].append(it)

        result = grouped

    return result



def _opentargets_general(
        dataset: Literal[
            'assoc_direct',
            'assoc_indirect',
            'adr',
            'expr',
            'associationByOverallIndirect',
            'associationByOverallDirect',
            'fda/significantAdverseDrugReactions',
            'baselineExpression',
        ],
    ) -> Generator[dict]:

    datasets = {
        'assoc_indirect': 'associationByOverallIndirect',
        'assoc_direct': 'associationByOverallDirect',
        'adr': 'fda/significantAdverseDrugReactions',
        'expr': 'baselineExpression',
    }

    dataset = datasets.get(dataset, dataset)


    url = urls.urls['opentargets']['url'] % dataset
    c = curl.Curl(url, silent = False, large = False)

    repart = re.compile(r'"(part.*\.json)"')
    json_files = repart.findall(c.result)
    url += '/%s'

    for json_name in json_files:

        c = curl.Curl(url % json_name, silent = False, large = True)

        for line in c.result:

            if not line:

                continue

            try:

                contents = json.loads(line)

            except json.JSONDecodeError:

                err = f'Failed to parse JSON from Open Targets data:\n{line}'
                _log(err)
                continue

            yield contents



[docs]
def opentargets_indirect_score(
        return_df: bool = False,
        by: str | bool = False,
    ) -> Generator[dict] | dict | pd.DataFrame:
    """
    Indirect target-disease association scores from Open Targets.

    Args:
        return_df:
            Return a pandas data frame.
        by:
            Name of the variable to be used as top level key in the returned
            dictionary. If True, the default grouping variable for the given
            dataset will be used. If False, no grouping will be performed.

    Returns:
        Target-disease association records as a list of dicts by default; or
        a pandas data frame if `return_df` is True; or a dict of list of dicts
        if by is not False.
    """

    return opentargets_general('assoc_indirect', return_df, by)




[docs]
def opentargets_direct_score(
        return_df: bool = False,
        by: bool = False,
    ) -> Generator[dict] | dict | pd.DataFrame:
    """
    Direct target-disease association scores from Open Targets.

    Args:
        return_df:
            Return a pandas data frame.
        by:
            Name of the variable to be used as top level key in the returned
            dictionary. If True, the default grouping variable for the given
            dataset will be used. If False, no grouping will be performed.

    Returns:
        Target-disease association records as a list of dicts by default; or
        a pandas data frame if `return_df` is True; or a dict of list of dicts
        if by is not False.
    """

    return opentargets_general('assoc_direct', return_df, by)




[docs]
def opentargets_adverse_reactions(
        return_df: bool = False,
        by: bool = False,
    ) -> Generator[dict] | dict | pd.DataFrame:
    """
    Drug adverse reactions from Open Targets.

    Args:
        return_df:
            Return a pandas data frame.
        by:
            Name of the variable to be used as top level key in the returned
            dictionary. If True, the default grouping variable for the given
            dataset will be used. If False, no grouping will be performed.

    Returns:
        Drug-adverse reaction records as a list of dicts by default; or
        a pandas data frame if `return_df` is True; or a dict of list of dicts
        if by is not False.
    """

    return opentargets_general('adr', return_df, by)




[docs]
def opentargets_baseline_expression(
        return_df: bool = False,
        by: bool = False,
    ) -> Generator[dict] | dict | pd.DataFrame:
    """
    Baseline expression from Open Targets.

    Args:
        return_df:
            Return a pandas data frame.
        by:
            Name of the variable to be used as top level key in the returned
            dictionary. If True, the default grouping variable for the given
            dataset will be used. If False, no grouping will be performed.

    Returns:
        Baseline expression records as a list of dicts by default; or
        a pandas data frame if `return_df` is True; or a dict of list of dicts
        if by is not False.
    """

    return opentargets_general('expr', return_df, by)