Source code for pypath.inputs.pharos

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

"""
Retrieve data from the NIH Pharos database.
"""

from __future__ import annotations

import json

from pypath.share.curl import Curl
from pypath.resources.urls import urls
import pypath.share.session as session

_logger = session.Logger(name = 'pharos_input')
_log = _logger._log


QUERY_TYPES = (
    'expression',
    'gtex',
    'orthologs',
    'ligands',
    'xrefs',
    'diseases',
)


PHAROS_QUERY = """
    query targetDetails(
        $chunk_size: Int!,
        $step: Int!,
        $getExpressions: Boolean!,
        $getGtex: Boolean!,
        $getOrthologs: Boolean!,
        $getLigands: Boolean!,
        $getXrefs: Boolean!,
        $getDiseases: Boolean!,
    ) {

        targets {

            targets(top: $chunk_size skip: $step) {

                name
                sym
                uniprot

                expressions(top: 10000) @include(if: $getExpressions) {

                    expid
                    type
                    tissue
                    value

                    uberon {
                        name
                        uid
                    }

                    pub {
                        pmid
                    }
                }

                gtex @include(if: $getGtex) {

                    tissue
                    tpm
                    log2foldchange

                    uberon {
                        name
                        uid
                    }
                }

                orthologs(top: 10000) @include(if: $getOrthologs) {
                    name
                    species
                    orid
                    dbid
                    geneid
                    source
                }

                ligands(top: 10000 isdrug: true) @include(if: $getLigands) {

                    ligid
                    name

                    synonyms {
                        name
                        value
                    }

                    activities(all: true) {
                        actid
                        type
                        moa
                        value

                        pubs {
                            pmid
                            __typename
                            }
                    }
                }

                xrefs(source: "Ensembl") @include(if: $getXrefs) {
                    name
                }

                diseases(top:10000) @include(if: $getDiseases) {

                    name
                    mondoID

                    dids {
                        id
                        dataSources
                        doName
                    }
                }
            }
        }
    }
    """


[docs] def pharos_general( query: str, variables: dict[str, bool] | None = None, ) -> dict: """ Query the NIH Pharos database by GraphQL. Read about Pharos here: https://pharos.nih.gov/about Args: query: A GraphQL query. variables: Variables to retrieve. A dict of variable names and boolean values. Return: The JSON response parsed into a dict. """ url = urls['pharos_api']['url'] req_headers = { 'Accept-Encoding': 'gzip, deflate, br', 'Content-Type': 'application/json', 'Connection': 'keep-alive', 'DNT': '1', 'Origin': 'https://pharos-api.ncats.io', } query_param = {'query': query} if variables: _log( 'Querying Pharos, variables: ' f'{", ".join(k for k, v in variables.items() if v)}' ) query_param['variables'] = variables binary_data = json.dumps(query_param).encode('utf-8') c = Curl( url=url, req_headers=req_headers, binary_data=binary_data, compressed=True, compr='gz', ) result = json.loads(c.result) result = result['data'] return result
[docs] def pharos_targets( chunk_size: int = 100, expression: bool = False, gtex: bool = False, orthologs: bool = False, ligands: bool = False, xrefs: bool = False, diseases: bool = False, ) -> list: """ Query the NIH Pharos database by GraphQL. The queried data is fetched in chunks, by default 100 records each. The complete data consists of thousands of chunks, the retrieval takes about half hour. Args: chunk_size: Records in one batch. Better stay 100 because higher numbers likely to cause timeout errors. Return: Records as a list of dicts. """ variables = { 'chunk_size': chunk_size, 'step': 0, 'getExpressions': expression, 'getGtex': gtex, 'getOrthologs': orthologs, 'getLigands': ligands, 'getXrefs': xrefs, 'getDiseases': diseases, } result = [] while True: _log(f'Pharos query, chunk #{variables["step"]}') response = pharos_general(PHAROS_QUERY, variables) response = response['targets']['targets'] if not response: break result.extend(response) variables['step'] += chunk_size return result
def _create_query_functions(): for qtype in QUERY_TYPES: args = {qtype: True} name = f'pharos_{qtype}' doc = f""" Retrieve `{qtype}` records from Pharos. Note: data retrieval might take about half an hour. Args: chunk_size: Records in one batch. Better stay 100 because higher numbers likely to cause timeout errors. Return: Records as a list of dicts. """ def query_func(chunk_size: int = 100) -> list: return pharos_targets(chunk_size = chunk_size, **args) query_func.__name__ = name query_func.__doc__ = doc globals()[name] = query_func _create_query_functions()