Source code for pypath.inputs.adrecs

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

from typing import Generator, NamedTuple

import collections

import pandas as pd

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.inputs.common as inputs_common


_notavail = lambda x: None if x == 'Not Available' else x
_synonyms = lambda x: (
    tuple(sorted(y.strip() for y in x.split('|'))) if x else ()
)



[docs]
class AdrecsAdr(NamedTuple):
    adr_class: str
    badd: str




[docs]
class AdrecsChildParent(NamedTuple):
    child: AdrecsAdr
    parent: AdrecsAdr




[docs]
class AdrecsDrugAdr(NamedTuple):
    drug_badd: str
    drug: str
    adr_badd: str
    adr: str




[docs]
class AdrecsTerm(NamedTuple):
    adrecs_class: str
    badd: str
    name: str
    synonyms: tuple[str]
    meddra: str




[docs]
class AdrecsDrug(NamedTuple):
    badd: str
    drug: str
    synonyms: str
    drugbank: str
    pubchem_cid: str
    mesh: str
    kegg: str
    tdd: str




[docs]
def adrecs_drug_identifiers(
        return_df: bool = False,
    ) -> list[tuple] | pd.DataFrame:
    """
    Drug identifiers from the AdReCS database.

    IUPAC name, synonyms, DrugBank, MeSH, KEGG and TDD IDs of drugs.
    http://www.bio-add.org/ADReCS/index.jsp

    Args:
        return_df:
            Return a pandas data frame.

    Returns:
        List of tuples or data frame of drug identifiers.
    """

    return _adrecs_base(
        url_key = 'drug_information',
        record = AdrecsDrug,
        cell_range = 'A1:H2527',
        synonym_idx = [2],
        return_df = return_df,
    )




[docs]
def adrecs_adr_ontology(return_df: bool = False) -> list[AdrecsTerm] | pd.DataFrame:
    """
    Adverse drug reaction (ADR) ontology from the AdReCS database.

    Args:
        return_df:
            Return a pandas data frame.

    Returns:
        List of tuples or data frame of adverse drug reaction terms.
    """

    return _adrecs_base(
        url_key = 'terminology',
        record = AdrecsTerm,
        cell_range = 'A1:E13856',
        synonym_idx = [3],
        return_df = return_df,
    )



def _adrecs_base(
        url_key: str,
        record: str | type,
        cell_range: str,
        synonym_idx: list[int],
        fields: tuple[str] | None = None,
        return_df: bool = False,
    ) -> list[tuple] | pd.DataFrame:

    if isinstance(record, str):

        record = collections.namedtuple(f'Adrecs{record_name}', fields)

    url = urls.urls['adrecs'][url_key]
    path = curl.Curl(url, silent = False, large = True)
    contents = inputs_common.read_xls(path.outfile, cell_range = cell_range)
    result = []

    for line in contents[1:]:

        line = [_notavail(x) for x in line]

        for isyn in synonym_idx:

            line[isyn] = _synonyms(line[isyn])

        result.append(record(*line))

    return pd.DataFrame(result) if return_df else result



[docs]
def adrecs_drug_adr(
        return_df: bool = False,
    ) -> Generator[AdrecsDrugAdr] | pd.DataFrame:
    """
    Drug-ADR pairs from the AdReCS database.

    Args:
        return_df:
            Return a pandas data frame.

    Returns:
        List of tuples or data frame of drug-ADR pairs.
    """

    result = _adrecs_drug_adr()

    return pd.DataFrame(result) if return_df else result



def _adrecs_drug_adr():

    url = urls.urls['adrecs']['adrecs_drugs']
    c = curl.Curl(url, large = True, silent = False)
    _ = next(c.result)

    for line in c.result:

        yield AdrecsDrugAdr(*line.strip().split('\t'))



[docs]
def adrecs_hierarchy() -> set[AdrecsChildParent]:
    """
    Child-parent relationships between AdReCS ontology terms.

    Return:
        Set of tuples representing child-parent relationship. Both the child
        and parent terms present with their numeric class and BADD identifiers.
    """

    adr_ontology = adrecs_adr_ontology()

    child_adrs = {
        record.adrecs_class: record.badd
        for record in adr_ontology
    }

    result = set()

    for field in adr_ontology:

        if '.' not in field.adrecs_class:

            continue

        parent_adrecs = field.adrecs_class.rsplit('.', 1)[0]

        result.add(
            AdrecsChildParent(
                child = AdrecsAdr(
                    adr_class = field.adrecs_class,
                    badd = field.badd,
                ),
                parent = AdrecsAdr(
                    adr_class = parent_adrecs,
                    badd = child_adrs.get(parent_adrecs),
                ),
            )
        )

    return result