Source code for pypath.inputs.adrecs
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from __future__ import annotations
from typing import Generator, NamedTuple
import collections
import pandas as pd
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.inputs.common as inputs_common
_notavail = lambda x: None if x == 'Not Available' else x
_synonyms = lambda x: (
tuple(sorted(y.strip() for y in x.split('|'))) if x else ()
)
[docs]
class AdrecsAdr(NamedTuple):
adr_class: str
badd: str
[docs]
class AdrecsChildParent(NamedTuple):
child: AdrecsAdr
parent: AdrecsAdr
[docs]
class AdrecsDrugAdr(NamedTuple):
drug_badd: str
drug: str
adr_badd: str
adr: str
[docs]
class AdrecsTerm(NamedTuple):
adrecs_class: str
badd: str
name: str
synonyms: tuple[str]
meddra: str
[docs]
class AdrecsDrug(NamedTuple):
badd: str
drug: str
synonyms: str
drugbank: str
pubchem_cid: str
mesh: str
kegg: str
tdd: str
[docs]
def adrecs_drug_identifiers(
return_df: bool = False,
) -> list[tuple] | pd.DataFrame:
"""
Drug identifiers from the AdReCS database.
IUPAC name, synonyms, DrugBank, MeSH, KEGG and TDD IDs of drugs.
http://www.bio-add.org/ADReCS/index.jsp
Args:
return_df:
Return a pandas data frame.
Returns:
List of tuples or data frame of drug identifiers.
"""
return _adrecs_base(
url_key = 'drug_information',
record = AdrecsDrug,
cell_range = 'A1:H2527',
synonym_idx = [2],
return_df = return_df,
)
[docs]
def adrecs_adr_ontology(return_df: bool = False) -> list[AdrecsTerm] | pd.DataFrame:
"""
Adverse drug reaction (ADR) ontology from the AdReCS database.
Args:
return_df:
Return a pandas data frame.
Returns:
List of tuples or data frame of adverse drug reaction terms.
"""
return _adrecs_base(
url_key = 'terminology',
record = AdrecsTerm,
cell_range = 'A1:E13856',
synonym_idx = [3],
return_df = return_df,
)
def _adrecs_base(
url_key: str,
record: str | type,
cell_range: str,
synonym_idx: list[int],
fields: tuple[str] | None = None,
return_df: bool = False,
) -> list[tuple] | pd.DataFrame:
if isinstance(record, str):
record = collections.namedtuple(f'Adrecs{record_name}', fields)
url = urls.urls['adrecs'][url_key]
path = curl.Curl(url, silent = False, large = True)
contents = inputs_common.read_xls(path.outfile, cell_range = cell_range)
result = []
for line in contents[1:]:
line = [_notavail(x) for x in line]
for isyn in synonym_idx:
line[isyn] = _synonyms(line[isyn])
result.append(record(*line))
return pd.DataFrame(result) if return_df else result
[docs]
def adrecs_drug_adr(
return_df: bool = False,
) -> Generator[AdrecsDrugAdr] | pd.DataFrame:
"""
Drug-ADR pairs from the AdReCS database.
Args:
return_df:
Return a pandas data frame.
Returns:
List of tuples or data frame of drug-ADR pairs.
"""
result = _adrecs_drug_adr()
return pd.DataFrame(result) if return_df else result
def _adrecs_drug_adr():
url = urls.urls['adrecs']['adrecs_drugs']
c = curl.Curl(url, large = True, silent = False)
_ = next(c.result)
for line in c.result:
yield AdrecsDrugAdr(*line.strip().split('\t'))
[docs]
def adrecs_hierarchy() -> set[AdrecsChildParent]:
"""
Child-parent relationships between AdReCS ontology terms.
Return:
Set of tuples representing child-parent relationship. Both the child
and parent terms present with their numeric class and BADD identifiers.
"""
adr_ontology = adrecs_adr_ontology()
child_adrs = {
record.adrecs_class: record.badd
for record in adr_ontology
}
result = set()
for field in adr_ontology:
if '.' not in field.adrecs_class:
continue
parent_adrecs = field.adrecs_class.rsplit('.', 1)[0]
result.add(
AdrecsChildParent(
child = AdrecsAdr(
adr_class = field.adrecs_class,
badd = field.badd,
),
parent = AdrecsAdr(
adr_class = parent_adrecs,
badd = child_adrs.get(parent_adrecs),
),
)
)
return result