Source code for pypath.inputs.sider
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from __future__ import annotations
from typing import NamedTuple
import collections
import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.share.common as common
[docs]
class SiderSideeffect(NamedTuple):
umls_concept_on_label: str
umls_concept_in_meddra: str
side_effect: str
[docs]
class SiderSideeffetFrequency(NamedTuple):
umls_concept_on_label: str
umls_concept_in_meddra: str
side_effect: str
frequency: float
[docs]
class SiderSideeffectMeddra(NamedTuple):
cid: str
meddra_id: str
side_effect_name: str
[docs]
class SiderDrug(NamedTuple):
name: str
atc: str
[docs]
def sider_drug_names() -> dict[str, set[tuple]]:
"""
Retrieves drug information from the SIDER database.
Returns:
Drug PubChem CID, name and ATC information as a list of named tuples.
"""
result = collections.defaultdict(set)
attrs = {}
for attr in ('name', 'atc'):
url = urls.urls['sider'][f'drug_{attr}s']
c = curl.Curl(url, large = True, silent = False)
attrs[attr] = collections.defaultdict(list)
for line in c.result:
cid, value = line.strip('\n').split('\t')
attrs[attr][cid].append(value)
for cid in set.union(*map(set, attrs.values())):
for atc in attrs['atc'].get(cid, (None,)):
result[cid].add(SiderDrug(
name = attrs['name'].get(cid, (None,))[0],
atc = atc,
))
return dict(result)
[docs]
def sider_side_effects(freq: bool = False) -> dict[str, set[tuple]]:
"""
Retrieves side effect information from the SIDER database.
Args:
freq:
Retrieve the dataset with frequency information. This is
an independent dataset with lower coverage.
Returns:
Drug PubChem CID, UMLS concept ids both for label and MedDra
and side effect name.
"""
record = SiderSideeffetFrequency if freq else SiderSideeffect
result = collections.defaultdict(set)
url = urls.urls['sider']['meddra_%s' % ('freq' if freq else 'all')]
c = curl.Curl(url, large = True, silent = False)
# essential features' indices
indices = (2, 8, 9, 4) if freq else (2, 4, 5)
for line in c.result:
line = line.strip().split('\t')
if not line:
continue
result[line[0]].add(
record(**{
key: line[i] or None
for key, i in zip(record._fields, indices)
})
)
return dict(result)
[docs]
def sider_side_effect_frequencies() -> list[tuple]:
"""
Retrieves side effect information from the SIDER database.
Returns:
Drug CID, UMLS concept ids both for label and MedDRA,
frequency information and side effect name.
Attention! -> `sider_side_effects` function returns about 20k more rows
than this dataset, but without frequency information.
"""
return sider_side_effects(freq = True)
[docs]
def sider_meddra_side_effects() -> list[tuple]:
"""
Retrieves MedDRA side effect information from the SIDER database.
Returns:
A list of named tuples containing the following fields:
- cid: Drug PubChem CID
- meddra_id: MedDRA ID for the side effect
- side_effect_name: Name of the side effect
"""
url_meddra_tsv = urls.urls['sider']['meddra_tsv']
c = curl.Curl(
url_meddra_tsv,
large = True,
silent = False,
)
result = set()
for line in c.result:
if not line.strip():
continue
line = line.strip().split('\t')
if line[1] == 'PT':
result.add(
SiderSideeffectMeddra(
cid = line[0],
meddra_id = line[2],
side_effect_name = line[3],
)
)
return list(result)