#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsfromtypingimportNamedTupleimportcollectionsimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.commonascommon
[docs]defsider_drug_names()->dict[str,set[tuple]]:""" Retrieves drug information from the SIDER database. Returns: Drug PubChem CID, name and ATC information as a list of named tuples. """result=collections.defaultdict(set)attrs={}forattrin('name','atc'):url=urls.urls['sider'][f'drug_{attr}s']c=curl.Curl(url,large=True,silent=False)attrs[attr]=collections.defaultdict(list)forlineinc.result:cid,value=line.strip('\n').split('\t')attrs[attr][cid].append(value)forcidinset.union(*map(set,attrs.values())):foratcinattrs['atc'].get(cid,(None,)):result[cid].add(SiderDrug(name=attrs['name'].get(cid,(None,))[0],atc=atc,))returndict(result)
[docs]defsider_side_effects(freq:bool=False)->dict[str,set[tuple]]:""" Retrieves side effect information from the SIDER database. Args: freq: Retrieve the dataset with frequency information. This is an independent dataset with lower coverage. Returns: Drug PubChem CID, UMLS concept ids both for label and MedDra and side effect name. """record=SiderSideeffetFrequencyiffreqelseSiderSideeffectresult=collections.defaultdict(set)url=urls.urls['sider']['meddra_%s'%('freq'iffreqelse'all')]c=curl.Curl(url,large=True,silent=False)# essential features' indicesindices=(2,8,9,4)iffreqelse(2,4,5)forlineinc.result:line=line.strip().split('\t')ifnotline:continueresult[line[0]].add(record(**{key:line[i]orNoneforkey,iinzip(record._fields,indices)}))returndict(result)
[docs]defsider_side_effect_frequencies()->list[tuple]:""" Retrieves side effect information from the SIDER database. Returns: Drug CID, UMLS concept ids both for label and MedDRA, frequency information and side effect name. Attention! -> `sider_side_effects` function returns about 20k more rows than this dataset, but without frequency information. """returnsider_side_effects(freq=True)
[docs]defsider_meddra_side_effects()->list[tuple]:""" Retrieves MedDRA side effect information from the SIDER database. Returns: A list of named tuples containing the following fields: - cid: Drug PubChem CID - meddra_id: MedDRA ID for the side effect - side_effect_name: Name of the side effect """url_meddra_tsv=urls.urls['sider']['meddra_tsv']c=curl.Curl(url_meddra_tsv,large=True,silent=False,)result=set()forlineinc.result:ifnotline.strip():continueline=line.strip().split('\t')ifline[1]=='PT':result.add(SiderSideeffectMeddra(cid=line[0],meddra_id=line[2],side_effect_name=line[3],))returnlist(result)