#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#"""Web scraper for the CancerDrugs_DB database."""importcsvimportcollectionsimportitertoolsimportreimportpypath.share.curlascurlimportpypath.share.commonascommonimportpypath.resources.urlsasurlsimportpypath.utils.mappingasmappingimportpypath.share.sessionassession_logger=session.Logger(name='cancerdrugsdb_input')_log=_logger._log
[docs]defcancerdrugsdb_download():""" Downloads a curated set of interactions of cancer drugs licensed in most parts of the world with gene targets (where available). From https://www.anticancerfund.org/en/cancerdrugs-db. This function downloads a single dataset. Args None. Returns A list of dicts, each is a record as it provided by the database. """url=urls.urls['cancerdrugs_db']['url']c=curl.Curl(url,large=True,silent=False)returnlist(csv.DictReader(c.result,delimiter='\t'))
[docs]defcancerdrugsdb_interactions():""" Returns drug-gene interactions from Cancer Drugs Database (https://www.anticancerfund.org/en/cancerdrugs-db). Args None. Returns List of named tuples, each describing a drug-gene interaction. Identifiers of type PubChem and UniProt. """# note: drug-target interactions are from GDIdb. if they're in OnmiPath# already, do we skip the interactions altogether, or is it better to# keep them here for consistency? the dataset is very small anyways.# if we keep them, do we test for consistency between the interactions?reid=re.compile(r'>(\w+)<')defstrip_id(field):match=reid.search(field)returnmatch.group(1)ifmatchelseNonedefyes_no(field):returnfield=='Y'CancerDrugsInteraction=collections.namedtuple('CancerdrugsdbInteraction',('drug_pubchem','drug_chembl','drug_drugbank','drug_label','target_uniprot','ema_approved','fda_approved','european_national_approved','who_approved','generic','approval_year','indications',),)result=[]unmapped_drug=[]no_targets=[]data=cancerdrugsdb_download()forrecindata:chembl=strip_id(rec.get('ChEMBL'))drugbank=strip_id(rec.get('DrugBank ID'))ifchemblisNone:unmapped_drug.append(rec.get('Product'))continuepubchems=mapping.map_name(chembl,'chembl','pubchem')targets=rec.get('Targets')ifnottargets:no_targets.append(rec.get('Product'))continuetarget_uniprots=mapping.map_names((tar.strip()fortarintargets.split(';')),'genesymbol','uniprot',)forpubchem,uniprotinitertools.product(pubchems,target_uniprots):result.append(CancerDrugsInteraction(drug_pubchem=pubchem,drug_chembl=chembl,drug_drugbank=drugbank,drug_label=rec.get('Product'),target_uniprot=uniprot,ema_approved=yes_no(rec.get('EMA')),fda_approved=yes_no(rec.get('FDA')),european_national_approved=yes_no(rec.get('EN')),who_approved=yes_no(rec.get('WHO')),generic=yes_no(rec.get('Generic')),approval_year=int(rec.get('Year'))ifrec.get('Year')elseNone,indications=tuple(i.strip()foriinrec.get('Indications').split(';')),))_log('Could not find CHEMBL IDs for %u ''CancerDrugs_DB Products.'%len(unmapped_drug))_log('%u CancerDrugs_DB Products had no targets.'%len(no_targets))returnresult
[docs]defcancerdrugsdb_annotations():""" Returns drug annotations from CancerDrugs_DB. Args None. Returns (dict): Keys are PubChem IDs, values are sets of annotations. """record=collections.namedtuple('CancerdrugsdbAnnotation',('drug_label','ema_approved','fda_approved','european_national_approved','who_approved','generic','approval_year','indications',),)result=collections.defaultdict(set)data=cancerdrugsdb_interactions()forrecindata:result[rec.drug_pubchem].add(record(**dict(iforiinrec._asdict().items()ifi[0]inrecord._fields)))returndict(result)