Source code for pypath.inputs.cancerdrugsdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#  This file is part of the `pypath` python module
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#  Website:

Web scraper for the CancerDrugs_DB database.

import csv
import collections
import itertools
import re

import pypath.share.curl as curl
import pypath.share.common as common
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.share.session as session

_logger = session.Logger(name = 'cancerdrugsdb_input')
_log = _logger._log

[docs] def cancerdrugsdb_download(): """ Downloads a curated set of interactions of cancer drugs licensed in most parts of the world with gene targets (where available). From This function downloads a single dataset. Args None. Returns A list of dicts, each is a record as it provided by the database. """ url = urls.urls['cancerdrugs_db']['url'] c = curl.Curl(url, large = True, silent = False) return list(csv.DictReader(c.result, delimiter = '\t'))
[docs] def cancerdrugsdb_interactions(): """ Returns drug-gene interactions from Cancer Drugs Database ( Args None. Returns List of named tuples, each describing a drug-gene interaction. Identifiers of type PubChem and UniProt. """ # note: drug-target interactions are from GDIdb. if they're in OnmiPath # already, do we skip the interactions altogether, or is it better to # keep them here for consistency? the dataset is very small anyways. # if we keep them, do we test for consistency between the interactions? reid = re.compile(r'>(\w+)<') def strip_id(field): match = return if match else None def yes_no(field): return field == 'Y' CancerDrugsInteraction = collections.namedtuple( 'CancerdrugsdbInteraction', ( 'drug_pubchem', 'drug_chembl', 'drug_drugbank', 'drug_label', 'target_uniprot', 'ema_approved', 'fda_approved', 'european_national_approved', 'who_approved', 'generic', 'approval_year', 'indications', ), ) result = [] unmapped_drug = [] no_targets = [] data = cancerdrugsdb_download() for rec in data: chembl = strip_id(rec.get('ChEMBL')) drugbank = strip_id(rec.get('DrugBank ID')) if chembl is None: unmapped_drug.append(rec.get('Product')) continue pubchems = mapping.map_name(chembl, 'chembl', 'pubchem') targets = rec.get('Targets') if not targets: no_targets.append(rec.get('Product')) continue target_uniprots = mapping.map_names( (tar.strip() for tar in targets.split(';')), 'genesymbol', 'uniprot', ) for pubchem, uniprot in itertools.product(pubchems, target_uniprots): result.append( CancerDrugsInteraction( drug_pubchem = pubchem, drug_chembl = chembl, drug_drugbank = drugbank, drug_label = rec.get('Product'), target_uniprot = uniprot, ema_approved = yes_no(rec.get('EMA')), fda_approved = yes_no(rec.get('FDA')), european_national_approved = yes_no(rec.get('EN')), who_approved = yes_no(rec.get('WHO')), generic = yes_no(rec.get('Generic')), approval_year = int(rec.get('Year')) if rec.get('Year') else None, indications = tuple( i.strip() for i in rec.get('Indications').split(';') ), ) ) _log( 'Could not find CHEMBL IDs for %u ' 'CancerDrugs_DB Products.' % len(unmapped_drug) ) _log( '%u CancerDrugs_DB Products had no targets.' % len(no_targets) ) return result
[docs] def cancerdrugsdb_annotations(): """ Returns drug annotations from CancerDrugs_DB. Args None. Returns (dict): Keys are PubChem IDs, values are sets of annotations. """ record = collections.namedtuple( 'CancerdrugsdbAnnotation', ( 'drug_label', 'ema_approved', 'fda_approved', 'european_national_approved', 'who_approved', 'generic', 'approval_year', 'indications', ), ) result = collections.defaultdict(set) data = cancerdrugsdb_interactions() for rec in data: result[rec.drug_pubchem].add( record( **dict( i for i in rec._asdict().items() if i[0] in record._fields ) ) ) return dict(result)