Source code for pypath.inputs.cancerdrugsdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

"""
Web scraper for the CancerDrugs_DB database.
"""

import csv
import collections
import itertools
import re

import pypath.share.curl as curl
import pypath.share.common as common
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping
import pypath.share.session as session

_logger = session.Logger(name = 'cancerdrugsdb_input')
_log = _logger._log



[docs]
def cancerdrugsdb_download():
    """
    Downloads a curated set of interactions of cancer drugs licensed
    in most parts of the world with gene targets (where available).
    From https://www.anticancerfund.org/en/cancerdrugs-db.
    This function downloads a single dataset.

    Args
        None.

    Returns
        A list of dicts, each is a record as it provided by the database.
    """

    url = urls.urls['cancerdrugs_db']['url']

    c = curl.Curl(url, large = True, silent = False)

    return list(csv.DictReader(c.result, delimiter = '\t'))




[docs]
def cancerdrugsdb_interactions():
    """
    Returns drug-gene interactions from Cancer Drugs Database
    (https://www.anticancerfund.org/en/cancerdrugs-db).

    Args
        None.

    Returns
        List of named tuples, each describing a drug-gene interaction.
        Identifiers of type PubChem and UniProt.
    """

    # note: drug-target interactions are from GDIdb. if they're in OnmiPath
    # already, do we skip the interactions altogether, or is it better to
    # keep them here for consistency? the dataset is very small anyways.
    # if we keep them, do we test for consistency between the interactions?

    reid = re.compile(r'>(\w+)<')

    def strip_id(field):

        match = reid.search(field)

        return match.group(1) if match else None


    def yes_no(field):

        return field == 'Y'


    CancerDrugsInteraction = collections.namedtuple(
        'CancerdrugsdbInteraction',
        (
            'drug_pubchem',
            'drug_chembl',
            'drug_drugbank',
            'drug_label',
            'target_uniprot',
            'ema_approved',
            'fda_approved',
            'european_national_approved',
            'who_approved',
            'generic',
            'approval_year',
            'indications',
        ),
    )

    result = []
    unmapped_drug = []
    no_targets = []

    data = cancerdrugsdb_download()

    for rec in data:

        chembl = strip_id(rec.get('ChEMBL'))
        drugbank = strip_id(rec.get('DrugBank ID'))

        if chembl is None:

            unmapped_drug.append(rec.get('Product'))
            continue

        pubchems = mapping.map_name(chembl, 'chembl', 'pubchem')

        targets = rec.get('Targets')

        if not targets:

            no_targets.append(rec.get('Product'))
            continue

        target_uniprots = mapping.map_names(
            (tar.strip() for tar in targets.split(';')),
            'genesymbol',
            'uniprot',
        )

        for pubchem, uniprot in itertools.product(pubchems, target_uniprots):

            result.append(
                CancerDrugsInteraction(
                    drug_pubchem = pubchem,
                    drug_chembl = chembl,
                    drug_drugbank = drugbank,
                    drug_label = rec.get('Product'),
                    target_uniprot = uniprot,
                    ema_approved = yes_no(rec.get('EMA')),
                    fda_approved = yes_no(rec.get('FDA')),
                    european_national_approved = yes_no(rec.get('EN')),
                    who_approved = yes_no(rec.get('WHO')),
                    generic = yes_no(rec.get('Generic')),
                    approval_year = int(rec.get('Year')) if rec.get('Year') else None,
                    indications = tuple(
                        i.strip()
                        for i in rec.get('Indications').split(';')
                    ),
                )
            )


    _log(
        'Could not find CHEMBL IDs for %u '
        'CancerDrugs_DB Products.' % len(unmapped_drug)
    )

    _log(
        '%u CancerDrugs_DB Products had no targets.'
        % len(no_targets)
    )

    return result




[docs]
def cancerdrugsdb_annotations():
    """
    Returns drug annotations from CancerDrugs_DB.

    Args
        None.

    Returns
        (dict): Keys are PubChem IDs, values are sets of annotations.
    """

    record = collections.namedtuple(
        'CancerdrugsdbAnnotation',
        (
            'drug_label',
            'ema_approved',
            'fda_approved',
            'european_national_approved',
            'who_approved',
            'generic',
            'approval_year',
            'indications',
        ),
    )

    result = collections.defaultdict(set)
    data = cancerdrugsdb_interactions()

    for rec in data:

        result[rec.drug_pubchem].add(
            record(
                **dict(
                    i
                    for i in rec._asdict().items()
                    if i[0] in record._fields
                )
            )
        )

    return dict(result)