Source code for pypath.inputs.dgidb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

import csv
import collections

import bs4

import pypath.share.curl as curl
import pypath.resources.urls as urls
import pypath.utils.mapping as mapping


[docs] def dgidb_interactions() -> list[tuple]: """ Retrieves drug-gene interactions from DGIdb. Returns: A list with tuples. Tuples are dgidb interactons """ result = set() DgidbInteraction = collections.namedtuple( 'DgidbInteraction', [ 'genesymbol', 'entrez', 'resource', 'type', 'drug_name', 'drug_chembl', 'score', 'pmid' ], ) url = urls.urls['dgidb']['interactions'] c = curl.Curl(url = url, silent = False, large = True) interactions = csv.DictReader(c.result, delimiter = '\t') for interaction in interactions: interaction = {k: v or None for k, v in interaction.items()} dgidb_interaction = DgidbInteraction( genesymbol = interaction['gene_name'], entrez = interaction['entrez_id'], resource = interaction['interaction_claim_source'], type = interaction['interaction_types'], drug_name = interaction['drug_claim_primary_name'], drug_chembl = interaction['drug_concept_id'], score = interaction['interaction_group_score'], pmid = interaction['PMIDs'], ) result.add(dgidb_interaction) return list(result)
[docs] def dgidb_annotations(): """ Downloads druggable protein annotations from DGIdb. """ DgidbAnnotation = collections.namedtuple( 'DgidbAnnotation', ['category'], ) url = urls.urls['dgidb']['categories'] c = curl.Curl(url = url, silent = False, large = True) data = csv.DictReader(c.result, delimiter = '\t') result = collections.defaultdict(set) for rec in data: uniprots = mapping.map_name( rec['entrez_gene_symbol'], 'genesymbol', 'uniprot', ) for uniprot in uniprots: result[uniprot].add( DgidbAnnotation( category = rec['category'] ) ) return dict(result)
[docs] def get_dgidb_old(): """ Deprecated. Will be removed soon. Downloads and processes the list of all human druggable proteins. Returns a list of GeneSymbols. """ genesymbols = [] url = urls.urls['dgidb']['main_url'] c = curl.Curl(url, silent = False) html = c.result soup = bs4.BeautifulSoup(html, 'html.parser') cats = [ o.attrs['value'] for o in soup.find('select', {'id': 'gene_categories'}) .find_all('option') ] for cat in cats: url = urls.urls['dgidb']['url'] % cat c = curl.Curl(url) html = c.result soup = bs4.BeautifulSoup(html, 'html.parser') trs = soup.find('tbody').find_all('tr') genesymbols.extend([tr.find('td').text.strip() for tr in trs]) return mapping.map_names(genesymbols, 'genesymbol', 'uniprot')