Source code for pypath.inputs.intogen
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from past.builtins import xrange, range
import csv
import collections
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.common as common
import pypath.share.settings as settings
import pypath.utils.mapping as mapping
[docs]
def intogen_annotations():
"""
Returns a list of cancer driver genes with their annotations,
according to the IntOGen database.
"""
IntogenAnnotation = collections.namedtuple(
'IntogenAnnotation',
[
'type',
'role',
'curated',
'oncodrive_role_prob',
],
)
url = urls.urls['intogen']['db2014_2']
with settings.context(curl_connect_timeout = 100):
c = curl.Curl(
url,
large = True,
silent = False,
files_needed = ['Drivers_type_role.tsv'],
compr = 'zip',
)
for _ in xrange(7):
__ = c.result['Drivers_type_role.tsv'].readline()
data = csv.DictReader(
c.result['Drivers_type_role.tsv'],
delimiter = '\t',
)
result = collections.defaultdict(set)
for rec in data:
uniprots = mapping.map_name(
rec['geneHGNCsymbol'],
'genesymbol',
'uniprot',
)
for uniprot in uniprots:
role_prob, curated = (
(
1.0,
True,
)
if rec['OncodriveROLE_prob'] == 'Manually curated' else
(
common.float_or_nan(rec['OncodriveROLE_prob']),
False,
)
)
result[uniprot].add(
IntogenAnnotation(
type = rec['Driver_type'],
role = rec['Role'],
curated = curated,
oncodrive_role_prob = role_prob,
)
)
return dict(result)