#!/usr/bin/env python
# -*- coding: utf-8 -*-
# This file is part of the `pypath` python module
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
# Website: https://pypath.omnipathdb.org/
import os
import csv
import collections
import base64
import json
import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.session as session_mod
import pypath.share.settings as settings
import pypath.utils.mapping as mapping
import pypath.inputs.credentials as credentials
_logger = session_mod.Logger(name = 'cosmic_input')
_log = _logger._log
def cancer_gene_census_annotations(
user = None,
passwd = None,
credentials_fname = 'cosmic_credentials',
Retrieves a list of cancer driver genes (Cancer Gene Census) from
the Sanger COSMIC (Catalogue of Somatic Mutations in Cancer) database.
Returns dict of annotations.
cosmic_cred = credentials.credentials(
user = user,
passwd = passwd,
resource = 'COSMIC',
from_file = credentials_fname,
except RuntimeError:
'No credentials available for the COSMIC website. '
'Either set the `cosmic_credentials` key in the `settings` '
'module (e.g. `{\'user\': \'myuser\', '
'\'passwd\': \'mypassword\'}`), or pass them directly to the '
'`pypath.inputs.cosmic.cancer_gene_census_annotations` '
return {}
CancerGeneCensusAnnotation = collections.namedtuple(
def multi_field(content):
return (
tuple(sorted(i.strip() for i in content.split(',')))
if content.strip() else
url = urls.urls['cgc']['url_new']
auth_str = base64.b64encode(
('%s:%s\n' % (cosmic_cred['user'], cosmic_cred['passwd'])).encode()
req_hdrs = ['Authorization: Basic %s' % auth_str.decode()]
c = curl.Curl(
large = False,
silent = False,
req_headers = req_hdrs,
cache = False,
access_url = json.loads(c.result)
if 'url' not in access_url:
'Could not retrieve COSMIC access URL. '
'Most likely the authentication failed. '
'The reply was: `%s`' % c.result
return None
c = curl.Curl(
large = True,
silent = False,
bypass_url_encoding = True,
data = csv.DictReader(c.fileobj, delimiter = ',')
result = collections.defaultdict(set)
for rec in data:
uniprots = mapping.map_name(
rec['Gene Symbol'],
for uniprot in uniprots:
tier = int(rec['Tier']),
hallmark = rec['Hallmark'].strip().lower() == 'yes',
somatic = rec['Somatic'].strip().lower() == 'yes',
germline = rec['Germline'].strip().lower() == 'yes',
tumour_types_somatic = (
multi_field(rec['Tumour Types(Somatic)'])
tumour_types_germline = (
multi_field(rec['Tumour Types(Germline)'])
cancer_syndrome = (
multi_field(rec['Cancer Syndrome'])
tissue_type = (
multi_field(rec['Tissue Type'].replace(' ', ''))
genetics = rec['Molecular Genetics'].strip() or None,
role = (
multi_field(rec['Role in Cancer'])
mutation_type = (
multi_field(rec['Mutation Types'])
return dict(result)