#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importosimportcsvimportcollectionsimportbase64importjsonimportpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.share.sessionassession_modimportpypath.share.settingsassettingsimportpypath.utils.mappingasmappingimportpypath.inputs.credentialsascredentials_logger=session_mod.Logger(name='cosmic_input')_log=_logger._log
[docs]defcancer_gene_census_annotations(user=None,passwd=None,credentials_fname='cosmic_credentials',):""" Retrieves a list of cancer driver genes (Cancer Gene Census) from the Sanger COSMIC (Catalogue of Somatic Mutations in Cancer) database. Returns dict of annotations. """try:cosmic_cred=credentials.credentials(user=user,passwd=passwd,resource='COSMIC',from_file=credentials_fname,)exceptRuntimeError:_log('No credentials available for the COSMIC website. ''Either set the `cosmic_credentials` key in the `settings` ''module (e.g. `{\'user\': \'myuser\', ''\'passwd\': \'mypassword\'}`), or pass them directly to the ''`pypath.inputs.cosmic.cancer_gene_census_annotations` ''method.')return{}CancerGeneCensusAnnotation=collections.namedtuple('CancerGeneCensusAnnotation',('tier','hallmark','somatic','germline','tumour_types_somatic','tumour_types_germline','cancer_syndrome','tissue_type','genetics','role','mutation_type',),)defmulti_field(content):return(tuple(sorted(i.strip()foriincontent.split(',')))ifcontent.strip()else())url=urls.urls['cgc']['url_new']auth_str=base64.b64encode(('%s:%s\n'%(cosmic_cred['user'],cosmic_cred['passwd'])).encode())req_hdrs=['Authorization: Basic %s'%auth_str.decode()]c=curl.Curl(url,large=False,silent=False,req_headers=req_hdrs,cache=False,)access_url=json.loads(c.result)if'url'notinaccess_url:_log('Could not retrieve COSMIC access URL. ''Most likely the authentication failed. ''The reply was: `%s`'%c.result)returnNonec=curl.Curl(access_url['url'],large=True,silent=False,bypass_url_encoding=True,)data=csv.DictReader(c.fileobj,delimiter=',')result=collections.defaultdict(set)forrecindata:uniprots=mapping.map_name(rec['Gene Symbol'],'genesymbol','uniprot',)foruniprotinuniprots:result[uniprot].add(CancerGeneCensusAnnotation(tier=int(rec['Tier']),hallmark=rec['Hallmark'].strip().lower()=='yes',somatic=rec['Somatic'].strip().lower()=='yes',germline=rec['Germline'].strip().lower()=='yes',tumour_types_somatic=(multi_field(rec['Tumour Types(Somatic)'])),tumour_types_germline=(multi_field(rec['Tumour Types(Germline)'])),cancer_syndrome=(multi_field(rec['Cancer Syndrome'])),tissue_type=(multi_field(rec['Tissue Type'].replace(' ',''))),genetics=rec['Molecular Genetics'].strip()orNone,role=(multi_field(rec['Role in Cancer'])),mutation_type=(multi_field(rec['Mutation Types'])),))returndict(result)