#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsimportcollectionsfrompypath.shareimportcurlfrompypath.resources.urlsimporturlsCTD_URLS={'chemical_gene':'CTD_chem_gene_ixns.tsv.gz','chemical_disease':'CTD_chemicals_diseases.tsv.gz','disease_pathway':'CTD_diseases_pathways.tsv.gz','chemical_phenotype':'CTD_pheno_term_ixns.tsv.gz','gene_disease':'CTD_genes_diseases.tsv.gz','chemical_vocabulary':'CTD_chemicals.tsv.gz','gene_vocabulary':'CTD_genes.tsv.gz','disease_vocabulary':'CTD_diseases.tsv.gz','pathway_vocabulary':'CTD_pathways.tsv.gz','anatomy_vocabulary':'CTD_anatomy.tsv.gz','phenotype_vocabulary':'CTD_phenotypes.tsv.gz',}def_ctdbase_download(_type:str)->list[tuple]:""" Retrieves a CTDbase file and returns entries as a list of tuples. """if'_'notin_type:_type=f'{_type}_vocabulary'url=urls['ctdbase']['url']%CTD_URLS[_type]c=curl.Curl(url,silent=False,large=True,encoding="utf-8",default_mode="r",compressed=True,compr="gz",)entries=list()fieldnames=Noneforlineinc.result:ifline.startswith("#"):line=line.strip(" #\n").split("\t")iflen(line)>1:fieldnames=[fieldnameforfieldnameinlineiffieldname!='']record=collections.namedtuple('CTDEntry',fieldnames)continuedata=line.split("\t")# if data[-1] == "\n":# del data[-1]fori,vinenumerate(data):is_list="|"invhas_sublist="^"invifis_list:v=v.split("|")ifhas_sublist:v=[element.split("^")forelementinv]elifhas_sublist:v=[v.split("^")]data[i]=viflen(data)!=len(fieldnames):continue# some lines have missing fields and cannot be parsedentry={}for(fieldname,element)inzip(fieldnames,data):ifelement=="":element=Noneelse:iftype(element)==str:element=element.strip()eliftype(element)==list:element=[e.strip()iftype(e)==strelseeforeinelement]entry[fieldname]=elementif_type=='chemical_phenotype':entry=_modify_dict(entry,('comentionedterms',['name','id','source']),('anatomyterms',['sequenceorder','name','id']),('inferencegenesymbols',['name','id']),('interactionactions',['interaction','action']),)if_type=='gene_disease':ifentry['DirectEvidence']==None:continueentries.append(record(**entry))returnentries
[docs]defctdbase_relations(relation_type:str)->list[tuple]:""" Retrieves a CTDbase relation file. For "gene-disease" relation type only curated relations are returned (i.e. those with a "DirectEvidence" field) as the number of non-curated relations is too large. Args: relation_type: One of the following: 'chemical_gene', 'chemical_disease', 'disease_pathway', 'chemical_phenotype', 'gene_disease', Returns: Relations as a list of tuples. """return_ctdbase_download(relation_type)
[docs]defctdbase_vocabulary(vocabulary_type:str)->list[tuple]:""" Retrieves a CTDbase vocabulary file. Args: vocabulary_type: One of the following: 'chemical', 'gene', 'disease', 'pathway', 'anatomy', 'phenotype', Returns: Vocabulary as a list of tuples. """return_ctdbase_download(vocabulary_type)