#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritemsimportreimportcsvimportcollectionsimportitertoolsimportfunctoolsimportpyreadrimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.sessionassessionimportpypath.utils.taxonomyastaxonomy_logger=session.Logger(name='dorothea_input')DorotheaInteraction=collections.namedtuple('DorotheaInteraction',['tf','target','effect','level','curated','chipseq','predicted','coexp','curated_sources','chipseq_sources','predicted_sources','coexp_sources','all_sources','pubmed','kegg_pathways',])_resources_upper=('jaspar','trred','kegg','trrust','tred','trrd','hocomoco','fantom4','pazar',)_resources_special_case={'tfact':'TFactS','tf_act':'TFactS','htri_db':'HTRIdb','int_act':'IntAct','fantom_4':'FANTOM4','oreganno':'ORegAnno','reviews':'DoRothEA-reviews','HOCOMOCO_v11':'HOCOMOCO-v11','hocomoco_v11':'HOCOMOCO-v11','JASPAR_v2018':'JASPAR-v2018','remap':'ReMap','gtex':'ARACNe-GTEx','nfi_regulome_db':'NFIRegulomeDB','tf_e':'TFe','reg_network':'RegNetwork',}def_process_resources(sources):ifsources=='none':return''revia=re.compile(r',|_via_')sources=functools.reduce(lambdas,r:s.replace(r,r.upper()),_resources_upper,sources,)sources=functools.reduce(lambdas,r:s.replace(*r),iteritems(_resources_special_case),sources,)return','.join(revia.split(sources))
[docs]defget_dorothea_old(levels={'A','B'},only_curated=False):""" Retrieves TF-target interactions from DoRothEA. :param set levels: Confidence levels to be used. :param bool only_curated: Retrieve only literature curated interactions. Details ------- DoRothEA is a comprehensive resource of TF-target interactions combining multiple lines of evidences: literature curated databases, ChIP-Seq data, PWM based prediction using HOCOMOCO and JASPAR matrices and prediction from GTEx expression data by ARACNe. For details see https://github.com/saezlab/DoRothEA. """url=urls.urls['dorothea']['url']%('all'if'E'inlevelselse'ABCD'if'D'inlevelselse'ABC'if'C'inlevelselse'AB'if'B'inlevelselse'A')c=curl.Curl(url,silent=False,large=True)_=next(c.result)return(list(itertools.chain(ll[:4],(s=='TRUE'forsinll[4:8]),ll[-4:],[','.join(sforsinll[-4:]ifs)]ifnotonly_curatedelsell[8]))forllin(l.strip('\n\r').split('\t')forlinc.result)if(ll[3]inlevelsandnotonly_curatedorll[4]=='TRUE'))
[docs]defdorothea_old_csv(levels={'A','B'},only_curated=False):""" Retrieves TF-target interactions from DoRothEA. :param set levels: Confidence levels to be used. :param bool only_curated: Retrieve only literature curated interactions. Details ------- Note: this method processes DoRothEA from an old CSV file generated in 2018. For an up to date version of DoRothEA please use the ``dorothea_interactions`` method. DoRothEA is a comprehensive resource of TF-target interactions combining multiple lines of evidences: literature curated databases, ChIP-Seq data, PWM based prediction using HOCOMOCO and JASPAR matrices and prediction from GTEx expression data by ARACNe. For details see https://github.com/saezlab/DoRothEA. """evidence_types=('chipSeq','TFbindingMotif','coexpression','curateddatabase')url=urls.urls['dorothea_git']['url']c=curl.Curl(url,silent=False,large=True,files_needed=['database.csv'],)reader=csv.DictReader(c.result['database.csv'])forrecinreader:# process only the ones of the requested levels or if curatedif(rec['score']notinlevelsandnot(only_curatedandrec['is_evidence_curateddatabase']=='TRUE')):continuerec=dict((k,vifvnotin{'-','none'}else'')fork,viniteritems(rec))yieldDorotheaInteraction(**dict(zip(DorotheaInteraction._fields,itertools.chain(# TF, target, effect, score(rec[key]forkeyin('TF','target','effect','score')),# boolean values for curated, chipseq, motif pred.# and coexp(rec['is_evidence_%s'%key]=='TRUE'forkeyinevidence_types),# databases & datasets(rec['which_%s'%key]forkeyinevidence_types),# all data sources (or only the curated ones)(_process_resources(','.join(rec[key]forkeyin('which_%s'%evtforevtinevidence_types)ifrec[key])ifnotonly_curatedelserec['which_curateddatabase']),),# PubMed and KEGG pw(rec['pubmedID_from_curated_resources'],rec['kegg_pathway'],)))))
[docs]defdorothea_rda_raw(organism=9606):""" :param int,str organism: Name or NCBI Taxonomy ID of the organism. Human and mouse are supported. If `None`, the human interactions will be returned with additional details included. """_organism=taxonomy.ensure_ncbi_tax_id(organism)if_organismnotin(9606,10090,None):msg=('DoRothEA: invalid organism: `%s`. Only human and mouse ''are supported.'%str(organism))_logger._log(msg)raiseValueError(msg)fname=('entire_database'if_organismisNoneelse'dorothea_%s'%('hs'if_organism==9606else'mm'))url=urls.urls['dorothea_git']['rda']%fnamec=curl.Curl(url,silent=False,large=True)rdata_path=c.fileobj.namec.fileobj.close()rdata=Nonetry:rdata=pyreadr.read_r(rdata_path)[fname]exceptpyreadr.custom_errors.LibrdataErrorase:_logger._log('Could not parse DoRothEA data from Rdata file: ''`%s`. ''Make sure your `pyreadr` installation supports the xz ''compression.'%e.args[0])returnrdata
[docs]defdorothea_full_raw(organism=9606):""" DoRothEA data as it is provided in the R package. Args organism (int,str): Name or NCBI Taxonomy ID of the organism. The complete DoRothEA database (with all the details about the original sources) is available only for human. Returns (pandas.DataFrame): A data frame of TF-target interactions from DoRothEA. """_organism=taxonomy.ensure_ncbi_tax_id(organism)if_organism!=9606andorganism:msg=('DoRothEA: invalid organism: `%s`. The full database is ''available only for human. To have them for other organisms, ''you can load DoRothEA in a Network object and use the homology ''translation function.'%str(organism))_logger._log(msg)raiseValueError(msg)dorothea_full=dorothea_rda_raw(organism=None)returndorothea_full
[docs]defdorothea_interactions(organism=9606,levels={'A','B','C','D'},only_curated=False,confidence_pairwise=True,):""" Retrieves TF-target interactions from TF regulons. :param int,str organism: Name or NCBI Taxonomy ID of the organism. Only human is available. :param set levels: Confidence levels to be used. :param bool only_curated: Retrieve only literature curated interactions. Details ------- TF regulons is a comprehensive resource of TF-target interactions combining multiple lines of evidences: literature curated databases, ChIP-Seq data, PWM based prediction using HOCOMOCO and JASPAR matrices and prediction from GTEx expression data by ARACNe. As KEGG is not longer part of the public version of DoRothEA the `kegg_pathways` field is always empty. For details see https://github.com/saezlab/DoRothEA. """evidence_types=('curated','chip_seq','tfbs',# inferred'inferred',# coexp)df=dorothea_full_raw(organism=organism)df=df[df.confidence.isin(levels)]ifonly_curated:df=df[df.is_evidence_curated]forrecindf.itertuples():yield(DorotheaInteraction(**dict(zip(DorotheaInteraction._fields,itertools.chain(# TF, target, effect, score(rec.tf,rec.target,int(rec.mor),rec.confidence,),# boolean values for curated, chipseq, motif pred.# and coexp(getattr(rec,'is_evidence_%s'%evt)forevtinevidence_types),# databases & datasets(_process_resources(getattr(rec,'which_%s'%evt))forevtinevidence_types),# all data sources (or only the curated ones)(_process_resources(','.join(getattr(rec,key)forkeyin('which_%s'%evtforevtinevidence_types)ifgetattr(rec,key)!='none')ifnotonly_curatedelserec.which_curated),),# PubMed and KEGG pw(rec.pubmed_idifrec.pubmed_id.isdigit()else'','',),)))))