#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotations"""CollecTRI is a comprehensive resource of TF-target interactions."""fromtypingimportLiteralimportreimportcollectionsimportitertoolsimportpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.internals.interaasinteraimportpypath.utils.mappingasmappingimportpypath.share.sessionassession_log=session.Logger(name='collectri_input')._log# Based on literature by Sophia# https://www.sciencedirect.com/science/article/abs/pii/S0304419X19300526# https://www.nature.com/articles/1209933COMPLEXES={'AP1':{'JUN-FOS','JUNB-FOS','JUND-FOS','JUN-FOSB','JUNB-FOSB','JUND-FOSB','JUN-FOSL1','JUNB-FOSL1','JUND-FOSL1','JUN-FOSL2','JUNB-FOSL2','JUND-FOSL2','JUN-JUN','JUN-JUNB','JUN-JUND','JUNB-JUNB','JUNB-JUND','JUND-JUND',},'NFKB':{'RELA-RELA','RELA-REL','RELA-RELB','RELA-NFKB1','RELA-NFKB2','REL-REL','REL-RELB','REL-NFKB1','REL-NFKB2','RELB-RELB','RELB-NFKB1','RELB-NFKB2','NFKB1-NFKB1','NFKB1-NFKB2','NFKB2-NFKB2',},}
[docs]defcollectri_raw(protein_coding:bool=True,mirna:bool=False,)->list[tuple]:""" TF-target interactions from the CollecTRI database. Args: protein_coding: Include regulation of protein coding genes. mirna: Include regulation of miRNA coding genes. """remirna=re.compile('^MIR(\d+)$')CollectriRecord=collections.namedtuple('CollectriRecord',('tf','target','effect','tf_category','resources','pubmed','sign_decision','target_type',),)url=urls.urls['collectri']['url']c=curl.Curl(url,silent=False,large=True,)_=next(c.result)result=[]forlinc.result:l=l.strip().split(',')mmirna=remirna.match(l[1])if((mmirnaandnotmirna)or(notmmirnaandnotprotein_coding)):continuetarget_id=f'hsa-miR-{mmirna.group(1)}'ifmmirnaelsel[1]result.append(CollectriRecord(tf=l[0],target=target_id,effect=int(l[2]),tf_category=l[3],resources=l[4].replace('DoRothEA_A','DoRothEA-A'),pubmed=l[5],sign_decision=l[6],target_type='mirna'ifmmirnaelse'protein',))returnresult
[docs]defcollectri_interactions(protein_coding:bool=True,mirna:bool=False,)->list[tuple]:""" TF-target interactions from the CollecTRI database. While `collectri_raw` returns the records in the same format as in the original data, here we translate identifiers to UniProt IDs, and use `Complex` objects to represent protein complexes. Args: protein_coding: Include regulation of protein coding genes. mirna: Include regulation of miRNA coding genes. """CollectriInteraction=collections.namedtuple('CollectriInteraction',('tf','target','effect','tf_category','resources','pubmed','sign_decision','target_type',),)defprocess_complex(name):result=[]forvarinCOMPLEXES[name]:uniprots=[mapping.map_name(comp,'genesymbol','uniprot')forcompinvar.split('-')]ifall(uniprots):result.extend(list(itertools.product(*uniprots)))else:_log('Failed to translate all components of 'f'complex `{name}` (components: {var}).')returnset(result)forrecincollectri_raw(protein_coding=protein_coding,mirna=mirna):tf_uniprots=(process_complex(rec.tf)ifrec.tfinCOMPLEXESelsemapping.map_name(rec.tf,'genesymbol','uniprot'))target_uniprots=((rec.target,)ifrec.target_type=='mirna'elsemapping.map_name(rec.target,'genesymbol','uniprot'))fortf_u,t_uinitertools.product(tf_uniprots,target_uniprots):ifisinstance(tf_u,tuple):tf_u=intera.Complex(components=tf_u,sources='CollecTRI',)yieldCollectriInteraction(tf_u,t_u,*rec[2:])