#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#importreimportcollectionsimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.utils.mappingasmapping
[docs]deftfcensus_annotations(only_classes=None):""" Downloads and processes the list of all known transcription factors from TF census (Vaquerizas 2009). This resource is human only. Returns dict with UniProt IDs as keys and TF annotations as values. """TfcensusAnnotation=collections.namedtuple('TfcensusAnnotation',['tfcensus_class','tissue'],)result=collections.defaultdict(set)reensg=re.compile(r'ENSG[0-9]{11}')url=urls.urls['vaquerizas2009']['url']c=curl.Curl(url,silent=False,large=True)header=Trueforlinc.result:l=l.split('\t')ifheader:ifl[0]=='Class':header=Falsecontinueensg=reensg.findall(l[1])hgnc=l[5]tfcensus_class=l[0]tissues=l[6].strip()iflen(l)>6else''tissues=tissues.split(';')iftissueselse[None]uniprots=mapping.map_names(ensg,'ensembl','uniprot')uniprots.update(mapping.map_name(hgnc,'genesymbol','uniprot'))foruniprotinuniprots:fortissueintissues:result[uniprot].add(TfcensusAnnotation(tfcensus_class=tfcensus_class,tissue=tissue,))returndict(result)