Source code for pypath.inputs.tcdb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems

import re
import collections
import bs4

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.utils.mapping as mapping
import pypath.utils.reflists as reflists


[docs] def tcdb_families(): retag = re.compile(r'<.*>') rethe = re.compile(r'^[tT]he (.*) (?:Super)?[Ff]amily') url = urls.urls['tcdb']['url_families'] c = curl.Curl(url, large = False, silent = False) lines = bs4.BeautifulSoup(c.result, features = 'lxml').find('p').text return dict( ( tcid, rethe.sub(r'\g<1>', family.replace('\t', ' ')) ) for tcid, family in ( retag.sub('', line.strip()).split('\t', maxsplit = 1) for line in lines.strip().split('\n') ) )
[docs] def tcdb_classes(): refam = re.compile(r'(\d\.[A-Z]\.\d+)') retab = re.compile(r'\t+') url = urls.urls['tcdb']['url_acc2tc'] c = curl.Curl(url, large = True, silent = False) result = {} for line in c.result: if not line: continue ac, tc = retab.split(line.rstrip()) family = refam.search(tc).groups()[0] result[ac] = (tc, family) return result
[docs] def tcdb_annotations(organism = 9606): TcdbAnnotation = collections.namedtuple( 'TcdbAnnotation', [ 'family', 'tcid', ] ) families = tcdb_families() classes = tcdb_classes() result = collections.defaultdict(set) for ac, (tc, family) in iteritems(classes): uniprots = mapping.map_name( ac, 'uniprot', 'uniprot', ncbi_tax_id = organism, ) for uniprot in uniprots: if reflists.check(uniprot, 'uniprot', ncbi_tax_id = organism): result[uniprot].add( TcdbAnnotation( family = families[family], tcid = tc, ) ) return dict(result)