Source code for pypath.inputs.uniprot_db

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

import re
import time
import datetime

import timeloop

import pypath.share.session as session_mod
import pypath.share.settings as settings
import pypath.inputs.uniprot as uniprot

_logger = session_mod.Logger(name = 'uniprot_db')

db = {}
_cleanup_period = settings.get('mapper_cleanup_interval')
_lifetime = 300
_last_used = {}


[docs] def all_uniprots(organism = 9606, swissprot = None): return get_db(organism = organism, swissprot = swissprot)
[docs] def all_swissprots(organism = 9606): return get_db(organism = organism, swissprot = True)
[docs] def all_trembls(organism = 9606): return get_db(organism = organism, swissprot = False)
[docs] def init_db(organism = 9606, swissprot = None): _swissprot = uniprot._swissprot_param(swissprot) _logger._log( 'Loading list of all UniProt IDs for ' 'organism `%u` (only SwissProt: %s).' % ( organism, str(swissprot), ) ) key = (organism, _swissprot) globals()['db'][key] = uniprot._all_uniprots( organism = organism, swissprot = swissprot, ) globals()['_last_used'][key] = time.time()
[docs] def get_db(organism = 9606, swissprot = None): _swissprot = uniprot._swissprot_param(swissprot) key = (organism, _swissprot) if key not in globals()['db']: init_db(organism = organism, swissprot = swissprot) globals()['_last_used'][key] = time.time() return globals()['db'][key]
[docs] def is_uniprot(name, organism = 9606, swissprot = None): """ Tells if ``name`` is a UniProt ID of ``organism``. If ``swissprot`` is None then both SwissProt and TrEMBL IDs will be considered. """ return name in get_db(organism = organism, swissprot = swissprot)
[docs] def is_swissprot(name, organism = 9606): """ Tells if ``name`` is a SwissProt ID of ``organism``. For TrEMBL IDs returns False. """ return is_uniprot(name, organism = organism, swissprot = True)
[docs] def is_trembl(name, organism = 9606): """ Tells if ``name`` is a TrEMBL ID of ``organism``. For SwissProt IDs returns False. """ return is_uniprot(name, organism = organism, swissprot = False)
_cleanup_timeloop = timeloop.Timeloop() _cleanup_timeloop.logger.setLevel(9999) @_cleanup_timeloop.job( interval = datetime.timedelta( seconds = _cleanup_period ) ) def _cleanup(): keys = list(globals()['db'].keys()) for key in keys: if time.time() - globals()['_last_used'][key] > _lifetime: _remove(key) _cleanup_timeloop.start(block = False) def _remove(key): if key in globals()['db']: _logger._log( 'Removing UniProt ID list for ' 'organism `%u` (SwissProt: %s)' % ( key[0], str(key[1]), ) ) del globals()['db'][key] if key in globals()['_last_used']: del globals()['_last_used'][key]