#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromfuture.utilsimportiteritemsimportosimportjsonimportdatetimeimporttimetry:importcPickleaspickleexceptImportError:importpickleimporttimeloopimportpypath.inputs.uniprot_dbasuniprot_inputimportpypath.inputs.mirbaseasmirbase_inputimportpypath.share.commonascommonimportpypath.share.sessionassession_modimportpypath.share.settingsassettingsimportpypath.share.cacheascache_mod# method names for ID typesinputs={'uniprot':'all_uniprots','swissprot':'all_swissprots','trembl':'all_trembls','mirbase':'mirbase_mature_all','mir-pre':'mirbase_precursor_all',}_reflists_cleanup_timeloop=timeloop.Timeloop()_reflists_cleanup_timeloop.logger.setLevel(9999)
[docs]def__init__(self,cleanup_period=10,lifetime=300):session_mod.Logger.__init__(self,name='reflists')@_reflists_cleanup_timeloop.job(interval=datetime.timedelta(seconds=cleanup_period))def_cleanup():self._remove_expired()_reflists_cleanup_timeloop.start(block=False)self.lifetime=lifetimeself.lists={}self.expiry={}self.cachedir=cache_mod.get_cachedir()self._log('ReferenceListManager has been created.')
defwhich_list(self,id_type,ncbi_tax_id=None):ncbi_tax_id=ncbi_tax_idorsettings.get('default_organism')key=(id_type,ncbi_tax_id)self.expiry[key]=time.time()ifkeynotinself.lists:self.load(key)ifkeyinself.lists:returnself.lists[key]defload(self,key):cachefile='reflist_%s_%u.pickle'%keycachefile=os.path.join(self.cachedir,cachefile)ifos.path.exists(cachefile):self.lists[key]=pickle.load(open(cachefile,'rb'))self._log('Reference list for ID type `%s` for organism `%u` ''has been loaded from `%s`.'%(key+(cachefile,)))else:self.lists[key]=self._load(key)pickle.dump(self.lists[key],open(cachefile,'wb'))self._log('Reference list for ID type `%s` for organism `%u` ''has been saved to `%s`.'%(key+(cachefile,)))def_load(self,key):data=set()input_method=inputs[key[0]]ifos.path.exists(input_method):withopen(input_method,'r')asfp:data={l.strip()forlinfp.readlines()}self._log('Reference list for ID type `%s` for organism `%u` has ''been loaded from `%s`.'%(key+(input_method,)))else:ifhasattr(uniprot_input,input_method):input_func=getattr(uniprot_input,input_method)elifhasattr(mirbase_input,input_method):input_func=getattr(mirbase_input,input_method)ncbi_tax_id=key[1]data=set(input_func(organism=ncbi_tax_id))self._log('Reference list for ID type `%s` for organism `%u` has ''been loaded by method `%s`.'%(key+(str(input_method),)))returndata
[docs]defcheck(self,name,id_type,ncbi_tax_id=None):""" Checks if the identifier ``name`` is in the reference list with the provided ``id_type`` and organism. """lst=self.which_list(id_type=id_type,ncbi_tax_id=ncbi_tax_id)returnnameinlst
[docs]defselect(self,names,id_type,ncbi_tax_id=None):""" Selects the identifiers in ``names`` which are in the reference list with the provided ``id_type`` and organism. """names=set(names)lst=self.which_list(id_type=id_type,ncbi_tax_id=ncbi_tax_id)returnnames&lst
[docs]defis_not(self,names,id_type,ncbi_tax_id=None):""" Returns the identifiers from ``names`` which are not instances of the provided ``id_type`` and from the given organism. """names=set(names)lst=self.which_list(id_type=id_type,ncbi_tax_id=ncbi_tax_id)returnnames-lst
[docs]defcheck(name,id_type,ncbi_tax_id=None):""" Checks if the identifier ``name`` is in the reference list with the provided ``id_type`` and organism. """manager=get_manager()returnmanager.check(name=name,id_type=id_type,ncbi_tax_id=ncbi_tax_id,)
[docs]defselect(names,id_type,ncbi_tax_id=None):""" Selects the identifiers in ``names`` which are in the reference list with the provided ``id_type`` and organism. """manager=get_manager()returnmanager.select(names=names,id_type=id_type,ncbi_tax_id=ncbi_tax_id,)
[docs]defis_not(names,id_type,ncbi_tax_id=None):""" Returns the identifiers from ``names`` which are not instances of the provided ``id_type`` and from the given organism. """manager=get_manager()returnmanager.is_not(names=names,id_type=id_type,ncbi_tax_id=ncbi_tax_id,)