Source code for pypath.utils.go

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems
from past.builtins import xrange, range

import os
import sys
import importlib as imp
import re
from collections import Counter, OrderedDict
import numpy as np
import itertools

try:
    import cPickle as pickle
except:
    import pickle

import pypath.share.cache as cache
import pypath.inputs.go as go_input
import pypath.share.progress as progress
import pypath.share.common as common
from pypath.share.common import *
import pypath.share.session as session_mod
import pypath.share.settings as settings

# this is for GO terms parsing:
_reexprterm = re.compile(r'and|or|not|\(|\)|GO:[0-9]{7}')
_reexprname = re.compile(
    r'(?!\s)' # no space at the beginning
    r'(?:AND|OR|NOT|\(|\)|' # either AND, OR, NOT or parentheses
       r'(?:(?!OR|AND|NOT|\s{2:})(?:[-\w: ]))+)' # or something else
                                                 # (words with spaces)
    r'(?<!\s)' # no space at the end
)


ROOT_NODES = {
    'cellular_component': 'GO:0005575',
    'biological_process': 'GO:0008150',
    'molecular_function': 'GO:0003674',
}

ROOT_ACS = set(ROOT_NODES.values())


[docs] class GeneOntology(session_mod.Logger): all_relations = { 'is_a', 'part_of', 'occurs_in', 'regulates', 'positively_regulates', 'negatively_regulates', }
[docs] def __init__( self, terms = None, ancestors = None, descendants = None, aspect = None, term = None, name = None, ): """ Loads data about Gene Ontology terms and their relations. """ session_mod.Logger.__init__(self, name = 'go') self._terms_provided = terms self._ancestors_provided = ancestors self._descendants_provided = descendants self._aspect_provided = aspect self._term_provided = term self._name_provided = name self._load()
[docs] def reload(self): """Reloads the object from the module level.""" modname = self.__class__.__module__ mod = __import__(modname, fromlist = [modname.split('.')[0]]) imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new)
def _load(self): self._log('Populating Gene Ontology: ontology.') self._load_terms() self._load_tree() self._set_aspect() self._set_name() self._set_term() # delattr(self, '_terms') self._log('Gene Ontology: ontology populated.') def _load_terms(self): self._terms = self._terms_provided or go_input.go_terms_quickgo() def _load_tree(self): self._log('Gene Ontology: building the ontology tree.') self.ancestors = ( self._ancestors_provided or self._merge_aspects( go_input.go_ancestors_quickgo() ) ) self.descendants = ( self._descendants_provided or self._merge_aspects( go_input.go_descendants_quickgo() ) ) def _set_aspect(self): self.aspect = ( self._aspect_provided or dict( (term, asp) for asp, terms in iteritems(self._terms) for term in terms.keys() ) ) def _set_name(self): self._log('Collecting short names of GO terms.') self.name = ( self._name_provided or dict( i for ii in self._terms.values() for i in iteritems(ii) ) ) def _set_term(self): self.term = ( self._term_provided or dict( reversed(i) for i in iteritems(self.name) ) )
[docs] def is_term(self, term): """ Tells if ``term`` is a GO accession number. """ return term in self.name
[docs] def is_name(self, name): """ Tells if ``name`` is a GO term name. """ return name in self.term
[docs] def get_name(self, term): """ For a GO accession number returns the name of the term. If ``term`` is already a GO term name returns it unchanged. """ return ( term if self.is_name(term) else None if term not in self.name else self.name[term] )
[docs] def get_term(self, name): """ For a GO term name returns its GO accession number. If ``name`` is a GO accession returns it unchanged. """ result = ( name if self.is_term(name) else None if name not in self.term else self.term[name] ) if result is None: self._log('Could not find GO term name: `%s`.' % name) return result
[docs] def terms_to_names(self, terms): """ For a list of GO names returns a list of tuples with the terms and their names. """ return [(term, self.get_name(term)) for term in terms]
[docs] def terms_to_names_aspects(self, terms): """ For a list of GO terms returns a list of tuples with the terms, their names and the ontology aspect. """ return [ (term, self.get_name(term), self.get_aspect(term)) for term in terms ]
[docs] def names_to_terms(self, names): """ For a list of GO terms returns a list of tuples with the terms and their names. """ return [(self.get_term(name), name) for name in names]
[docs] def names_to_terms_aspects(self, names): """ For a list of GO namess returns a list of tuples with the terms, their names and ontology aspects. """ return [ (self.get_term(name), name, self.aspect_from_name(name)) for name in names ]
[docs] def aspect_from_name(self, name): """ Tells about a Gene Ontology term name which aspect does it belong to. """ term = self.get_term(name) if term: return self.get_aspect(term)
@staticmethod def _merge_aspects(dct): dct['P'].update(dct['C']) dct['P'].update(dct['F']) return dct['P']
[docs] def subgraph_nodes( self, direction, terms, relations = None, include_seed = True, ): """ Returns a set of all nodes either in the subgraph of ancestors or descendants of a single term or a set of terms. :param str direction: Possible values: `ancestors` or `descendants`. :param bool include_seed: Include ``terms`` in the subgraph or only the related nodes. """ relations = relations or self.all_relations if isinstance(terms, str): terms = {terms} graph = getattr(self, direction) subgraph = set(terms) if include_seed else set() for term in terms: if term not in graph: if term not in ROOT_ACS: self._log( 'GO term without known %ss: `%s`.' % (direction, term) ) continue for related, relation in graph[term]: if relation not in relations: continue if related not in subgraph: subgraph.update( self.subgraph_nodes(direction, related, relations) ) subgraph.add(related) return subgraph
[docs] def get_all_ancestors(self, terms, relations = None, include_seed = True): """ Returns a set of all ancestors of a single term or a set of terms. """ terms = self.set_of_terms(terms) return self.subgraph_nodes( direction = 'ancestors', terms = terms, relations = relations, include_seed = include_seed, )
[docs] def get_all_descendants( self, terms, relations = None, include_seed = True, ): """ Returns a set of all descendants of a single term or a set of terms. """ terms = self.set_of_terms(terms) return self.subgraph_nodes( direction = 'descendants', terms = terms, relations = relations, include_seed = include_seed, )
[docs] def get_aspect(self, term): """ For a GO term tells which aspect does it belong to. Returns `None` if the term is not in the ontology. """ if term in self.aspect: return self.aspect[term]
[docs] def all_from_aspect(self, aspect): """ Returns the set of all GO terms of one aspect. """ return set( term for term, asp in iteritems(self.aspect) if asp == aspect )
[docs] def is_root(self, term): """ Tells if a term is the root of the graph i.e. it has no ancestors. """ return term in self.ancestors and bool(self.ancestors[term])
[docs] def is_leaf(self, term): """ Tells if a term is a leaf of the graph i.e. it has no descendants. """ return ( ( term in self.ancestors and term not in self.descendants ) or not bool(self.descendants[term]) )
[docs] def lowest(self, terms, *args): """ From a set of terms returns the lowest level ones, removing all which are parents of some others in the set. """ return self.flatten(terms, *args)
[docs] def highest(self, terms, *args): """ From a set of terms returns the highest level ones, removing all which are descendants of some others in the set. """ return self.flatten(terms, *args, lowest = False)
[docs] def flatten(self, terms, *args, lowest = True): """ Returns a set of terms by removing either all redundant ancestors or descendants from the provided set terms. By removing the ancestors you get the lowest level set of terms, by removing the descendants the result will be the highest level non-redundant terms. :param str direction: Either `lowest` or `highest`. """ terms = self.set_of_terms(terms, *args) method = getattr( self, 'get_all_%s' % ( 'ancestors' if lowest else 'descendants' ) ) return ( terms - set.union( *( method(term, include_seed = False) for term in terms ) ) )
[docs] def set_of_terms(self, terms_names, *args): """ Converts anything to a set of terms. ``terms_names`` can be either a single term or name or an iterable of terms and names. """ return self.set_of(terms_names, *args)
[docs] def set_of_names(self, terms_names, *args): """ Converts anything to a set of names. ``terms_names`` can be either a single term or name or an iterable of terms and names. """ return self.set_of(terms_names, *args, to_terms = False)
[docs] def set_of(self, terms_names, *args, to_terms = True): """ Converts anything to a set of terms or names. ``terms_names`` can be either a single term or name or an iterable of terms and names. :param bool to_terms: The target identifier type is `term`; if ``False`` the target will be `name`. """ if isinstance(terms_names, str): terms_names = {terms_names} elif not isinstance(terms_names, set): terms_names = set(terms_names) if args: terms_names.update(set(args)) method = getattr( self, 'get_term' if to_terms else 'get_name' ) return { method(term) for term in terms_names }
[docs] class GOAnnotation(session_mod.Logger): aspects = ('C', 'F', 'P')
[docs] def __init__( self, organism = 9606, ontology = None, pickle_file = None, use_pickle_cache = True, ): """ For one organism loads Gene Ontology annotations, in addition it accepts or creates a ``GeneOntology`` object. """ session_mod.Logger.__init__(self, name = 'go') self.organism = organism self._pickle_file = pickle_file self._use_pickle_cache = use_pickle_cache if self._pickle_cache_load_hook(): return self.ontology = ontology or GeneOntology() self._log( 'Populating Gene Ontology: ' 'annotations for organism `%u`.' % organism ) annot = go_input.go_annotations_goa(organism = organism) self.c = annot['C'] self.f = annot['F'] self.p = annot['P'] self._ancestors_annotate() self._merge_annotations() self._pickle_cache_save_hook()
[docs] def reload(self): """ Reloads the object from the module level. """ modname = self.__class__.__module__ mod = __import__(modname, fromlist = [modname.split('.')[0]]) imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new)
def _ancestors_annotate(self): self._log('Creating ancestors lookup dictionary.') for asp in self.aspects: setattr( self, '%s_full' % asp.lower(), dict( ( uniprot, self.ontology.get_all_ancestors(annot) ) for uniprot, annot in iteritems(getattr(self, asp.lower())) ) ) def _merge_annotations(self): self._log('Creating complete lookup dictionary.') uniprots = self.all_uniprots() self.all = dict( ( uniprot, set.union(*( self.get_annot(uniprot, asp) for asp in self.aspects )) ) for uniprot in uniprots ) self.all_full = dict( ( uniprot, set.union(*( self.get_annot_ancestors(uniprot, asp) for asp in self.aspects )) ) for uniprot in uniprots )
[docs] def get_name(self, term): """ For a GO accession number returns the name of the term. """ return self.ontology.get_name(term)
[docs] def get_term(self, name): """ For a GO term name returns its GO accession number. """ return self.ontology.get_term(name)
[docs] def get_annot(self, uniprot, aspect): """ For a UniProt ID returns its direct annotations from one aspect of Gene Ontology. Returns set. """ annot = getattr(self, aspect.lower()) return annot[uniprot] if uniprot in annot else set()
[docs] def get_annots(self, uniprot): """ For a UniProt ID returns its direct annotations from all aspects of Gene Ontology. Returns set. """ return self.all[uniprot] if uniprot in self.all else set()
[docs] def get_annot_ancestors(self, uniprot, aspect): """ For a UniProt ID returns its annotations including lowest level terms and their ancestors from one aspect of Gene Ontology. Returns set. """ annot = getattr(self, '%s_full' % aspect.lower()) return annot[uniprot] if uniprot in annot else set()
[docs] def get_annots_ancestors(self, uniprot): """ For a UniProt ID returns its annotations including lowest level terms and their ancestors from all aspects of Gene Ontology. Returns set. """ return self.all_full[uniprot] if uniprot in self.all_full else set()
[docs] def has_term(self, uniprot, term): """ Tells if an UniProt ID is annotated with a GO term. """ return uniprot in self.all_full and term in self.all_full[uniprot]
[docs] def has_any_term(self, uniprot, terms): """ Tells if an UniProt ID is annotated with any of a set of GO terms. """ return uniprot in self.all_full and term & self.all_full[uniprot]
[docs] def all_uniprots(self): """ Returns all UniProt IDs having annotations. """ return set.union(*( set(getattr(self, asp.lower()).keys()) for asp in self.aspects ))
[docs] def i_select_by_term(self, term, uniprots = None): """ Accepts a list of UniProt IDs and one or more gene ontology terms and returns a set of indices of those UniProts which are annotated with any of the terms. If no UniProts given all annotated UniProts considered. :param str,set term: A single GO term or set of terms. """ uniprots = uniprots or sorted(self.all_uniprots()) method = self.has_any_term if isinstance(term, set) else self.has_term return set( i for i, uniprot in enumerate(uniprots) if method(uniprot, term) )
[docs] def select_by_name(self, name, uniprots = None, return_uniprots = False): """ Accepts a list of UniProt IDs and one or more gene ontology names and returns the UniProts which are annotated with any of the names. If no UniProts given all annotated UniProts returned. :param str,set name: A single GO term name or set of names. :param bool return_uniprots: By default returns list of indices; if ``True`` returns a set of the selected UniProt IDs. """ if isinstance(name, str): term = self.ontology.get_term(name) else: term = set(i[0] for i in self.ontology.names_to_terms(name)) return self.select( term, uniprots = uniprots, return_uniprots = return_uniprots, )
[docs] def select_by_term(self, term, uniprots = None): """ Accepts a list of UniProt IDs and one or more gene ontology terms and returns the UniProts which are annotated with any of the terms. If no UniProts given all annotated UniProts returned. :param str,set term: A single GO term or set of terms. """ uniprots = uniprots or sorted(self.all_uniprots()) return set( np.array(uniprots)[ list(self.i_select_by_term(term, uniprots)) ] )
[docs] def expr_names_to_terms(self, expr): """ Processes an expression built by names to expressions of terms. :arg str expr: An expression using Gene Ontology names, parentheses and logical operators. """ not_name = {'(', ')', 'AND', 'OR', 'NOT'} tokens_names = _reexprname.findall(expr) tokens_terms = [] if tokens_names: for t in tokens_names: t = t.strip() if not t: continue tokens_terms.append(( t if t[:3] == 'GO:' else t.lower() if t in not_name else self.get_term(t) )) return tokens_terms
[docs] def select_by_expr( self, expr, uniprots = None, return_uniprots = False, ): """ Selects UniProts based on an expression of Gene Ontology terms. Operator precedence not considered, please use parentheses. Return indices of the selected elements in the ``uniprots`` list or the set of selected UniProt IDs. :param str expr: An expression of Gene Ontology terms and names. E.g. ``'(GO:0005576 and not GO:0070062) or GO:0005887'``. Parentheses and operators ``and``, ``or`` and ``not`` can be used. Another example: ``hormone binding AND (cell surface OR GO:0009897)``. :param bool return_uniprots: By default returns list of indices; if ``True`` returns a set of the selected UniProt IDs. """ expr_terms = self.expr_names_to_terms(expr) result = self.select_by_expr_terms( expr = expr_terms, uniprots = uniprots, return_uniprots = return_uniprots, ) if any(e is None for e in result): self._log( 'Could not process Gene Ontology expression: `%s`. ' 'Please check if the expression consists only of ' 'GO terms and/or ACs, the operators `AND`, `OR` and ' '`NOT`, and braces. Whitespaces and newlines are OK. ' 'If you think the expression is correct please open ' 'an issue for `pypath`.' % expr, -9, ) return result
[docs] def select_by_expr_terms( self, expr, uniprots = None, return_uniprots = False, ): """ Selects UniProts based on an expression of Gene Ontology terms. Operator precedence not considered, please use parentheses. Return indices of the selected elements in the ``uniprots`` list or the set of selected UniProt IDs. :param str expr: An expression of Gene Ontology terms. E.g. ``'(GO:0005576 and not GO:0070062) or GO:0005887'``. Parentheses and operators ``and``, ``or`` and ``not`` can be used. :param bool return_uniprots: By default returns list of indices; if ``True`` returns a set of the selected UniProt IDs. """ ops = { 'and': 'intersection', 'or': 'union', } # if no UniProts provided does not make sense to return indices return_uniprots = return_uniprots or uniprots is None uniprots = uniprots or sorted(self.all_uniprots()) if isinstance(expr, str): # tokenizing expression if it is a string # (method is recursive) expr = _reexprterm.findall(expr) if any(e is None for e in expr): self._log( 'Could not process Gene Ontology expression: `%s`. ' 'Please check if the expression consists only of ' 'GO terms and/or ACs, the operators `AND`, `OR` and ' '`NOT`, and braces. Whitespaces and newlines are OK. ' 'If you think the expression is correct please open ' 'an issue for `pypath`.' % expr, -9, ) return set() # initial values result = set() stack = [] sub = False negate = False op = None this_set = None for it in expr: # processing expression by tokens # we are in a sub-selection part if sub: if it == ')': # token is a closing parenthesis # execute sub-selection this_set = self.select_by_expr_terms( expr = stack, uniprots = uniprots, ) # empty stack stack = [] sub = False else: # token is something else # add to sub-selection stack stack.append(it) elif it is None: self._log( 'One part of a Gene Ontology the expression failed to ' 'translate to GO AC. Substituting with empty set, this ' 'will alter your results. Check for more specific ' 'information earlier in the log.' ) this_set = set() # we do actual processing of the expression elif it.lower() == 'not': # token is negation # turn on negation for the next set negate = True continue # open a sub-selection part elif it == '(': # token is a parenthesis # start a new sub-selection sub = True continue elif it[:3] == 'GO:': # token is a GO term # get the vertex selection by the single term method this_set = self.i_select_by_term(it, uniprots = uniprots) if negate: # take the inverse of the current set this_set = set(xrange(len(uniprots))) - this_set # set negation again to False negate = False elif it.lower() in ops: # token is an operator # set it for use at the next operation op = ops[it.lower()] # we found a set if this_set is not None: # and an operator if op is not None: result = getattr(result, op)(this_set) # this normally happens only at the first set else: result = this_set this_set = None op = None return self._uniprot_return(result, uniprots, return_uniprots)
[docs] def select(self, terms, uniprots = None, return_uniprots = False): """ Retrieves the UniProt IDs annotated with any Gene Ontology terms or their descendants, or evaluates string expression (see ``select_by_expr``). Returns indices of the selected elements in the ``uniprots`` list or the set of selected UniProt IDs. :param str,set terms: A single GO term, a set of GO terms or an expression with GO terms. :param bool return_uniprots: By default returns list of indices; if ``True`` returns a set of the selected UniProt IDs. """ return_uniprots = return_uniprots or uniprots is None uniprots = uniprots or sorted(self.all_uniprots()) # this is not an individual term but an expression if ( isinstance(terms, str) and not terms.startswith('GO') ): result = self.select_by_expr(terms, uniprots = uniprots) # either one term or a set of terms else: result = self.i_select_by_term(terms, uniprots = uniprots) return self._uniprot_return(result, uniprots, return_uniprots)
[docs] def select_by_all(self, terms, uniprots = None, return_uniprots = False): """ Selects the nodes annotated by all GO terms in ``terms``. Returns indices of the selected elements in the ``uniprots`` list or the set of selected UniProt IDs. :param list terms: List, set or tuple of GO terms. :param bool return_uniprots: By default returns list of indices; if ``True`` returns a set of the selected UniProt IDs. """ return_uniprots = return_uniprots or uniprots is None uniprots = uniprots or sorted(self.all_uniprots()) idx = set.intersection(*[self.select_by_term(term) for term in terms]) return self._uniprot_return(idx, uniprots, return_uniprots)
def _uniprot_return(self, idx, uniprots, return_uniprots): if return_uniprots: return set(np.array(uniprots)[list(idx)]) return idx def _pickle_cache_load_hook(self): if not self._use_pickle_cache: return self._set_pickle_path() self.load_from_pickle() return os.path.exists(self._pickle_file) def _pickle_cache_save_hook(self): self._set_pickle_path() self.save_to_pickle() def _set_pickle_path(self): self._pickle_file = ( self._pickle_file or os.path.join( cache.get_cachedir(), settings.get('go_pickle_cache_fname') % self.organism, ) ) def save_to_pickle(self, pickle_file = None): pickle_file = pickle_file or self._pickle_file if not isinstance(pickle_file, str): self._log( 'Pickle file path must be a string: `%s`.' % str(pickle_file) ) return self._log('Saving to pickle `%s`.' % pickle_file) with open(pickle_file, 'wb') as fp: pickle.dump( obj = ( self.c_full, self.p_full, self.f_full, self.all_full, self.c, self.p, self.f, self.all, self.ontology._terms, self.ontology.ancestors, self.ontology.descendants, self.ontology.term, self.ontology.name, ), file = fp, ) self._log('Saved to pickle `%s`.' % pickle_file) def load_from_pickle(self, pickle_file = None): pickle_file = pickle_file or self._pickle_file if not os.path.exists(pickle_file): self._log('Pickle file does not exist: `%s`.' % str(pickle_file)) return self._log('Loading from pickle `%s`.' % pickle_file) with open(pickle_file, 'rb') as fp: ( self.c_full, self.p_full, self.f_full, self.all_full, self.c, self.p, self.f, self.all, ontology_terms, ontology_ancestors, ontology_descendants, ontology_term, ontology_name, ) = pickle.load(fp) self.ontology = GeneOntology( terms = ontology_terms, ancestors = ontology_ancestors, descendants = ontology_descendants, term = ontology_term, name = ontology_name, ) self._log('Loaded from pickle `%s`.' % pickle_file)
[docs] class GOCustomAnnotation(session_mod.Logger):
[docs] def __init__( self, categories, go_annot = None, ncbi_tax_id = 9606, ): """ Provides annotations by a custom set of GO terms or expressions built from multiple terms. :arg dict categories: A dict with custom category labels as keys and single GO terms or names or complex expressions as values. Alternatively a set of GO terms, in this case the term names will be used as labels. :arg pypath.go.GOAnnotation go_annot: A :class:``pypath.go.GOAnnotation`` object. """ session_mod.Logger.__init__(self, name = 'go') self.go_annot = go_annot or get_db() # TODO: consider ncbi_tax_id at # selection DB self._categories = categories self.process_categories()
[docs] def reload(self): """ Reloads the object from the module level. """ modname = self.__class__.__module__ mod = __import__(modname, fromlist = [modname.split('.')[0]]) imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new)
[docs] def process_categories(self): """ Translates GO term names listed in categories to GO terms ACs. """ # if the categories are grouped by aspects if ( isinstance(self._categories, dict) and not set(self._categories.keys()) - set(self.go_annot.aspects) ): if isinstance(list(self._categories.values())[0], set): self._categories = set.union(*self._categories.values()) elif isinstance(list(self._categories.values())[0], dict): self._categories = dict( itertools.chain( *(d.items() for d in self._categories.values()) ) ) # if a set provided we use names as keys # and accessions as values if isinstance(self._categories, set): self._categories = dict( ( self.go_annot.get_name(cat), self.go_annot.get_term(cat) ) for cat in self._categories ) self.categories = self._categories
[docs] def get_annotation(self, category, uniprots = None): """ For a category name returns a set of UniProt IDs annotated with the corresponding Gene Ontology terms or expression. :arg str category: The category name, should be a key in the ``categories`` dict. :arg set uniprots: A set or list of UniProt IDs. If ``None``, annotations based on all UniProts in GO annotation will be returned. """ return self.go_annot.select( self.categories[category], uniprots = uniprots, return_uniprots = True, )
[docs] def get_annotations(self, uniprots = None): """ Returns a dict with set of UniProt IDs for each category. :arg set uniprots: A set or list of UniProt IDs. If ``None``, annotations based on all UniProts in GO annotation will be returned. """ return dict( ( category, self.get_annotation(category, uniprots = uniprots) ) for category in self.categories.keys() )
[docs] def annotate(graph, organism = 9606, aspects = ('C', 'F', 'P')): """ Adds Gene Ontology annotations to the nodes of a graph. :param igraph.Graph graph: Any ``igraph.Graph`` object with uniprot IDs in its ``name`` vertex attribute. """ aspects = aspects if type(aspects) in {list, tuple} else (aspects, ) graph.vs['go'] = [ {'C': set(), 'F': set(), 'P': set()} for _ in xrange(graph.vcount()) ] terms, annot = go_input.go_annotations_goa(organism = organism) prg = progress.Progress(graph.vcount(), 'Loading GO annotations', 9) for v in graph.vs: prg.step() for asp in aspects: if v['name'] in annot[asp]: v['go'][asp] = annot[asp][v['name']] prg.terminate()
# old name as synonym load_go = annotate
[docs] def init_db(organism = 9606, pickle_file = None, use_pickle_cache = True): """ Initializes or reloads the GO annotation database. The database will be assigned to the ``db`` attribute of this module. """ if 'db' not in globals(): globals()['db'] = {} globals()['db'][organism] = GOAnnotation( organism, pickle_file = pickle_file, use_pickle_cache = use_pickle_cache, )
[docs] def get_db(organism = 9606, pickle_file = None, use_pickle_cache = True): """ Retrieves the current database instance and initializes it if does not exist yet. """ # TODO: consider organism # TODO: delete the DB if not used in order to free memory # TODO: introduce pickle cache to make it load quicker if 'db' not in globals() or organism not in globals()['db']: init_db( organism, pickle_file = pickle_file, use_pickle_cache = use_pickle_cache, ) return globals()['db'][organism]