Source code for pypath.core.enz_sub

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems
from past.builtins import xrange, range

import sys
import importlib as imp
import itertools
import collections
import pickle
import traceback

import pandas as pd

import pypath.share.common as common
import pypath_common._constants as _const
import pypath.utils.mapping as mapping
import pypath.utils.orthology as orthology
import pypath.inputs.uniprot as uniprot_input
import pypath.internals.intera as intera
import pypath.share.progress as progress
import pypath.share.session as session_mod
import pypath.utils.taxonomy as taxonomy
import pypath.inputs as inputs
import pypath.core.evidence as evidence
import pypath.core.entity as entity
import pypath.resources as resources



[docs]
class EnzymeSubstrateProcessor(
        orthology.Proteomes,
        orthology.SequenceContainer
    ):



[docs]
    def __init__(
            self,
            input_param = None,
            input_method = None,
            ncbi_tax_id = None,
            trace = False,
            id_type_enzyme = None,
            id_type_substrate = None,
            name = None,
            allow_mixed_organisms = None,
            organisms_supported = False,
            **kwargs
        ):
        """
        Processes enzyme-substrate interaction data from various databases.
        Provides generators to iterate over these interactions.
        For organisms other than human obtains the organism specific
        interactions from databases.

        :param str input_method:
            Either a method name in the ``inputs`` module or a database
            name e.g. `PhosphoSite` or a callable which returns data in
            list of dicts format.
        :param int ncbi_tax_id: NCBI Taxonomy ID used at the database lookups.
        :param bool trace: Keep data about ambiguous ID mappings and PTM data
                           in mismatch with UniProt sequences.
        :param pypath.mapping.Mapper: A `Mapper` instance. If `None` a new
                                      instance will be created.
        :param str id_type_enzyme: The ID type of the enzyme in the database.
        :param str id_type_substrate: The ID type of the substrate in the
                                      database.

        :param bool nonhuman_direct_lookup: Use direct lookup at non-human
                                            target species.
        :param **kwargs: Args to be forwarded to the input method.

        """

        if not hasattr(self, '_logger'):

            session_mod.Logger.__init__(self, name = 'enz_sub')

        self.mammal_taxa = {9606, 10090, 10116}
        self.nomatch = []
        self.kin_ambig = {}
        self.sub_ambig = {}

        self.input_param = input_param
        self.name = name
        self.id_type_enzyme = id_type_enzyme
        self.id_type_substrate = id_type_substrate
        self.allow_mixed_organisms = allow_mixed_organisms
        self.input_method = input_method
        self.trace = trace
        self.ncbi_tax_id = ncbi_tax_id
        self.organisms_supported = organisms_supported

        self.setup()

        orthology.SequenceContainer.__init__(self)
        self.load_seq(self.ncbi_tax_id)

        if self.allow_mixed_organisms:

            for taxon in self.mammal_taxa:

                self.load_seq(taxon = taxon)

        orthology.Proteomes.__init__(self)

        self.set_inputargs(**kwargs)
        self.load_enz_sub()



    def setup(self):

        self.name = self._get_param('name')
        self.id_type_enzyme = self._get_param('id_type_enzyme', 'genesymbol')
        self.id_type_substrate = self._get_param(
            'id_type_substrate',
            'genesymbol',
        )
        self.id_type_substrate = common.to_list(self.id_type_substrate)
        self.ncbi_tax_id = self._get_param('ncbi_tax_id', 9606)
        self.organisms_supported = self._get_param(
            'organisms_supported',
            False,
        )
        self.allow_mixed_organisms = self._get_param(
            'allow_mixed_organisms',
            False,
        )
        self.input_method = self._get_param('input_method')
        self.set_method()


    def _get_param(self, label, default = None):

        return (
            getattr(self, label) or (
                getattr(self.input_param, label)
                    if hasattr(self.input_param, label) else
                default
            )
        )


    def load_enz_sub(self):

        self._setup()
        self.load_data()


    def reload(self):

        modname = self.__class__.__module__
        mod = __import__(modname, fromlist=[modname.split('.')[0]])
        imp.reload(mod)
        new = getattr(mod, self.__class__.__name__)
        setattr(self, '__class__', new)


    def reset_ptmprocessor(self, seq = None, ncbi_tax_id = None):

        ncbi_tax_id = ncbi_tax_id or self.ncbi_tax_id

        self.set_taxon(ncbi_tax_id)
        self.load_seq(ncbi_tax_id)
        self.load_data()


    def set_taxon(self, ncbi_tax_id):

        self.ncbi_tax_id = ncbi_tax_id
        self._organism_setup()



[docs]
    def set_method(self):
        """
        Selects the input method.
        """

        def empty_input(*args, **kwargs): return []


        # attempting to look up the method in the inputs module
        if not hasattr(self.input_method, '__call__'):

            self.input_method = (
                inputs.get_method(self.input_method) or
                empty_input
            )

        self.name = self.name or self.input_method.__name__




[docs]
    def set_inputargs(self, **inputargs):
        """
        Sets the arguments to be provided for the input method.
        """

        self.inputargs = inputargs




[docs]
    def load_data(self):
        """
        Loads the data by the defined input method.
        """

        input_method_name = '%s.%s' % (
            self.input_method.__module__,
            self.input_method.__name__,
        )

        self._log(
            'Calling `%s` with arguments %s.' % (
                input_method_name,
                str(self.inputargs)
            )
        )

        self.data = self.input_method(**self.inputargs)

        self._log(
            'Loaded data by `%s`, resulted %u records.' % (
                input_method_name,
                len(self.data),
            )
        )



    def _phosphosite_setup(self):

        if 'strict' not in self.inputargs:
            self.inputargs['strict'] = False

        if self.inputargs['organism'] in taxonomy.taxids:
            self.inputargs['organism'] = (
                taxonomy.taxids[self.inputargs['organism']]
            )


    def _phosphoelm_setup(self):

        if self.ncbi_tax_id != 9606 and 'ltp_only' not in self.inputargs:

            self.inputargs['ltp_only'] = False


    def _setup(self):

        setupmethod = '_%s_setup' % self.name.lower()

        self._organism_setup()

        if hasattr(self, setupmethod):

            getattr(self, setupmethod)()


    def _organism_setup(self):

        if self.organisms_supported:

            if self.ncbi_tax_id in taxonomy.taxa:

                self.ncbi_tax_id = taxonomy.taxa[self.ncbi_tax_id]

            self.inputargs['organism'] = self.ncbi_tax_id

        self.load_proteome(self.ncbi_tax_id, False)


    def _process(self, p):

        # human leukocyte antigenes result a result an
        # extremely high number of combinations
        if (
            not p['kinase'] or (
                isinstance(p['substrate'], str) and
                p['substrate'].startswith('HLA')
            )
        ):

            return

        if not isinstance(p['kinase'], list):
            p['kinase'] = [p['kinase']]

        kinase_ups = mapping.map_names(
            p['kinase'],
            self.id_type_enzyme,
            'uniprot',
            ncbi_tax_id = self.ncbi_tax_id,
        )

        substrate_ups_all = set()

        for sub_id_type in self.id_type_substrate:

            if isinstance(sub_id_type, (list, tuple)):
                sub_id_type, sub_id_attr = sub_id_type
            else:
                sub_id_attr = 'substrate'

            substrate_ups_all.update(
                set(
                    mapping.map_name(
                        p[sub_id_attr],
                        sub_id_type,
                        'uniprot',
                        self.ncbi_tax_id,
                    )
                )
            )

        # looking up sequences in all isoforms:
        substrate_ups = []

        for s in substrate_ups_all:

            if 'substrate_isoform' in p and p['substrate_isoform']:

                substrate_ups.append((s, p['substrate_isoform']))

            else:

                se = self.get_seq(s)

                if se is None:
                    continue

                for isof in se.isoforms():

                    if 'instance' in p and p['instance'] is not None:

                        if se.match(
                            p['instance'],
                            p['start'],
                            p['end'],
                            isoform = isof,
                        ):

                            substrate_ups.append((s, isof))

                    else:

                        if se.match(
                            p['resaa'],
                            p['resnum'],
                            isoform = isof,
                        ):

                            substrate_ups.append((s, isof))

        if self.trace:

            if p['substrate'] not in self.sub_ambig:

                self.sub_ambig[p['substrate']] = substrate_ups

            for k in p['kinase']:

                if k not in self.kin_ambig:

                    self.kin_ambig[k] = kinase_ups
            # generating report on non matching substrates
            if len(substrate_ups) == 0:

                for s in substrate_ups_all:

                    se = self.get_seq(s[0])

                    if se is None:
                        continue

                    self.nomatch.append(
                        (
                            s[0],
                            s[1],
                            (
                                p['substrate_refseq']
                                    if 'substrate_refseq' in p else
                                '',
                                s,
                                p['instance'],
                                se.get(
                                    p['start'],
                                    p['end']
                                ),
                            ),
                        )
                    )

        # building objects representing the enzyme-substrate interaction(s)

        if 'typ' not in p:
            p['typ'] = 'phosphorylation'

        _resources = tuple(
            (
                self.input_param.get_via(name)
                    if hasattr(self.input_param, 'get_via') else
                name
            )
            for name in (
                p['databases'] if 'databases' in p else ()
            )
        )
        _resources += (
            (self.name,)
                if isinstance(self.input_param, str) else
            (self.input_param,)
        )

        # collecting the evidences
        evidences = evidence.Evidences(
            evidence.Evidence(
                resource = _res,
                references = p['references'] if 'references' in p else None
            )
            for _res in _resources
        )

        for s in substrate_ups:

            # building the objects representing the substrate
            se = self.get_seq(s[0])

            if se is None:
                continue

            res = intera.Residue(
                p['resnum'],
                p['resaa'],
                s[0],
                isoform = s[1],
                ncbi_tax_id = self.ncbi_tax_id,
            )

            if 'instance' not in p or p['instance'] is None:

                reg = se.get_region(
                    p['resnum'],
                    p['start'] if 'start' in p else None,
                    p['end'] if 'end' in p else None,
                    isoform = s[1],
                )

                if reg is not None:

                    p['start'], p['end'], p['instance'] = reg

            mot = intera.Motif(
                    s[0],
                    p['start'],
                    p['end'],
                    instance = p['instance'],
                    isoform = s[1],
                    ncbi_tax_id = self.ncbi_tax_id,
                )

            ptm = intera.Ptm(
                s[0],
                motif = mot,
                residue = res,
                typ = p['typ'],
                evidences = evidences,
                isoform = s[1],
                ncbi_tax_id = self.ncbi_tax_id,
            )

            for k in kinase_ups:

                if (
                    not self.allow_mixed_organisms and (
                        self.get_taxon(k) != self.ncbi_tax_id or
                        self.get_taxon(s[0]) != self.ncbi_tax_id
                    )
                ):
                    continue

                # the enzyme (kinase)
                dom = intera.Domain(
                    protein = k,
                    ncbi_tax_id = self.ncbi_tax_id,
                )

                dommot = intera.DomainMotif(
                    domain = dom,
                    ptm = ptm,
                    evidences = evidences,
                )

                if hasattr(self.input_param, 'extra_attrs'):

                    for attr, key in iteritems(self.input_param.extra_attrs):

                        if key in p:

                            setattr(dommot, attr, p[key])

                yield dommot


    def input_is(self, i, op = '__eq__'):

        return (
            type(self.name) in _const.CHAR_TYPES and
            getattr(i, op)(self.name.lower())
        )


    def __iter__(self):
        """
        Iterates through the enzyme-substrate interactions.
        """

        for p in self.data:

            for enz_sub in self._process(p):

                yield enz_sub


    def __len__(self):

        return len(self.data) if hasattr(self, 'data') else 0


    def __repr__(self):

        return '<Enzyme-substrate processor: %u records>' % len(self)




[docs]
class EnzymeSubstrateOrthologyProcessor(
        orthology.PtmOrthology,
        EnzymeSubstrateProcessor,
        session_mod.Logger
    ):



[docs]
    def __init__(
            self,
            ncbi_tax_id,
            input_param = None,
            input_method = None,
            map_by_orthology_from = None,
            trace = False,
            id_type_enzyme = None,
            id_type_substrate = None,
            name = None,
            orthology_only_swissprot = True,
            ptm_orthology_strict = False,
            **kwargs
        ):
        """
        Unifies a `pypath.core.enz_sub.EnzymeSubstrateProcessor` and
        a `pypath.utils.orthology.PtmOrthology` object to build
        a set of enzyme-substrate interactions from a database and
        subsequently translate them by orthology to one different organism.
        Multiple organism can be chosen as the source of the
        enzyme-substrate interactions. For example if you want mouse
        interactions, you can translate them from human and from rat.
        To get the original mouse interactions themselves, use an
        other instance of the `EnzymeSubstrateProcessor`.
        To have both the original and the orthology translated set,
        and also from multiple databases, whatmore all these merged
        into a single set, use the `EnzymeSubstrateAggregator`.

        :param str input_method: Data source for `EnzymeSubstrateProcessor`.
        :param int ncbi_tax_id: The NCBI Taxonomy ID the interactions
                                should be translated to.
        :param bool orthology_only_swissprot: Use only SwissProt
                                             (i.e. not Trembl) at orthology
                                             translation.
        :param bool ptm_orthology_strict: Use only those homologous PTM pairs
                                         which are in PhosphoSite data, i.e.
                                         do not look for residues with same
                                         offset in protein sequence.

        See further options at `EnzymeSubstrateProcessor`.

        """

        if not hasattr(self, '_logger'):

            session_mod.Logger.__init__(self, name = 'enz_sub_orthology')

        self.target_taxon = ncbi_tax_id
        self.map_by_orthology_from = (
            map_by_orthology_from or
            {9606, 10090, 10116}
        )
        self.map_by_orthology_from = common.to_set(self.map_by_orthology_from)
        self.map_by_orthology_from.discard(self.target_taxon)

        self.input_param = input_param
        self.input_method = input_method
        self.trace = trace
        self.id_type_enzyme = id_type_enzyme
        self.id_type_substrate = id_type_substrate
        self.name = name
        self.ptmprocargs = kwargs

        orthology.PtmOrthology.__init__(
            self,
            target = ncbi_tax_id,
            only_swissprot = orthology_only_swissprot,
            strict = ptm_orthology_strict,
        )



    def __iter__(self):
        """
        Iterates through enzyme-substrate interactions
        translated to another organism by orthology.
        """

        for source_taxon in self.map_by_orthology_from:

            self._log(
                'Translating enzyme-substrate interactions '
                'from organism %u to %u.' % (
                    source_taxon,
                    self.target_taxon,
                )
            )

            self.set_default_source(source_taxon)

            EnzymeSubstrateProcessor.__init__(
                self,
                input_param = self.input_param,
                input_method = self.input_method,
                ncbi_tax_id = source_taxon,
                trace = self.trace,
                id_type_enzyme = self.id_type_enzyme,
                id_type_substrate = self.id_type_substrate,
                name = self.name,
                allow_mixed_organisms = True,
                **self.ptmprocargs,
            )

            self._log(
                'Enzyme-substrate interactions loaded from resource `%s` '
                'for organism %s, %u raw records.' % (
                    self.name,
                    source_taxon,
                    len(self),
                )
            )

            for es in EnzymeSubstrateProcessor.__iter__(self):

                for target_es in self.translate(es):

                    yield target_es


    def __repr__(self):

        return (
            '<Enzyme-substrate orthology processor, '
            'target taxon: %u, source taxon(s): %s>' % (
                self.target_taxon,
                ', '.join(str(tax) for tax in self.map_by_orthology_from),
            )
        )




[docs]
class EnzymeSubstrateAggregator(session_mod.Logger):



[docs]
    def __init__(self,
            input_param = None,
            exclude = None,
            ncbi_tax_id = 9606,
            map_by_orthology_from = None,
            trace = False,
            orthology_only_swissprot = True,
            ptm_orthology_strict = False,
            nonhuman_direct_lookup = True,
            inputargs = None,
            pickle_file = None,
        ):
        """
        Docs not written yet.
        """

        session_mod.Logger.__init__(self, name = 'enz_sub')

        for k, v in iteritems(locals()):
            setattr(self, k, v)

        self.main()



    def reload(self):

        modname = self.__class__.__module__
        mod = __import__(modname, fromlist = [modname.split('.')[0]])
        imp.reload(mod)
        new = getattr(mod, self.__class__.__name__)
        setattr(self, '__class__', new)


    def main(self):

        if self.pickle_file:

            self.load_from_pickle(pickle_file = self.pickle_file)

        else:

            self.build()


    def load_from_pickle(self, pickle_file = None):

        self._log('Loading from file `%s`.' % pickle_file)

        with open(self.pickle_file, 'rb') as fp:

            self.enz_sub, self.references = pickle.load(fp)

        self.update_ptm_lookup_dict()


    def save_to_pickle(self, pickle_file):

        self._log('Saving to file file `%s`.' % pickle_file)

        with open(pickle_file, 'wb') as fp:

            pickle.dump(
                obj = (
                    self.enz_sub,
                    self.references,
                ),
                file = fp,
            )


    def build(self):

        self.inputargs = self.inputargs or {}
        self.map_by_orthology_from = (
            (
                {9606, 10090, 10116}
                    if self.ncbi_tax_id != 9606 else
                set()
            )
                if self.map_by_orthology_from is None else
            self.map_by_orthology_from
        )
        self.map_by_orthology_from = set(self.map_by_orthology_from)
        self.map_by_orthology_from.discard(self.ncbi_tax_id)

        self.set_inputs()

        self.build_list()
        self.unique()


    def __iter__(self):

        for ptm in itertools.chain(*self.enz_sub.values()):

            yield ptm


    def __len__(self):

        return sum([len(esub) for esub in self.enz_sub.values()])


    def __repr__(self):

        return '<Enzyme-substrate database: %s relationships>' % len(self)


    def __getitem__(self, *args):

        args = args[0] if isinstance(args[0], tuple) else args

        return self.get_enzyme_substrate(*args)


    def get_enzyme_substrate(self, enzyme, substrate):

        enzyme = entity.Entity(enzyme)
        substrate = entity.Entity(substrate)

        key = (enzyme, substrate)

        if key in self.enz_sub:

            return self.enz_sub[key]


    def set_inputs(self):

        self.input_param = (
            self.input_param or
            resources.get_controller().collect_enzyme_substrate()
        )



[docs]
    def build_list(self):
        """
        Builds a full list of enzyme-substrate interactions from
        all the requested sources. This list might contain redundant
        elements which later will be merged by `unique`.
        This 'full list' is organised into a dict by pairs of proteins
        in order to make it more efficient to compile a unique set
        for each pair.
        """

        def extend_lists(enz_sub):

            for es in enz_sub:

                key = (es.domain.protein, es.ptm.protein)

                if key not in self.enz_sub:

                    self.enz_sub[key] = []

                self.enz_sub[key].append(es)

                for ev in es.evidences:

                    resource_key = (ev.resource.name, ev.resource.via)

                    self.references[resource_key][es.key()].update(
                        ev.references
                    )

        self._log(
            'Starting to build enzyme-substrate '
            'database for organism `%u`.' % self.ncbi_tax_id
        )

        self.enz_sub = {}
        self.references = collections.defaultdict(
            lambda: collections.defaultdict(set)
        )

        for input_param in self.input_param:

            name = (
                input_param['name']
                    if isinstance(input_param, dict) else
                input_param.name
            )

            try:

                input_method = (
                    input_param['input_method']
                        if isinstance(input_param, dict) else
                    input_param.input_method
                )

                self._log(
                    'Loading enzyme-substrate interactions '
                    'from resource `%s` by method `%s`.' % (
                        name,
                        input_method,
                    )
                )

                args = (
                    input_param
                        if isinstance(input_param, dict) else
                    {'input_param': input_param}
                )

                if (
                    self.ncbi_tax_id == 9606 or (
                        self.nonhuman_direct_lookup and
                        input_param.organisms_supported
                    )
                ):

                    self._log(
                        'Loading enzyme-substrate interactions '
                        'for taxon `%u`.' % self.ncbi_tax_id
                    )

                    proc = EnzymeSubstrateProcessor(
                        ncbi_tax_id = self.ncbi_tax_id,
                        trace = self.trace,
                        **args,
                    )

                    extend_lists(proc.__iter__())

                if self.map_by_orthology_from:

                    source_taxons_str = ', '.join(
                        '%u' % tax for tax in self.map_by_orthology_from
                    )

                    self._log(
                        'Mapping `%s` by orthology from taxons %s to %u.' % (
                            input_method,
                            source_taxons_str,
                            self.ncbi_tax_id,
                        )
                    )

                    proc = EnzymeSubstrateOrthologyProcessor(
                        ncbi_tax_id = self.ncbi_tax_id,
                        map_by_orthology_from = self.map_by_orthology_from,
                        trace = self.trace,
                        orthology_only_swissprot = self.orthology_only_swissprot,
                        ptm_orthology_strict = self.ptm_orthology_strict,
                        **args
                    )

                    extend_lists(proc.__iter__())

                    self._log(
                        'Finished translating `%s` by orthology '
                        'from %s to %u.' % (
                            input_method,
                            source_taxons_str,
                            self.ncbi_tax_id,
                        )
                    )

                self._log(
                    'Finished loading enzyme-substrate data '
                    'from resource `%s`.' % name
                )

            except Exception as e:

                self._log('Failed to load resource `%s`.' % name)
                self._log_traceback()

                try:

                    traceback.print_tb(
                        e.__traceback__,
                        file = sys.stdout,
                    )

                except Exception as e:

                    self._log('Failed handling exception.')
                    self._log_traceback()

        self.references = dict(self.references)
        self.update_ptm_lookup_dict()

        self._log(
            'Finished building enzyme-substrate database '
            'for organism `%u`, resulted %u relationships.' % (
                self.ncbi_tax_id,
                len(self),
            )
        )



    def update_ptm_lookup_dict(self):

        self.ptm_to_enzyme = collections.defaultdict(set)
        self.ptms = {}

        for (enz, sub), ptms in iteritems(self.enz_sub):

            for ptm in ptms:

                self.ptm_to_enzyme[ptm.ptm].add(enz)
                self.ptms[ptm.ptm] = ptm.ptm

        self.ptm_to_enzyme = dict(self.ptm_to_enzyme)



[docs]
    def unique(self):
        """
        Merges the redundant elements of the interaction list.
        Elements are redundant if they agree in all their attributes
        except the sources, references and isoforms.
        """

        self.unique_list = set()

        for key, enz_sub in iteritems(self.enz_sub):

            self.enz_sub[key] = self.uniq_enz_sub(enz_sub)



    @staticmethod
    def uniq_enz_sub(enz_sub):

        enz_sub_uniq = []

        for es in enz_sub:

            merged = False

            for i, es_u in enumerate(enz_sub_uniq):

                if es == es_u:

                    enz_sub_uniq[i].merge(es)
                    merged = True

            if not merged:

                enz_sub_uniq.append(es)

        return enz_sub_uniq


    def make_df(self, tax_id = False, resources_only_primary = False):

        self._log('Creating enzyme-substrate interaction data frame.')


        hdr = [
            'enzyme',
            'enzyme_genesymbol',
            'substrate',
            'substrate_genesymbol',
            'isoforms',
            'residue_type',
            'residue_offset',
            'modification',
            'sources',
            'references',
            'curation_effort',
        ]

        self.df = pd.DataFrame(
            [
                dm.get_line(resources_only_primary = resources_only_primary)
                for dm in self
            ],
            columns = hdr,
        ).astype(
            {
                'enzyme': 'category',
                'substrate': 'category',
                'isoforms': 'category',
                'residue_type': 'category',
                'residue_offset': 'int32',
                'modification': 'category',
                'sources': 'category',
                'references': 'category',
                'curation_effort': 'int32',
            }
        )

        self.df = self.df.loc[:,hdr]

        if tax_id:

            self.df['ncbi_tax_id'] = [self.ncbi_tax_id] * self.df.shape[0]

        self._log(
            'Created enzyme-substrate interaction data frame. '
            'Memory usage: %s.' % common.df_memory_usage(self.df)
        )


    def export_table(self, fname):

        self.make_df()
        self.df.to_csv(fname, sep = '\t', index = False)



[docs]
    def assign_to_network(self, pa):
        """
        Assigns enzyme-substrate interactions to the edges of a
        network in a py:class:``pypath.legacy.main.PyPath`` instance.
        """

        pa.update_vname()

        if 'ptm' not in pa.graph.es.attributes():
            pa.graph.es['ptm'] = [[] for _ in pa.graph.es]

        for key, ptms in iteritems(self.enz_sub):

            nodes = pa.get_node_pair(key[0], key[1],
                    directed = pa.graph.is_directed())

            e = None

            if nodes:
                e = pa.graph.get_eid(
                    nodes[0], nodes[1], error = False)

            if isinstance(e, int) and e > 0:

                if pa.graph.es[e]['ptm'] is None:
                    pa.graph.es[e]['ptm'] = []

                pa.graph.es[e]['ptm'].extend(ptms)



    @property
    def resources(self):

        return set.union(*(
            es.evidences.get_resource_names_via(via = None)
            for es in self
        ))


    @property
    def resources_sorted(self):

        return sorted(
            self.resources,
            key = lambda res: (res[0], '') if res[1] is None else res
        )


    def update_summaries(self, collect_args = None):

        collect_args = collect_args or {'via': False}

        self.summaries = {}

        resources = [
            res for res in self.resources_sorted
            if (
                res[1] is None or
                'via' not in collect_args or
                collect_args['via'] != False
            )
        ]

        refs_by_resource = dict(
            (
                resource,
                set.union(
                    *itertools.chain(
                        self.references[resource].values()
                    )
                )
            )
            for resource in resources
        )
        curation_effort_by_resource = dict(
            (
                resource,
                {
                    key + (ref,)
                    for key, refs in
                    itertools.chain(
                        iteritems(self.references[resource])
                    )
                    for ref in refs
                }
            )
            for resource in resources
        )

        resources_sorted = sorted(resources)

        for resource in resources:

            n_total = sum(
                1
                for es in self
                if resource in es.evidences.get_resource_names(**collect_args)
            )

            n_unique = sum(
                1 for es in self
                if (
                    resource[0] in es.evidences and
                    es.evidences.count_resources(**collect_args) == 1
                )
            )
            n_shared = sum(
                1 for es in self
                if (
                    resource[0] in es.evidences and
                    es.evidences.count_resources(**collect_args) > 1
                )
            )

            curation_effort = len(curation_effort_by_resource[resource])
            ce_others = set.union(*(
                ce
                for res, ce in iteritems(curation_effort_by_resource)
                if res != resource
            ))
            curation_effort_shared = len(
                curation_effort_by_resource[resource] &
                ce_others
            )
            curation_effort_unique = len(
                curation_effort_by_resource[resource] -
                ce_others
            )

            references = len(refs_by_resource[resource])
            refs_others = set.union(*(
                refs
                for res, refs in iteritems(refs_by_resource)
                if res != resource
            ))
            references_shared = len(refs_by_resource[resource] & refs_others)
            references_unique = len(refs_by_resource[resource] - refs_others)

            enzymes = len(set(
                es.domain.protein
                for es in self
                if resource[0] in es.evidences
            ))
            substrates = len(set(
                es.ptm.protein
                for es in self
                if resource[0] in es.evidences
            ))

            modification_types = ', '.join(
                (
                    '%s (%u)' % (typ, cnt)
                    for typ, cnt in
                    sorted(
                        iteritems(collections.Counter(
                            es.ptm.typ
                            for es in self
                            if resource[0] in es.evidences
                        )),
                        key = lambda type_cnt: type_cnt[1],
                        reverse = True,
                    )
                    if typ
                )
            )

            self.summaries[resource] = {
                'name': resource,
                'n_es_total': n_total,
                'n_es_unique': n_unique,
                'n_es_shared': n_shared,
                'n_enzymes': enzymes,
                'n_substrates': substrates,
                'references': references,
                'references_unique': references_unique,
                'references_shared': references_shared,
                'curation_effort': curation_effort,
                'curation_effort_unique': curation_effort_shared,
                'curation_effort_shared': curation_effort_shared,
                'modification_types': modification_types,
            }


    def summaries_tab(self, outfile = None, return_table = False):

        columns = (
            ('name', 'Resource'),
            ('n_es_total', 'E-S interactions'),
            ('n_es_shared', 'Shared E-S interactions'),
            ('n_es_unique', 'Unique E-S interactions'),
            ('n_enzymes', 'Enzymes'),
            ('n_substrates', 'Substrates'),
            ('references', 'References'),
            ('references_shared', 'Shared references'),
            ('references_unique', 'Unique references'),
            ('curation_effort', 'Curation effort'),
            ('curation_effort_shared', 'Shared curation effort'),
            ('curation_effort_unique', 'Unique curation effort'),
            ('modification_types', 'Modification types'),
        )

        tab = []
        tab.append([f[1] for f in columns])

        tab.extend([
            [
                str(self.summaries[src][f[0]])
                for f in columns
            ]
            for src in sorted(
                self.summaries.keys(),
                key = lambda res: (res[0], '') if res[1] is None else res,
            )
        ])

        if outfile:

            with open(outfile, 'w') as fp:

                fp.write('\n'.join('\t'.join(row) for row in tab))

        if return_table:

            return tab





[docs]
def init_db(**kwargs):

    globals()['db'] = EnzymeSubstrateAggregator(**kwargs)




[docs]
def get_db(**kwargs):

    if 'db' not in globals():

        init_db(**kwargs)

    return globals()['db']