Source code for pypath.resources.controller

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

"""
Highest level resource management API.
"""

from future.utils import iteritems

from typing import Iterable, Literal

import json
import os
import copy
import importlib as imp
import itertools
import functools

import pypath.share.session as session_mod
import pypath.share.common as common
import pypath.internals.resource as resource_base
import pypath.resources._network as netres
from . import licenses as licenses


[docs] class ResourceController(session_mod.Logger): """ Resource controller is aimed to be the central part of pypath communication with resources. 14.01.2020: the initial step for resource controller development: used for /info page generating for the server. 14.02.2020: storing and reading enzyme-substrate resource definitions from the JSON; class inherits from session.Logger """
[docs] def __init__( self, resource_info_path = ( session_mod.session().module_root, 'resources', 'data', 'resources.json', ), use_package_path = False, ): session_mod.Logger.__init__(self, name = 'resource_controller') self.data = None if use_package_path: resource_info_path = ( ( os.path.dirname(os.path.abspath(__file__)), ) + resource_info_path ) self.resource_info_path = os.path.join(*resource_info_path) self._log( 'Loading resource information from ' 'JSON file: %s' % self.resource_info_path ) self.update()
def reload(self): modname = self.__class__.__module__ mod = __import__(modname, fromlist = [modname.split('.')[0]]) imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new)
[docs] def update(self, path = None, force = False, remove_old = False): """ Reads resource information from a JSON file. :arg str,NoneType path: Path to a JSON file with resource information. By default the path in py:attr:``resource_info_path`` used which points by default to the built in resource information file. :arg bool force: Read the file again even if no new path provided and it has been read already. :arg bool remove_old: Remove old data before reading. By default the data will be updated with the contents of the new file potentially overwriting identical keys in the old data. """ if self.data and not path and not force: return if not self.data or remove_old: self.data = {} path = path or self.resource_info_path try: with open(path, encoding = 'utf-8') as json_file: resources_data = json.load(json_file) self.data = resources_data self._log( 'Resource information has been read from `%s`.' % path ) self.update_licenses() except IOError: self._console( 'File %s with resources information cannot be accessed. ' 'Check the name of the file.' % path )
def update_licenses(self): self.license_db = licenses.Licenses() self.licenses = {} self.synonyms = {} self.secondary = {} self._log('Updating resource license information.') for res, res_data in iteritems(self.data): if 'license' in res_data: res_data['license'] = self.license_db[res_data] self.licenses[res] = res_data['license'] for synonym in res_data.get('synonyms', ()): self.licenses[synonym] = res_data['license'] self.synonyms[synonym] = res if 'components' in res_data: self.secondary[res] = set(res_data['components']) else: self._log(f'No license for resource `{res}`.') def __getitem__(self, key): return self.resource(key) def resource(self, name): return self._get(name, dct = self.data) def name(self, name): if name in self.synonyms: name = self.synonyms[name] return name
[docs] @functools.cache def secondary_resources(self, name, postfix = False): """ Args: name: Name of a composite resource. postfix: Append the name of the primary resource to the secondary, separated by an underscore, e.g. "TFactS_CollecTRI". """ name = self.name(name) secondary = self.secondary.get(name, set()) if postfix: secondary = {f'{sec}_{name}' for sec in secondary} return secondary
def _get(self, name, dct): if name in dct: return dct[name] elif name in self.synonyms: name = self.synonyms[name] return dct[name] elif '_' in name: name = name.split('_', maxsplit = 1)[0] return self._get(name, dct) else: self._log('Could not find resource `%s`.' % name) def license(self, name): return self._get(name, dct = self.licenses)
[docs] def license_filter( self, resources: list | dict, purpose: Literal[ 'academic', 'commercial', 'for-profit', 'non-profit', 'ignore', ] | None = None, sharing: Literal[ 'alike', 'free', 'noderiv', 'noshare', 'share', 'deriv', 'ignore', ] | None = None, attrib: Literal[ 'attrib', 'free', 'noattrib', 'composite', 'ignore', ] | None = None, ) -> list | dict: """ Filters a list of resources by their license. """ self.add_resource_attrs(resources) return common.compr( obj = resources, filter = lambda r: r.license.enables(purpose, sharing, attrib), )
[docs] def add_resource_attrs( self, resources: dict | Iterable[resource_base.AbstractResource], ) -> None: """ Adds resource attributes to a list of resources. It modifies the instances in-place, returns nothing. """ _ = common.compr( resources, lambda r: setattr(r, 'resource_attrs', self.resource(r.name)), )
def collect(self, data_type): resource_cls = getattr( resource_base, '%sResource' % ( ''.join(n.capitalize() for n in data_type.split('_')) ) ) result = [] for name, attrs in iteritems(self.data): if 'inputs' in attrs and data_type in attrs['inputs']: args = copy.deepcopy(attrs['inputs'][data_type]) args['resource_attrs'] = attrs if 'name' not in args: args['name'] = name result.append( resource_cls(**args) ) return result def collect_enzyme_substrate(self): return self.collect('enzyme_substrate')
[docs] def collect_network( self, datasets: Iterable[ Literal[ 'pathway', 'pathway_noref', 'pathway_all', 'activity_flow', 'mirna_target', 'dorothea', 'tfregulons', 'omnipath', 'reaction_pc', 'enzyme_substrate', 'extra_directions', 'small_molecule_protein', 'tf_mirna', 'pathwaycommons', 'pathwaycommons_transcription', 'interaction', 'interaction_htp', 'interaction_misc', 'ligand_receptor', 'lncrna_target', 'transcription_onebyone', 'transcription_dorothea', 'ptm', 'ptm_noref', 'ptm_all', 'reaction', 'reaction_misc', 'negative', ], ] | None = 'pathway', interaction_types: Iterable[ Literal[ 'post_translational', 'transcriptional', 'small_molecule_protein', 'post_transcriptional', ], ] | None = 'post_translational', data_models: Iterable[ Literal[ 'activity_flow', 'interaction', 'enzyme_substrate', 'process_description', 'ligand_receptor', 'drug_target', ], ] | None = 'activity_flow', license_purpose: Literal[ 'academic', 'commercial', 'for-profit', 'non-profit', 'ignore', ] = 'ignore', license_sharing: Literal[ 'alike', 'free', 'noderiv', 'noshare', 'share', 'deriv', 'ignore', ] = 'ignore', license_attrib: Literal[ 'attrib', 'free', 'noattrib', 'composite', 'ignore', ] = 'ignore', **kwargs ) -> dict: """ Collect network (interaction) resource definitions. Args: interaction_types: Include only these interaction types. data_models: Inclde only these data models. datasets: Process only these datasets. Note: there are many synonyms and overlaps among datasets. In addition, the overlaps might apply slightly different settings for the same resource, e.g. in `pathway`, interactions must have literature references, while in `pathway_noref` the same resources might allow interactions without references. The safest is to process only one dataset at a time and load them into the `Network` object sequentially. license_purpose: Do not include the resources that are not legally compatible with the defined purpose. license_sharing: Include only resources that allow the desired redistribution conditions. E.g. "deriv" means that the resources must allow the sharing of their derivative (altered) versions. license_attrib: Include only resources that allow the desired level of attribution. E.g. "noattrib" means that you can use the resource without even mentioning who created it. kwargs: Custom filters. Names should be attributes of the resource or the `NetworkInput` object. The special key `__resource__` can be used to refer to the whole `NetworkResource` object. For simple values, the test is equality, for arrays incidence, while custom callables can be provided for more flexible filters. """ interaction_types = common.to_set(interaction_types) data_models = common.to_set(data_models) datasets = common.to_set(datasets) kwargs = { k: v if callable(v) else lambda x: x in common.to_set(v) for k, v in kwargs.items() } resources = itertools.chain(*( getattr(netres, dset).items() for dset in datasets )) resources = { key: res for key, res in resources if ( not interaction_types or res.interaction_type in interaction_types ) and ( not datasets or res.data_model in data_models ) and all( fltr( res if key == '__resource__' else getattr(res, getattr(res.networkinput, key)) ) for key, fltr in kwargs.items() ) } resources = self.license_filter( resources, purpose = license_purpose, sharing = license_sharing, attrib = license_attrib, ) return resources
# synonym collect_interaction = collect_network