Source code for pypath.internals.resource

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from __future__ import annotations

"""
Generic objects for representing resources.
"""

from future.utils import iteritems

from typing import Iterable, Mapping, TYPE_CHECKING

if TYPE_CHECKING:

    import pypath.internals.license as License

import os
import collections
import copy

try:
    import cPickle as pickle
except:
    import pickle


import pypath.inputs as inputs
import pypath.share.common as common
import pypath.share.session as session_mod


[docs] class AbstractResource(session_mod.Logger): """ Generic class for downloading, processing and serving data from a resource. """
[docs] def __init__( self, name, ncbi_tax_id = 9606, input_method = None, input_args = None, dump = None, data_attr_name = None, **kwargs ): """ name : str Custom name for the resource. input_method : callable Method providing the input data. """ if not hasattr(self, '_log_name'): session_mod.Logger.__init__(self, name = 'resource') self.dump = dump self.name = name self._data_attr_name = data_attr_name or 'data' self._input_method = input_method self.input_args = input_args or {} self.ncbi_tax_id = ncbi_tax_id
def load(self): self.set_method() from_dump = self.from_dump() if not from_dump: self.load_data() self.process() if hasattr(self, 'data'): delattr(self, 'data')
[docs] def set_method(self): """ Sets the data input method by looking up in ``inputs`` module if necessary. """ if callable(self._input_method): self.input_method = self._input_method elif self._input_method: self.input_method = inputs.get_method(self._input_method)
[docs] def load_data(self): """ Loads the data by calling ``input_method``. """ self._log('Loading data from `%s`.' % self.name) self.set_method() if hasattr(self, 'input_method'): self.data = self.input_method(**self.input_args)
[docs] def process(self): """ Calls the ``_process_method``. """ self._log('Processing data from `%s`.' % self.name) self._process_method()
def _process_method(self): pass def from_dump(self): if self.dump is not None: if ( isinstance(self.dump, str) and os.path.exists(self.dump) ): with open(self.dump, 'rb') as fp: self._from_dump = pickle.load(fp) else: self._from_dump = self.dump self._from_dump_callback() return True return False def _from_dump_callback(self): if hasattr(self, '_from_dump'): setattr(self, self._data_attr_name, self._from_dump) delattr(self, '_from_dump') delattr(self, 'dump') def save_to_pickle(self, pickle_file): with open(pickle_file, 'wb') as fp: pickle.dump( obj = getattr(self, self._data_attr_name), file = fp, )
[docs] class ResourceAttributes(object):
[docs] def __init__( self, name, data_type, evidence_types = None, dataset = None, **kwargs ): self.name = name self.data_type = data_type self.evidence_types = evidence_types or set() self.resource_attrs = {} for attr, value in iteritems(kwargs): setattr(self, attr, value) self.dataset = dataset
def __eq__(self, other): return ( self.name == other.name and self.data_type == other.data_type if isinstance(other, self.__class__) else self.name == other ) def __str__(self): return self.name @property def dataset(self): return self._dataset @dataset.setter def dataset(self, dataset): self._dataset = dataset networkinput = getattr(self, 'networkinput', None) if hasattr(self, 'networkinput'): netinput_new = copy.deepcopy(networkinput) netinput_new.dataset = dataset self.networkinput = netinput_new
[docs] class NetworkResourceKey( collections.namedtuple( 'NetworkResourceKeyBase', [ 'name', 'data_type', 'interaction_type', 'data_model', 'via', ] ) ): def __new__(cls, *args, **kwargs): return super(NetworkResourceKey, cls).__new__(cls, *args, **kwargs) @property def label(self): """ Returns (str): A label containing the resource name, and if it's a secondary resource, the name of the primary resource separated by an underscore. """ return '%s_%s' % (self.name, self.via) if self.via else self.name @property def last(self): """ Returns (str): The name of the resource where the data directly came from ignoring the primary resource. """ return self.via or self.name
[docs] class NetworkResource(ResourceAttributes): _key = NetworkResourceKey
[docs] def __init__( self, name, interaction_type = 'PPI', data_model = None, evidence_types = None, via = None, dataset = None, **kwargs ): if not dataset and 'networkinput' in kwargs: dataset = kwargs['networkinput'].dataset ResourceAttributes.__init__( self, name = name, data_type = 'network', interaction_type = interaction_type, evidence_types = evidence_types, data_model = data_model, via = via, dataset = dataset, **kwargs )
def __hash__(self): return hash(self.key) @property def key(self): return self._key( name = self.name, data_type = self.data_type, interaction_type = self.interaction_type, data_model = self.data_model, via = self.via, ) def __eq__(self, other): return ( self.name == other if isinstance(other, str) else self.__hash__() == other.__hash__() ) def __repr__(self): return '<NetworkResource: %s (%s, %s)>' % ( self.name, self.interaction_type, self.data_model, ) def is_primary(self): return self.via is None @property def data_model_label(self): return ( self.data_model.capitalize().replace('_', ' ') if self.data_model else 'Unknown' ) @property def license(self) -> license.License | None: return self.resource_attrs.get('license', None)
[docs] class NetworkDataset(collections.abc.MutableMapping):
[docs] def __init__( self, name: str, resources: dict | list | None = None, ): """ A set of network resources. Formerly the network datasets were represented by dicts. This is only a thin wrapper around that solution to better organise metadata of the datasets and resources within. """ self._name = name self._resources = {} self.add(resources)
def __repr__(self): it = ', '.join(self.interaction_types) return f'<NetworkDataset: {self.name} ({len(self)} resources; {it})>' def __iter__(self): return (r for r in self._resources.values()) def __len__(self): return len(self._resources) @property def interaction_types(self): return sorted({r.interaction_type for r in self}) def __setitem__(self, key, value): self.add(value, key) def __getitem__(self, key): return self._resources[key] def __delitem__(self, key): del self._resources[key] def __contains__(self, key): return ( key in self._resources or any(r.name == key for r in self._resources.values()) ) def __eq__(self, other): return ( self._name == other or self._name == getattr(other, '_name', None) )
[docs] def items(self): return self._resources.items()
[docs] def values(self): return self._resources.values()
[docs] def keys(self): return self._resources.keys()
def add(self, value, key = None): if isinstance(value, Mapping): for label, resource in value.items(): self.add(resource, label) elif isinstance(value, Iterable): for resource in value: self.add(resource) elif isinstance(value, NetworkResource): resource = copy.deepcopy(value) resource.dataset = self.name self._resources[key or resource.name] = resource update = add @property def name(self): return self._name @name.setter def name(self, name): for resource in self.values(): resource.networkinput.dataset = name self._name = name def __copy__(self): return self.__class__( name = self.name, resources = self._resources, ) def rename(self, name: str): new = self.__class__(name = name) new.add(self) return new def remove(self, remove: str | set | None): remove = common.to_set(remove) self._resources = { k: v for k, v in self.items() if k not in remove and v.name not in remove } def without(self, exclude: str | set | None): new = copy.copy(self) new.remove(exclude) return new
EnzymeSubstrateResourceKey = collections.namedtuple( 'EnzymeSubstrateResourceKey', [ 'name', 'data_type', 'via', ] )
[docs] class EnzymeSubstrateResource(ResourceAttributes): _key = EnzymeSubstrateResourceKey
[docs] def __init__( self, name, input_method, evidence_types = None, via = None, id_type_enzyme = 'uniprot', id_type_substrate = 'uniprot', organisms_supported = False, organisms = None, resource_attrs = None, extra_attrs = None, **kwargs ): ResourceAttributes.__init__( self, name = name, input_method = input_method, data_type = 'enzyme_substrate', evidence_types = evidence_types, via = via, id_type_enzyme = id_type_enzyme, id_type_substrate = id_type_substrate, organisms_supported = organisms_supported, organisms = organisms, resource_attrs = resource_attrs or {}, extra_attrs = extra_attrs or {}, **kwargs )
def __hash__(self): return hash(self.key) @property def key(self): return self._key( name = self.name, data_type = self.data_type, via = self.via, ) def __eq__(self, other): return ( self.name == other if isinstance(other, str) else self.__hash__() == other.__hash__() ) def __repr__(self): return '<EnzymeSubstrateResource: %s>' % ( self.name, ) def is_primary(self): return self.via is None
[docs] def get_via(self, name): """ Returns a copy of the same resource attributes but the ``name`` set to ``name`` and the ``via`` set to the original name. This means the data comes from the resource ``name`` via the resource ``via``. """ args = dict( (k, getattr(self, k)) for k in self.__dir__() if ( not k.startswith('__') and not callable(getattr(self, k)) ) ) args['via'] = self.name args['name'] = name _ = args.pop('data_type', None) _ = args.pop('key', None) return EnzymeSubstrateResource(**args)