#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotations"""Highest level resource management API."""fromfuture.utilsimportiteritemsfromtypingimportIterable,Literalimportjsonimportosimportcopyimportimportlibasimpimportitertoolsimportfunctoolsimportpypath.share.sessionassession_modimportpypath.share.commonascommonimportpypath.internals.resourceasresource_baseimportpypath.resources._networkasnetresfrom.importlicensesaslicenses
[docs]classResourceController(session_mod.Logger):""" Resource controller is aimed to be the central part of pypath communication with resources. 14.01.2020: the initial step for resource controller development: used for /info page generating for the server. 14.02.2020: storing and reading enzyme-substrate resource definitions from the JSON; class inherits from session.Logger """
[docs]def__init__(self,resource_info_path=(session_mod.session().module_root,'resources','data','resources.json',),use_package_path=False,):session_mod.Logger.__init__(self,name='resource_controller')self.data=Noneifuse_package_path:resource_info_path=((os.path.dirname(os.path.abspath(__file__)),)+resource_info_path)self.resource_info_path=os.path.join(*resource_info_path)self._log('Loading resource information from ''JSON file: %s'%self.resource_info_path)self.update()
[docs]defupdate(self,path=None,force=False,remove_old=False):""" Reads resource information from a JSON file. :arg str,NoneType path: Path to a JSON file with resource information. By default the path in py:attr:``resource_info_path`` used which points by default to the built in resource information file. :arg bool force: Read the file again even if no new path provided and it has been read already. :arg bool remove_old: Remove old data before reading. By default the data will be updated with the contents of the new file potentially overwriting identical keys in the old data. """ifself.dataandnotpathandnotforce:returnifnotself.dataorremove_old:self.data={}path=pathorself.resource_info_pathtry:withopen(path,encoding='utf-8')asjson_file:resources_data=json.load(json_file)self.data=resources_dataself._log('Resource information has been read from `%s`.'%path)self.update_licenses()exceptIOError:self._console('File %s with resources information cannot be accessed. ''Check the name of the file.'%path)
defupdate_licenses(self):self.license_db=licenses.Licenses()self.licenses={}self.synonyms={}self.secondary={}self._log('Updating resource license information.')forres,res_datainiteritems(self.data):if'license'inres_data:res_data['license']=self.license_db[res_data]self.licenses[res]=res_data['license']forsynonyminres_data.get('synonyms',()):self.licenses[synonym]=res_data['license']self.synonyms[synonym]=resif'components'inres_data:self.secondary[res]=set(res_data['components'])else:self._log(f'No license for resource `{res}`.')def__getitem__(self,key):returnself.resource(key)defresource(self,name):returnself._get(name,dct=self.data)defname(self,name):ifnameinself.synonyms:name=self.synonyms[name]returnname
[docs]@functools.cachedefsecondary_resources(self,name,postfix=False):""" Args: name: Name of a composite resource. postfix: Append the name of the primary resource to the secondary, separated by an underscore, e.g. "TFactS_CollecTRI". """name=self.name(name)secondary=self.secondary.get(name,set())ifpostfix:secondary={f'{sec}_{name}'forsecinsecondary}returnsecondary
def_get(self,name,dct):ifnameindct:returndct[name]elifnameinself.synonyms:name=self.synonyms[name]returndct[name]elif'_'inname:name=name.split('_',maxsplit=1)[0]returnself._get(name,dct)else:self._log('Could not find resource `%s`.'%name)deflicense(self,name):returnself._get(name,dct=self.licenses)
[docs]deflicense_filter(self,resources:list|dict,purpose:Literal['academic','commercial','for-profit','non-profit','ignore',]|None=None,sharing:Literal['alike','free','noderiv','noshare','share','deriv','ignore',]|None=None,attrib:Literal['attrib','free','noattrib','composite','ignore',]|None=None,)->list|dict:""" Filters a list of resources by their license. """self.add_resource_attrs(resources)returncommon.compr(obj=resources,filter=lambdar:r.license.enables(purpose,sharing,attrib),)
[docs]defadd_resource_attrs(self,resources:dict|Iterable[resource_base.AbstractResource],)->None:""" Adds resource attributes to a list of resources. It modifies the instances in-place, returns nothing. """_=common.compr(resources,lambdar:setattr(r,'resource_attrs',self.resource(r.name)),)
[docs]defcollect_network(self,datasets:Iterable[Literal['pathway','pathway_noref','pathway_all','activity_flow','mirna_target','dorothea','tfregulons','omnipath','reaction_pc','enzyme_substrate','extra_directions','small_molecule_protein','tf_mirna','pathwaycommons','pathwaycommons_transcription','interaction','interaction_htp','interaction_misc','ligand_receptor','lncrna_target','transcription_onebyone','transcription_dorothea','ptm','ptm_noref','ptm_all','reaction','reaction_misc','negative',],]|None='pathway',interaction_types:Iterable[Literal['post_translational','transcriptional','small_molecule_protein','post_transcriptional',],]|None='post_translational',data_models:Iterable[Literal['activity_flow','interaction','enzyme_substrate','process_description','ligand_receptor','drug_target',],]|None='activity_flow',license_purpose:Literal['academic','commercial','for-profit','non-profit','ignore',]='ignore',license_sharing:Literal['alike','free','noderiv','noshare','share','deriv','ignore',]='ignore',license_attrib:Literal['attrib','free','noattrib','composite','ignore',]='ignore',**kwargs)->dict:""" Collect network (interaction) resource definitions. Args: interaction_types: Include only these interaction types. data_models: Inclde only these data models. datasets: Process only these datasets. Note: there are many synonyms and overlaps among datasets. In addition, the overlaps might apply slightly different settings for the same resource, e.g. in `pathway`, interactions must have literature references, while in `pathway_noref` the same resources might allow interactions without references. The safest is to process only one dataset at a time and load them into the `Network` object sequentially. license_purpose: Do not include the resources that are not legally compatible with the defined purpose. license_sharing: Include only resources that allow the desired redistribution conditions. E.g. "deriv" means that the resources must allow the sharing of their derivative (altered) versions. license_attrib: Include only resources that allow the desired level of attribution. E.g. "noattrib" means that you can use the resource without even mentioning who created it. kwargs: Custom filters. Names should be attributes of the resource or the `NetworkInput` object. The special key `__resource__` can be used to refer to the whole `NetworkResource` object. For simple values, the test is equality, for arrays incidence, while custom callables can be provided for more flexible filters. """interaction_types=common.to_set(interaction_types)data_models=common.to_set(data_models)datasets=common.to_set(datasets)kwargs={k:vifcallable(v)elselambdax:xincommon.to_set(v)fork,vinkwargs.items()}resources=itertools.chain(*(getattr(netres,dset).items()fordsetindatasets))resources={key:resforkey,resinresourcesif(notinteraction_typesorres.interaction_typeininteraction_types)and(notdatasetsorres.data_modelindata_models)andall(fltr(resifkey=='__resource__'elsegetattr(res,getattr(res.networkinput,key)))forkey,fltrinkwargs.items())}resources=self.license_filter(resources,purpose=license_purpose,sharing=license_sharing,attrib=license_attrib,)returnresources