#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsfromtypingimportGenerator,Literalimportjsonimportreimportcollectionsimportpandasaspdimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.sessionassession_log=session.Logger(name='opentargets_input')._log
[docs]defopentargets_general(dataset:Literal['assoc_direct','assoc_indirect','adr','expr','associationByOverallIndirect','associationByOverallDirect','fda/significantAdverseDrugReactions','baselineExpression',],return_df:bool=False,by:str|bool=False,)->Generator[dict]|dict[str,list[dict]]|pd.DataFrame:""" Download data from the Open Targets database. Args: dataset: Name of a dataset, either as a shorthand synonym or as it is shown in the URL. return_df: Return a pandas data frame. by: Name of the variable to be used as top level key in the returned dictionary. If True, the default grouping variable for the given dataset will be used. If False, no grouping will be performed. """by_defaults={'associationByOverallIndirect':'diseaseId','associationByOverallDirect':'diseaseId','fda/significantAdverseDrugReactions':'chembl_id','baselineExpression':'id',}by=by_defaults[dataset]ifby==Trueelsebyresult=_opentargets_general(dataset)ifreturn_df:result=pd.DataFrame(result)elifby:grouped=collections.defaultdict(list)foritinresult:key=it[by]delit[by]grouped[key].append(it)result=groupedreturnresult
def_opentargets_general(dataset:Literal['assoc_direct','assoc_indirect','adr','expr','associationByOverallIndirect','associationByOverallDirect','fda/significantAdverseDrugReactions','baselineExpression',],)->Generator[dict]:datasets={'assoc_indirect':'associationByOverallIndirect','assoc_direct':'associationByOverallDirect','adr':'fda/significantAdverseDrugReactions','expr':'baselineExpression',}dataset=datasets.get(dataset,dataset)url=urls.urls['opentargets']['url']%datasetc=curl.Curl(url,silent=False,large=False)repart=re.compile(r'"(part.*\.json)"')json_files=repart.findall(c.result)url+='/%s'forjson_nameinjson_files:c=curl.Curl(url%json_name,silent=False,large=True)forlineinc.result:ifnotline:continuetry:contents=json.loads(line)exceptjson.JSONDecodeError:err=f'Failed to parse JSON from Open Targets data:\n{line}'_log(err)continueyieldcontents
[docs]defopentargets_indirect_score(return_df:bool=False,by:str|bool=False,)->Generator[dict]|dict|pd.DataFrame:""" Indirect target-disease association scores from Open Targets. Args: return_df: Return a pandas data frame. by: Name of the variable to be used as top level key in the returned dictionary. If True, the default grouping variable for the given dataset will be used. If False, no grouping will be performed. Returns: Target-disease association records as a list of dicts by default; or a pandas data frame if `return_df` is True; or a dict of list of dicts if by is not False. """returnopentargets_general('assoc_indirect',return_df,by)
[docs]defopentargets_direct_score(return_df:bool=False,by:bool=False,)->Generator[dict]|dict|pd.DataFrame:""" Direct target-disease association scores from Open Targets. Args: return_df: Return a pandas data frame. by: Name of the variable to be used as top level key in the returned dictionary. If True, the default grouping variable for the given dataset will be used. If False, no grouping will be performed. Returns: Target-disease association records as a list of dicts by default; or a pandas data frame if `return_df` is True; or a dict of list of dicts if by is not False. """returnopentargets_general('assoc_direct',return_df,by)
[docs]defopentargets_adverse_reactions(return_df:bool=False,by:bool=False,)->Generator[dict]|dict|pd.DataFrame:""" Drug adverse reactions from Open Targets. Args: return_df: Return a pandas data frame. by: Name of the variable to be used as top level key in the returned dictionary. If True, the default grouping variable for the given dataset will be used. If False, no grouping will be performed. Returns: Drug-adverse reaction records as a list of dicts by default; or a pandas data frame if `return_df` is True; or a dict of list of dicts if by is not False. """returnopentargets_general('adr',return_df,by)
[docs]defopentargets_baseline_expression(return_df:bool=False,by:bool=False,)->Generator[dict]|dict|pd.DataFrame:""" Baseline expression from Open Targets. Args: return_df: Return a pandas data frame. by: Name of the variable to be used as top level key in the returned dictionary. If True, the default grouping variable for the given dataset will be used. If False, no grouping will be performed. Returns: Baseline expression records as a list of dicts by default; or a pandas data frame if `return_df` is True; or a dict of list of dicts if by is not False. """returnopentargets_general('expr',return_df,by)