#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsfromtypingimportGenerator,Literalfromcollectionsimportnamedtupleimportpandasaspdimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.commonascommon_NUMERIC_FIELDS={'z_score','confidence','confidence','source_score',}
[docs]defdiseases_general(data_origin:Literal['textmining','knowledge','experiments'],filtered:bool=False,return_df:bool=False,)->Generator[tuple]|pd.DataFrame:""" Retrieve a dataset from the DISEASES database from Jensen Lab. Warning: The "textmining" datasets are enormous! Args: data_origin: The data collection method. filtered: Download the filtered dataset instead of the full. return_df: Return a pandas data frame. """result=_diseases_general(data_origin,filtered)returnpd.DataFrame(result)ifreturn_dfelseresult
def_diseases_general(data_origin:Literal['textmining','knowledge','experiments'],filtered:bool=False,)->Generator[tuple]:""" Args: data_origin: The data collection method. filtered: Download the filtered dataset instead of the full. """query_type='filtered'iffilteredelse'full'url=urls.urls['diseases']['url']%(data_origin,query_type)query_fields={'textmining':['z_score','confidence','url'],'knowledge':['resource','evidence_type','confidence'],'experiments':['resource','source_score','confidence'],}fields=['gene_id','genesymbol','disease_id','disease',]+query_fields[data_origin]record=namedtuple('DiseasesInteraction',fields)c=curl.Curl(url,silent=False,large=True)interactions=list()defproc_field(value,key):ifkey=='source_score':value=value.split('=')[1]ifkeyin_NUMERIC_FIELDS:if'.'invalue:num_type=floatelse:num_type=intifcommon.is_int(value)elsefloatvalue=num_type(value)return(key,value)forlineinc.result:line=line.strip('\n ').split('\t')line=dict(proc_field(value,key)forkey,valueinzip(fields,line))yieldrecord(**line)
[docs]deftextmining_full(return_df:bool=False,)->Generator[tuple]|pd.DataFrame:""" Full textmining dataset of the DISEASES database from Jensen Lab. Warning: The "textmining" datasets are enormous! Args: return_df: Return a pandas data frame. """returndiseases_general(data_origin='textmining',filtered=False,return_df=return_df,)
[docs]deftextmining_filtered(return_df:bool=False,)->Generator[tuple]|pd.DataFrame:""" Filtered textmining dataset of the DISEASES database from Jensen Lab. Warning: The "textmining" datasets are enormous! Args: return_df: Return a pandas data frame. """returndiseases_general(data_origin='textmining',filtered=True,return_df=return_df,)
[docs]defknowledge_filtered(return_df:bool=False,)->Generator[tuple]|pd.DataFrame:""" Filtered knowledge dataset of the DISEASES database from Jensen Lab. Args: return_df: Return a pandas data frame. """returndiseases_general(data_origin='knowledge',filtered=True,return_df=return_df,)
[docs]defknowledge_full(return_df:bool=False,)->Generator[tuple]|pd.DataFrame:""" Full knowledge dataset of the DISEASES database from Jensen Lab. Args: return_df: Return a pandas data frame. """returndiseases_general(data_origin='knowledge',filtered=False,return_df=return_df,)
[docs]defexperiments_filtered(return_df:bool=False,)->Generator[tuple]|pd.DataFrame:""" Filtered experiments dataset of the DISEASES database from Jensen Lab. Args: return_df: Return a pandas data frame. """returndiseases_general(data_origin='experiments',filtered=True,return_df=return_df,)
[docs]defexperiments_full(return_df:bool=False,)->Generator[tuple]|pd.DataFrame:""" Full experiments dataset of the DISEASES database from Jensen Lab. Args: return_df: Return a pandas data frame. """returndiseases_general(data_origin='experiments',filtered=False,return_df=return_df,)