#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsimportcollectionsimportitertoolsimportcsvimportreimportasyncioimportinspectfromconcurrent.futures.threadimportThreadPoolExecutorfromabcimportABC,abstractmethodfromtypingimportIterable,Literalimportpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.share.sessionassessionimportpypath.share.commonascommon_logger=session.Logger(name='kegg_api')_log=_logger._log_url=urls.urls['kegg_api']['url']def_generate_relation_functions():_entity_types=('disease','drug','gene','pathway')foretypesinitertools.combinations(_entity_types,2):forargsin(etypes,reversed(etypes)):args=tuple(args)name=f'{args[0]}_to_{args[1]}'synopsis=f'{args[0].capitalize()}-{args[1]} relations from KEGG.'def_relation_function(organism):if'gene'inargs:args=args+(organism,)return_kegg_relations(*args)_relation_function.__name__=name_relation_function.__doc__=synopsisif'gene'notinargs:sig=inspect.signature(_relation_function)sig.replace(parameters=())_relation_function.__signature__=sigelse:_relation_function.__doc__+=('\n\nArgs\n organism:\n Name of the organism. ''Gene relations are organism specific.\n')globals()[name]=_relation_function
[docs]defdrug_to_drug(drugs:list|tuple|None=None,join:bool=True,asynchronous:bool=False)->dict[str,tuple]:""" Downloads drug-drug interaction data from KEGG database. Args drugs: Drug IDs as a list or a tuple. join: If it's True, returns individual interactions of queried list. Else, joins them together and returns mutual interactions. asynchronous: Yet to be implemented. Returns A dict with disease IDs as keys and drug-drug interactions as values. """DrugToDrugInteraction=collections.namedtuple('DrugToDrugInteraction',('type','name','interactions',),)Interaction=collections.namedtuple('Interaction',('type','id','name','contraindication','precaution',))entry_types={'d':'drug','c':'compound'}entry_dbs={'drug':_Drug(),'compound':_Compound()}interactions=collections.defaultdict(lambda:{'interactions':collections.defaultdict(list),})join=joinand(len(drugs)>0)asynchronous=notdrugsorasynchronousdrugs=drugsorentry_dbs['drug'].data.keys()entries=_kegg_ddi(drugs,join=join,async_=asynchronous)forentryinentries:partners=dict((role,{'type':entry_types.get(entry[i][0].lower(),None),'id':entry[i].split(':')[-1],'name':(entry_dbs[entry_types.get(entry[i][0].lower(),None)].get(entry[i].split(':')[-1],None)),})fori,roleinenumerate(('source','target')))labels=entry[2].split(',')contraindication='CI'inlabelsprecaution='P'inlabelsinteraction=Interaction(type=partners['target']['type'],id=partners['target']['id'],name=partners['target']['name'],contraindication=contraindication,precaution=precaution,)disease_id=partners['source']['id']try:interactions[disease_id]['interactions'].append(interaction)exceptAttributeError:interactions[disease_id]['interactions']=[interaction]interactions[disease_id]['type']=partners['source']['type']interactions[disease_id]['name']=partners['source']['name']interactions=dict((key,DrugToDrugInteraction(value['type'],value['name'],tuple(value['interactions']),))forkey,valueininteractions.items())returninteractions
def_generate_conv_functions():_id_types=(('drug',('chebi',)),('gene',('ncbi-geneid','uniprot')),)labels={'chebi':'ChEBI','ncbi-geneid':'NCBI Gene','uniprot':'UniProt',}forentity,id_typesin_id_types:forid_typeinid_types:args_=(entity,id_type)forargsin(args_,reversed(args_)):synopsis=('Translation dict between '+' and '.join(f'{labels.get(a,f"KEGG {a}")} IDs'forainargs)+'.')def_conv_function(organism):splits=[a!='gene'forainargs]args=[aifselseorganismfors,ainzip(splits,args)]return_kegg_conv(*args,*splits)name=('_to_'.join(f'kegg_{a}'ifa==entityelseaforainargs).replace('-','_'))_conv_function.__name__=name_conv_function.__doc__=synopsisifentity!='gene':sig=inspect.signature(_conv_function)sig.replace(parameters=())_conv_function.__signature__=sigelse:_conv_function.__doc__+=('\n\nArgs\n organism:\n Name of the ''organism. Gene relations are organism specific.\n')globals()[name]=_conv_functiondef_kegg_general(operation:str,*arguments:str,)->list[list[str]]:arguments=[argforarginargumentsifargisnotNone]url='/'.join([_url%operation]+list(arguments))curl_args={'url':url,'silent':True,'large':False}c=curl.Curl(**curl_args)lines=getattr(c,'result',[]).split('\n')or[]return[line.split('\t')forlineinlinesifline]asyncdef_kegg_general_async(operation:str,*arguments:str,)->list[list[str]]:#TODO Yet to be implemented# This function doesn't work but it better# stay so we can implement it without# changing the structure of the modulereturn_kegg_general(operation,*arguments)def_kegg_list(database:str,option:str|None=None,organism:str|int|None=None,)->list[list[str]]:args=['list',database]ifdatabase=='brite'andoptionisnotNone:args+=common.to_list(option)elifdatabase=='pathway'andorganismisnotNone:args+=common.to_list(organism)return_kegg_general(*args)def_kegg_conv(source_db:str,target_db:str,source_split:bool=False,target_split:bool=False,)->dict[str,set[str]]:result=_kegg_general('conv',target_db,source_db)conversion_table=collections.defaultdict(set)forsource,targetinresult:source=source.split(':')[1]ifsource_splitelsesourcetarget=target.split(':')[1]iftarget_splitelsetargetconversion_table[source].add(target)returndict(conversion_table)def_kegg_link(source_db:str,target_db:str)->list[list[str]]:return_kegg_general('link',target_db,source_db)def_kegg_ddi(drug_ids:str|Iterable[str],join=True,async_:bool=False):ifjoinandnotisinstance(drug_ids,str):drug_ids='+'.join(common.to_list(drug_ids))ifasync_:pool=ThreadPoolExecutor()returnpool.submit(asyncio.run,_kegg_ddi_async(drug_ids)).result()return_kegg_ddi_sync(drug_ids)def_kegg_ddi_sync(drug_ids:str|Iterable[str]):returnlist(itertools.chain(*(_kegg_general('ddi',drug_id)fordrug_idincommon.to_list(drug_ids))))asyncdef_kegg_ddi_async(drug_ids):#TODO Yet to be implemented# This function doesn't work but it better# stay so we can implement it without# changing the structure of the moduleresult=[]forresponseinasyncio.as_completed([_kegg_general_async('ddi',drug_id)fordrug_idincommon.to_list(drug_ids)]):the_response=awaitresponseresult.extend(common.to_list(the_response))returnresultdef_kegg_relations(source_db:Literal['gene','pathway','disease','drug'],target_db:Literal['gene','pathway','disease','drug'],# should have human as a default, instead of triggering an error:organism:str|None=None,)->tuple:l_organism=common.to_list(organism)data={}record=collections.namedtuple('KeggEntry',('id','name','type','ncbi_gene_ids','uniprot_ids','chebi_ids',))defget_data(name,cls_prefix=''):ifnamenotindata:cls=f'_{cls_prefix}{name.capitalize()}'data[name]=globals()[cls](*l_organism)returndata[name]defdb(name):returnget_data(name)defids(name):returnget_data(name,cls_prefix='KeggTo')defprocess(entry,type_):id_=db(type_).proc_key(entry)name=db(type_).get(id_,None)ncbi=ids('ncbi').get(id_)iftype_=='gene'else()uniprot=ids('uniprot').get(id_)iftype_=='gene'else()chebi=ids('chebi').get(id_)iftype_=='drug'else()returnrecord(id=id_,name=name,type=type_,ncbi_gene_ids=ncbi,uniprot_ids=uniprot,chebi_ids=chebi,)args=[organismifdb=='gene'elsedbfordbin(source_db,target_db)]entries=_kegg_link(*args)interactions=[(process(e[0],source_db),process(e[1],target_db))foreinentries]returninteractionsclass_KeggDatabase(ABC):_data=None_query_args=Nonedef__init__(self,*args):self.load(*args)@abstractmethoddefproc_key(self,entry):returnentry@abstractmethoddefproc_value(self,entry):returnentrydefload(self,*args):entries=_kegg_list(*common.to_list(self._query_args),*args)self._data={self.proc_key(entry[0]):self.proc_value(entry[1])forentryinentries}defget(self,index,default=None):returnself._data.get(index,default)def__getitem__(self,index):returnself.get(index)@propertydefdata(self):returnself._dataclass_Organism(_KeggDatabase):_query_args='organism'defload(self,*args):entries=_kegg_list(*common.to_list(self._query_args),*args)self._data={self.proc_key(entry[1]):self.proc_value(entry[0],entry[2])forentryinentries}defproc_value(self,entry):returnself.get(entry)defproc_key(self,entry):returnentryclass_Gene(_KeggDatabase):def__init__(self,organism):super().__init__(organism)defload(self,*args):entries=_kegg_list(*common.to_list(self._query_args),*args)self._data={self.proc_key(entry[0]):self.proc_value(entry[-1])forentryinentries}defproc_key(self,entry):returnentrydefproc_value(self,entry):returnentry.rsplit(';',maxsplit=1)[-1].strip(' ')class_Pathway(_KeggDatabase):_re_pathway=re.compile(r'\d+')_query_args='pathway'defproc_value(self,entry):returnentrydefproc_key(self,entry):pathway_id=self._re_pathway.search(entry)# is this correct?# there are pathway prefixes in KEGG other than "map"returnf'map{pathway_id.group()}'class_SplitDatabase(_KeggDatabase):defproc_key(self,entry):returnentry[0].split(':')[1]defproc_value(self,entry):returnentry[1]class_Disease(_SplitDatabase):_query_args='disease'class_Drug(_SplitDatabase):_query_args='drug'class_Compound(_SplitDatabase):_query_args='compound'class_ConversionTable:_table={}def__init__(self,*id_types:str,source_split:bool=False,target_split:bool=False,):self._id_types=id_typesself._splits={'source_split':source_split,'target_split':target_split,}self.load()@abstractmethoddefload(self):self._table.update(_kegg_conv(*self._id_types,**self._splits))defget(self,index,default=None):returnself._table.get(index,default)def__getitem__(self,index):returnself._table.get(index,None)@propertydeftable(self):returnself._tableclass_KeggToNcbi(_ConversionTable):def__init__(self,organism):super().__init__(organism,'ncbi-geneid',target_split=True)class_NcbiToKegg(_ConversionTable):def__init__(self,organism):super().__init__('ncbi-geneid',organism,source_split=True)class_KeggToUniprot(_ConversionTable):def__init__(self,organism):super().__init__(organism,'uniprot',target_split=True)class_UniprotToKegg(_ConversionTable):def__init__(self,organism):super().__init__('uniprot',organism,source_split=True)class_KeggToChebi(_ConversionTable):def__init__(self):super().__init__('drug','chebi',source_split=True,target_split=True,)class_ChebiToKegg(_ConversionTable):def__init__(self):super().__init__('chebi','drug',source_split=True,target_split=True,)_generate_relation_functions()_generate_conv_functions()