#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsfromtypingimportOptionalimportosimportreimportcsvimportcollectionsimportbase64fromlxmlimportetreefromzipfileimportZipFileimportpypath.resources.urlsasurlsimportpypath.share.curlascurlimportpypath.share.commonascommonimportpypath.share.sessionassessionimportpypath.share.settingsassettingsimportpypath.inputs.credentialsascredentials_logger=session.Logger(name='drugbank_input')_log=_logger._logdef_drugbank_credentials(user:Optional[str]=None,passwd:Optional[str]=None,credentials_fname:Optional[str]=None,)->tuple[str,str]:returncredentials.credentials(user=user,passwd=passwd,resource='DrugBank',from_file=credentials_fname,)def_drugbank_download(*args,user:Optional[str]=None,passwd:Optional[str]=None,credentials_fname:Optional[str]=None,**kwargs)->Optional[curl.Curl]:try:cred=_drugbank_credentials(user=user,passwd=passwd,credentials_fname=credentials_fname,)exceptRuntimeError:_log('No credentials available for the DrugBank website.')returnNonedefaults={'large':True,'silent':False,'compr':'zip',}defaults.update(kwargs)auth_str=base64.b64encode(f"{cred['user']}:{cred['passwd']}".encode())defaults['req_headers']=[f'Authorization: Basic {auth_str.decode()}',settings.get('user_agent'),]returncurl.Curl(*args,**defaults)
[docs]defdrugbank_raw_interactions(user:Optional[str]=None,passwd:Optional[str]=None,credentials_fname:Optional[str]=None,pharma_active:bool=False,)->list[tuple]:""" Retrieves protein identifiers from Drugbank. Args user: E-mail address with registered DrugBank account. passwd: Password for the DrugBank account. pharma_active: Only pharmacologically active relations. Returns List of drug-protein relations. """csv_name='pharmacologically_active.csv'ifpharma_activeelse'all.csv'fields=('drugbank_id','uniprot_id','relation',)DrugbankRawInteraction=collections.namedtuple('DrugbankRawInteraction',fields,defaults=(None,)*len(fields),)result=[]forrelin('carrier','enzyme','target','transporter'):url=urls.urls['drugbank'][f'drug_{rel}_identifiers']c=_drugbank_download(url=url,user=user,passwd=passwd,credentials_fname=credentials_fname,files_needed=(csv_name,),)ifnotc:continue_=next(c.result[csv_name])forlinc.result[csv_name]:drugs,uniprot=l.strip().split(',')[-1],l.strip().split(',')[5]drugs=drugs.strip().split(';')result.extend(DrugbankRawInteraction(drugbank_id=drug.strip(),uniprot_id=uniprot,relation=rel,)fordrugindrugs)returnresult
[docs]defdrugbank_interactions(user:Optional[str]=None,passwd:Optional[str]=None,credentials_fname:Optional[str]=None,pharma_active:bool=False,)->list[tuple]:""" Drug-protein and protein-drug interactions from Drugbank. Args user: E-mail address with registered DrugBank account. passwd: Password for the DrugBank account. pharma_active: Only pharmacologically active interactions. Returns List of drug-protein and protein-drug interactions. """raw=drugbank_raw_interactions(user=user,passwd=passwd,pharma_active=pharma_active,credentials_fname=credentials_fname,)drugs=dict((d.drugbank,d)fordindrugbank_drugs(user=user,passwd=passwd))DrugbankInteraction=collections.namedtuple('DrugbankInteraction',('source','target','source_entity_type','target_entity_type','interaction_type',))result=[]forrinraw:drug=drugs.get(r.drugbank_id,None)# TODO: later engage the mapping module hereifdruganddrug.pubchem_cid:src_tgt=reversedifr.relation=='target'elselambdax:xresult.append(DrugbankInteraction(*src_tgt((r.uniprot_id,drug.pubchem_cid)),*src_tgt(('protein','drug')),interaction_type=r.relation,))returnresult
[docs]defdrugbank_drugs(user:Optional[str]=None,passwd:Optional[str]=None,credentials_fname:Optional[str]=None,)->list[tuple]:""" Retrieves drug identifiers from Drugbank. Each drug is annotated by its various database cross-references. Args user: E-mail address with registered DrugBank account. passwd: Password for the DrugBank account. Returns List of named tuples, each field corresponding to various identifiers. """fields=('drugbank','name','type','groups','cas','inchikey','inchi','smiles','formula','kegg_compound','kegg_drug','pubchem_cid','pubchem_sid','chebi','chembl','pharmgkb','het',)raw={}fortablein('drug','structure'):csv_=f'{table} links.csv'c=_drugbank_download(url=urls.urls['drugbank'][f'all_{table}s'],user=user,passwd=passwd,credentials_fname=credentials_fname,files_needed=(csv_,),)ifnotc:continueraw[table]=dict((rec['DrugBank ID'],rec)forrecincsv.DictReader(c.result[csv_],delimiter=','))DrugbankDrug=collections.namedtuple('DrugbankDrug',fields,defaults=(None,)*len(fields),)result=[]fordbid,structinraw['structure'].items():drug=raw['drug'].get(dbid,{})result.append(DrugbankDrug(drugbank=dbid,name=struct['Name'],type=drug.get('Drug Type',None),groups=struct['Drug Groups'],cas=struct['CAS Number'],inchikey=struct['InChIKey'],inchi=struct['InChI'],smiles=struct['SMILES'],formula=struct['Formula'],kegg_compound=struct['KEGG Compound ID'],kegg_drug=struct['KEGG Drug ID'],pubchem_cid=struct['PubChem Compound ID'],pubchem_sid=struct['PubChem Substance ID'],chebi=struct['ChEBI ID'],chembl=struct['ChEMBL ID'],pharmgkb=drug.get('PharmGKB ID',None),het=drug.get('HET ID',None),))returnresult
[docs]defdrugbank_annotations(user:Optional[str]=None,passwd:Optional[str]=None,credentials_fname:Optional[str]=None,)->dict[str,set[tuple]]:""" Drug annotations from Drugbank. The annotations are restricted to the drug molecule type and drug status. Args user: E-mail address with registered DrugBank account. passwd: Password for the DrugBank account. Returns List of drug annotations. """drugs=drugbank_drugs(user=user,passwd=passwd,credentials_fname=credentials_fname,)DrugbankAnnotation=collections.namedtuple('DrugbankAnnotation',('type','status',))result=collections.defaultdict(set)fordindrugs:ifd.pubchem_cid:result[d.pubchem_cid].add(DrugbankAnnotation(type=d.type,status=re.sub(',\s*',';',d.groups),))returndict(result)
[docs]defdrugbank_mapping(id_type:str,target_id_type:str,user:Optional[str]=None,passwd:Optional[str]=None,credentials_fname:Optional[str]=None,)->dict[str,set[str]]:""" Identifier translation table from DrugBank. Available ID types: drugbank, name, type, groups, cas, inchikey, inchi, smiles, formula, kegg_compound, kegg_drug, pubchem_compound, pubchem_substance, chebi, chembl, pharmgkb, het. Args id_type: The identifier type to be used as keys. target_id_type: The identifier type that will be collected into the values. user: E-mail address with registered DrugBank account. passwd: Password for the DrugBank account. credentials_fname: File name or path to a file with DrugBank login credentials. Returns An identifier translation table. """synonyms={'pubchem_compound':'pubchem_cid','pubchem_substance':'pubchem_sid',}defid_type_proc(_id_type):_id_type=re.sub('[^cs]id$','',_id_type.lower()).replace(' ','_')returnsynonyms.get(_id_type,_id_type)drugs=drugbank_drugs(user=user,passwd=passwd,credentials_fname=credentials_fname,)result=collections.defaultdict(set)id_type=id_type_proc(id_type)target_id_type=id_type_proc(target_id_type)fordindrugs:the_id=getattr(d,id_type)target_id=getattr(d,target_id_type)ifthe_idandtarget_id:result[the_id].add(target_id)returndict(result)
[docs]classDrugbankFull:""" This is a wrapper around the Drugbank full database XML file. Provides access to the full Drugbank database. The class provides two methods: drugbank_drugs_full and drugbank_targets_full. The first method returns a list of namedtuples, each of which represents a drug. The second method returns a list of namedtuples, each of which represents a drug's target. Args user: E-mail address with registered DrugBank account. passwd: Password for the DrugBank account. """
[docs]defdrugbank_drugs_full(self,fields:str|list[str]|None=None,)->list[tuple]:""" Returns a list of namedtuples containing detailed information about drugs. Args fields: The fields to return. If None, all XML fields are returned. Default: None Returns A list of namedtuples containing information about drugs. """basic_fields=['drugbank_id','type','name','description','cas_number','unii','average_mass','monoisotopic_mass','state','synthesis_reference','indication','pharmacodynamics','mechanism_of_action','toxicity','metabolism','absorption','half_life','protein_binding','route_of_elimination','volume_of_distribution','clearance','fda_label','msds',]fields_w_subfields={'groups':{'path':'/db:group'},'general_references':{'path':'/db:articles/db:article/db:pubmed-id'},'classification':{'path':'/db:class'},'synonyms':{'path':'/db:synonym'},'products':{'path':'/db:product/db:name'},'international_brands':{'path':'/db:international-brand/db:name'},'mixtures':{'path':'/db:mixture/db:name'},'packagers':{'path':'/db:packager/db:name'},'manufacturers':{'path':'/db:manufacturer/db:name'},'categories':{'path':'/db:category/db:mesh-id'},'affected_organisms':{'path':'/db:affected-organism'},'atc_codes':{'path':'/db:atc-code','key':'code'},'ahfs_codes':{'path':'/db:ahfs-code','key':'code'},'pdb_entries':{'path':'/db:pdb-entry'},'patents':{'path':'/db:patent/db:number'},'food_interactions':{'path':'/db:food-interaction'},'drug_interactions':{'path':'/db:drug-interaction/db:drugbank-id'},'pathways':{'path':'/db:pathway/db:smpdb-id'},}# TODO: later process and engage fields below# future_fields: 'salts', 'prices', 'dosages', 'sequences',# 'experimental_properties', 'external_links',# 'reactions', 'snp_effects', 'snp_adverse_drug_reactions'fields=fieldsorbasic_fields+list(fields_w_subfields.keys())fields=common.to_list(fields)if'drugbank_id'notinfields:fields.insert(0,'drugbank_id')result=[]record=collections.namedtuple('DrugbankDrug',fields)fordruginself.drugs:field_dict={}forfieldinfields:iffield=='drugbank_id':field_dict[field]=[iforiindrug.xpath('db:drugbank-id',namespaces=self.ns)ifi.attrib.get('primary')=='true'][0].texteliffield=='type':field_dict[field]=drug.get('type')else:iffieldinfields_w_subfields:path_to_field=f"db:{field.replace('_','-')}{fields_w_subfields[field]['path']}"if'key'infields_w_subfields[field]:field_dict[field]={f.get(fields_w_subfields[field]['key'])forfindrug.xpath(path_to_field,namespaces=self.ns)}else:field_dict[field]={f.textforfindrug.xpath(path_to_field,namespaces=self.ns)}else:path_to_field=f"db:{field.replace('_','-')}"field_dict[field]={f.textforfindrug.xpath(path_to_field,namespaces=self.ns)}fork,vinfield_dict.items():ifvandtype(v)!=str:field_dict[k]=[elem.replace('\r\n',' ')foreleminvifelem]iflen(field_dict[k])==1:field_dict[k]=field_dict[k][0]ifnotfield_dict[k]:field_dict[k]=Noneresult.append(record(**field_dict))returnresult
[docs]defdrugbank_targets_full(self,fields:str|list[str]|None=None,)->list[tuple]:""" Returns a list of namedtuples containing detailed information about drug-target interactions. Args fields: The fields to return. Default: None Returns A list of namedtuples containing information about the target of drugs. """result=[]all_fields=['drugbank_id','id','name','organism','actions','references','known_action','polypeptide',]fields=fieldsorall_fieldsfields=common.to_list(fields)if'drugbank_id'notinfields:fields.insert(0,'drugbank_id')record=collections.namedtuple('DrugbankTarget',fields)fordruginself.drugs:db_id=[iforiindrug.xpath('db:drugbank-id',namespaces=self.ns)ifi.attrib.get('primary')=='true'][0].textfortargetindrug.xpath('db:targets/db:target',namespaces=self.ns):target_dict={}target_dict['drugbank_id']=db_idforfieldinfields:iffieldin['id','name','organism','known_action']:target_dict[field]=[f.textforfintarget.xpath(f"db:{field.replace('_','-')}",namespaces=self.ns)]eliffield=='actions':target_dict[field]=[f.textforfintarget.xpath('db:actions/db:action',namespaces=self.ns)]eliffield=='references':target_dict[field]=[f.textforfintarget.xpath('db:references/db:articles/db:article/db:pubmed-id',namespaces=self.ns)]eliffield=='polypeptide':target_dict[field]=[(f.get('id'),f.get('source'))forfintarget.xpath('db:polypeptide',namespaces=self.ns)]fork,vintarget_dict.items():ifvandlen(v)==1:target_dict[k]=v[0]ifnotv:target_dict[k]=Noneresult.append(record(**target_dict))returnresult
[docs]defdrugbank_external_ids_full(self,)->dict[str,dict]:""" Returns a dictionary containing all external identifiers of drugs. """result={}fordruginself.drugs:db_id=[iforiindrug.xpath('db:drugbank-id',namespaces=self.ns)ifi.attrib.get('primary')=='true'][0].textforext_idindrug.xpath('db:external-identifiers/db:external-identifier',namespaces=self.ns):source=ext_id.xpath('db:resource',namespaces=self.ns)[0].textidentifier=ext_id.xpath('db:identifier',namespaces=self.ns)[0].textifdb_idnotinresult:result[db_id]={}result[db_id][source]=identifierreturnresult
[docs]defdrugbank_properties_full(self,)->dict[str,dict]:""" Returns a dictionary containing calculated properties of drugs. """result={}fordruginself.drugs:db_id=[iforiindrug.xpath('db:drugbank-id',namespaces=self.ns)ifi.attrib.get('primary')=='true'][0].textforpropindrug.xpath('db:calculated-properties/db:property',namespaces=self.ns):kind=prop.xpath('db:kind',namespaces=self.ns)[0].textidentifier=prop.xpath('db:value',namespaces=self.ns)[0].textifdb_idnotinresult:result[db_id]={}result[db_id][kind]=identifierreturnresult