#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#fromtypingimportLiteralimportjsonimportcollectionsimportpypath.share.curlascurlimportpypath.resources.urlsasurls
[docs]defchembl_targets()->list[tuple]:""" Retrieves targets data from ChEMBL. Returns List of drug target records as named tuples. """fields_target=('accession','target_chembl_id',)ChemblTarget=collections.namedtuple('ChemblTarget',fields_target,defaults=(None,)*len(fields_target),)tgt_lst=[]page_dct={}whileTrue:ifnotpage_dct:url=(f"{urls.urls['chembl']['url']}"f"{urls.urls['chembl']['target']}")elifpage_dct['page_meta']['next']:url=(f"{urls.urls['chembl']['url']}"f"{page_dct['page_meta']['next']}")else:breakc=curl.Curl(url,large=True,silent=False)fileobj=open(c.fileobj.name,encoding='utf-8')page_dct=json.loads(fileobj.read())tgt_lst.extend(ChemblTarget(accession=(tgt['target_components'][0]['accession']iftgt['target_components']elseNone),target_chembl_id=tgt['target_chembl_id'],)fortgtinpage_dct['targets'])returntgt_lst
[docs]defchembl_assays()->list[tuple]:""" Retrieves assays data from ChEMBL. Returns List of assay records as named tuples. """fields_assay=('assay_chembl_id','assay_organism','assay_type','confidence_score','target_chembl_id',)ChemblAssay=collections.namedtuple('ChemblAssay',fields_assay,defaults=(None,)*len(fields_assay),)assay_lst=[]page_dct={}whileTrue:ifnotpage_dct:url=(f"{urls.urls['chembl']['url']}"f"{urls.urls['chembl']['assay']}")elifpage_dct['page_meta']['next']:url=(f"{urls.urls['chembl']['url']}"f"{page_dct['page_meta']['next']}")else:breakc=curl.Curl(url,large=True,silent=False)fileobj=open(c.fileobj.name,encoding='utf-8')page_dct=json.loads(fileobj.read())assay_lst.extend(ChemblAssay(assay_chembl_id=assy_attr['assay_chembl_id'],assay_organism=assy_attr['assay_organism'],assay_type=assy_attr['assay_type'],confidence_score=assy_attr['confidence_score'],target_chembl_id=assy_attr['target_chembl_id'],)forassy_attrinpage_dct['assays'])returnassay_lst
[docs]defchembl_molecules()->list[tuple]:""" Retrieves molecules data from ChEMBL. Returns Molecule records as named tuples. """def_get(mol,key0,key1):molecule_properties=mol.get(f'molecule_{key0}',{})ifmolecule_properties:returnmolecule_properties.get(key1,None)else:returnNonefields_molecule=('name','alogp','canonical_smiles','chirality','full_mwt','heavy_atoms','species','qed_weighted','type','structure_type','chembl','parent_chembl','prodrug','std_inchi_key','std_inchi','xrefs',)ChemblMolecule=collections.namedtuple('ChemblMolecule',fields_molecule,defaults=(None,)*len(fields_molecule),)mol_lst=[]page_dct={}whileTrue:ifnotpage_dct:url=urls.urls['chembl']['url']+urls.urls['chembl']['molecule']c=curl.Curl(url,large=True,silent=False)elifpage_dct['page_meta']['next']:url=(f"{urls.urls['chembl']['url']}"f"{page_dct['page_meta']['next']}")else:breakc=curl.Curl(url,large=True,silent=False)fileobj=open(c.fileobj.name,encoding='utf-8')page_dct=json.loads(fileobj.read())mol_lst.extend(ChemblMolecule(name=mol['pref_name'],chirality=mol['chirality'],type=mol['molecule_type'],prodrug=mol['prodrug'],structure_type=mol['structure_type'],chembl=_get(mol,'hierarchy','molecule_chembl_id'),parent_chembl=_get(mol,'hierarchy','parent_chembl_id'),alogp=_get(mol,'properties','alogp'),full_mwt=_get(mol,'properties','full_mwt'),heavy_atoms=_get(mol,'properties','heavy_atoms'),species=_get(mol,'properties','molecular_species'),qed_weighted=_get(mol,'properties','qed_weighted'),canonical_smiles=_get(mol,'structures','canonical_smiles'),std_inchi_key=_get(mol,'structures','standard_inchi_key'),std_inchi=_get(mol,'structures','standard_inchi'),xrefs=([{'xref_id':rec['xref_id'],'xref_src':rec['xref_src'],}forrecinmol['cross_references']]ifmol['cross_references']elseNone))formolinpage_dct['molecules'])returnmol_lst
[docs]defchembl_activities(#TODO: are these below all the allowed values?standard_relation:Literal['=','>','<','>=','<='],pchembl_value_none:bool=False,)->list[tuple]:""" Retrieves activities data from ChEMBL. Args pchembl_value_none: # TODO: it is allowed to be None or must be None? Whether the pchembl value should be none or not. standard_relation: Which standard relation in needed. Returns List of activity records as named tuples. `standard_units` attribute is not included in the returned records. # TODO: then why the data_validity_comment is part of the records? Only records without `data_validity_comment` are returned. """fields_activity=('assay_chembl','data_validity_comment','chembl','pchembl','standard_flag','standard_relation','standard_value','standard_type','target_chembl','document')ChemblActivity=collections.namedtuple('ChemblActivity',fields_activity,defaults=(None,)*len(fields_activity),)activity_lst=[]page_dct={}whileTrue:ifnotpage_dct:url=(f"{urls.urls['chembl']['url']}"f"{urls.urls['chembl']['activity']}"f"&pchembl_value__isnull={str(pchembl_value_none).lower()}"f"&standard_relation__exact={standard_relation}")elifpage_dct['page_meta']['next']:url=(f"{urls.urls['chembl']['url']}"f"{page_dct['page_meta']['next']}")else:breakc=curl.Curl(url,large=True,silent=False)fileobj=open(c.fileobj.name,encoding='utf-8')page_dct=json.loads(fileobj.read())activity_lst.extend(ChemblActivity(assay_chembl=act['assay_chembl_id'],data_validity_comment=act['data_validity_comment'],chembl=act['molecule_chembl_id'],pchembl=act['pchembl_value'],standard_flag=Trueifact['standard_flag']==1elseFalse,standard_relation=act['standard_relation'],standard_value=act['standard_value'],standard_type=act['standard_type'],target_chembl=act['target_chembl_id'],document=act['document_chembl_id'],)foractinpage_dct['activities']ifact['data_validity_comment']isNone)returnactivity_lst
[docs]defchembl_documents()->dict[str,str]:""" Retrieves ChEMBL document ID to PubMed ID conversion. Returns Dictionary of ChEMBL document IDs as keys and PubMed IDs as values. """page_dct={}document_dict={}whileTrue:ifnotpage_dct:url=(f"{urls.urls['chembl']['url']}"f"{urls.urls['chembl']['document']}")elifpage_dct['page_meta']['next']:url=(f"{urls.urls['chembl']['url']}"f"{page_dct['page_meta']['next']}")else:breakc=curl.Curl(url,large=True,silent=False)fileobj=open(c.fileobj.name,encoding='utf-8')page_dct=json.loads(fileobj.read())fordocinpage_dct['documents']:ifdoc['pubmed_id']:document_dict[doc['document_chembl_id']]=doc['pubmed_id']returndocument_dict
[docs]defchembl_drug_indications(max_phase_threshold:int=0,)->list[tuple]:""" Retrieves drug indications data from ChEMBL. Args max_phase_threshold: The threshold for maximum phase of the drug for which the indication is valid. Returns List of drug indications as namedtuples. """fields_indication=('efo_id','efo_term','max_phase','mesh_heading','mesh_id','molecule_chembl',)ChemblIndication=collections.namedtuple('ChemblIndication',fields_indication,defaults=(None,)*len(fields_indication),)indication_lst=[]page_dct={}whileTrue:ifnotpage_dct:url=(f"{urls.urls['chembl']['url']}"f"{urls.urls['chembl']['drug_indication']}")elifpage_dct['page_meta']['next']:url=(f"{urls.urls['chembl']['url']}"f"{page_dct['page_meta']['next']}")else:breakc=curl.Curl(url,large=True,silent=False)fileobj=open(c.fileobj.name,encoding='utf-8')page_dct=json.loads(fileobj.read())indication_lst.extend(ChemblIndication(efo_id=ind['efo_id'],efo_term=ind['efo_term'],max_phase=float(ind['max_phase_for_ind']),mesh_heading=ind['mesh_heading'],mesh_id=ind['mesh_id'],molecule_chembl=ind['molecule_chembl_id'],)forindinpage_dct['drug_indications']iffloat(ind['max_phase_for_ind'])>max_phase_thresholdandmax_phase_threshold!=0 \
ormax_phase_threshold==0)returnindication_lst
[docs]defchembl_mechanisms()->list[tuple]:""" Retrieves mechanism data from ChEMBL. Returns List of mechanisms as namedtuples. """fields_mechanism=('action_type','direct_interaction','disease_efficacy','mechanism_of_action','chembl','target_chembl',)ChemblMechanism=collections.namedtuple('ChemblMechanism',fields_mechanism,defaults=(None,)*len(fields_mechanism),)mechanism_lst=[]page_dct={}whileTrue:ifnotpage_dct:url=(f"{urls.urls['chembl']['url']}"f"{urls.urls['chembl']['mechanism']}")elifpage_dct['page_meta']['next']:url=(f"{urls.urls['chembl']['url']}"f"{page_dct['page_meta']['next']}")else:breakc=curl.Curl(url,large=True,silent=False)fileobj=open(c.fileobj.name,encoding='utf-8')page_dct=json.loads(fileobj.read())mechanism_lst.extend(ChemblMechanism(action_type=mech['action_type'],direct_interaction=Trueifmech['direct_interaction']==1elseFalse,disease_efficacy=Trueifmech['disease_efficacy']==1elseFalse,mechanism_of_action=mech['mechanism_of_action'],chembl=mech['molecule_chembl_id'],target_chembl=mech['target_chembl_id'],)formechinpage_dct['mechanisms'])returnmechanism_lst