#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from typing import Literal
import json
import collections
import pypath.share.curl as curl
import pypath.resources.urls as urls
[docs]
def chembl_targets() -> list[tuple]:
"""
Retrieves targets data from ChEMBL.
Returns
List of drug target records as named tuples.
"""
fields_target = (
'accession',
'target_chembl_id',
)
ChemblTarget = collections.namedtuple(
'ChemblTarget',
fields_target,
defaults = (None,) * len(fields_target),
)
tgt_lst = []
page_dct = {}
while True:
if not page_dct:
url = (
f"{urls.urls['chembl']['url']}"
f"{urls.urls['chembl']['target']}"
)
elif page_dct['page_meta']['next']:
url = (
f"{urls.urls['chembl']['url']}"
f"{page_dct['page_meta']['next']}"
)
else:
break
c = curl.Curl(url, large=True, silent=False)
fileobj = open(c.fileobj.name, encoding='utf-8')
page_dct = json.loads(fileobj.read())
tgt_lst.extend(
ChemblTarget(
accession = (
tgt['target_components'][0]['accession']
if tgt['target_components'] else
None
),
target_chembl_id = tgt['target_chembl_id'],
)
for tgt in page_dct['targets']
)
return tgt_lst
[docs]
def chembl_assays() -> list[tuple] :
"""
Retrieves assays data from ChEMBL.
Returns
List of assay records as named tuples.
"""
fields_assay = (
'assay_chembl_id',
'assay_organism',
'assay_type',
'confidence_score',
'target_chembl_id',
)
ChemblAssay = collections.namedtuple(
'ChemblAssay',
fields_assay,
defaults = (None,) * len(fields_assay),
)
assay_lst = []
page_dct = {}
while True:
if not page_dct:
url = (
f"{urls.urls['chembl']['url']}"
f"{urls.urls['chembl']['assay']}"
)
elif page_dct['page_meta']['next']:
url = (
f"{urls.urls['chembl']['url']}"
f"{page_dct['page_meta']['next']}"
)
else:
break
c = curl.Curl(url, large=True, silent=False)
fileobj = open(c.fileobj.name, encoding='utf-8')
page_dct = json.loads(fileobj.read())
assay_lst.extend(
ChemblAssay(
assay_chembl_id = assy_attr['assay_chembl_id'],
assay_organism = assy_attr['assay_organism'],
assay_type = assy_attr['assay_type'],
confidence_score = assy_attr['confidence_score'],
target_chembl_id = assy_attr['target_chembl_id'],
)
for assy_attr in page_dct['assays']
)
return assay_lst
[docs]
def chembl_molecules() -> list[tuple]:
"""
Retrieves molecules data from ChEMBL.
Returns
Molecule records as named tuples.
"""
def _get(mol, key0, key1):
molecule_properties = mol.get(f'molecule_{key0}', {})
if molecule_properties:
return molecule_properties.get(key1, None)
else:
return None
fields_molecule = (
'name',
'alogp',
'canonical_smiles',
'chirality',
'full_mwt',
'heavy_atoms',
'species',
'qed_weighted',
'type',
'structure_type',
'chembl',
'parent_chembl',
'prodrug',
'std_inchi_key',
'std_inchi',
'xrefs',
)
ChemblMolecule = collections.namedtuple(
'ChemblMolecule',
fields_molecule,
defaults = (None,) * len(fields_molecule),
)
mol_lst = []
page_dct = {}
while True:
if not page_dct:
url = urls.urls['chembl']['url'] + urls.urls['chembl']['molecule']
c = curl.Curl(url, large=True, silent=False)
elif page_dct['page_meta']['next']:
url = (
f"{urls.urls['chembl']['url']}"
f"{page_dct['page_meta']['next']}"
)
else:
break
c = curl.Curl(url, large=True, silent=False)
fileobj = open(c.fileobj.name, encoding='utf-8')
page_dct = json.loads(fileobj.read())
mol_lst.extend(
ChemblMolecule(
name = mol['pref_name'],
chirality = mol['chirality'],
type = mol['molecule_type'],
prodrug = mol['prodrug'],
structure_type = mol['structure_type'],
chembl = _get(mol, 'hierarchy', 'molecule_chembl_id'),
parent_chembl = _get(mol, 'hierarchy', 'parent_chembl_id'),
alogp = _get(mol, 'properties', 'alogp'),
full_mwt = _get(mol, 'properties', 'full_mwt'),
heavy_atoms = _get(mol, 'properties', 'heavy_atoms'),
species = _get(mol, 'properties', 'molecular_species'),
qed_weighted = _get(mol, 'properties', 'qed_weighted'),
canonical_smiles = _get(mol, 'structures', 'canonical_smiles'),
std_inchi_key = _get(mol, 'structures', 'standard_inchi_key'),
std_inchi = _get(mol, 'structures', 'standard_inchi'),
xrefs = (
[
{
'xref_id': rec['xref_id'],
'xref_src': rec['xref_src'],
}
for rec in mol['cross_references']
]
if mol['cross_references'] else
None
)
)
for mol in page_dct['molecules']
)
return mol_lst
[docs]
def chembl_activities(
#TODO: are these below all the allowed values?
standard_relation: Literal['=', '>', '<', '>=', '<='],
pchembl_value_none: bool = False,
) -> list[tuple] :
"""
Retrieves activities data from ChEMBL.
Args
pchembl_value_none:
# TODO: it is allowed to be None or must be None?
Whether the pchembl value should be none or not.
standard_relation:
Which standard relation in needed.
Returns
List of activity records as named tuples.
`standard_units` attribute is not included in the returned records.
# TODO: then why the data_validity_comment is part of the records?
Only records without `data_validity_comment` are returned.
"""
fields_activity = (
'assay_chembl',
'data_validity_comment',
'chembl',
'pchembl',
'standard_flag',
'standard_relation',
'standard_value',
'standard_type',
'target_chembl',
'document'
)
ChemblActivity = collections.namedtuple(
'ChemblActivity',
fields_activity,
defaults = (None,) * len(fields_activity),
)
activity_lst = []
page_dct = {}
while True:
if not page_dct:
url = (
f"{urls.urls['chembl']['url']}"
f"{urls.urls['chembl']['activity']}"
f"&pchembl_value__isnull={str(pchembl_value_none).lower()}"
f"&standard_relation__exact={standard_relation}"
)
elif page_dct['page_meta']['next']:
url = (
f"{urls.urls['chembl']['url']}"
f"{page_dct['page_meta']['next']}"
)
else:
break
c = curl.Curl(url, large=True, silent=False)
fileobj = open(c.fileobj.name, encoding='utf-8')
page_dct = json.loads(fileobj.read())
activity_lst.extend(
ChemblActivity(
assay_chembl = act['assay_chembl_id'],
data_validity_comment = act['data_validity_comment'],
chembl = act['molecule_chembl_id'],
pchembl = act['pchembl_value'],
standard_flag = True if act['standard_flag'] == 1 else False,
standard_relation = act['standard_relation'],
standard_value = act['standard_value'],
standard_type = act['standard_type'],
target_chembl = act['target_chembl_id'],
document = act['document_chembl_id'],
)
for act in page_dct['activities']
if act['data_validity_comment'] is None
)
return activity_lst
[docs]
def chembl_documents() -> dict[str, str] :
"""
Retrieves ChEMBL document ID to PubMed ID conversion.
Returns
Dictionary of ChEMBL document IDs as keys and PubMed IDs as values.
"""
page_dct = {}
document_dict = {}
while True:
if not page_dct:
url = (
f"{urls.urls['chembl']['url']}"
f"{urls.urls['chembl']['document']}"
)
elif page_dct['page_meta']['next']:
url = (
f"{urls.urls['chembl']['url']}"
f"{page_dct['page_meta']['next']}"
)
else:
break
c = curl.Curl(url, large=True, silent=False)
fileobj = open(c.fileobj.name, encoding='utf-8')
page_dct = json.loads(fileobj.read())
for doc in page_dct['documents']:
if doc['pubmed_id']:
document_dict[doc['document_chembl_id']]= doc['pubmed_id']
return document_dict
[docs]
def chembl_drug_indications(
max_phase_threshold: int = 0,
) -> list[tuple]:
"""
Retrieves drug indications data from ChEMBL.
Args
max_phase_threshold:
The threshold for maximum phase of the drug
for which the indication is valid.
Returns
List of drug indications as namedtuples.
"""
fields_indication = (
'efo_id',
'efo_term',
'max_phase',
'mesh_heading',
'mesh_id',
'molecule_chembl',
)
ChemblIndication = collections.namedtuple(
'ChemblIndication',
fields_indication,
defaults = (None,) * len(fields_indication),
)
indication_lst = []
page_dct = {}
while True:
if not page_dct:
url = (
f"{urls.urls['chembl']['url']}"
f"{urls.urls['chembl']['drug_indication']}"
)
elif page_dct['page_meta']['next']:
url = (
f"{urls.urls['chembl']['url']}"
f"{page_dct['page_meta']['next']}"
)
else:
break
c = curl.Curl(url, large=True, silent=False)
fileobj = open(c.fileobj.name, encoding='utf-8')
page_dct = json.loads(fileobj.read())
indication_lst.extend(
ChemblIndication(
efo_id = ind['efo_id'],
efo_term = ind['efo_term'],
max_phase = float(ind['max_phase_for_ind']),
mesh_heading = ind['mesh_heading'],
mesh_id = ind['mesh_id'],
molecule_chembl = ind['molecule_chembl_id'],
)
for ind in page_dct['drug_indications']
if float(ind['max_phase_for_ind']) > max_phase_threshold and max_phase_threshold != 0 \
or max_phase_threshold == 0
)
return indication_lst
[docs]
def chembl_mechanisms() -> list[tuple]:
"""
Retrieves mechanism data from ChEMBL.
Returns
List of mechanisms as namedtuples.
"""
fields_mechanism = (
'action_type',
'direct_interaction',
'disease_efficacy',
'mechanism_of_action',
'chembl',
'target_chembl',
)
ChemblMechanism= collections.namedtuple(
'ChemblMechanism',
fields_mechanism,
defaults = (None,) * len(fields_mechanism),
)
mechanism_lst = []
page_dct = {}
while True:
if not page_dct:
url = (
f"{urls.urls['chembl']['url']}"
f"{urls.urls['chembl']['mechanism']}"
)
elif page_dct['page_meta']['next']:
url = (
f"{urls.urls['chembl']['url']}"
f"{page_dct['page_meta']['next']}"
)
else:
break
c = curl.Curl(url, large=True, silent=False)
fileobj = open(c.fileobj.name, encoding='utf-8')
page_dct = json.loads(fileobj.read())
mechanism_lst.extend(
ChemblMechanism(
action_type = mech['action_type'],
direct_interaction = True if mech['direct_interaction'] == 1 else False,
disease_efficacy = True if mech['disease_efficacy'] == 1 else False,
mechanism_of_action = mech['mechanism_of_action'],
chembl = mech['molecule_chembl_id'],
target_chembl = mech['target_chembl_id'],
)
for mech in page_dct['mechanisms']
)
return mechanism_lst