Source code for pypath.inputs.pathophenodb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#  This file is part of the `pypath` python module
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#  Website:

import os
import pickle
import hashlib
import collections

import pypath.resources.urls as urls
import pypath.share.cache as cache
import pypath.share.session as session

_logger = session.Logger(name = 'pathophenodb_input')
_log = _logger._log

DiseasePathogen = collections.namedtuple(

[docs] def disease_pathogen_interactions(): """ Retrieves disease pathogen relationships from PathoPhenoDb. Returns: Disease-pathogen relationships as a list of tuples. """ query = """#EX3:List all diseases which caused by pathogens PREFIX SIO: <> PREFIX RO: <> PREFIX rdfs: <> SELECT distinct ?Disease_ID ?Disease ?Pathogen_ID ?Pathogen ?evidence_Code FROM <> WHERE { ?Disease_ID SIO:000255 ?o . ?o RO:0002558 ?o1 . ?o RO:0002556 ?Pathogen_ID . ?Disease_ID rdfs:label ?Disease . ?Pathogen_ID rdfs:label ?Pathogen . ?o1 rdfs:label ?evidence_Code . } """ url = urls.urls['pathophenodb']['url'] urlmd5 = hashlib.md5(f'{url}{query}'.encode()).hexdigest() cache_path = os.path.join(cache.get_cachedir(), urlmd5) if os.path.exists(cache_path): try: _log(f'Loading from cache: `{cache_path}`.') with open(cache_path, 'rb') as fp: return pickle.load(fp) except: _log( f'Failed to load from `{cache_path}`, ' 'falling back to download.' ) try: from SPARQLWrapper import SPARQLWrapper, JSON except ModuleNotFoundError: _logger._console( 'No module `SPARQLWrapper` is available. ' 'Please install it to access PathoPhenoDB: ' 'pip install sparqlwrapper' ) _log('Returning empty result!') return [] sparql = SPARQLWrapper(url) sparql.setReturnFormat(JSON) sparql.setQuery(query) response = sparql.queryAndConvert() result = set() for r in response['results']['bindings']: pair = DiseasePathogen( disease_id = ( r['Disease_ID']['value'].split('/')[-1].replace('_',':') ), disease = r['Disease']['value'], pathogen_taxid = ( r['Pathogen_ID']['value'].split('/')[-1].split('_')[1] ), pathogen = r['Pathogen']['value'], evidence = r['evidence_Code']['value'], ) result.add(pair) result = list(result) with open(cache_path, 'wb') as fp: _log(f'Saving to cache: `{cache_path}`.') pickle.dump( obj = result, file = fp, ) return result