Source code for pypath.inputs.pathophenodb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

import os
import pickle
import hashlib
import collections

import pypath.resources.urls as urls
import pypath.share.cache as cache
import pypath.share.session as session

_logger = session.Logger(name = 'pathophenodb_input')
_log = _logger._log


DiseasePathogen = collections.namedtuple(
    'DiseasePathogen',
    (
        'disease_id',
        'disease',
        'pathogen_taxid',
        'pathogen',
        'evidence'
    ),
)


[docs] def disease_pathogen_interactions(): """ Retrieves disease pathogen relationships from PathoPhenoDb. Returns: Disease-pathogen relationships as a list of tuples. """ query = """#EX3:List all diseases which caused by pathogens PREFIX SIO: <http://semanticscience.org/resource/SIO_> PREFIX RO: <http://purl.obolibrary.org/obo/RO_> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT distinct ?Disease_ID ?Disease ?Pathogen_ID ?Pathogen ?evidence_Code FROM <http://patho.phenomebrowser.net> WHERE { ?Disease_ID SIO:000255 ?o . ?o RO:0002558 ?o1 . ?o RO:0002556 ?Pathogen_ID . ?Disease_ID rdfs:label ?Disease . ?Pathogen_ID rdfs:label ?Pathogen . ?o1 rdfs:label ?evidence_Code . } """ url = urls.urls['pathophenodb']['url'] urlmd5 = hashlib.md5(f'{url}{query}'.encode()).hexdigest() cache_path = os.path.join(cache.get_cachedir(), urlmd5) if os.path.exists(cache_path): try: _log(f'Loading from cache: `{cache_path}`.') with open(cache_path, 'rb') as fp: return pickle.load(fp) except: _log( f'Failed to load from `{cache_path}`, ' 'falling back to download.' ) try: from SPARQLWrapper import SPARQLWrapper, JSON except ModuleNotFoundError: _logger._console( 'No module `SPARQLWrapper` is available. ' 'Please install it to access PathoPhenoDB: ' 'pip install sparqlwrapper' ) _log('Returning empty result!') return [] sparql = SPARQLWrapper(url) sparql.setReturnFormat(JSON) sparql.setQuery(query) response = sparql.queryAndConvert() result = set() for r in response['results']['bindings']: pair = DiseasePathogen( disease_id = ( r['Disease_ID']['value'].split('/')[-1].replace('_',':') ), disease = r['Disease']['value'], pathogen_taxid = ( r['Pathogen_ID']['value'].split('/')[-1].split('_')[1] ), pathogen = r['Pathogen']['value'], evidence = r['evidence_Code']['value'], ) result.add(pair) result = list(result) with open(cache_path, 'wb') as fp: _log(f'Saving to cache: `{cache_path}`.') pickle.dump( obj = result, file = fp, ) return result