Source code for pypath.inputs.pubmed

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `pypath` python module
#
#  Copyright 2014-2023
#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
#  Authors: see the file `README.rst`
#  Contact: Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      https://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: https://pypath.omnipathdb.org/
#

from future.utils import iteritems
from past.builtins import xrange, range

import sys
import json
import webbrowser

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath_common._constants as _const
import pypath.share.progress as progress
import pypath.inputs.eutils as eutils


[docs] def open_pubmed(pmid): """ Opens PubMed record in web browser. @pmid : str or int PubMed ID """ pmid = str(pmid) url = urls.urls['pubmed']['url'] % pmid webbrowser.open(url)
[docs] def only_pmids(idList, strict = True): """ Return elements unchanged which comply with the PubMed ID format, and attempts to translate the DOIs and PMC IDs using NCBI E-utils. Returns list containing only PMIDs. @idList : list, str List of IDs or one single ID. @strict : bool Whether keep in the list those IDs which are not PMIDs, neither DOIs or PMC IDs or NIH manuscript IDs. """ if type(idList) in _const.SIMPLE_TYPES: idList = [idList] pmids = {i for i in idList if isinstance(i, int) or i.isdigit()} pmcids = [i for i in idList if i.startswith('PMC')] dois = [i for i in idList if '/' in i] manuscids = [i for i in idList if i.startswith('NIHMS')] if not strict: pmids = set(pmids) | set(dois) | set(pmcids) | set(manuscids) if len(pmcids) > 0: pmids = pmids | set(pmids_list(pmcids)) if len(dois) > 0: pmids = pmids | set(pmids_list(dois)) return list(pmids)
[docs] def get_pmid(idList): """ For a list of doi or PMC IDs fetches the corresponding PMIDs. """ if type(idList) in _const.SIMPLE_TYPES: idList = [idList] url = urls.urls['eutils']['pmc-idconv'] % ','.join(str(i) for i in idList) c = curl.Curl(url, silent = True) data = c.result try: js = json.loads(data) except: js = {} return js
[docs] def pmids_dict(idList): jsn = get_pmid(idList) result = {'doi': {}, 'pmc': {}} if 'records' in jsn: for r in jsn['records']: if 'pmid' in r: if 'doi' in r: result['doi'][r['pmid']] = r['doi'] if 'pmcid' in r: result['pmc'][r['pmid']] = r['pmcid'] return result
[docs] def pmids_list(idList): jsn = get_pmid(idList) result = [] if 'records' in jsn: for r in jsn['records']: if 'pmid' in r: result.append(r['pmid']) return result
[docs] def get_pubmeds(pmids: list[str], cache_small: int = 10) -> dict: """ Metadata about PubMed records. Args: pmids: One or more PubMed IDs. cache_small: Small requests querying less than 10 IDs by default are not cached, except if this parameter is True or is set to a lower number. """ return eutils.esummary( ids = pmids, db = 'pubmed', cache_small = cache_small, )