#!/usr/bin/env python# -*- coding: utf-8 -*-## This file is part of the `pypath` python module## Copyright 2014-2023# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University## Authors: see the file `README.rst`# Contact: Dénes Türei (turei.denes@gmail.com)## Distributed under the GPLv3 License.# See accompanying file LICENSE.txt or copy at# https://www.gnu.org/licenses/gpl-3.0.html## Website: https://pypath.omnipathdb.org/#from__future__importannotationsfromtypingimportIterableimportjsonimportreimportpypath.share.progressasprogressimportpypath.share.curlascurlimportpypath.resources.urlsasurlsimportpypath.share.sessionassessionimportpypath.share.commonascommon"""Clients to the NCBI Entrez Utils (E-utils) API."""_logger=session.Logger(name='eutils_input')_REPREFIX=re.compile(r'[A-z]*(\d+)')
[docs]defesummary(ids:str|Iterable[str],db:str,cache_small:bool|int=10,)->list|dict:""" Record metainformation from NCBI databases. A client to the eSummary API endpoint: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESummary Args: ids: One or more record identifiers for the database of interst (`db`). The API understands the numeric IDs, letter prefixes if present will be stripped. db: The database, such as "pubmed" or "geoprofiles". cache_small: Small requests querying less than 10 IDs by default are not cached, except if this parameter is True or is set to a lower number. """ids=list(map(str,common.to_list(ids)))ids=sorted(_REPREFIX.sub('\\1',i)foriinids)url=urls.urls['eutils']['esummary']cache=len(ids)<cache_smallresult={}_logger._log(f'Retrieving data from NCBI E-utils eSummary: querying {len(ids)} 'f'identifiers ({",".join(ids[:5])}...).')prg=progress.Progress(len(ids)/100+1,'Retrieving data from NCBI E-utils',1,percent=False,)forchunkincommon.paginate(ids,100):prg.step()post={'id':','.join(chunk),'retmode':'json','db':db,}foriinrange(3):try:c=curl.Curl(url,silent=False,cache=cache,post=post,override_post=True,)res=json.loads(c.result)if'error'inres:_logger._log(f'Error from NCBI E-utils: `{res["error"]}`.')result.update(res['result'].items())breakexceptValueError:_logger._log('Failed to process JSON from NCBI E-utils:')_logger._log_tracebakc()prg.terminate()_logger._log('Finished retrieving data from NCBI E-utils eSummary, 'f'result contains {len(result)} items.')returnresult