Source code for pypath.inputs.science
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import os
import pypath.share.curl as curl
import pypath.share.session as session
_logger = session.Logger(name = 'science_input')
_log = _logger._log
[docs]
def science_download(url):
"""
Downloads a supplementary material from the Science journal webpage.
Args
url (str):
URL of the supplementary material.
Returns
The path of the downloaded file.
"""
c_nocall = curl.Curl(
url,
call = False,
setup = False,
process = False,
silent = True,
)
c_nocall.get_cache_file_name()
path = c_nocall.cache_file_name
req_headers = ['user-agent: curl/7.69.1']
if not os.path.exists(path):
c_init = curl.Curl(
url,
silent = True,
large = False,
cache = False,
follow = False,
retries = 1,
empty_attempt_again = False,
req_headers = req_headers,
write_cache = False,
)
cookies = dict(
tuple(
h.decode().split(':')[1].\
split(';')[0].\
strip().split('=', maxsplit = 1)
)
for h in c_init.resp_headers
if h.lower().startswith(b'set-cookie')
)
req_headers.append(
'Cookie: %s' % '; '.join(
'%s=%s' % ck
for ck in cookies.items()
)
)
_log(
'HTTP %u; cookies: `%s`.' % (
c_init.status,
req_headers[-1] if req_headers else '',
)
)
c_main = curl.Curl(
url,
silent = False,
large = True,
empty_attempt_again = False,
req_headers = req_headers,
)
path = c_main.cache_file_name
c_main.fileobj.close()
return path