Source code for pypath.inputs.cell
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `pypath` python module
#
# Copyright 2014-2023
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# Authors: see the file `README.rst`
# Contact: Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# https://www.gnu.org/licenses/gpl-3.0.html
#
# Website: https://pypath.omnipathdb.org/
#
from future.utils import iteritems
import os
import pypath.share.curl as curl
import pypath.share.session as session
_logger = session.Logger(name = 'cell_input')
_log = _logger._log
[docs]
def cell_supplementary(supp_url: str, article_url: str) -> str:
"""
Downloads a supplementary material from the Cell journal webpage.
Args
supp_url:
URL of the supplementary material.
article_url:
URL of the article page.
Return
The path of the downloaded file.
"""
c_nocall = curl.Curl(
supp_url,
call = False,
setup = False,
process = False,
silent = True,
)
c_nocall.get_cache_file_name()
path = c_nocall.cache_file_name
req_headers = []
if not os.path.exists(path):
cookies = {}
init_url = article_url
for step in range(3):
c_init = curl.Curl(
init_url,
silent = True,
large = True,
cache = False,
follow = False,
req_headers = req_headers + ['user-agent: curl/7.69.1'],
bypass_url_encoding = True,
retries = 1,
empty_attempt_again = False,
)
new_cookies = dict(
tuple(
h.decode().split(':')[1].\
split(';')[0].\
strip().split('=', maxsplit = 1)
)
for h in c_init.resp_headers
if h.lower().startswith(b'set-cookie')
)
cookies.update(new_cookies)
_ = cookies.pop('__cflb', None)
for h in c_init.resp_headers:
if h.lower().startswith(b'location'):
init_url = h.decode().split(':', maxsplit = 1)[1].strip()
req_headers = (
[
'Cookie: %s' % (
'; '.join(
'%s=%s' % cookie
for cookie in iteritems(cookies)
)
)
]
if cookies else
[]
)
_log(
'HTTP %u; location: `%s`, cookies: `%s`.' % (
c_init.status,
init_url,
req_headers[0] if req_headers else '',
)
)
if c_init.status != 302:
break
c_table = curl.Curl(
supp_url,
silent = False,
large = True,
empty_attempt_again = False,
req_headers = req_headers + ['user-agent: curl/7.69.1'],
)
path = c_table.cache_file_name
c_table.fileobj.close()
return path