79 lines
2.7 KiB
Python
79 lines
2.7 KiB
Python
import sys
|
|
import time
|
|
import requests
|
|
import logging
|
|
from rich.logging import RichHandler
|
|
|
|
|
|
FORMAT = "%(message)s"
|
|
# We're defaulting to log level WARNING for all modules
|
|
logging.basicConfig(
|
|
level="WARNING", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]
|
|
)
|
|
log = logging.getLogger("rich")
|
|
# Our own code gets its own log level, that's probably what we want to play with most
|
|
log.setLevel(logging.INFO)
|
|
|
|
|
|
# If we need to manually adjust log level for a specific module here's how you get a list of all modules that
|
|
# themselves use the 'logging' module. See also https://stackoverflow.com/a/36208664.
|
|
#for key in logging.Logger.manager.loggerDict:
|
|
# print(key)
|
|
|
|
|
|
username =
|
|
password =
|
|
dl_dir =
|
|
dl_dir = dl_dir.rstrip("\\")
|
|
file_name_on_disk = r"""2021-5.pdf"""
|
|
dl_url = r"""https://www.heise.de/select/ct/archiv/2021/5/download"""
|
|
dl_wait_timeout = 60
|
|
|
|
|
|
def logout(logged_in_session: requests.Session) -> None:
|
|
log.debug(f"Logging out ...")
|
|
with logged_in_session.get("https://www.heise.de/sso/login/logout") as logout_request:
|
|
log.debug(f"Logout got HTTP status code {logout_request.status_code}")
|
|
|
|
|
|
def login(login_username: str, login_password: str) -> requests.Session:
|
|
log.debug(f"Logging in ...")
|
|
session = requests.Session()
|
|
payload = {
|
|
"username": login_username,
|
|
"password": login_password
|
|
}
|
|
response = session.post("https://www.heise.de/sso/login/login", data=payload, allow_redirects=False)
|
|
if "Set-Cookie" in response.headers:
|
|
log.debug(f"Logged in, cookie received")
|
|
return session
|
|
|
|
|
|
def download_mag(**kwargs: requests.Session) -> None:
|
|
log.info(f"Preparing heise.de session ...")
|
|
have_printed_wait_notice = False
|
|
logged_in_session = kwargs.get("logged_in_session")
|
|
if not logged_in_session:
|
|
logged_in_session = login(username, password)
|
|
started_waiting = time.time()
|
|
log.info(f"Requesting AWS download ...")
|
|
while True:
|
|
if time.time() - started_waiting > dl_wait_timeout:
|
|
log.warning(f"We've waited {dl_wait_timeout}s for download. Exiting 1 ...")
|
|
sys.exit(1)
|
|
if int(logged_in_session.get(dl_url).headers['Content-Length']) > 1000000:
|
|
break
|
|
if not have_printed_wait_notice: log.debug(f"Waiting for heise's AWS backend ...")
|
|
have_printed_wait_notice = True
|
|
time.sleep(3)
|
|
log.info(f"Downloading ...")
|
|
with logged_in_session.get(dl_url) as dl_request:
|
|
with open(dl_dir + r"\\" + file_name_on_disk, "wb") as file:
|
|
file.write(dl_request.content)
|
|
log.info(f"Download complete")
|
|
logout(logged_in_session)
|
|
|
|
|
|
download_mag()
|
|
log.debug("Done, exiting 0 ...")
|
|
sys.exit(0) |