import sys import time import requests import logging from rich.logging import RichHandler FORMAT = "%(message)s" # We're defaulting to log level WARNING for all modules logging.basicConfig( level="WARNING", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()] ) log = logging.getLogger("rich") # Our own code gets its own log level, that's probably what we want to play with most log.setLevel(logging.INFO) # If we need to manually adjust log level for a specific module here's how you get a list of all modules that # themselves use the 'logging' module. See also https://stackoverflow.com/a/36208664. #for key in logging.Logger.manager.loggerDict: # print(key) username = password = dl_dir = dl_dir = dl_dir.rstrip("\\") file_name_on_disk = r"""2021-5.pdf""" dl_url = r"""https://www.heise.de/select/ct/archiv/2021/5/download""" dl_wait_timeout = 60 def logout(logged_in_session: requests.Session) -> None: log.debug(f"Logging out ...") with logged_in_session.get("https://www.heise.de/sso/login/logout") as logout_request: log.debug(f"Logout got HTTP status code {logout_request.status_code}") def login(login_username: str, login_password: str) -> requests.Session: log.debug(f"Logging in ...") session = requests.Session() payload = { "username": login_username, "password": login_password } response = session.post("https://www.heise.de/sso/login/login", data=payload, allow_redirects=False) if "Set-Cookie" in response.headers: log.debug(f"Logged in, cookie received") return session def download_mag(**kwargs: requests.Session) -> None: log.info(f"Preparing heise.de session ...") have_printed_wait_notice = False logged_in_session = kwargs.get("logged_in_session") if not logged_in_session: logged_in_session = login(username, password) started_waiting = time.time() log.info(f"Requesting AWS download ...") while True: if time.time() - started_waiting > dl_wait_timeout: log.warning(f"We've waited {dl_wait_timeout}s for download. Exiting 1 ...") sys.exit(1) if int(logged_in_session.get(dl_url).headers['Content-Length']) > 1000000: break if not have_printed_wait_notice: log.debug(f"Waiting for heise's AWS backend ...") have_printed_wait_notice = True time.sleep(3) log.info(f"Downloading ...") with logged_in_session.get(dl_url) as dl_request: with open(dl_dir + r"\\" + file_name_on_disk, "wb") as file: file.write(dl_request.content) log.info(f"Download complete") logout(logged_in_session) download_mag() log.debug("Done, exiting 0 ...") sys.exit(0)