Compare commits
No commits in common. "9765ec16b50420039d98b8b8ab6517504e613538" and "8362c4184a064ecd2b865baad15d87b9877c0bd4" have entirely different histories.
9765ec16b5
...
8362c4184a
2
.gitignore
vendored
2
.gitignore
vendored
@ -231,5 +231,3 @@ fabric.properties
|
|||||||
# Android studio 3.1+ serialized cache file
|
# Android studio 3.1+ serialized cache file
|
||||||
.idea/caches/build_file_checksums.ser
|
.idea/caches/build_file_checksums.ser
|
||||||
|
|
||||||
# ---> Custom
|
|
||||||
.idea/deployment.xml
|
|
5
.idea/codeStyles/codeStyleConfig.xml
generated
5
.idea/codeStyles/codeStyleConfig.xml
generated
@ -1,5 +0,0 @@
|
|||||||
<component name="ProjectCodeStyleConfiguration">
|
|
||||||
<state>
|
|
||||||
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
|
|
||||||
</state>
|
|
||||||
</component>
|
|
8
.idea/heiseselect-dl.iml
generated
8
.idea/heiseselect-dl.iml
generated
@ -1,8 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<module type="PYTHON_MODULE" version="4">
|
|
||||||
<component name="NewModuleRootManager">
|
|
||||||
<content url="file://$MODULE_DIR$" />
|
|
||||||
<orderEntry type="jdk" jdkName="Python 3.11 (heiseselect-dl)" jdkType="Python SDK" />
|
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
|
||||||
</component>
|
|
||||||
</module>
|
|
26
.idea/inspectionProfiles/Project_Default.xml
generated
26
.idea/inspectionProfiles/Project_Default.xml
generated
@ -1,26 +0,0 @@
|
|||||||
<component name="InspectionProjectProfileManager">
|
|
||||||
<profile version="1.0">
|
|
||||||
<option name="myName" value="Project Default" />
|
|
||||||
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
|
||||||
<option name="ignoredPackages">
|
|
||||||
<value>
|
|
||||||
<list size="1">
|
|
||||||
<item index="0" class="java.lang.String" itemvalue="google" />
|
|
||||||
</list>
|
|
||||||
</value>
|
|
||||||
</option>
|
|
||||||
</inspection_tool>
|
|
||||||
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
|
||||||
<option name="ignoredIdentifiers">
|
|
||||||
<list>
|
|
||||||
<option value="str.decode" />
|
|
||||||
</list>
|
|
||||||
</option>
|
|
||||||
</inspection_tool>
|
|
||||||
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
|
||||||
<option name="processCode" value="true" />
|
|
||||||
<option name="processLiterals" value="true" />
|
|
||||||
<option name="processComments" value="true" />
|
|
||||||
</inspection_tool>
|
|
||||||
</profile>
|
|
||||||
</component>
|
|
6
.idea/inspectionProfiles/profiles_settings.xml
generated
6
.idea/inspectionProfiles/profiles_settings.xml
generated
@ -1,6 +0,0 @@
|
|||||||
<component name="InspectionProjectProfileManager">
|
|
||||||
<settings>
|
|
||||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
|
||||||
<version value="1.0" />
|
|
||||||
</settings>
|
|
||||||
</component>
|
|
4
.idea/misc.xml
generated
4
.idea/misc.xml
generated
@ -1,4 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (heiseselect-dl)" project-jdk-type="Python SDK" />
|
|
||||||
</project>
|
|
8
.idea/modules.xml
generated
8
.idea/modules.xml
generated
@ -1,8 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="ProjectModuleManager">
|
|
||||||
<modules>
|
|
||||||
<module fileurl="file://$PROJECT_DIR$/.idea/heiseselect-dl.iml" filepath="$PROJECT_DIR$/.idea/heiseselect-dl.iml" />
|
|
||||||
</modules>
|
|
||||||
</component>
|
|
||||||
</project>
|
|
6
.idea/vcs.xml
generated
6
.idea/vcs.xml
generated
@ -1,6 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="VcsDirectoryMappings">
|
|
||||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
|
||||||
</component>
|
|
||||||
</project>
|
|
79
main.py
79
main.py
@ -1,79 +0,0 @@
|
|||||||
import sys
|
|
||||||
import time
|
|
||||||
import requests
|
|
||||||
import logging
|
|
||||||
from rich.logging import RichHandler
|
|
||||||
|
|
||||||
|
|
||||||
FORMAT = "%(message)s"
|
|
||||||
# We're defaulting to log level WARNING for all modules
|
|
||||||
logging.basicConfig(
|
|
||||||
level="WARNING", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]
|
|
||||||
)
|
|
||||||
log = logging.getLogger("rich")
|
|
||||||
# Our own code gets its own log level, that's probably what we want to play with most
|
|
||||||
log.setLevel(logging.INFO)
|
|
||||||
|
|
||||||
|
|
||||||
# If we need to manually adjust log level for a specific module here's how you get a list of all modules that
|
|
||||||
# themselves use the 'logging' module. See also https://stackoverflow.com/a/36208664.
|
|
||||||
#for key in logging.Logger.manager.loggerDict:
|
|
||||||
# print(key)
|
|
||||||
|
|
||||||
|
|
||||||
username =
|
|
||||||
password =
|
|
||||||
dl_dir =
|
|
||||||
dl_dir = dl_dir.rstrip("\\")
|
|
||||||
file_name_on_disk = r"""2021-5.pdf"""
|
|
||||||
dl_url = r"""https://www.heise.de/select/ct/archiv/2021/5/download"""
|
|
||||||
dl_wait_timeout = 60
|
|
||||||
|
|
||||||
|
|
||||||
def logout(logged_in_session: requests.Session) -> None:
|
|
||||||
log.debug(f"Logging out ...")
|
|
||||||
with logged_in_session.get("https://www.heise.de/sso/login/logout") as logout_request:
|
|
||||||
log.debug(f"Logout got HTTP status code {logout_request.status_code}")
|
|
||||||
|
|
||||||
|
|
||||||
def login(login_username: str, login_password: str) -> requests.Session:
|
|
||||||
log.debug(f"Logging in ...")
|
|
||||||
session = requests.Session()
|
|
||||||
payload = {
|
|
||||||
"username": login_username,
|
|
||||||
"password": login_password
|
|
||||||
}
|
|
||||||
response = session.post("https://www.heise.de/sso/login/login", data=payload, allow_redirects=False)
|
|
||||||
if "Set-Cookie" in response.headers:
|
|
||||||
log.debug(f"Logged in, cookie received")
|
|
||||||
return session
|
|
||||||
|
|
||||||
|
|
||||||
def download_mag(**kwargs: requests.Session) -> None:
|
|
||||||
log.info(f"Preparing heise.de session ...")
|
|
||||||
have_printed_wait_notice = False
|
|
||||||
logged_in_session = kwargs.get("logged_in_session")
|
|
||||||
if not logged_in_session:
|
|
||||||
logged_in_session = login(username, password)
|
|
||||||
started_waiting = time.time()
|
|
||||||
log.info(f"Requesting AWS download ...")
|
|
||||||
while True:
|
|
||||||
if time.time() - started_waiting > dl_wait_timeout:
|
|
||||||
log.warning(f"We've waited {dl_wait_timeout}s for download. Exiting 1 ...")
|
|
||||||
sys.exit(1)
|
|
||||||
if int(logged_in_session.get(dl_url).headers['Content-Length']) > 1000000:
|
|
||||||
break
|
|
||||||
if not have_printed_wait_notice: log.debug(f"Waiting for heise's AWS backend ...")
|
|
||||||
have_printed_wait_notice = True
|
|
||||||
time.sleep(3)
|
|
||||||
log.info(f"Downloading ...")
|
|
||||||
with logged_in_session.get(dl_url) as dl_request:
|
|
||||||
with open(dl_dir + r"\\" + file_name_on_disk, "wb") as file:
|
|
||||||
file.write(dl_request.content)
|
|
||||||
log.info(f"Download complete")
|
|
||||||
logout(logged_in_session)
|
|
||||||
|
|
||||||
|
|
||||||
download_mag()
|
|
||||||
log.debug("Done, exiting 0 ...")
|
|
||||||
sys.exit(0)
|
|
@ -1,3 +0,0 @@
|
|||||||
requests
|
|
||||||
lxml
|
|
||||||
rich
|
|
@ -1,26 +0,0 @@
|
|||||||
#
|
|
||||||
# This file is autogenerated by pip-compile with python 3.10
|
|
||||||
# To update, run:
|
|
||||||
#
|
|
||||||
# pip-compile
|
|
||||||
#
|
|
||||||
certifi==2021.10.8
|
|
||||||
# via requests
|
|
||||||
charset-normalizer==2.0.12
|
|
||||||
# via requests
|
|
||||||
colorama==0.4.4
|
|
||||||
# via rich
|
|
||||||
commonmark==0.9.1
|
|
||||||
# via rich
|
|
||||||
idna==3.3
|
|
||||||
# via requests
|
|
||||||
lxml==4.8.0
|
|
||||||
# via -r requirements.in
|
|
||||||
pygments==2.11.2
|
|
||||||
# via rich
|
|
||||||
requests==2.27.1
|
|
||||||
# via -r requirements.in
|
|
||||||
rich==11.2.0
|
|
||||||
# via -r requirements.in
|
|
||||||
urllib3==1.26.8
|
|
||||||
# via requests
|
|
Loading…
x
Reference in New Issue
Block a user