Compare commits
59 Commits
d1130baa10
...
master
Author | SHA1 | Date | |
---|---|---|---|
eabf595ff5 | |||
ab0a82c626 | |||
03b449c768 | |||
e269a110a6 | |||
a3a375d142 | |||
81ce5812a6 | |||
83921912a4 | |||
65e3ec83b1 | |||
266d3189dc | |||
563ff4d342 | |||
287a755e65 | |||
d70766bae0 | |||
380fb4bf2e | |||
e395309011 | |||
029d9ffb7e | |||
ec612de2dd | |||
2905ff5c74 | |||
0cfe47465d | |||
5eff7876bc | |||
4809846edf | |||
b5dff485d9 | |||
e78659b2de | |||
27004a5294 | |||
d0552f9e67 | |||
dd6464de5d | |||
facfe4e7d3 | |||
c0a271d0eb | |||
11b9daa729 | |||
9a5ce9469d | |||
3f6bc46d52 | |||
601583afc3 | |||
7081c6b50a | |||
7b391be89e | |||
c5f74ce479 | |||
4c327a35f5 | |||
5e321686d0 | |||
e02c0bf52e | |||
25d0059f14 | |||
296e2ca7e5 | |||
34ef348929 | |||
47bb1f761a | |||
d92236a2f2 | |||
376932583e | |||
6aeecabc9b | |||
bb4defdb82 | |||
0f68ef558e | |||
32932fe803 | |||
c5830aeadc | |||
3b33fd65a7 | |||
ffb9e9d5f4 | |||
4ce2163e19 | |||
65fe4b707c | |||
1110f3b024 | |||
98d307db73 | |||
affa15e191 | |||
6686e91236 | |||
f0130ce2fb | |||
576b3a56a9 | |||
720493d0de |
5
.gitignore
vendored
5
.gitignore
vendored
@ -235,4 +235,7 @@ fabric.properties
|
||||
.idea/deployment.xml
|
||||
.idea/misc.xml
|
||||
.idea/remote-mappings.xml
|
||||
.idea/mvw-dl.iml
|
||||
.idea/*.iml
|
||||
|
||||
# ---> Project-specific
|
||||
data
|
8
.idea/.gitignore
generated
vendored
8
.idea/.gitignore
generated
vendored
@ -1,8 +0,0 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
26
.idea/inspectionProfiles/Project_Default.xml
generated
26
.idea/inspectionProfiles/Project_Default.xml
generated
@ -1,26 +0,0 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="ignoredPackages">
|
||||
<value>
|
||||
<list size="1">
|
||||
<item index="0" class="java.lang.String" itemvalue="google" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||
<option name="ignoredIdentifiers">
|
||||
<list>
|
||||
<option value="str.decode" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
||||
<option name="processCode" value="true" />
|
||||
<option name="processLiterals" value="true" />
|
||||
<option name="processComments" value="true" />
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
1
.idea/inspectionProfiles/profiles_settings.xml
generated
1
.idea/inspectionProfiles/profiles_settings.xml
generated
@ -1,5 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="PROJECT_PROFILE" value="Default" />
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
|
21
config.ini
21
config.ini
@ -1,21 +0,0 @@
|
||||
[DEFAULT]
|
||||
self_name = mvw-dl
|
||||
tmp_base_dir = /tmp/%(self_name)s
|
||||
state_base_dir = /var/lib/%(self_name)s
|
||||
state_files_dir = %(state_base_dir)s/state
|
||||
state_file_retention = 50
|
||||
state_file_name_prefix = state-
|
||||
state_file_name_suffix = .log
|
||||
|
||||
[maus]
|
||||
min_duration = 1200
|
||||
max_duration = 2700
|
||||
query = @maus-query.json
|
||||
# query = {"queries":[{"fields":["topic"],"query":"die sendung mit der maus"},{"fields":["channel"],"query":"ARD"}],"sortBy":"timestamp","sortOrder":"desc","future":false,"offset":0,"size":50}
|
||||
# state_file_name = maus
|
||||
# tmp_base_dir = %(tmp_base_dir)s/maus
|
||||
|
||||
[test]
|
||||
min_duration = 100
|
||||
max_duration = 200
|
||||
query = {"queries":[{"fields":["topic"],"query":"die sendung mit der maus"},{"fields":["channel"],"query":"ARD"}],"sortBy":"timestamp","sortOrder":"desc","future":false,"offset":0,"size":50}
|
25
examples/config.ini.example
Normal file
25
examples/config.ini.example
Normal file
@ -0,0 +1,25 @@
|
||||
[DEFAULT]
|
||||
self_name = mvw-dl
|
||||
tmp_base_dir = /tmp/%(self_name)s
|
||||
state_base_dir = /var/lib/%(self_name)s
|
||||
state_files_dir = %(state_base_dir)s/state
|
||||
state_file_retention = 50
|
||||
state_file_name_prefix = state-
|
||||
state_file_name_suffix = .log
|
||||
mvw_endpoint = http://localhost:8000/api/query
|
||||
title_dedup_winner = first
|
||||
dl_progress_update_interval = 10
|
||||
dl_threads = 2
|
||||
dl_filename_pattern = &(channel)s - &(publish_date)s - &(topic)s - &(title)s.&(ext)s
|
||||
publish_date_srtftime_pattern = %%Y%%m%%d
|
||||
dl_filename_replace_spaces_with =
|
||||
dl_filename_all_lowercase = no
|
||||
|
||||
[maus]
|
||||
min_duration = 1200
|
||||
max_duration = 3000
|
||||
query = @maus-query.json.example
|
||||
title_not_regex = audiodeskription|gebärdensprache|hörfassung
|
||||
dl_filename_pattern = &(publish_date)s.&(ext)s
|
||||
publish_date_srtftime_pattern = S%%YE%%Y%%m%%d01
|
||||
dl_dir = /tmp/kodi-nfo-feeder/maus
|
21
examples/maus-query.json.example
Normal file
21
examples/maus-query.json.example
Normal file
@ -0,0 +1,21 @@
|
||||
{
|
||||
"queries": [
|
||||
{
|
||||
"fields": [
|
||||
"topic"
|
||||
],
|
||||
"query": "die sendung mit der maus"
|
||||
},
|
||||
{
|
||||
"fields": [
|
||||
"channel"
|
||||
],
|
||||
"query": "ARD"
|
||||
}
|
||||
],
|
||||
"sortBy": "timestamp",
|
||||
"sortOrder": "desc",
|
||||
"future": false,
|
||||
"offset": 0,
|
||||
"size": 20
|
||||
}
|
12
examples/mvw-dl.service.example
Normal file
12
examples/mvw-dl.service.example
Normal file
@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=MediathekViewWeb download helper
|
||||
After=multi-user.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=no
|
||||
Environment='PATH=/usr/local/sbin:/usr/local/bin:/usr/bin'
|
||||
ExecStart=/opt/miniconda3/envs/mvw-dl/bin/python /opt/python/mvw-dl/dev/mvw-dl.py
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
9
examples/mvw-dl.timer.example
Normal file
9
examples/mvw-dl.timer.example
Normal file
@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Run MediathekViewWeb download helper
|
||||
|
||||
[Timer]
|
||||
OnCalendar=0/2:2
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
102
main.py
102
main.py
@ -1,102 +0,0 @@
|
||||
import configparser
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from rich.logging import RichHandler
|
||||
|
||||
|
||||
# Exit codes
|
||||
# 1: Config file invalid, it has no sections
|
||||
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
|
||||
|
||||
|
||||
class CONST(object):
|
||||
__slots__ = ()
|
||||
LOG_FORMAT = "%(message)s"
|
||||
CFG_DEFAULT_FILENAME = "config.ini"
|
||||
CFG_DEFAULT_ABS_PATH = os.path.join(os.getcwd(), CFG_DEFAULT_FILENAME)
|
||||
CFG_MANDATORY = "query"
|
||||
|
||||
|
||||
CONST = CONST()
|
||||
logging.basicConfig(
|
||||
level="NOTSET",
|
||||
format=CONST.LOG_FORMAT,
|
||||
datefmt="[%X]",
|
||||
handlers=[RichHandler(
|
||||
show_time=False if "SYSTEMD_EXEC_PID" in os.environ else True,
|
||||
rich_tracebacks=True
|
||||
)]
|
||||
)
|
||||
log = logging.getLogger("rich")
|
||||
log.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
config = configparser.ConfigParser()
|
||||
config.read(CONST.CFG_DEFAULT_FILENAME)
|
||||
|
||||
|
||||
def validate_config_defaults(config_obj: configparser.ConfigParser()) -> None:
|
||||
log.debug(f"Loading config from file '{CONST.CFG_DEFAULT_ABS_PATH}' ...")
|
||||
if not config_obj.sections():
|
||||
log.error(f"No config sections found in '{CONST.CFG_DEFAULT_ABS_PATH}'. Exiting 1 ...")
|
||||
sys.exit(1)
|
||||
if config.defaults():
|
||||
for default in config_obj.defaults():
|
||||
log.debug(f" {default} = {config_obj[config_obj.default_section][default]}")
|
||||
else:
|
||||
log.debug(f"No defaults defined")
|
||||
|
||||
|
||||
def config_has_valid_section(config_obj: configparser.ConfigParser()) -> bool:
|
||||
has_valid_section = False
|
||||
for config_obj_section in config_obj.sections():
|
||||
if CONST.CFG_MANDATORY in config_obj.options(config_obj_section):
|
||||
has_valid_section = True
|
||||
break
|
||||
return has_valid_section
|
||||
|
||||
|
||||
#def option_has_default_value(config_option: configparser.ConfigParser(), section: str) -> bool:
|
||||
#
|
||||
#~ local option that doesn't exist in defaults section
|
||||
#+ local option that overrides a default
|
||||
#* global option
|
||||
|
||||
def validate_config_sections(config_obj: configparser.ConfigParser()) -> None:
|
||||
for section in config_obj.sections():
|
||||
log.debug(f"Loading config '[{section}]' ...")
|
||||
for key in config_obj[section]:
|
||||
# log.debug(config_obj.has_option(section, key))
|
||||
is_local_option = True if config_obj.has_option(section, key) else False
|
||||
log.debug(f""" {key}{"/" if is_local_option else ".."} = {config_obj[section][key]}""")
|
||||
|
||||
|
||||
validate_config_defaults(config)
|
||||
if config_has_valid_section(config):
|
||||
validate_config_sections(config)
|
||||
else:
|
||||
log.error(f"No valid config section found. A valid config section has at least the '{CONST.CFG_MANDATORY}' "
|
||||
f"option set. Exiting 2 ...")
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
# https://stackoverflow.com/a/2451872
|
||||
quit()
|
||||
|
||||
# This is a sample Python script.
|
||||
|
||||
# Press Umschalt+F10 to execute it or replace it with your code.
|
||||
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
|
||||
|
||||
|
||||
def print_hi(name):
|
||||
# Use a breakpoint in the code line below to debug your script.
|
||||
print(f'Hi, {name}') # Press Strg+F8 to toggle the breakpoint.
|
||||
|
||||
|
||||
# Press the green button in the gutter to run the script.
|
||||
if __name__ == '__main__':
|
||||
print_hi('PyCharm')
|
||||
|
||||
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
|
783
mvw-dl.py
Normal file
783
mvw-dl.py
Normal file
@ -0,0 +1,783 @@
|
||||
import configparser
|
||||
import datetime as d
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
|
||||
import filelock
|
||||
import humanize
|
||||
import requests
|
||||
import inflect
|
||||
from rich.logging import RichHandler
|
||||
from rich.traceback import install
|
||||
import typing as t
|
||||
from rich.console import Console
|
||||
from filelock import Timeout, FileLock
|
||||
import uuid
|
||||
|
||||
import type_def.mvw_json_response
|
||||
from type_def.mvw_json_request import MVWJSONRequest
|
||||
from type_def.mvw_json_response import MVWJSONResponse
|
||||
|
||||
# Downloading
|
||||
import os.path
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import signal
|
||||
from functools import partial
|
||||
from threading import Event
|
||||
from typing import Iterable
|
||||
from urllib.request import urlopen
|
||||
|
||||
|
||||
from rich.progress import (
|
||||
BarColumn,
|
||||
DownloadColumn,
|
||||
Progress,
|
||||
TaskID,
|
||||
TextColumn,
|
||||
TimeRemainingColumn,
|
||||
TransferSpeedColumn,
|
||||
)
|
||||
|
||||
# TODO set locale for datetime and others to globally stick to en_US
|
||||
# TODO thread log messages display timestamp in systemd journal
|
||||
# TODO Increment file name suffix more than once of needed
|
||||
# TODO [23:15:14] DEBUG [thread]
|
||||
# TODO Clean mvw-dl.timer
|
||||
# TODO Reset maus-query.json
|
||||
download_start_time = 0
|
||||
download_last_update_time = 0
|
||||
total_content_length = 0
|
||||
size_downloaded_for_progress_tracking = 0
|
||||
size_downloaded_for_speed_tracking = 0
|
||||
file_lock_timeout = 1
|
||||
state_lock_file_ext = ".lock"
|
||||
|
||||
progress = Progress(
|
||||
TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
|
||||
BarColumn(bar_width=None),
|
||||
"[progress.percentage]{task.percentage:>3.1f}%",
|
||||
"•",
|
||||
DownloadColumn(),
|
||||
"•",
|
||||
TransferSpeedColumn(),
|
||||
"•",
|
||||
TimeRemainingColumn(),
|
||||
)
|
||||
# Downloading
|
||||
|
||||
# Without width
|
||||
console = Console(width=180)
|
||||
p = inflect.engine()
|
||||
|
||||
# We use Python 3.5+ type hints; we're working with JSON objects; we're following a 2016 suggestion from
|
||||
# Python's "typing" GitHub issue tracker on how to create a "JSONType" hint since such a thing does not yet
|
||||
# officially exist: https://github.com/python/typing/issues/182#issuecomment-186684288
|
||||
# JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any]]
|
||||
JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any]]
|
||||
|
||||
|
||||
# Exit codes
|
||||
# 1: Config file invalid, it has no sections
|
||||
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
|
||||
# 3: No search results to download
|
||||
# 4: State file already exists, has more than 0 bytes size but doesn't contain usable JSON
|
||||
# 5: State file lock cannot be acquired within file_lock_timeout
|
||||
# 6: Unable to create state directory
|
||||
|
||||
|
||||
class CONST(object):
|
||||
__slots__ = ()
|
||||
LOG_FORMAT = "%(message)s"
|
||||
CFG_THIS_FILE_DIRNAME = os.path.dirname(__file__)
|
||||
CFG_DEFAULT_FILENAME = "config.ini"
|
||||
CFG_DEFAULT_ABS_PATH = os.path.join(CFG_THIS_FILE_DIRNAME, CFG_DEFAULT_FILENAME)
|
||||
CFG_KNOWN_DEFAULTS = [
|
||||
{"key": "self_name", "value": "mvw-dl"},
|
||||
{"key": "tmp_base_dir", "value": os.path.join(CFG_THIS_FILE_DIRNAME, "data/tmp/%(self_name)s")},
|
||||
{"key": "state_base_dir", "value": os.path.join(CFG_THIS_FILE_DIRNAME, "data/var/lib/%(self_name)s")},
|
||||
{"key": "state_files_dir", "value": "%(state_base_dir)s/state"},
|
||||
{"key": "state_file_retention", "value": "50"},
|
||||
{"key": "state_file_name_prefix", "value": "state-"},
|
||||
{"key": "state_file_name_suffix", "value": ".log"},
|
||||
{"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"},
|
||||
{"key": "title_dedup_winner", "value": "first"},
|
||||
{"key": "dl_progress_update_interval", "value": "10"},
|
||||
{"key": "dl_threads", "value": "2"},
|
||||
{"key": "dl_filename_pattern", "value": "&(channel)s - &(publish_date)s - &(topic)s - &(title)s"},
|
||||
{"key": "publish_date_srtftime_pattern", "value": "%%Y%%m%%d"},
|
||||
{"key": "dl_filename_replace_spaces_with", "value": "_"},
|
||||
{"key": "dl_filename_all_lowercase", "value": "yes"}
|
||||
]
|
||||
CFG_KNOWN_SECTION = [
|
||||
{"key": "min_duration", "is_mandatory": False},
|
||||
{"key": "max_duration", "is_mandatory": False},
|
||||
{"key": "title_not_regex", "is_mandatory": False},
|
||||
{"key": "query", "is_mandatory": True},
|
||||
{"key": "dl_dir", "is_mandatory": True}
|
||||
]
|
||||
CFG_MANDATORY = [section_cfg["key"] for section_cfg in CFG_KNOWN_SECTION if section_cfg["is_mandatory"]]
|
||||
|
||||
|
||||
CONST = CONST()
|
||||
logging.basicConfig(
|
||||
# Default for all modules in NOTSET so log everything
|
||||
level="NOTSET",
|
||||
format=CONST.LOG_FORMAT,
|
||||
datefmt="[%X]",
|
||||
handlers=[RichHandler(
|
||||
show_time=False,
|
||||
rich_tracebacks=True
|
||||
)]
|
||||
)
|
||||
log = logging.getLogger("rich")
|
||||
# Our own code logs with this level
|
||||
log.setLevel(logging.DEBUG)
|
||||
# connectionpool and filelock log with WARNING, we don't need its verbosity
|
||||
logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
|
||||
logging.getLogger("filelock").setLevel(logging.WARNING)
|
||||
install(show_locals=True)
|
||||
|
||||
|
||||
class ConfigParser(
|
||||
configparser.ConfigParser):
|
||||
"""Can get options() without defaults
|
||||
|
||||
Taken from https://stackoverflow.com/a/12600066.
|
||||
"""
|
||||
|
||||
def options(self, section, no_defaults=False, **kwargs):
|
||||
if no_defaults:
|
||||
try:
|
||||
return list(self._sections[section].keys())
|
||||
except KeyError:
|
||||
raise configparser.NoSectionError(section)
|
||||
else:
|
||||
return super().options(section)
|
||||
|
||||
|
||||
ini_defaults = []
|
||||
internal_defaults = {default["key"]: default["value"] for default in CONST.CFG_KNOWN_DEFAULTS}
|
||||
config = ConfigParser(defaults=internal_defaults)
|
||||
config.read(CONST.CFG_DEFAULT_ABS_PATH)
|
||||
|
||||
|
||||
def print_section_header(
|
||||
header: str) -> str:
|
||||
return f"Loading config section '[{header}]' ..."
|
||||
|
||||
|
||||
def validate_default_section(
|
||||
config_obj: configparser.ConfigParser()) -> None:
|
||||
log.debug(f"Loading config from file '{CONST.CFG_DEFAULT_ABS_PATH}' ...")
|
||||
if not config_obj.sections():
|
||||
log.error(f"No config sections found in '{CONST.CFG_DEFAULT_ABS_PATH}'. Exiting 1 ...")
|
||||
sys.exit(1)
|
||||
if config.defaults():
|
||||
log.debug(f"Symbol legend:\n"
|
||||
f"* Global default from section '[{config_obj.default_section}]'\n"
|
||||
f"~ Local option, doesn't exist in '[{config_obj.default_section}]'\n"
|
||||
f"+ Local override of a value from '[{config_obj.default_section}]'\n"
|
||||
f"= Local override, same value as in '[{config_obj.default_section}]'")
|
||||
log.debug(print_section_header(config_obj.default_section))
|
||||
for default in config_obj.defaults():
|
||||
ini_defaults.append({default: config_obj[config_obj.default_section][default]})
|
||||
log.debug(f"* {default} = {config_obj[config_obj.default_section][default]}")
|
||||
else:
|
||||
log.debug(f"No defaults defined")
|
||||
|
||||
|
||||
def config_has_valid_section(
|
||||
config_obj: configparser.ConfigParser()) -> bool:
|
||||
has_valid_section = False
|
||||
for config_obj_section in config_obj.sections():
|
||||
if set(CONST.CFG_MANDATORY).issubset(config_obj.options(config_obj_section)):
|
||||
has_valid_section = True
|
||||
break
|
||||
return has_valid_section
|
||||
|
||||
|
||||
def is_default(
|
||||
config_key: str) -> bool:
|
||||
return any(config_key in ini_default for ini_default in ini_defaults)
|
||||
|
||||
|
||||
def is_same_as_default(
|
||||
config_kv_pair: dict) -> bool:
|
||||
return config_kv_pair in ini_defaults
|
||||
|
||||
|
||||
def validate_config_sections(
|
||||
config_obj: configparser.ConfigParser()) -> None:
|
||||
for this_section in config_obj.sections():
|
||||
log.debug(print_section_header(this_section))
|
||||
if not set(CONST.CFG_MANDATORY).issubset(config_obj.options(this_section, no_defaults=True)):
|
||||
log.warning(f"Config section '[{this_section}]' does not have all mandatory options "
|
||||
f"{CONST.CFG_MANDATORY} set, skipping section ...")
|
||||
config_obj.remove_section(this_section)
|
||||
else:
|
||||
for key in config_obj.options(this_section, no_defaults=True):
|
||||
kv_prefix = "~"
|
||||
if is_default(key):
|
||||
kv_prefix = "+"
|
||||
if is_same_as_default({key: config_obj[this_section][key]}):
|
||||
kv_prefix = "="
|
||||
log.debug(f"{kv_prefix} {key} = {config_obj[this_section][key]}")
|
||||
|
||||
|
||||
def query_string_from_file(
|
||||
filename: str) -> str:
|
||||
filename_abs_path = os.path.join(CONST.CFG_THIS_FILE_DIRNAME, filename)
|
||||
with open(filename_abs_path, "r", encoding="utf-8") as jsonfile:
|
||||
query_string = jsonfile.read()
|
||||
return query_string
|
||||
|
||||
|
||||
def get_query_payload(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser()) -> MVWJSONRequest:
|
||||
log.debug(f"Generating HTTP POST JSON payload ...")
|
||||
query = config_obj.get(section_name, "query")
|
||||
if query[0] == "@":
|
||||
query = query.split("@", 1)[1]
|
||||
query = query_string_from_file(query)
|
||||
got_query_payload = MVWJSONRequest(**json.loads(query))
|
||||
return got_query_payload
|
||||
|
||||
|
||||
def get_json_response(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
payload: MVWJSONRequest) -> MVWJSONResponse:
|
||||
log.debug(f"Downloading JSON list of Mediathek files that match search criteria")
|
||||
serialized_payload = payload.json()
|
||||
url = config_obj.get(section_name, "mvw_endpoint")
|
||||
req_header = {"Content-Type": "text/plain"}
|
||||
s = requests.Session()
|
||||
req = requests.Request("POST", url, data=serialized_payload, headers=req_header)
|
||||
prepped = req.prepare()
|
||||
newline = "\n"
|
||||
log.debug(f"Request method: {req.method}\n"
|
||||
f"URL: {req.url}\n"
|
||||
f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n"""
|
||||
f"Payload: {payload}")
|
||||
with s.send(prepped) as s:
|
||||
got_json_response = MVWJSONResponse(**json.loads(s.content))
|
||||
return got_json_response
|
||||
|
||||
|
||||
def no_downloads_needed() -> None:
|
||||
log.info(f"No search results to download, exiting 3 ...")
|
||||
sys.exit(3)
|
||||
|
||||
|
||||
def remove_result(
|
||||
json_obj: MVWJSONResponse,
|
||||
result_obj: type_def.mvw_json_response.Show) -> MVWJSONResponse:
|
||||
json_obj.result.results.remove(result_obj)
|
||||
json_obj.result.queryInfo.resultCount -= 1
|
||||
if json_obj.result.queryInfo.resultCount:
|
||||
return json_obj
|
||||
else:
|
||||
no_downloads_needed()
|
||||
|
||||
|
||||
def log_result_count(result_count: int, pre_filter: bool = True) -> None:
|
||||
if pre_filter:
|
||||
log.debug(f"""Search result contains {result_count} {p.plural("show", result_count)} going in""")
|
||||
else:
|
||||
log.debug(f"""Search result now contains {result_count} {p.plural("show", result_count)}""")
|
||||
|
||||
|
||||
def filter_json_by_duration(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
json_obj: MVWJSONResponse) -> MVWJSONResponse:
|
||||
min_duration = config_obj.getint(section_name, "min_duration")
|
||||
max_duration = config_obj.getint(section_name, "max_duration")
|
||||
log_result_count(json_obj.result.queryInfo.resultCount)
|
||||
if min_duration >= 0:
|
||||
log.debug(f"Filtering '[{section_name}]' JSON for minimum length of {min_duration} "
|
||||
f"""{p.plural("second", min_duration)} ...""")
|
||||
for result in json_obj.result.results.copy():
|
||||
if not result.duration >= min_duration:
|
||||
remove_result(json_obj, result)
|
||||
if max_duration >= 0:
|
||||
log.debug(f"Filtering '[{section_name}]' JSON for maximum length of {max_duration} "
|
||||
f"""{p.plural("second", max_duration)} ...""")
|
||||
for result in json_obj.result.results.copy():
|
||||
if not result.duration <= max_duration:
|
||||
remove_result(json_obj, result)
|
||||
log_result_count(json_obj.result.queryInfo.resultCount, False)
|
||||
return json_obj
|
||||
|
||||
|
||||
def filter_json_by_title_regex(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
json_obj: MVWJSONResponse) -> MVWJSONResponse:
|
||||
title_not_regex = re.compile(config_obj.get(section_name, "title_not_regex"), re.IGNORECASE)
|
||||
log_result_count(json_obj.result.queryInfo.resultCount)
|
||||
log.debug(f"Filtering '[{section_name}]' JSON by title regular expression")
|
||||
for result in json_obj.result.results.copy():
|
||||
if title_not_regex.search(result.title):
|
||||
remove_result(json_obj, result)
|
||||
log_result_count(json_obj.result.queryInfo.resultCount, False)
|
||||
return json_obj
|
||||
|
||||
|
||||
def dedup_json_titles(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
json_obj: MVWJSONResponse) -> MVWJSONResponse:
|
||||
title_dedup_winner = config_obj.get(section_name, "title_dedup_winner")
|
||||
titles_list = {}
|
||||
log_result_count(json_obj.result.queryInfo.resultCount)
|
||||
for result in json_obj.result.results.copy():
|
||||
if result.title not in titles_list:
|
||||
titles_list[result.title] = {}
|
||||
if result.id not in titles_list[result.title]:
|
||||
titles_list[result.title][result.id] = result.timestamp
|
||||
for result in titles_list.copy():
|
||||
if title_dedup_winner == "first":
|
||||
dd_winner = min(titles_list[result], key=str)
|
||||
else:
|
||||
dd_winner = max(titles_list[result], key=str)
|
||||
titles_list[result] = dd_winner
|
||||
for result in json_obj.result.results.copy():
|
||||
if result.title in titles_list:
|
||||
if result.id != titles_list[result.title]:
|
||||
log.debug(f"""Deduplicating '[{section_name}]' result "{result.title}" ...""")
|
||||
remove_result(json_obj, result)
|
||||
log_result_count(json_obj.result.queryInfo.resultCount, False)
|
||||
return json_obj
|
||||
|
||||
|
||||
def expanded_dest_dir(
|
||||
raw_dest_dir: str) -> str:
|
||||
user_expanded_dest_dir = os.path.expanduser(raw_dest_dir)
|
||||
all_expanded_dest_dir = os.path.expandvars(user_expanded_dest_dir)
|
||||
return all_expanded_dest_dir
|
||||
|
||||
|
||||
def filename_replace_pattern(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
show: type_def.mvw_json_response.Show,
|
||||
max_quality_url: str,
|
||||
shorthand_uuid: str) -> str:
|
||||
|
||||
filename = config_obj.get(section_name, "dl_filename_pattern")
|
||||
ext = pathlib.Path(max_quality_url).suffix.lstrip(".")
|
||||
publish_date = d.datetime.utcfromtimestamp(show.timestamp).strftime(
|
||||
config_obj.get(section_name, "publish_date_srtftime_pattern"))
|
||||
show_extended = {"ext": ext, "publish_date": publish_date}
|
||||
show_attrs = [attr for attr in dir(show) if not attr.startswith('_') and not callable(getattr(show, attr))]
|
||||
|
||||
for attr in show_attrs:
|
||||
attr_re = re.compile(r"&\(" + re.escape(attr) + r"\)s")
|
||||
if re.search(attr_re, filename):
|
||||
log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
|
||||
filename = re.sub(attr_re, str(getattr(show, attr)), filename)
|
||||
log.debug(f"{shorthand_uuid} New filename: '{filename}'")
|
||||
for extended_attr in show_extended:
|
||||
extended_attr_re = re.compile(r"&\(" + re.escape(extended_attr) + r"\)s")
|
||||
if re.search(extended_attr_re, filename):
|
||||
log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
|
||||
filename = re.sub(extended_attr_re, show_extended[extended_attr], filename)
|
||||
log.debug(f"{shorthand_uuid} New filename: '{filename}'")
|
||||
return filename
|
||||
|
||||
|
||||
def get_safe_filename(
|
||||
dirty_filename: str,
|
||||
shorthand_uuid: str) -> str:
|
||||
"""https://stackoverflow.com/a/71199182"""
|
||||
|
||||
log.debug(f"{shorthand_uuid} Removing question marks from file name ...")
|
||||
clean_filename = re.sub(r"""[?]""", "", dirty_filename)
|
||||
|
||||
log.debug(f"{shorthand_uuid} Replacing unsafe characters in filename with dashes ...")
|
||||
clean_filename = re.sub(r"""[/\\?%*:|"<>\x7F\x00-\x1F]""", "-", clean_filename)
|
||||
|
||||
log.debug(f"{shorthand_uuid} New filename: '{clean_filename}'")
|
||||
return clean_filename
|
||||
|
||||
|
||||
def filename_replace_spaces_with_underscores(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
filename: str,
|
||||
shorthand_uuid: str) -> str:
|
||||
|
||||
space_replace_string = config_obj.get(section_name, "dl_filename_replace_spaces_with")
|
||||
log.debug(f"{shorthand_uuid} Replacing space characters with '{space_replace_string}' ...")
|
||||
underscored_filename = re.sub(
|
||||
r"\s",
|
||||
space_replace_string,
|
||||
filename)
|
||||
log.debug(f"{shorthand_uuid} New filename: '{underscored_filename}'")
|
||||
return underscored_filename
|
||||
|
||||
|
||||
def get_filename(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
show: type_def.mvw_json_response.Show,
|
||||
max_quality_url: str,
|
||||
shorthand_uuid: str) -> str:
|
||||
|
||||
log.debug(f"{shorthand_uuid} Generating final filename ...")
|
||||
|
||||
filename_replaced_patterns = filename_replace_pattern(
|
||||
section_name,
|
||||
config_obj,
|
||||
show,
|
||||
max_quality_url,
|
||||
shorthand_uuid)
|
||||
|
||||
filename_safe = get_safe_filename(
|
||||
filename_replaced_patterns,
|
||||
shorthand_uuid)
|
||||
|
||||
if config.get(section_name, "dl_filename_replace_spaces_with"):
|
||||
filename_safe = filename_replace_spaces_with_underscores(
|
||||
section_name,
|
||||
config_obj,
|
||||
filename_safe,
|
||||
shorthand_uuid)
|
||||
|
||||
if config.getboolean(section_name, "dl_filename_all_lowercase"):
|
||||
log.debug(f"{shorthand_uuid} Lowercasing all filename characters ...")
|
||||
filename_safe = filename_safe.lower()
|
||||
log.debug(f"{shorthand_uuid} New filename: '{filename_safe}'")
|
||||
|
||||
return filename_safe
|
||||
|
||||
|
||||
def get_state_file_abs_path(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser()) -> str:
|
||||
|
||||
state_dir = config_obj.get(section_name, "state_files_dir")
|
||||
try:
|
||||
os.makedirs(state_dir, exist_ok=True)
|
||||
except OSError:
|
||||
log.error(f"Unable to create '[{section}]' state directory '{state_dir}'. "
|
||||
f"We're not going to be able to log state information. Exiting 6 ...")
|
||||
sys.exit(6)
|
||||
else:
|
||||
state_file = \
|
||||
config_obj.get(section_name, "state_file_name_prefix") + \
|
||||
section_name + \
|
||||
config_obj.get(section_name, "state_file_name_suffix")
|
||||
state_file_abs_path = os.path.join(state_dir, state_file)
|
||||
return state_file_abs_path
|
||||
|
||||
|
||||
def state_file_none_or_valid_json(
|
||||
state_file_abs_path: str) -> bool:
|
||||
|
||||
if os.path.exists(state_file_abs_path):
|
||||
if os.path.getsize(state_file_abs_path) > 0:
|
||||
with open(state_file_abs_path, "r", encoding="utf-8") as state_file:
|
||||
try:
|
||||
json.loads(state_file.read())
|
||||
return True
|
||||
except json.JSONDecodeError:
|
||||
log.warning(f"State file '{state_file_abs_path}' does not contain valid JSON. We're not going to "
|
||||
f"be able to log anything into it. Exiting 4 ...")
|
||||
sys.exit(4)
|
||||
else:
|
||||
return True
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def truncate_log(
|
||||
json_data: json.loads,
|
||||
max_log_entries: int) -> json.loads:
|
||||
|
||||
for i in range(len(json_data)):
|
||||
del json_data[i]
|
||||
if len(json_data) <= max_log_entries:
|
||||
break
|
||||
return json_data
|
||||
|
||||
|
||||
def get_state_file_lock(
|
||||
state_lock_file: str) -> filelock.BaseFileLock:
|
||||
global file_lock_timeout
|
||||
try:
|
||||
lock = FileLock(state_lock_file, timeout=file_lock_timeout)
|
||||
return lock
|
||||
except filelock.Timeout:
|
||||
log.error(f"Unable to acquire lock on state lock file '{state_lock_file}' "
|
||||
f"""within {file_lock_timeout} {p.plural("second", file_lock_timeout)}, exiting 5 ...""")
|
||||
sys.exit(5)
|
||||
|
||||
|
||||
def log_successful_download(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
show: type_def.mvw_json_response.Show,
|
||||
state_file_abs_path: str,
|
||||
job_uuid: str,
|
||||
shorthand_uuid: str) -> None:
|
||||
|
||||
timestamp_now = int(time.time())
|
||||
os.makedirs(os.path.dirname(state_file_abs_path), exist_ok=True)
|
||||
state_lock_file = state_file_abs_path + state_lock_file_ext
|
||||
|
||||
state_body = show.dict(include={"topic", "title"})
|
||||
state_body["dl_complete_timestamp_epoch"] = timestamp_now
|
||||
state_body["dl_complete_timestamp_human"] = \
|
||||
d.datetime.utcfromtimestamp(timestamp_now).strftime("%Y-%m-%d %H%M%S UTC")
|
||||
state_entry = {job_uuid: state_body}
|
||||
json_state = None
|
||||
|
||||
lock = get_state_file_lock(state_lock_file)
|
||||
|
||||
with lock:
|
||||
state_file_none_or_valid_json(state_file_abs_path)
|
||||
state_file_open_mode = "r+" if os.path.exists(state_file_abs_path) else "w+"
|
||||
with open(state_file_abs_path, state_file_open_mode, encoding="utf-8") as state_file:
|
||||
try:
|
||||
json_state = json.load(state_file)
|
||||
except json.JSONDecodeError:
|
||||
if json_state is None:
|
||||
state_file.truncate()
|
||||
json_state = []
|
||||
|
||||
log.debug(f"{shorthand_uuid} Writing log entry to '{state_file_abs_path}' ...")
|
||||
with open(state_file_abs_path, "w", encoding="utf-8") as state_file:
|
||||
json_state.append(state_entry)
|
||||
max_log_entries = config_obj.getint(section_name, "state_file_retention")
|
||||
if len(json_state) > max_log_entries:
|
||||
json_state = truncate_log(json_state, max_log_entries)
|
||||
json.dump(json_state, state_file, indent=4, sort_keys=True, ensure_ascii=False)
|
||||
|
||||
|
||||
def copy_url(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
show: type_def.mvw_json_response.Show,
|
||||
video_metadata: dict,
|
||||
state_file_abs_path: str,
|
||||
show_name: str,
|
||||
job_uuid: str,
|
||||
shorthand_uuid: str,
|
||||
tmp_dir: str,
|
||||
dest_dir: str) -> None:
|
||||
"""Copy data from a url to a local file."""
|
||||
|
||||
global download_start_time
|
||||
global download_last_update_time
|
||||
global size_downloaded_for_progress_tracking
|
||||
global size_downloaded_for_speed_tracking
|
||||
|
||||
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
|
||||
max_quality_url = video_metadata["url"]
|
||||
filename = get_filename(section_name, config_obj, show, max_quality_url, shorthand_uuid)
|
||||
resume_header = {}
|
||||
tmp_file_open_mode = "wb"
|
||||
tmp_file_size = 0
|
||||
tmp_path = os.path.join(tmp_dir, filename)
|
||||
dest_path = os.path.join(dest_dir, filename)
|
||||
|
||||
os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
|
||||
log.info(f"{shorthand_uuid} Download location resolved to '{tmp_path}'")
|
||||
if os.path.exists(tmp_path):
|
||||
tmp_file_size = os.path.getsize(tmp_path)
|
||||
log.debug(f"{shorthand_uuid} Temporary file '{tmp_path}' exists likely from a previous incomplete "
|
||||
f"download attempt, size is {humanize.naturalsize(tmp_file_size, binary=True)}. Resuming ...")
|
||||
tmp_file_open_mode = "ab"
|
||||
try:
|
||||
with open(tmp_path, tmp_file_open_mode) as tmp_file:
|
||||
log.info(f"""{shorthand_uuid} Downloading "{show_name}" ...""")
|
||||
if tmp_file_size > 0:
|
||||
resume_header = {"range": f"bytes={tmp_file_size}-"}
|
||||
log.debug(f"resume_header: {resume_header}")
|
||||
size_downloaded_for_progress_tracking += tmp_file_size
|
||||
r = requests.get(max_quality_url, headers=resume_header, stream=True)
|
||||
for chunk in r.iter_content(32768):
|
||||
size_downloaded_for_progress_tracking += len(chunk)
|
||||
size_downloaded_for_speed_tracking += len(chunk)
|
||||
tmp_file.write(chunk)
|
||||
if time.time() - download_last_update_time >= update_interval:
|
||||
download_last_update_time = time.time()
|
||||
time_in_progress = download_last_update_time - download_start_time
|
||||
dl_speed_so_far = size_downloaded_for_speed_tracking / time_in_progress
|
||||
human_dl_speed_so_far = f"{humanize.naturalsize(dl_speed_so_far, binary=True)}/s"
|
||||
data_missing = total_content_length - size_downloaded_for_progress_tracking
|
||||
time_til_completion = 1 / dl_speed_so_far * data_missing
|
||||
human_time_til_completion = humanize.naturaldelta(d.timedelta(seconds=time_til_completion))
|
||||
percentage_done = size_downloaded_for_progress_tracking / total_content_length * 100
|
||||
human_pct = "{:.1f}".format(percentage_done)
|
||||
human_size_dl = humanize.naturalsize(size_downloaded_for_progress_tracking, binary=True)
|
||||
human_total_dl = humanize.naturalsize(total_content_length, binary=True)
|
||||
log.debug(f"[thread] Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} "
|
||||
f"at an average {human_dl_speed_so_far}, approximately {human_time_til_completion} "
|
||||
f"left til completion.)")
|
||||
log.info(f"""{shorthand_uuid} Download of "{show_name}" done""")
|
||||
except IOError:
|
||||
log.error(f"{shorthand_uuid} IOError during download. Aborting this download thread ...")
|
||||
return
|
||||
|
||||
log.info(f"{shorthand_uuid} Moving file to final location '{dest_path}' ...")
|
||||
try:
|
||||
shutil.move(tmp_path, dest_path)
|
||||
except OSError as ose:
|
||||
log.error(f"{shorthand_uuid} Failed moving file with an OSError\n"
|
||||
f"{ose}\n"
|
||||
f"Other threads continue unhindered.")
|
||||
else:
|
||||
log_successful_download(section_name, config_obj, show, state_file_abs_path, job_uuid, shorthand_uuid)
|
||||
log.info(f"{shorthand_uuid} Done moving")
|
||||
|
||||
|
||||
def get_max_quality_url(
|
||||
show: type_def.mvw_json_response.Show) -> str:
|
||||
if show.url_video_hd:
|
||||
max_quality_url = show.url_video_hd
|
||||
elif show.url_video:
|
||||
max_quality_url = show.url_video
|
||||
else:
|
||||
max_quality_url = show.url_video_low
|
||||
return max_quality_url
|
||||
|
||||
|
||||
def get_content_length(
|
||||
video_url: str) -> int:
|
||||
r = requests.head(video_url)
|
||||
if r.status_code == requests.codes.ok:
|
||||
return int(r.headers["content-length"])
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def get_json_state(
|
||||
state_file_abs_path: str) -> json.loads:
|
||||
try:
|
||||
with open(state_file_abs_path, "r", encoding="utf-8") as state_file:
|
||||
try:
|
||||
json_state = json.load(state_file)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
else:
|
||||
return json_state
|
||||
except FileNotFoundError:
|
||||
log.debug(f"State file does not exist (yet), assuming no previous downloads have ever happened ...")
|
||||
return []
|
||||
|
||||
|
||||
def is_already_downloaded(
|
||||
show: type_def.mvw_json_response.Show,
|
||||
json_state: json.loads,
|
||||
show_name: str) -> bool:
|
||||
|
||||
for log_entry in json_state:
|
||||
for log_data in [key for key in log_entry]:
|
||||
if show.topic == log_entry[log_data]["topic"] and show.title == log_entry[log_data]["title"]:
|
||||
log.debug(f"""Show "{show_name}" already downloaded, won't queue""")
|
||||
return True
|
||||
|
||||
|
||||
def download_media(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
json_obj: MVWJSONResponse) -> None:
|
||||
|
||||
global download_start_time
|
||||
global download_last_update_time
|
||||
global total_content_length
|
||||
|
||||
dl_threads = config_obj.getint(section_name, "dl_threads")
|
||||
state_file_abs_path = get_state_file_abs_path(section_name, config_obj)
|
||||
state_lock_file = state_file_abs_path + state_lock_file_ext
|
||||
video_metadata = {}
|
||||
|
||||
tmp_dir = expanded_dest_dir(config_obj.get(section_name, "tmp_base_dir"))
|
||||
dest_dir = expanded_dest_dir(config_obj.get(section_name, "dl_dir"))
|
||||
log.info(f"""Download location is {tmp_dir}""")
|
||||
log.info(f"""Final location is {dest_dir}""")
|
||||
log.info(f"Limiting parallel downloads to {dl_threads} ...")
|
||||
|
||||
lock = get_state_file_lock(state_lock_file)
|
||||
|
||||
with lock:
|
||||
state_file_none_or_valid_json(state_file_abs_path)
|
||||
json_state = get_json_state(state_file_abs_path)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=dl_threads) as pool:
|
||||
download_last_update_time = time.time()
|
||||
download_start_time = download_last_update_time
|
||||
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
|
||||
log.debug(f"""Will provide updates every {update_interval} {p.plural("second", update_interval)}""")
|
||||
for result in json_obj.result.results.copy():
|
||||
show_name = f"{result.topic} - {result.title}"
|
||||
future = None
|
||||
if not is_already_downloaded(result, json_state, show_name):
|
||||
max_quality_url = get_max_quality_url(result)
|
||||
content_length = get_content_length(max_quality_url)
|
||||
video_metadata[result.id] = {"url": max_quality_url, "content_length": content_length}
|
||||
total_content_length += video_metadata[result.id]["content_length"]
|
||||
log.debug(f"Total download size upped to "
|
||||
f"{humanize.naturalsize(total_content_length, binary=True)}")
|
||||
|
||||
job_uuid = str(uuid.uuid4())
|
||||
shorthand_uuid = f"[{job_uuid[:2]}..{job_uuid[-2:]}]"
|
||||
log.debug(f"{shorthand_uuid} Job UUID {job_uuid} generated, shorthand is {shorthand_uuid}")
|
||||
log.debug(f"""{shorthand_uuid} Queuing "{show_name}" for download ...""")
|
||||
future = pool.submit(
|
||||
copy_url,
|
||||
section_name,
|
||||
config_obj,
|
||||
result,
|
||||
video_metadata[result.id],
|
||||
state_file_abs_path,
|
||||
show_name,
|
||||
job_uuid,
|
||||
shorthand_uuid,
|
||||
tmp_dir,
|
||||
dest_dir)
|
||||
if future is not None:
|
||||
future.result()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
validate_default_section(config)
|
||||
if config_has_valid_section(config):
|
||||
validate_config_sections(config)
|
||||
else:
|
||||
log.error(f"No valid config section found. A valid config section has at least the mandatory options "
|
||||
f"{CONST.CFG_MANDATORY} set. Exiting 2 ...")
|
||||
sys.exit(2)
|
||||
|
||||
log.debug(f"Iterating over config sections ...")
|
||||
for section in config.sections():
|
||||
log.debug(f"Processing section '[{section}]' ...")
|
||||
query_payload = get_query_payload(section, config)
|
||||
json_response = get_json_response(section, config, query_payload)
|
||||
|
||||
log.debug(f"Filtering results by duration where applicable ...")
|
||||
if config.has_option(section, "min_duration") or config.has_option(section, "max_duration"):
|
||||
json_response = filter_json_by_duration(section, config, json_response)
|
||||
|
||||
log.debug(f"Filtering results by title regular expression where applicable ...")
|
||||
if config.has_option(section, "title_not_regex"):
|
||||
json_response = filter_json_by_title_regex(section, config, json_response)
|
||||
|
||||
log.debug(f"Deduplicating results by title where needed ...")
|
||||
if config.has_option(section, "title_not_regex"):
|
||||
json_response = dedup_json_titles(section, config, json_response)
|
||||
|
||||
log.debug(f"Interested in {json_response.result.queryInfo.resultCount} "
|
||||
f"""{p.plural("show", json_response.result.queryInfo.resultCount)} ...""")
|
||||
download_media(section, config, json_response)
|
@ -1 +1,6 @@
|
||||
rich
|
||||
requests
|
||||
inflect
|
||||
pydantic
|
||||
humanize
|
||||
filelock
|
@ -4,9 +4,29 @@
|
||||
#
|
||||
# pip-compile
|
||||
#
|
||||
certifi==2021.10.8
|
||||
# via requests
|
||||
charset-normalizer==2.0.12
|
||||
# via requests
|
||||
commonmark==0.9.1
|
||||
# via rich
|
||||
filelock==3.6.0
|
||||
# via -r requirements.in
|
||||
humanize==4.0.0
|
||||
# via -r requirements.in
|
||||
idna==3.3
|
||||
# via requests
|
||||
inflect==5.4.0
|
||||
# via -r requirements.in
|
||||
pydantic==1.9.0
|
||||
# via -r requirements.in
|
||||
pygments==2.11.2
|
||||
# via rich
|
||||
requests==2.27.1
|
||||
# via -r requirements.in
|
||||
rich==12.0.0
|
||||
# via -r requirements.in
|
||||
typing-extensions==4.1.1
|
||||
# via pydantic
|
||||
urllib3==1.26.9
|
||||
# via requests
|
||||
|
0
type_def/__init__.py
Normal file
0
type_def/__init__.py
Normal file
17
type_def/mvw_json_request.py
Normal file
17
type_def/mvw_json_request.py
Normal file
@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
from typing import List
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class Query(BaseModel):
|
||||
fields: List[str]
|
||||
query: str
|
||||
|
||||
|
||||
class MVWJSONRequest(BaseModel):
|
||||
queries: List[Query]
|
||||
sortBy: str
|
||||
sortOrder: str
|
||||
future: bool
|
||||
offset: int
|
||||
size: int
|
37
type_def/mvw_json_response.py
Normal file
37
type_def/mvw_json_response.py
Normal file
@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class Show(BaseModel):
|
||||
channel: str
|
||||
topic: str
|
||||
title: str
|
||||
description: str
|
||||
timestamp: int
|
||||
duration: int
|
||||
size: Optional[int]
|
||||
url_website: str
|
||||
url_subtitle: str
|
||||
url_video: str
|
||||
url_video_low: str
|
||||
url_video_hd: str
|
||||
filmlisteTimestamp: str
|
||||
id: str
|
||||
|
||||
|
||||
class QueryInfo(BaseModel):
|
||||
filmlisteTimestamp: str
|
||||
searchEngineTime: str
|
||||
resultCount: int
|
||||
totalResults: int
|
||||
|
||||
|
||||
class Result(BaseModel):
|
||||
results: List[Show] = []
|
||||
queryInfo: QueryInfo
|
||||
|
||||
|
||||
class MVWJSONResponse(BaseModel):
|
||||
result: Result
|
||||
err: Optional[str] = None
|
Loading…
x
Reference in New Issue
Block a user