mvw-dl/mvw-dl.py

239 lines
9.3 KiB
Python

import configparser
import json
import logging
import os
import sys
import requests
import inflect
from rich.logging import RichHandler
from rich.traceback import install
from rich import print
import typing as t
from rich.console import Console
from type_def.mvw_json_request import MVWJSONRequest
from type_def.mvw_json_response import MVWJSONResponse
# Without width
console = Console(width=180)
p = inflect.engine()
# We use Python 3.5+ type hints; we're working with JSON objects; we're following a 2016 suggestion from
# Python's "typing" GitHub issue tracker on how to create a "JSONType" hint since such a thing does not yet
# officially exist: https://github.com/python/typing/issues/182#issuecomment-186684288
# JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any]]
JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any]]
# Exit codes
# 1: Config file invalid, it has no sections
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
class CONST(object):
__slots__ = ()
LOG_FORMAT = "%(message)s"
CFG_THIS_FILE_DIRNAME = os.path.dirname(__file__)
CFG_DEFAULT_FILENAME = "config.ini"
CFG_DEFAULT_ABS_PATH = os.path.join(CFG_THIS_FILE_DIRNAME, CFG_DEFAULT_FILENAME)
CFG_KNOWN_DEFAULTS = [
{"key": "self_name", "value": "mvw-dl"},
{"key": "tmp_base_dir", "value": os.path.join(CFG_THIS_FILE_DIRNAME, "data/tmp/%(self_name)s")},
{"key": "state_base_dir", "value": os.path.join(CFG_THIS_FILE_DIRNAME, "data/var/lib/%(self_name)s")},
{"key": "state_files_dir", "value": "%(state_base_dir)s/state"},
{"key": "state_file_retention", "value": "50"},
{"key": "state_file_name_prefix", "value": "state-"},
{"key": "state_file_name_suffix", "value": ".log"},
{"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"},
{"key": "title_dedup_winner", "value": "first"}
]
CFG_KNOWN_SECTION = [
{"key": "min_duration", "is_mandatory": False},
{"key": "max_duration", "is_mandatory": False},
{"key": "title_not_regex", "is_mandatory": False},
{"key": "query", "is_mandatory": True},
{"key": "dl_dir", "is_mandatory": True}
]
CFG_MANDATORY = [section_cfg["key"] for section_cfg in CFG_KNOWN_SECTION if section_cfg["is_mandatory"]]
CONST = CONST()
logging.basicConfig(
# Default for all modules in NOTSET so log everything
level="NOTSET",
format=CONST.LOG_FORMAT,
datefmt="[%X]",
handlers=[RichHandler(
show_time=False if "SYSTEMD_EXEC_PID" in os.environ else True,
rich_tracebacks=True
)]
)
log = logging.getLogger("rich")
# Our own code logs with this level
log.setLevel(logging.DEBUG)
# connectionpool logs with WARNING, we don't need its verbosity
log_connectionpool = logging.getLogger("urllib3.connectionpool")
log_connectionpool.setLevel(logging.WARNING)
install(show_locals=True)
class ConfigParser(configparser.ConfigParser):
"""Can get options() without defaults
Taken from https://stackoverflow.com/a/12600066.
"""
def options(self, section, no_defaults=False, **kwargs):
if no_defaults:
try:
return list(self._sections[section].keys())
except KeyError:
raise configparser.NoSectionError(section)
else:
return super().options(section)
ini_defaults = []
internal_defaults = {default["key"]: default["value"] for default in CONST.CFG_KNOWN_DEFAULTS}
config = ConfigParser(defaults=internal_defaults)
config.read(CONST.CFG_DEFAULT_FILENAME)
def print_section_header(header: str) -> str:
return f"Loading config section '[{header}]' ..."
def validate_default_section(config_obj: configparser.ConfigParser()) -> None:
log.debug(f"Loading config from file '{CONST.CFG_DEFAULT_ABS_PATH}' ...")
if not config_obj.sections():
log.error(f"No config sections found in '{CONST.CFG_DEFAULT_ABS_PATH}'. Exiting 1 ...")
sys.exit(1)
if config.defaults():
log.debug(f"Symbol legend:\n"
f"* Global default from section '[{config_obj.default_section}]'\n"
f"~ Local option, doesn't exist in '[{config_obj.default_section}]'\n"
f"+ Local override of a value from '[{config_obj.default_section}]'\n"
f"= Local override, same value as in '[{config_obj.default_section}]'")
log.debug(print_section_header(config_obj.default_section))
for default in config_obj.defaults():
ini_defaults.append({default: config_obj[config_obj.default_section][default]})
log.debug(f"* {default} = {config_obj[config_obj.default_section][default]}")
else:
log.debug(f"No defaults defined")
def config_has_valid_section(config_obj: configparser.ConfigParser()) -> bool:
has_valid_section = False
for config_obj_section in config_obj.sections():
if set(CONST.CFG_MANDATORY).issubset(config_obj.options(config_obj_section)):
has_valid_section = True
break
return has_valid_section
def is_default(config_key: str) -> bool:
return any(config_key in ini_default for ini_default in ini_defaults)
def is_same_as_default(config_kv_pair: dict) -> bool:
return config_kv_pair in ini_defaults
def validate_config_sections(config_obj: configparser.ConfigParser()) -> None:
for this_section in config_obj.sections():
log.debug(print_section_header(this_section))
if not set(CONST.CFG_MANDATORY).issubset(config_obj.options(this_section, no_defaults=True)):
log.warning(f"Config section '[{this_section}]' does not have all mandatory options "
f"{CONST.CFG_MANDATORY} set, skipping section ...")
config_obj.remove_section(this_section)
else:
for key in config_obj.options(this_section, no_defaults=True):
kv_prefix = "~"
if is_default(key):
kv_prefix = "+"
if is_same_as_default({key: config_obj[this_section][key]}):
kv_prefix = "="
log.debug(f"{kv_prefix} {key} = {config_obj[this_section][key]}")
def query_string_from_file(filename: str) -> str:
with open(filename, "r") as jsonfile:
query_string = jsonfile.read()
return query_string
def get_query_payload(
section_name: str,
config_obj: configparser.ConfigParser()) -> MVWJSONRequest:
log.debug(f"Generating HTTP POST JSON payload ...")
query = config_obj.get(section_name, "query")
if query[0] == "@":
query = query.split("@", 1)[1]
query = query_string_from_file(query)
got_query_payload = MVWJSONRequest(**json.loads(query))
return got_query_payload
def get_json_response(
section_name: str,
config_obj: configparser.ConfigParser(),
payload: MVWJSONRequest) -> MVWJSONResponse:
log.debug(f"Downloading JSON list of Mediathek files that match search criteria")
serialized_payload = json.dumps(payload)
url = config_obj.get(section_name, "mvw_endpoint")
req_header = {"Content-Type": "text/plain"}
s = requests.Session()
req = requests.Request("POST", url, data=serialized_payload, headers=req_header)
prepped = req.prepare()
newline = "\n"
log.debug(f"Request method: {req.method}\n"
f"URL: {req.url}\n"
f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n"""
f"Payload: {payload}")
with s.send(prepped) as s:
# return json.loads(s.content)
got_json_response = MVWJSONResponse(**json.loads(s.content))
return got_json_response
def filter_json_by_duration(
section_name: str,
config_obj: configparser.ConfigParser(),
json_obj: MVWJSONResponse) -> MVWJSONResponse:
min_length = config_obj.getint(section_name, "min_duration")
if min_length >= 0:
log.debug(f"""Filtering JSON for minimum length of {min_length} {p.plural("second", min_length)}""")
# console.print(json_obj["result"]["results"][0])
for result in json_obj["result"]["results"]:
console.log(result)
console.log(f"0000000000000000000000")
if not result["duration"] >= min_length:
pass
# json_str.
# console.log(f"{result}\n......................")
# console.log(json_obj)
# console.log(f"ssssssssss")
# json_matches_min_length =
pass
if __name__ == '__main__':
validate_default_section(config)
if config_has_valid_section(config):
validate_config_sections(config)
else:
log.error(f"No valid config section found. A valid config section has at least the mandatory options "
f"{CONST.CFG_MANDATORY} set. Exiting 2 ...")
sys.exit(2)
log.debug(f"Iterating over config sections ...")
for section in config.sections():
log.debug(f"Processing section '[{section}]' ...")
query_payload = get_query_payload(section, config)
json_response = get_json_response(section, config, query_payload)
log.debug(CONST.CFG_KNOWN_SECTION[0])
if config.has_option(section, "min_duration") or config.has_option(section, "max_duration"):
json_matches_duration = filter_json_by_duration(section, config, json_response)
# console.log(json_response)