systemd timer unit example has on 'OnCalendar' instruction

Hide log timestamps, intended use case is inside a systemd service unit anyway where systemd provides timestamps
systemd service unit will run on a timer, change unit type to oneshot and supply a timer unit file
2022-03-26 23:12:42 +01:00 · 2022-03-26 23:11:53 +01:00 · 2022-03-23 23:40:41 +01:00 · 2022-03-23 23:39:32 +01:00 · 2022-03-23 23:39:04 +01:00 · 2022-03-23 23:38:25 +01:00
8 changed files with 467 additions and 110 deletions
--- a/config.ini
+++ b/config.ini
@@ -1,26 +0,0 @@
-[DEFAULT]
-self_name = mvw-dl
-tmp_base_dir = /tmp/%(self_name)s
-state_base_dir = /var/lib/%(self_name)s
-state_files_dir = %(state_base_dir)s/state
-state_file_retention = 50
-state_file_name_prefix = state-
-state_file_name_suffix = .log
-mvw_endpoint = http://localhost:8000/api/query
-title_dedup_winner = first
-
-[maus]
-min_duration = 1200
-max_duration = 2700
-query = @maus-query.json
-title_not_regex = audiodeskription|gebärdensprache
-# query = {"queries":[{"fields":["topic"],"query":"die sendung mit der maus"},{"fields":["channel"],"query":"ARD"}],"sortBy":"timestamp","sortOrder":"desc","future":false,"offset":0,"size":50}
-# state_file_name = maus
-# tmp_base_dir = %(tmp_base_dir)s/maus
-dl_dir = ~/maus
-
-#[test]
-#min_duration = 100
-#max_duration = 200
-#query = {"queries":[{"fields":["topic"],"query":"die sendung mit der maus"},{"fields":["channel"],"query":"ARD"}],"sortBy":"timestamp","sortOrder":"desc","future":false,"offset":0,"size":50}
-#dl_dir = test
--- a/examples/config.ini.example
+++ b/examples/config.ini.example
@@ -0,0 +1,25 @@
+[DEFAULT]
+self_name = mvw-dl
+tmp_base_dir = /tmp/%(self_name)s
+state_base_dir = /var/lib/%(self_name)s
+state_files_dir = %(state_base_dir)s/state
+state_file_retention = 50
+state_file_name_prefix = state-
+state_file_name_suffix = .log
+mvw_endpoint = http://localhost:8000/api/query
+title_dedup_winner = first
+dl_progress_update_interval = 10
+dl_threads = 2
+dl_filename_pattern = &(channel)s - &(publish_date)s - &(topic)s - &(title)s.&(ext)s
+publish_date_srtftime_pattern = %%Y%%m%%d
+dl_filename_replace_spaces_with =
+dl_filename_all_lowercase = no
+
+[maus]
+min_duration = 1200
+max_duration = 3000
+query = @maus-query.json.example
+title_not_regex = audiodeskription|gebärdensprache|hörfassung
+dl_filename_pattern = &(publish_date)s.&(ext)s
+publish_date_srtftime_pattern = S%%YE%%Y%%m%%d01
+dl_dir = /tmp/kodi-nfo-feeder/maus
--- a/examples/maus-query.json.example
+++ b/examples/maus-query.json.example
@@ -17,5 +17,5 @@
    "sortOrder": "desc",
    "future": false,
    "offset": 0,
-    "size": 50
+    "size": 20
 }
--- a/examples/mvw-dl.service.example
+++ b/examples/mvw-dl.service.example
@@ -0,0 +1,12 @@
+[Unit]
+Description=MediathekViewWeb download helper
+After=multi-user.target
+
+[Service]
+Type=oneshot
+RemainAfterExit=no
+Environment='PATH=/usr/local/sbin:/usr/local/bin:/usr/bin'
+ExecStart=/opt/miniconda3/envs/mvw-dl/bin/python /opt/python/mvw-dl/dev/mvw-dl.py
+
+[Install]
+WantedBy=multi-user.target
--- a/examples/mvw-dl.timer.example
+++ b/examples/mvw-dl.timer.example
@@ -0,0 +1,9 @@
+[Unit]
+Description=Run MediathekViewWeb download helper
+
+[Timer]
+OnCalendar=0/2:2
+Persistent=true
+
+[Install]
+WantedBy=timers.target
--- a/mvw-dl.py
+++ b/mvw-dl.py
@@ -3,9 +3,13 @@ import datetime as d
 import json
 import logging
 import os
+import pathlib
 import re
+import shutil
 import sys
 import time
+
+import filelock
 import humanize
 import requests
 import inflect
@@ -13,6 +17,8 @@ from rich.logging import RichHandler
 from rich.traceback import install
 import typing as t
 from rich.console import Console
+from filelock import Timeout, FileLock
+import uuid

 import type_def.mvw_json_response
 from type_def.mvw_json_request import MVWJSONRequest
@@ -28,6 +34,7 @@ from threading import Event
 from typing import Iterable
 from urllib.request import urlopen

+
 from rich.progress import (
    BarColumn,
    DownloadColumn,
@@ -38,6 +45,20 @@ from rich.progress import (
    TransferSpeedColumn,
 )

+# TODO set locale for datetime and others to globally stick to en_US
+# TODO thread log messages display timestamp in systemd journal
+# TODO Increment file name suffix more than once of needed
+# TODO [23:15:14] DEBUG    [thread]
+# TODO Clean mvw-dl.timer
+# TODO Reset maus-query.json
+download_start_time = 0
+download_last_update_time = 0
+total_content_length = 0
+size_downloaded_for_progress_tracking = 0
+size_downloaded_for_speed_tracking = 0
+file_lock_timeout = 1
+state_lock_file_ext = ".lock"
+
 progress = Progress(
    TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
    BarColumn(bar_width=None),
@@ -66,6 +87,9 @@ JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any
 # 1: Config file invalid, it has no sections
 # 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
 # 3: No search results to download
+# 4: State file already exists, has more than 0 bytes size but doesn't contain usable JSON
+# 5: State file lock cannot be acquired within file_lock_timeout
+# 6: Unable to create state directory


 class CONST(object):
@@ -83,7 +107,13 @@ class CONST(object):
        {"key": "state_file_name_prefix", "value": "state-"},
        {"key": "state_file_name_suffix", "value": ".log"},
        {"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"},
-        {"key": "title_dedup_winner", "value": "first"}
+        {"key": "title_dedup_winner", "value": "first"},
+        {"key": "dl_progress_update_interval", "value": "10"},
+        {"key": "dl_threads", "value": "2"},
+        {"key": "dl_filename_pattern", "value": "&(channel)s - &(publish_date)s - &(topic)s - &(title)s"},
+        {"key": "publish_date_srtftime_pattern", "value": "%%Y%%m%%d"},
+        {"key": "dl_filename_replace_spaces_with", "value": "_"},
+        {"key": "dl_filename_all_lowercase", "value": "yes"}
    ]
    CFG_KNOWN_SECTION = [
        {"key": "min_duration", "is_mandatory": False},
@@ -102,16 +132,16 @@ logging.basicConfig(
    format=CONST.LOG_FORMAT,
    datefmt="[%X]",
    handlers=[RichHandler(
-        show_time=False if "SYSTEMD_EXEC_PID" in os.environ else True,
+        show_time=False,
        rich_tracebacks=True
    )]
 )
 log = logging.getLogger("rich")
 # Our own code logs with this level
 log.setLevel(logging.DEBUG)
-# connectionpool logs with WARNING, we don't need its verbosity
-log_connectionpool = logging.getLogger("urllib3.connectionpool")
-log_connectionpool.setLevel(logging.WARNING)
+# connectionpool and filelock log with WARNING, we don't need its verbosity
+logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
+logging.getLogger("filelock").setLevel(logging.WARNING)
 install(show_locals=True)


@@ -135,7 +165,7 @@ class ConfigParser(
 ini_defaults = []
 internal_defaults = {default["key"]: default["value"] for default in CONST.CFG_KNOWN_DEFAULTS}
 config = ConfigParser(defaults=internal_defaults)
-config.read(CONST.CFG_DEFAULT_FILENAME)
+config.read(CONST.CFG_DEFAULT_ABS_PATH)


 def print_section_header(
@@ -203,7 +233,8 @@ def validate_config_sections(

 def query_string_from_file(
        filename: str) -> str:
-    with open(filename, "r") as jsonfile:
+    filename_abs_path = os.path.join(CONST.CFG_THIS_FILE_DIRNAME, filename)
+    with open(filename_abs_path, "r", encoding="utf-8") as jsonfile:
        query_string = jsonfile.read()
        return query_string

@@ -328,92 +359,396 @@ def dedup_json_titles(
    return json_obj


-done_event = Event()
+def expanded_dest_dir(
+        raw_dest_dir: str) -> str:
+    user_expanded_dest_dir = os.path.expanduser(raw_dest_dir)
+    all_expanded_dest_dir = os.path.expandvars(user_expanded_dest_dir)
+    return all_expanded_dest_dir


-def handle_sigint(signum, frame):
-    done_event.set()
+def filename_replace_pattern(
+        section_name: str,
+        config_obj: configparser.ConfigParser(),
+        show: type_def.mvw_json_response.Show,
+        max_quality_url: str,
+        shorthand_uuid: str) -> str:
+
+    filename = config_obj.get(section_name, "dl_filename_pattern")
+    ext = pathlib.Path(max_quality_url).suffix.lstrip(".")
+    publish_date = d.datetime.utcfromtimestamp(show.timestamp).strftime(
+        config_obj.get(section_name, "publish_date_srtftime_pattern"))
+    show_extended = {"ext": ext, "publish_date": publish_date}
+    show_attrs = [attr for attr in dir(show) if not attr.startswith('_') and not callable(getattr(show, attr))]
+
+    for attr in show_attrs:
+        attr_re = re.compile(r"&\(" + re.escape(attr) + r"\)s")
+        if re.search(attr_re, filename):
+            log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
+            filename = re.sub(attr_re, str(getattr(show, attr)), filename)
+            log.debug(f"{shorthand_uuid} New filename: '{filename}'")
+    for extended_attr in show_extended:
+        extended_attr_re = re.compile(r"&\(" + re.escape(extended_attr) + r"\)s")
+        if re.search(extended_attr_re, filename):
+            log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
+            filename = re.sub(extended_attr_re, show_extended[extended_attr], filename)
+            log.debug(f"{shorthand_uuid} New filename: '{filename}'")
+    return filename


-signal.signal(signal.SIGINT, handle_sigint)
+def get_safe_filename(
+        dirty_filename: str,
+        shorthand_uuid: str) -> str:
+    """https://stackoverflow.com/a/71199182"""
+
+    log.debug(f"{shorthand_uuid} Removing question marks from file name ...")
+    clean_filename = re.sub(r"""[?]""", "", dirty_filename)
+
+    log.debug(f"{shorthand_uuid} Replacing unsafe characters in filename with dashes ...")
+    clean_filename = re.sub(r"""[/\\?%*:|"<>\x7F\x00-\x1F]""", "-", clean_filename)
+
+    log.debug(f"{shorthand_uuid} New filename: '{clean_filename}'")
+    return clean_filename
+
+
+def filename_replace_spaces_with_underscores(
+        section_name: str,
+        config_obj: configparser.ConfigParser(),
+        filename: str,
+        shorthand_uuid: str) -> str:
+
+    space_replace_string = config_obj.get(section_name, "dl_filename_replace_spaces_with")
+    log.debug(f"{shorthand_uuid} Replacing space characters with '{space_replace_string}' ...")
+    underscored_filename = re.sub(
+        r"\s",
+        space_replace_string,
+        filename)
+    log.debug(f"{shorthand_uuid} New filename: '{underscored_filename}'")
+    return underscored_filename
+
+
+def get_filename(
+        section_name: str,
+        config_obj: configparser.ConfigParser(),
+        show: type_def.mvw_json_response.Show,
+        max_quality_url: str,
+        shorthand_uuid: str) -> str:
+
+    log.debug(f"{shorthand_uuid} Generating final filename ...")
+
+    filename_replaced_patterns = filename_replace_pattern(
+        section_name,
+        config_obj,
+        show,
+        max_quality_url,
+        shorthand_uuid)
+
+    filename_safe = get_safe_filename(
+        filename_replaced_patterns,
+        shorthand_uuid)
+
+    if config.get(section_name, "dl_filename_replace_spaces_with"):
+        filename_safe = filename_replace_spaces_with_underscores(
+            section_name,
+            config_obj,
+            filename_safe,
+            shorthand_uuid)
+
+    if config.getboolean(section_name, "dl_filename_all_lowercase"):
+        log.debug(f"{shorthand_uuid} Lowercasing all filename characters ...")
+        filename_safe = filename_safe.lower()
+        log.debug(f"{shorthand_uuid} New filename: '{filename_safe}'")
+
+    return filename_safe
+
+
+def get_state_file_abs_path(
+        section_name: str,
+        config_obj: configparser.ConfigParser()) -> str:
+
+    state_dir = config_obj.get(section_name, "state_files_dir")
+    try:
+        os.makedirs(state_dir, exist_ok=True)
+    except OSError:
+        log.error(f"Unable to create '[{section}]' state directory '{state_dir}'. "
+                  f"We're not going to be able to log state information. Exiting 6 ...")
+        sys.exit(6)
+    else:
+        state_file = \
+            config_obj.get(section_name, "state_file_name_prefix") + \
+            section_name + \
+            config_obj.get(section_name, "state_file_name_suffix")
+        state_file_abs_path = os.path.join(state_dir, state_file)
+        return state_file_abs_path
+
+
+def state_file_none_or_valid_json(
+        state_file_abs_path: str) -> bool:
+
+    if os.path.exists(state_file_abs_path):
+        if os.path.getsize(state_file_abs_path) > 0:
+            with open(state_file_abs_path, "r", encoding="utf-8") as state_file:
+                try:
+                    json.loads(state_file.read())
+                    return True
+                except json.JSONDecodeError:
+                    log.warning(f"State file '{state_file_abs_path}' does not contain valid JSON. We're not going to "
+                                f"be able to log anything into it. Exiting 4 ...")
+                    sys.exit(4)
+        else:
+            return True
+    else:
+        return True
+
+
+def truncate_log(
+        json_data: json.loads,
+        max_log_entries: int) -> json.loads:
+
+    for i in range(len(json_data)):
+        del json_data[i]
+        if len(json_data) <= max_log_entries:
+            break
+    return json_data
+
+
+def get_state_file_lock(
+        state_lock_file: str) -> filelock.BaseFileLock:
+    global file_lock_timeout
+    try:
+        lock = FileLock(state_lock_file, timeout=file_lock_timeout)
+        return lock
+    except filelock.Timeout:
+        log.error(f"Unable to acquire lock on state lock file '{state_lock_file}' "
+                  f"""within {file_lock_timeout} {p.plural("second", file_lock_timeout)}, exiting 5 ...""")
+        sys.exit(5)
+
+
+def log_successful_download(
+        section_name: str,
+        config_obj: configparser.ConfigParser(),
+        show: type_def.mvw_json_response.Show,
+        state_file_abs_path: str,
+        job_uuid: str,
+        shorthand_uuid: str) -> None:
+
+    timestamp_now = int(time.time())
+    os.makedirs(os.path.dirname(state_file_abs_path), exist_ok=True)
+    state_lock_file = state_file_abs_path + state_lock_file_ext
+
+    state_body = show.dict(include={"topic", "title"})
+    state_body["dl_complete_timestamp_epoch"] = timestamp_now
+    state_body["dl_complete_timestamp_human"] = \
+        d.datetime.utcfromtimestamp(timestamp_now).strftime("%Y-%m-%d %H%M%S UTC")
+    state_entry = {job_uuid: state_body}
+    json_state = None
+
+    lock = get_state_file_lock(state_lock_file)
+
+    with lock:
+        state_file_none_or_valid_json(state_file_abs_path)
+        state_file_open_mode = "r+" if os.path.exists(state_file_abs_path) else "w+"
+        with open(state_file_abs_path, state_file_open_mode, encoding="utf-8") as state_file:
+            try:
+                json_state = json.load(state_file)
+            except json.JSONDecodeError:
+                if json_state is None:
+                    state_file.truncate()
+                    json_state = []
+
+        log.debug(f"{shorthand_uuid} Writing log entry to '{state_file_abs_path}' ...")
+        with open(state_file_abs_path, "w", encoding="utf-8") as state_file:
+            json_state.append(state_entry)
+            max_log_entries = config_obj.getint(section_name, "state_file_retention")
+            if len(json_state) > max_log_entries:
+                json_state = truncate_log(json_state, max_log_entries)
+            json.dump(json_state, state_file, indent=4, sort_keys=True, ensure_ascii=False)


 def copy_url(
-        show: type_def.mvw_json_response.Show) -> None:
+        section_name: str,
+        config_obj: configparser.ConfigParser(),
+        show: type_def.mvw_json_response.Show,
+        video_metadata: dict,
+        state_file_abs_path: str,
+        show_name: str,
+        job_uuid: str,
+        shorthand_uuid: str,
+        tmp_dir: str,
+        dest_dir: str) -> None:
    """Copy data from a url to a local file."""

-    url = show.url_video_hd
-    filename = url.split("/")[-1]
-    dest_path = os.path.join("./", filename)
-    release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X')
-    #s = requests.Session()
-    #newline = "\n"
-    #log.debug(f"Request method: {req.method}\n"
-    #          f"URL: {req.url}\n"
-    #          f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""")
-    show_name = f"{show.topic} - {show.title}"
-    log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""")
-    with open(dest_path, "wb") as dest_file:
-        last_update_time = time.time()
-        r = requests.get(url, stream=True)
-        total_length = int(r.headers.get('content-length'))
-        size_downloaded = 0
+    global download_start_time
+    global download_last_update_time
+    global size_downloaded_for_progress_tracking
+    global size_downloaded_for_speed_tracking
+
+    update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
+    max_quality_url = video_metadata["url"]
+    filename = get_filename(section_name, config_obj, show, max_quality_url, shorthand_uuid)
+    resume_header = {}
+    tmp_file_open_mode = "wb"
+    tmp_file_size = 0
+    tmp_path = os.path.join(tmp_dir, filename)
+    dest_path = os.path.join(dest_dir, filename)
+
+    os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
+    log.info(f"{shorthand_uuid} Download location resolved to '{tmp_path}'")
+    if os.path.exists(tmp_path):
+        tmp_file_size = os.path.getsize(tmp_path)
+        log.debug(f"{shorthand_uuid} Temporary file '{tmp_path}' exists likely from a previous incomplete "
+                  f"download attempt, size is {humanize.naturalsize(tmp_file_size, binary=True)}. Resuming ...")
+        tmp_file_open_mode = "ab"
+    try:
+        with open(tmp_path, tmp_file_open_mode) as tmp_file:
+            log.info(f"""{shorthand_uuid} Downloading "{show_name}" ...""")
+            if tmp_file_size > 0:
+                resume_header = {"range": f"bytes={tmp_file_size}-"}
+                log.debug(f"resume_header: {resume_header}")
+                size_downloaded_for_progress_tracking += tmp_file_size
+            r = requests.get(max_quality_url, headers=resume_header, stream=True)
            for chunk in r.iter_content(32768):
-            size_downloaded += len(chunk)
-            dest_file.write(chunk)
-            if time.time() - last_update_time >= 10:
-                percentage_done = size_downloaded / total_length * 100
+                size_downloaded_for_progress_tracking += len(chunk)
+                size_downloaded_for_speed_tracking += len(chunk)
+                tmp_file.write(chunk)
+                if time.time() - download_last_update_time >= update_interval:
+                    download_last_update_time = time.time()
+                    time_in_progress = download_last_update_time - download_start_time
+                    dl_speed_so_far = size_downloaded_for_speed_tracking / time_in_progress
+                    human_dl_speed_so_far = f"{humanize.naturalsize(dl_speed_so_far, binary=True)}/s"
+                    data_missing = total_content_length - size_downloaded_for_progress_tracking
+                    time_til_completion = 1 / dl_speed_so_far * data_missing
+                    human_time_til_completion = humanize.naturaldelta(d.timedelta(seconds=time_til_completion))
+                    percentage_done = size_downloaded_for_progress_tracking / total_content_length * 100
                    human_pct = "{:.1f}".format(percentage_done)
-                human_size_dl = humanize.naturalsize(size_downloaded)
-                human_total_dl = humanize.naturalsize(total_length)
-                last_update_time = time.time()
-                log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""")
-            if done_event.is_set():
-                log.debug(f"done_event")
+                    human_size_dl = humanize.naturalsize(size_downloaded_for_progress_tracking, binary=True)
+                    human_total_dl = humanize.naturalsize(total_content_length, binary=True)
+                    log.debug(f"[thread] Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} "
+                              f"at an average {human_dl_speed_so_far}, approximately {human_time_til_completion} "
+                              f"left til completion.)")
+            log.info(f"""{shorthand_uuid} Download of "{show_name}" done""")
+    except IOError:
+        log.error(f"{shorthand_uuid} IOError during download. Aborting this download thread ...")
        return

-        #got_json_response = MVWJSONResponse(**json.loads(s.content))
-        #return got_json_response
-
-    # progress.console.log(f"Requesting {url}")
-    # response = urlopen(url)
-    # # This will break if the response doesn't contain content length
-    # progress.update(task_id, total=int(response.info()["Content-length"]))
-    # with open(path, "wb") as dest_file:
-    #     progress.start_task(task_id)
-    #     for data in iter(partial(response.read, 32768), b""):
-    #         dest_file.write(data)
-    #         progress.update(task_id, advance=len(data))
-    #         if done_event.is_set():
-    #             return
-    # progress.console.log(f"Downloaded {path}")
+    log.info(f"{shorthand_uuid} Moving file to final location '{dest_path}' ...")
+    try:
+        shutil.move(tmp_path, dest_path)
+    except OSError as ose:
+        log.error(f"{shorthand_uuid} Failed moving file with an OSError\n"
+                  f"{ose}\n"
+                  f"Other threads continue unhindered.")
+    else:
+        log_successful_download(section_name, config_obj, show, state_file_abs_path, job_uuid, shorthand_uuid)
+        log.info(f"{shorthand_uuid} Done moving")


-#def download(urls: Iterable[str], dest_dir: str):
-#    """Download multuple files to the given directory."""
-#
-#    with progress:
-#        with ThreadPoolExecutor(max_workers=1) as pool:
-#            for url in urls:
-#                filename = url.split("/")[-1]
-#                dest_path = os.path.join(dest_dir, filename)
-#                task_id = progress.add_task("download", filename=filename, start=False)
-#                pool.submit(copy_url, task_id, url, dest_path)
+def get_max_quality_url(
+        show: type_def.mvw_json_response.Show) -> str:
+    if show.url_video_hd:
+        max_quality_url = show.url_video_hd
+    elif show.url_video:
+        max_quality_url = show.url_video
+    else:
+        max_quality_url = show.url_video_low
+    return max_quality_url
+
+
+def get_content_length(
+        video_url: str) -> int:
+    r = requests.head(video_url)
+    if r.status_code == requests.codes.ok:
+        return int(r.headers["content-length"])
+    else:
+        return 0
+
+
+def get_json_state(
+        state_file_abs_path: str) -> json.loads:
+    try:
+        with open(state_file_abs_path, "r", encoding="utf-8") as state_file:
+            try:
+                json_state = json.load(state_file)
+            except json.JSONDecodeError:
+                return []
+            else:
+                return json_state
+    except FileNotFoundError:
+        log.debug(f"State file does not exist (yet), assuming no previous downloads have ever happened ...")
+        return []
+
+
+def is_already_downloaded(
+        show: type_def.mvw_json_response.Show,
+        json_state: json.loads,
+        show_name: str) -> bool:
+
+    for log_entry in json_state:
+        for log_data in [key for key in log_entry]:
+            if show.topic == log_entry[log_data]["topic"] and show.title == log_entry[log_data]["title"]:
+                log.debug(f"""Show "{show_name}" already downloaded, won't queue""")
+                return True


 def download_media(
        section_name: str,
-        config_obj: configparser.ConfigParser()) -> None:
-    with ThreadPoolExecutor(max_workers=2) as pool:
-        for result in json_response.result.results.copy():
-            # filename = url.split("/")[-1]
-            # dest_path = os.path.join(dest_dir, filename)
-            # task_id = progress.add_task("download", filename=filename, start=False)
-            pool.submit(copy_url, result)
-            # TODO before sending into pool validate which url we're going to use
-            # TODO from each url get total content-length
-            # TODO use total content-length for overall progress of what we want to download
-    pass
+        config_obj: configparser.ConfigParser(),
+        json_obj: MVWJSONResponse) -> None:
+
+    global download_start_time
+    global download_last_update_time
+    global total_content_length
+
+    dl_threads = config_obj.getint(section_name, "dl_threads")
+    state_file_abs_path = get_state_file_abs_path(section_name, config_obj)
+    state_lock_file = state_file_abs_path + state_lock_file_ext
+    video_metadata = {}
+
+    tmp_dir = expanded_dest_dir(config_obj.get(section_name, "tmp_base_dir"))
+    dest_dir = expanded_dest_dir(config_obj.get(section_name, "dl_dir"))
+    log.info(f"""Download location is {tmp_dir}""")
+    log.info(f"""Final location is {dest_dir}""")
+    log.info(f"Limiting parallel downloads to {dl_threads} ...")
+
+    lock = get_state_file_lock(state_lock_file)
+
+    with lock:
+        state_file_none_or_valid_json(state_file_abs_path)
+        json_state = get_json_state(state_file_abs_path)
+
+    with ThreadPoolExecutor(max_workers=dl_threads) as pool:
+        download_last_update_time = time.time()
+        download_start_time = download_last_update_time
+        update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
+        log.debug(f"""Will provide updates every {update_interval} {p.plural("second", update_interval)}""")
+        for result in json_obj.result.results.copy():
+            show_name = f"{result.topic} - {result.title}"
+            future = None
+            if not is_already_downloaded(result, json_state, show_name):
+                max_quality_url = get_max_quality_url(result)
+                content_length = get_content_length(max_quality_url)
+                video_metadata[result.id] = {"url": max_quality_url, "content_length": content_length}
+                total_content_length += video_metadata[result.id]["content_length"]
+                log.debug(f"Total download size upped to "
+                          f"{humanize.naturalsize(total_content_length, binary=True)}")
+
+                job_uuid = str(uuid.uuid4())
+                shorthand_uuid = f"[{job_uuid[:2]}..{job_uuid[-2:]}]"
+                log.debug(f"{shorthand_uuid} Job UUID {job_uuid} generated, shorthand is {shorthand_uuid}")
+                log.debug(f"""{shorthand_uuid} Queuing "{show_name}" for download ...""")
+                future = pool.submit(
+                    copy_url,
+                    section_name,
+                    config_obj,
+                    result,
+                    video_metadata[result.id],
+                    state_file_abs_path,
+                    show_name,
+                    job_uuid,
+                    shorthand_uuid,
+                    tmp_dir,
+                    dest_dir)
+        if future is not None:
+            future.result()


 if __name__ == '__main__':
@@ -443,7 +778,6 @@ if __name__ == '__main__':
        if config.has_option(section, "title_not_regex"):
            json_response = dedup_json_titles(section, config, json_response)

-        log.debug(f"Downloading shows ...")
-        download_media(section, config)
-
-            # console.print_json(json_response.json())
+        log.debug(f"Interested in {json_response.result.queryInfo.resultCount} "
+                  f"""{p.plural("show", json_response.result.queryInfo.resultCount)} ...""")
+        download_media(section, config, json_response)
--- a/requirements.in
+++ b/requirements.in
@@ -3,3 +3,4 @@ requests
 inflect
 pydantic
 humanize
+filelock
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,8 @@ charset-normalizer==2.0.12
    # via requests
 commonmark==0.9.1
    # via rich
+filelock==3.6.0
+    # via -r requirements.in
 humanize==4.0.0
    # via -r requirements.in
 idna==3.3
@@ -26,5 +28,5 @@ rich==12.0.0
    # via -r requirements.in
 typing-extensions==4.1.1
    # via pydantic
-urllib3==1.26.8
+urllib3==1.26.9
    # via requests
Author	SHA1	Message	Date
hygienic-books	eabf595ff5	systemd timer unit example has on 'OnCalendar' instruction	2022-03-26 23:12:42 +01:00
hygienic-books	ab0a82c626	Hide log timestamps, intended use case is inside a systemd service unit anyway where systemd provides timestamps	2022-03-26 23:11:53 +01:00
hygienic-books	03b449c768	systemd service unit will run on a timer, change unit type to oneshot and supply a timer unit file	2022-03-23 23:40:41 +01:00
hygienic-books	e269a110a6	When cleaning file name remove question marks instead of replacing them with dashes	2022-03-23 23:39:32 +01:00
hygienic-books	a3a375d142	Config and JSON files by default live in script's dir	2022-03-23 23:39:04 +01:00
hygienic-books	81ce5812a6	Account for situations where a state file does not (yet) exist	2022-03-23 23:38:25 +01:00
hygienic-books	83921912a4	Add to-dos	2022-03-23 23:37:19 +01:00
hygienic-books	65e3ec83b1	Add example systemd service unit file	2022-03-23 15:53:31 +01:00
hygienic-books	266d3189dc	Replace our maus-query.json file with an example with sane defaults	2022-03-23 15:49:54 +01:00
hygienic-books	563ff4d342	Replace our config.ini with a sane example	2022-03-23 15:47:33 +01:00
hygienic-books	287a755e65	Streamline download selection	2022-03-20 02:35:10 +01:00
hygienic-books	d70766bae0	If a partial download exists in temp dir we resume it	2022-03-20 02:34:15 +01:00
hygienic-books	380fb4bf2e	Calculate total downloadable content length after deciding which downloads we need	2022-03-20 02:33:32 +01:00
hygienic-books	e395309011	Cosmetics, remove unnecessary lines, rewrite help texts a bit	2022-03-20 02:31:55 +01:00
hygienic-books	029d9ffb7e	When replacing filename pattern strings only log the ones we're seeing in config file	2022-03-20 02:30:23 +01:00
hygienic-books	ec612de2dd	Remove event handler from downloads	2022-03-20 02:29:22 +01:00
hygienic-books	2905ff5c74	Fix encoding for JSON files	2022-03-20 02:28:14 +01:00
hygienic-books	0cfe47465d	Fix encoding for JSON files	2022-03-20 02:27:55 +01:00
hygienic-books	5eff7876bc	Cosmetics, lower urllib3.connectionpool log level back to WARNING	2022-03-20 02:25:39 +01:00
hygienic-books	4809846edf	Correctly calculate download speed if we're resuming	2022-03-20 02:25:02 +01:00
hygienic-books	b5dff485d9	Move downloaded file into target location even across file system boundaries	2022-03-20 02:22:12 +01:00
hygienic-books	e78659b2de	Example JSON query uses 100 results	2022-03-20 02:17:36 +01:00
hygienic-books	27004a5294	By default 'hörfassung' isn't needed for downloads	2022-03-20 02:16:52 +01:00
hygienic-books	d0552f9e67	Check if file was previously downloaded	2022-03-19 16:58:27 +01:00
hygienic-books	dd6464de5d	Start implementing range downloading	2022-03-19 16:58:04 +01:00
hygienic-books	facfe4e7d3	Check state file for previous downloads, improve logging per thread	2022-03-19 16:08:12 +01:00
hygienic-books	c0a271d0eb	Cosmetics, add trailing newline	2022-03-19 14:13:45 +01:00
hygienic-books	11b9daa729	Update urllib3 to 1.26.9	2022-03-19 14:13:17 +01:00
hygienic-books	9a5ce9469d	Use filelock to make sure we don't try to write state info from multiple download threads simultaneously	2022-03-19 14:12:42 +01:00
hygienic-books	3f6bc46d52	Cosmetics, remove unused variable	2022-03-19 08:51:31 +01:00
hygienic-books	601583afc3	Maintain a state file	2022-03-19 08:50:51 +01:00
hygienic-books	7081c6b50a	Generate safe filename	2022-03-19 03:52:26 +01:00
hygienic-books	7b391be89e	Cosmetics, replace safe filename regex with triple-quoted string so we don't have to bḁash-escape literal double quote	2022-03-19 01:53:39 +01:00
hygienic-books	c5f74ce479	Separate function to expand download directory	2022-03-19 01:52:31 +01:00
hygienic-books	4c327a35f5	Cosmetics, put imports at top of file	2022-03-19 01:51:44 +01:00
hygienic-books	5e321686d0	Cosmetics, put imports at top of file	2022-03-19 01:51:11 +01:00
hygienic-books	e02c0bf52e	Add options to customize output filename	2022-03-19 01:50:05 +01:00
hygienic-books	25d0059f14	Prep for logging and safe filenames	2022-03-17 18:41:17 +01:00
hygienic-books	296e2ca7e5	Use user's download location, resolve mentions of ~ (tilde) and environment vars	2022-03-17 17:58:44 +01:00
hygienic-books	34ef348929	Download episodes and provide regular updates	2022-03-17 17:37:57 +01:00
hygienic-books	47bb1f761a	Get total content_length and provide just one global stream of download progress updates	2022-03-17 17:23:41 +01:00
hygienic-books	d92236a2f2	Prior to submitting download job to thread pool determine URL for highest quality	2022-03-17 16:13:13 +01:00