Start implementing range downloading

2022-03-19 16:58:04 +01:00
parent facfe4e7d3
commit dd6464de5d
1 changed files with 60 additions and 35 deletions
--- a/mvw-dl.py
+++ b/mvw-dl.py
@@ -131,7 +131,7 @@ log = logging.getLogger("rich")
 # Our own code logs with this level
 log.setLevel(logging.DEBUG)
 # connectionpool and filelock log with WARNING, we don't need its verbosity
-logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
+# logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
 logging.getLogger("filelock").setLevel(logging.WARNING)
 install(show_locals=True)

@@ -381,13 +381,13 @@ def filename_replace_pattern(
    show_attrs = [attr for attr in dir(show) if not attr.startswith('_') and not callable(getattr(show, attr))]

    for attr in show_attrs:
-        log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
+        # log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
        filename = re.sub(r"&\(" + re.escape(attr) + r"\)s", str(getattr(show, attr)), filename)
-        log.debug(f"{shorthand_uuid} New filename: '{filename}'")
+        # log.debug(f"{shorthand_uuid} New filename: '{filename}'")
    for extended_attr in show_extended:
-        log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
+        # log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
        filename = re.sub(r"&\(" + re.escape(extended_attr) + r"\)s", show_extended[extended_attr], filename)
-        log.debug(f"{shorthand_uuid} New filename: '{filename}'")
+        # log.debug(f"{shorthand_uuid} New filename: '{filename}'")
    return filename


@@ -558,7 +558,9 @@ def copy_url(
        state_file_abs_path: str,
        show_name: str,
        job_uuid: str,
-        shorthand_uuid: str) -> None:
+        shorthand_uuid: str,
+        tmp_dir: str,
+        dest_dir: str) -> None:
    """Copy data from a url to a local file."""

    global download_start_time
@@ -568,20 +570,27 @@ def copy_url(
    update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
    max_quality_url = video_metadata["url"]
    filename = get_filename(section_name, config_obj, show, max_quality_url, shorthand_uuid)
-    dest_dir = expanded_dest_dir(config_obj.get(section_name, "tmp_base_dir"))
+    resume_header = {}
+    tmp_file_size = 0
+    tmp_path = os.path.join(tmp_dir, filename)
    dest_path = os.path.join(dest_dir, filename)

-    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
-    # TODO quit
-    log_successful_download(section_name, config_obj, show, state_file_abs_path, job_uuid, shorthand_uuid)
-    quit()
-    with open(dest_path, "wb") as dest_file:
+    os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
+    log.info(f"{shorthand_uuid} Download location resolved to '{tmp_path}'")
+    if os.path.exists(tmp_path):
+        tmp_file_size = os.path.getsize(tmp_path)
+        log.debug(f"{shorthand_uuid} Temporary file '{tmp_path}' exists likely from a previous incomplete "
+                  f"download attempt, size is {humanize.naturalsize(tmp_file_size, binary=True)}. Resuming ...")
+    try:
+        with open(tmp_path, "wb") as tmp_file:
            log.info(f"""{shorthand_uuid} Downloading "{show_name}" ...""")
-        log.info(f"{shorthand_uuid} Download location resolved to {dest_path}")
-        r = requests.get(max_quality_url, stream=True)
+            if tmp_file_size > 0:
+                resume_header = {"range": f"bytes={tmp_file_size}-"}
+            log.info(f"resume_header: {resume_header}")
+            r = requests.get(max_quality_url, headers=resume_header, stream=True)
            for chunk in r.iter_content(32768):
                size_downloaded += len(chunk)
-            dest_file.write(chunk)
+                tmp_file.write(chunk)
                if time.time() - download_last_update_time >= update_interval:
                    download_last_update_time = time.time()
                    dl_speed_so_far = size_downloaded / (download_last_update_time - download_start_time)
@@ -590,13 +599,24 @@ def copy_url(
                    human_pct = "{:.1f}".format(percentage_done)
                    human_size_dl = humanize.naturalsize(size_downloaded, binary=True)
                    human_total_dl = humanize.naturalsize(total_content_length, binary=True)
-                log.debug(f"{shorthand_uuid} Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} at an average "
-                          f"{human_dl_speed_so_far})")
+                    log.debug(f"[thread] Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} "
+                              f"at an average {human_dl_speed_so_far})")
                if done_event.is_set():
                    log.info(f"""{shorthand_uuid} Download of "{show_name}" interrupted""")
                    return
            log.info(f"""{shorthand_uuid} Download of "{show_name}" done""")
-    # log_successful_download(show)
+    except IOError:
+        log.error(f"{shorthand_uuid} IOError during download. Aborting this download thread ...")
+        return
+
+    log.info(f"{shorthand_uuid} Moving file to final location '{dest_path}' ...")
+    try:
+        os.rename(tmp_path, dest_path)
+        log_successful_download(section_name, config_obj, show, state_file_abs_path, job_uuid, shorthand_uuid)
+        log.info(f"{shorthand_uuid} Done moving")
+    except Exception:
+        console.print_exception(show_locals=True)
+        log.error(f"{shorthand_uuid} Failed moving file")


 def get_max_quality_url(
@@ -640,7 +660,10 @@ def download_media(
    for video in video_metadata:
        total_content_length += video_metadata[video]["content_length"]
    video_metadata["total_content_length"] = total_content_length
-    log.info(f"""Download location is {config_obj.get(section_name, "dl_dir")}""")
+    tmp_dir = expanded_dest_dir(config_obj.get(section_name, "tmp_base_dir"))
+    dest_dir = expanded_dest_dir(config_obj.get(section_name, "dl_dir"))
+    log.info(f"""Download location is {tmp_dir}""")
+    log.info(f"""Final location is {dest_dir}""")
    log.info(f"Limiting parallel downloads to {dl_threads} ...")

    lock = get_state_file_lock(state_lock_file)
@@ -668,7 +691,9 @@ def download_media(
                        state_file_abs_path,
                        show_name,
                        job_uuid,
-                        shorthand_uuid)
+                        shorthand_uuid,
+                        tmp_dir,
+                        dest_dir)


 if __name__ == '__main__':