Start implementing range downloading

This commit is contained in:
hygienic-books 2022-03-19 16:58:04 +01:00
parent facfe4e7d3
commit dd6464de5d

View File

@ -131,7 +131,7 @@ log = logging.getLogger("rich")
# Our own code logs with this level
log.setLevel(logging.DEBUG)
# connectionpool and filelock log with WARNING, we don't need its verbosity
logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
# logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
logging.getLogger("filelock").setLevel(logging.WARNING)
install(show_locals=True)
@ -381,13 +381,13 @@ def filename_replace_pattern(
show_attrs = [attr for attr in dir(show) if not attr.startswith('_') and not callable(getattr(show, attr))]
for attr in show_attrs:
log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
# log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
filename = re.sub(r"&\(" + re.escape(attr) + r"\)s", str(getattr(show, attr)), filename)
log.debug(f"{shorthand_uuid} New filename: '{filename}'")
# log.debug(f"{shorthand_uuid} New filename: '{filename}'")
for extended_attr in show_extended:
log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
# log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
filename = re.sub(r"&\(" + re.escape(extended_attr) + r"\)s", show_extended[extended_attr], filename)
log.debug(f"{shorthand_uuid} New filename: '{filename}'")
# log.debug(f"{shorthand_uuid} New filename: '{filename}'")
return filename
@ -558,7 +558,9 @@ def copy_url(
state_file_abs_path: str,
show_name: str,
job_uuid: str,
shorthand_uuid: str) -> None:
shorthand_uuid: str,
tmp_dir: str,
dest_dir: str) -> None:
"""Copy data from a url to a local file."""
global download_start_time
@ -568,20 +570,27 @@ def copy_url(
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
max_quality_url = video_metadata["url"]
filename = get_filename(section_name, config_obj, show, max_quality_url, shorthand_uuid)
dest_dir = expanded_dest_dir(config_obj.get(section_name, "tmp_base_dir"))
resume_header = {}
tmp_file_size = 0
tmp_path = os.path.join(tmp_dir, filename)
dest_path = os.path.join(dest_dir, filename)
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
# TODO quit
log_successful_download(section_name, config_obj, show, state_file_abs_path, job_uuid, shorthand_uuid)
quit()
with open(dest_path, "wb") as dest_file:
os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
log.info(f"{shorthand_uuid} Download location resolved to '{tmp_path}'")
if os.path.exists(tmp_path):
tmp_file_size = os.path.getsize(tmp_path)
log.debug(f"{shorthand_uuid} Temporary file '{tmp_path}' exists likely from a previous incomplete "
f"download attempt, size is {humanize.naturalsize(tmp_file_size, binary=True)}. Resuming ...")
try:
with open(tmp_path, "wb") as tmp_file:
log.info(f"""{shorthand_uuid} Downloading "{show_name}" ...""")
log.info(f"{shorthand_uuid} Download location resolved to {dest_path}")
r = requests.get(max_quality_url, stream=True)
if tmp_file_size > 0:
resume_header = {"range": f"bytes={tmp_file_size}-"}
log.info(f"resume_header: {resume_header}")
r = requests.get(max_quality_url, headers=resume_header, stream=True)
for chunk in r.iter_content(32768):
size_downloaded += len(chunk)
dest_file.write(chunk)
tmp_file.write(chunk)
if time.time() - download_last_update_time >= update_interval:
download_last_update_time = time.time()
dl_speed_so_far = size_downloaded / (download_last_update_time - download_start_time)
@ -590,13 +599,24 @@ def copy_url(
human_pct = "{:.1f}".format(percentage_done)
human_size_dl = humanize.naturalsize(size_downloaded, binary=True)
human_total_dl = humanize.naturalsize(total_content_length, binary=True)
log.debug(f"{shorthand_uuid} Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} at an average "
f"{human_dl_speed_so_far})")
log.debug(f"[thread] Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} "
f"at an average {human_dl_speed_so_far})")
if done_event.is_set():
log.info(f"""{shorthand_uuid} Download of "{show_name}" interrupted""")
return
log.info(f"""{shorthand_uuid} Download of "{show_name}" done""")
# log_successful_download(show)
except IOError:
log.error(f"{shorthand_uuid} IOError during download. Aborting this download thread ...")
return
log.info(f"{shorthand_uuid} Moving file to final location '{dest_path}' ...")
try:
os.rename(tmp_path, dest_path)
log_successful_download(section_name, config_obj, show, state_file_abs_path, job_uuid, shorthand_uuid)
log.info(f"{shorthand_uuid} Done moving")
except Exception:
console.print_exception(show_locals=True)
log.error(f"{shorthand_uuid} Failed moving file")
def get_max_quality_url(
@ -640,7 +660,10 @@ def download_media(
for video in video_metadata:
total_content_length += video_metadata[video]["content_length"]
video_metadata["total_content_length"] = total_content_length
log.info(f"""Download location is {config_obj.get(section_name, "dl_dir")}""")
tmp_dir = expanded_dest_dir(config_obj.get(section_name, "tmp_base_dir"))
dest_dir = expanded_dest_dir(config_obj.get(section_name, "dl_dir"))
log.info(f"""Download location is {tmp_dir}""")
log.info(f"""Final location is {dest_dir}""")
log.info(f"Limiting parallel downloads to {dl_threads} ...")
lock = get_state_file_lock(state_lock_file)
@ -668,7 +691,9 @@ def download_media(
state_file_abs_path,
show_name,
job_uuid,
shorthand_uuid)
shorthand_uuid,
tmp_dir,
dest_dir)
if __name__ == '__main__':