Get total content_length and provide just one global stream of download progress updates

This commit is contained in:
hygienic-books 2022-03-17 17:23:41 +01:00
parent d92236a2f2
commit 47bb1f761a
3 changed files with 60 additions and 57 deletions

View File

@ -8,6 +8,7 @@ state_file_name_prefix = state-
state_file_name_suffix = .log state_file_name_suffix = .log
mvw_endpoint = http://localhost:8000/api/query mvw_endpoint = http://localhost:8000/api/query
title_dedup_winner = first title_dedup_winner = first
dl_progress_update_interval = 10
[maus] [maus]
min_duration = 1200 min_duration = 1200

View File

@ -17,5 +17,5 @@
"sortOrder": "desc", "sortOrder": "desc",
"future": false, "future": false,
"offset": 0, "offset": 0,
"size": 50 "size": 20
} }

114
mvw-dl.py
View File

@ -28,6 +28,11 @@ from threading import Event
from typing import Iterable from typing import Iterable
from urllib.request import urlopen from urllib.request import urlopen
# TODO set locale for datetime and others to globally stick to en_US
download_start_time = 0
download_last_update_time = 0
size_downloaded = 0
from rich.progress import ( from rich.progress import (
BarColumn, BarColumn,
DownloadColumn, DownloadColumn,
@ -83,7 +88,8 @@ class CONST(object):
{"key": "state_file_name_prefix", "value": "state-"}, {"key": "state_file_name_prefix", "value": "state-"},
{"key": "state_file_name_suffix", "value": ".log"}, {"key": "state_file_name_suffix", "value": ".log"},
{"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"}, {"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"},
{"key": "title_dedup_winner", "value": "first"} {"key": "title_dedup_winner", "value": "first"},
{"key": "dl_progress_update_interval", "value": "10"}
] ]
CFG_KNOWN_SECTION = [ CFG_KNOWN_SECTION = [
{"key": "min_duration", "is_mandatory": False}, {"key": "min_duration", "is_mandatory": False},
@ -339,67 +345,39 @@ signal.signal(signal.SIGINT, handle_sigint)
def copy_url( def copy_url(
section_name: str,
config_obj: configparser.ConfigParser(),
show: type_def.mvw_json_response.Show, show: type_def.mvw_json_response.Show,
max_quality_url: str) -> None: video_metadata: dict,
total_content_length: int) -> None:
"""Copy data from a url to a local file.""" """Copy data from a url to a local file."""
global download_start_time
global download_last_update_time
global size_downloaded
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
max_quality_url = video_metadata["url"]
filename = max_quality_url.split("/")[-1] filename = max_quality_url.split("/")[-1]
dest_path = os.path.join("./", filename) dest_path = os.path.join("./", filename)
release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X')
#s = requests.Session()
#newline = "\n"
#log.debug(f"Request method: {req.method}\n"
# f"URL: {req.url}\n"
# f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""")
show_name = f"{show.topic} - {show.title}"
log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""")
with open(dest_path, "wb") as dest_file: with open(dest_path, "wb") as dest_file:
last_update_time = time.time()
r = requests.get(max_quality_url, stream=True) r = requests.get(max_quality_url, stream=True)
total_length = int(r.headers.get('content-length'))
size_downloaded = 0
for chunk in r.iter_content(32768): for chunk in r.iter_content(32768):
size_downloaded += len(chunk) size_downloaded += len(chunk)
dest_file.write(chunk) dest_file.write(chunk)
if time.time() - last_update_time >= 10: if time.time() - download_last_update_time >= update_interval:
percentage_done = size_downloaded / total_length * 100 download_last_update_time = time.time()
dl_speed_so_far = size_downloaded / (download_last_update_time - download_start_time)
human_dl_speed_so_far = f"{humanize.naturalsize(dl_speed_so_far, binary=True)}/s"
percentage_done = size_downloaded / total_content_length * 100
human_pct = "{:.1f}".format(percentage_done) human_pct = "{:.1f}".format(percentage_done)
human_size_dl = humanize.naturalsize(size_downloaded) human_size_dl = humanize.naturalsize(size_downloaded, binary=True)
human_total_dl = humanize.naturalsize(total_length) human_total_dl = humanize.naturalsize(total_content_length, binary=True)
last_update_time = time.time() log.debug(f"Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} at an average "
log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""") f"{human_dl_speed_so_far})")
if done_event.is_set(): if done_event.is_set():
log.debug(f"done_event") log.debug(f"done_event")
return return
#got_json_response = MVWJSONResponse(**json.loads(s.content))
#return got_json_response
# progress.console.log(f"Requesting {url}")
# response = urlopen(url)
# # This will break if the response doesn't contain content length
# progress.update(task_id, total=int(response.info()["Content-length"]))
# with open(path, "wb") as dest_file:
# progress.start_task(task_id)
# for data in iter(partial(response.read, 32768), b""):
# dest_file.write(data)
# progress.update(task_id, advance=len(data))
# if done_event.is_set():
# return
# progress.console.log(f"Downloaded {path}")
#def download(urls: Iterable[str], dest_dir: str):
# """Download multuple files to the given directory."""
#
# with progress:
# with ThreadPoolExecutor(max_workers=1) as pool:
# for url in urls:
# filename = url.split("/")[-1]
# dest_path = os.path.join(dest_dir, filename)
# task_id = progress.add_task("download", filename=filename, start=False)
# pool.submit(copy_url, task_id, url, dest_path)
def get_max_quality_url( def get_max_quality_url(
show: type_def.mvw_json_response.Show) -> str: show: type_def.mvw_json_response.Show) -> str:
@ -412,20 +390,43 @@ def get_max_quality_url(
return max_quality_url return max_quality_url
def get_content_length(
video_url: str) -> int:
r = requests.head(video_url)
if r.status_code == requests.codes.ok:
return int(r.headers["content-length"])
else:
return 0
def download_media( def download_media(
section_name: str, section_name: str,
config_obj: configparser.ConfigParser(), config_obj: configparser.ConfigParser(),
json_obj: MVWJSONResponse) -> None: json_obj: MVWJSONResponse) -> None:
with ThreadPoolExecutor(max_workers=2) as pool: global download_start_time
global download_last_update_time
video_metadata = {}
for result in json_obj.result.results.copy(): for result in json_obj.result.results.copy():
# filename = url.split("/")[-1]
# dest_path = os.path.join(dest_dir, filename)
# task_id = progress.add_task("download", filename=filename, start=False)
max_quality_url = get_max_quality_url(result) max_quality_url = get_max_quality_url(result)
pool.submit(copy_url, result, max_quality_url) content_length = get_content_length(max_quality_url)
# TODO from each url get total content-length video_metadata[result.id] = {"url": max_quality_url, "content_length": content_length}
# TODO use total content-length for overall progress of what we want to download total_content_length = 0
pass for video in video_metadata:
total_content_length += video_metadata[video]["content_length"]
video_metadata["total_content_length"] = total_content_length
with ThreadPoolExecutor(max_workers=2) as pool:
download_last_update_time = time.time()
download_start_time = download_last_update_time
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
log.debug(f"""Will provide updates every {update_interval} {p.plural("second", update_interval)}""")
for result in json_obj.result.results.copy():
pool.submit(
copy_url,
section_name,
config_obj,
result,
video_metadata[result.id],
video_metadata["total_content_length"])
if __name__ == '__main__': if __name__ == '__main__':
@ -455,7 +456,8 @@ if __name__ == '__main__':
if config.has_option(section, "title_not_regex"): if config.has_option(section, "title_not_regex"):
json_response = dedup_json_titles(section, config, json_response) json_response = dedup_json_titles(section, config, json_response)
log.debug(f"Downloading shows ...") log.debug(f"Downloading {json_response.result.queryInfo.resultCount} "
f"""{p.plural("show", json_response.result.queryInfo.resultCount)} ...""")
download_media(section, config, json_response) download_media(section, config, json_response)
# console.print_json(json_response.json()) # console.print_json(json_response.json())