diff --git a/config.ini b/config.ini index c0685af..2c42196 100644 --- a/config.ini +++ b/config.ini @@ -8,6 +8,7 @@ state_file_name_prefix = state- state_file_name_suffix = .log mvw_endpoint = http://localhost:8000/api/query title_dedup_winner = first +dl_progress_update_interval = 10 [maus] min_duration = 1200 diff --git a/maus-query.json b/maus-query.json index 731ee89..7703479 100644 --- a/maus-query.json +++ b/maus-query.json @@ -17,5 +17,5 @@ "sortOrder": "desc", "future": false, "offset": 0, - "size": 50 + "size": 20 } diff --git a/mvw-dl.py b/mvw-dl.py index b26d7d4..7f3457b 100644 --- a/mvw-dl.py +++ b/mvw-dl.py @@ -28,6 +28,11 @@ from threading import Event from typing import Iterable from urllib.request import urlopen +# TODO set locale for datetime and others to globally stick to en_US +download_start_time = 0 +download_last_update_time = 0 +size_downloaded = 0 + from rich.progress import ( BarColumn, DownloadColumn, @@ -83,7 +88,8 @@ class CONST(object): {"key": "state_file_name_prefix", "value": "state-"}, {"key": "state_file_name_suffix", "value": ".log"}, {"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"}, - {"key": "title_dedup_winner", "value": "first"} + {"key": "title_dedup_winner", "value": "first"}, + {"key": "dl_progress_update_interval", "value": "10"} ] CFG_KNOWN_SECTION = [ {"key": "min_duration", "is_mandatory": False}, @@ -339,67 +345,39 @@ signal.signal(signal.SIGINT, handle_sigint) def copy_url( + section_name: str, + config_obj: configparser.ConfigParser(), show: type_def.mvw_json_response.Show, - max_quality_url: str) -> None: + video_metadata: dict, + total_content_length: int) -> None: """Copy data from a url to a local file.""" + global download_start_time + global download_last_update_time + global size_downloaded + update_interval = config_obj.getint(section_name, "dl_progress_update_interval") + max_quality_url = video_metadata["url"] filename = max_quality_url.split("/")[-1] dest_path = os.path.join("./", filename) - release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X') - #s = requests.Session() - #newline = "\n" - #log.debug(f"Request method: {req.method}\n" - # f"URL: {req.url}\n" - # f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""") - show_name = f"{show.topic} - {show.title}" - log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""") with open(dest_path, "wb") as dest_file: - last_update_time = time.time() r = requests.get(max_quality_url, stream=True) - total_length = int(r.headers.get('content-length')) - size_downloaded = 0 for chunk in r.iter_content(32768): size_downloaded += len(chunk) dest_file.write(chunk) - if time.time() - last_update_time >= 10: - percentage_done = size_downloaded / total_length * 100 + if time.time() - download_last_update_time >= update_interval: + download_last_update_time = time.time() + dl_speed_so_far = size_downloaded / (download_last_update_time - download_start_time) + human_dl_speed_so_far = f"{humanize.naturalsize(dl_speed_so_far, binary=True)}/s" + percentage_done = size_downloaded / total_content_length * 100 human_pct = "{:.1f}".format(percentage_done) - human_size_dl = humanize.naturalsize(size_downloaded) - human_total_dl = humanize.naturalsize(total_length) - last_update_time = time.time() - log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""") + human_size_dl = humanize.naturalsize(size_downloaded, binary=True) + human_total_dl = humanize.naturalsize(total_content_length, binary=True) + log.debug(f"Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} at an average " + f"{human_dl_speed_so_far})") if done_event.is_set(): log.debug(f"done_event") return - #got_json_response = MVWJSONResponse(**json.loads(s.content)) - #return got_json_response - - # progress.console.log(f"Requesting {url}") - # response = urlopen(url) - # # This will break if the response doesn't contain content length - # progress.update(task_id, total=int(response.info()["Content-length"])) - # with open(path, "wb") as dest_file: - # progress.start_task(task_id) - # for data in iter(partial(response.read, 32768), b""): - # dest_file.write(data) - # progress.update(task_id, advance=len(data)) - # if done_event.is_set(): - # return - # progress.console.log(f"Downloaded {path}") - - -#def download(urls: Iterable[str], dest_dir: str): -# """Download multuple files to the given directory.""" -# -# with progress: -# with ThreadPoolExecutor(max_workers=1) as pool: -# for url in urls: -# filename = url.split("/")[-1] -# dest_path = os.path.join(dest_dir, filename) -# task_id = progress.add_task("download", filename=filename, start=False) -# pool.submit(copy_url, task_id, url, dest_path) - def get_max_quality_url( show: type_def.mvw_json_response.Show) -> str: @@ -412,20 +390,43 @@ def get_max_quality_url( return max_quality_url +def get_content_length( + video_url: str) -> int: + r = requests.head(video_url) + if r.status_code == requests.codes.ok: + return int(r.headers["content-length"]) + else: + return 0 + + def download_media( section_name: str, config_obj: configparser.ConfigParser(), json_obj: MVWJSONResponse) -> None: + global download_start_time + global download_last_update_time + video_metadata = {} + for result in json_obj.result.results.copy(): + max_quality_url = get_max_quality_url(result) + content_length = get_content_length(max_quality_url) + video_metadata[result.id] = {"url": max_quality_url, "content_length": content_length} + total_content_length = 0 + for video in video_metadata: + total_content_length += video_metadata[video]["content_length"] + video_metadata["total_content_length"] = total_content_length with ThreadPoolExecutor(max_workers=2) as pool: + download_last_update_time = time.time() + download_start_time = download_last_update_time + update_interval = config_obj.getint(section_name, "dl_progress_update_interval") + log.debug(f"""Will provide updates every {update_interval} {p.plural("second", update_interval)}""") for result in json_obj.result.results.copy(): - # filename = url.split("/")[-1] - # dest_path = os.path.join(dest_dir, filename) - # task_id = progress.add_task("download", filename=filename, start=False) - max_quality_url = get_max_quality_url(result) - pool.submit(copy_url, result, max_quality_url) - # TODO from each url get total content-length - # TODO use total content-length for overall progress of what we want to download - pass + pool.submit( + copy_url, + section_name, + config_obj, + result, + video_metadata[result.id], + video_metadata["total_content_length"]) if __name__ == '__main__': @@ -455,7 +456,8 @@ if __name__ == '__main__': if config.has_option(section, "title_not_regex"): json_response = dedup_json_titles(section, config, json_response) - log.debug(f"Downloading shows ...") + log.debug(f"Downloading {json_response.result.queryInfo.resultCount} " + f"""{p.plural("show", json_response.result.queryInfo.resultCount)} ...""") download_media(section, config, json_response) # console.print_json(json_response.json())