From 376932583eed24fff63ace0fae6ce68030414a86 Mon Sep 17 00:00:00 2001 From: hygienic-books Date: Thu, 17 Mar 2022 04:11:44 +0100 Subject: [PATCH] Dabble in downloading with Will McGugan's downloader example at https://github.com/Textualize/rich/blob/master/examples/downloader.py --- mvw-dl.py | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 5 deletions(-) diff --git a/mvw-dl.py b/mvw-dl.py index 49d91e8..2cff5f4 100644 --- a/mvw-dl.py +++ b/mvw-dl.py @@ -1,9 +1,12 @@ import configparser +import datetime as d import json import logging import os import re import sys +import time +import humanize import requests import inflect from rich.logging import RichHandler @@ -15,6 +18,39 @@ import type_def.mvw_json_response from type_def.mvw_json_request import MVWJSONRequest from type_def.mvw_json_response import MVWJSONResponse +# Downloading +import os.path +import sys +from concurrent.futures import ThreadPoolExecutor +import signal +from functools import partial +from threading import Event +from typing import Iterable +from urllib.request import urlopen + +from rich.progress import ( + BarColumn, + DownloadColumn, + Progress, + TaskID, + TextColumn, + TimeRemainingColumn, + TransferSpeedColumn, +) + +progress = Progress( + TextColumn("[bold blue]{task.fields[filename]}", justify="right"), + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.1f}%", + "•", + DownloadColumn(), + "•", + TransferSpeedColumn(), + "•", + TimeRemainingColumn(), +) +# Downloading + # Without width console = Console(width=180) p = inflect.engine() @@ -292,6 +328,94 @@ def dedup_json_titles( return json_obj +done_event = Event() + + +def handle_sigint(signum, frame): + done_event.set() + + +signal.signal(signal.SIGINT, handle_sigint) + + +def copy_url( + show: type_def.mvw_json_response.Show) -> None: + """Copy data from a url to a local file.""" + + url = show.url_video_hd + filename = url.split("/")[-1] + dest_path = os.path.join("./", filename) + release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X') + #s = requests.Session() + #newline = "\n" + #log.debug(f"Request method: {req.method}\n" + # f"URL: {req.url}\n" + # f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""") + show_name = f"{show.topic} - {show.title}" + log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""") + with open(dest_path, "wb") as dest_file: + last_update_time = time.time() + r = requests.get(url, stream=True) + total_length = int(r.headers.get('content-length')) + size_downloaded = 0 + for chunk in r.iter_content(32768): + size_downloaded += len(chunk) + dest_file.write(chunk) + if time.time() - last_update_time >= 10: + percentage_done = size_downloaded / total_length * 100 + human_pct = "{:.1f}".format(percentage_done) + human_size_dl = humanize.naturalsize(size_downloaded) + human_total_dl = humanize.naturalsize(total_length) + last_update_time = time.time() + log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""") + if done_event.is_set(): + log.debug(f"done_event") + return + + #got_json_response = MVWJSONResponse(**json.loads(s.content)) + #return got_json_response + + # progress.console.log(f"Requesting {url}") + # response = urlopen(url) + # # This will break if the response doesn't contain content length + # progress.update(task_id, total=int(response.info()["Content-length"])) + # with open(path, "wb") as dest_file: + # progress.start_task(task_id) + # for data in iter(partial(response.read, 32768), b""): + # dest_file.write(data) + # progress.update(task_id, advance=len(data)) + # if done_event.is_set(): + # return + # progress.console.log(f"Downloaded {path}") + + +#def download(urls: Iterable[str], dest_dir: str): +# """Download multuple files to the given directory.""" +# +# with progress: +# with ThreadPoolExecutor(max_workers=1) as pool: +# for url in urls: +# filename = url.split("/")[-1] +# dest_path = os.path.join(dest_dir, filename) +# task_id = progress.add_task("download", filename=filename, start=False) +# pool.submit(copy_url, task_id, url, dest_path) + + +def download_media( + section_name: str, + config_obj: configparser.ConfigParser()) -> None: + with ThreadPoolExecutor(max_workers=2) as pool: + for result in json_response.result.results.copy(): + # filename = url.split("/")[-1] + # dest_path = os.path.join(dest_dir, filename) + # task_id = progress.add_task("download", filename=filename, start=False) + pool.submit(copy_url, result) + # TODO before sending into pool validate which url we're going to use + # TODO from each url get total content-length + # TODO use total content-length for overall progress of what we want to download + pass + + if __name__ == '__main__': validate_default_section(config) if config_has_valid_section(config): @@ -319,8 +443,7 @@ if __name__ == '__main__': if config.has_option(section, "title_not_regex"): json_response = dedup_json_titles(section, config, json_response) - #if json_response.result.queryInfo.resultCount: - # log.debug(f"Downloading shows ...") - #else - # - # # console.print_json(json_response.json()) + log.debug(f"Downloading shows ...") + download_media(section, config) + + # console.print_json(json_response.json())