Dabble in downloading with Will McGugan's downloader example at https://github.com/Textualize/rich/blob/master/examples/downloader.py

2022-03-17 04:11:44 +01:00
parent 6aeecabc9b
commit 376932583e
1 changed files with 128 additions and 5 deletions
--- a/mvw-dl.py
+++ b/mvw-dl.py
@@ -1,9 +1,12 @@
 import configparser
+import datetime as d
 import json
 import logging
 import os
 import re
 import sys
+import time
+import humanize
 import requests
 import inflect
 from rich.logging import RichHandler
@@ -15,6 +18,39 @@ import type_def.mvw_json_response
 from type_def.mvw_json_request import MVWJSONRequest
 from type_def.mvw_json_response import MVWJSONResponse

+# Downloading
+import os.path
+import sys
+from concurrent.futures import ThreadPoolExecutor
+import signal
+from functools import partial
+from threading import Event
+from typing import Iterable
+from urllib.request import urlopen
+
+from rich.progress import (
+    BarColumn,
+    DownloadColumn,
+    Progress,
+    TaskID,
+    TextColumn,
+    TimeRemainingColumn,
+    TransferSpeedColumn,
+)
+
+progress = Progress(
+    TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
+    BarColumn(bar_width=None),
+    "[progress.percentage]{task.percentage:>3.1f}%",
+    "•",
+    DownloadColumn(),
+    "•",
+    TransferSpeedColumn(),
+    "•",
+    TimeRemainingColumn(),
+)
+# Downloading
+
 # Without width
 console = Console(width=180)
 p = inflect.engine()
@@ -292,6 +328,94 @@ def dedup_json_titles(
    return json_obj


+done_event = Event()
+
+
+def handle_sigint(signum, frame):
+    done_event.set()
+
+
+signal.signal(signal.SIGINT, handle_sigint)
+
+
+def copy_url(
+        show: type_def.mvw_json_response.Show) -> None:
+    """Copy data from a url to a local file."""
+
+    url = show.url_video_hd
+    filename = url.split("/")[-1]
+    dest_path = os.path.join("./", filename)
+    release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X')
+    #s = requests.Session()
+    #newline = "\n"
+    #log.debug(f"Request method: {req.method}\n"
+    #          f"URL: {req.url}\n"
+    #          f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""")
+    show_name = f"{show.topic} - {show.title}"
+    log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""")
+    with open(dest_path, "wb") as dest_file:
+        last_update_time = time.time()
+        r = requests.get(url, stream=True)
+        total_length = int(r.headers.get('content-length'))
+        size_downloaded = 0
+        for chunk in r.iter_content(32768):
+            size_downloaded += len(chunk)
+            dest_file.write(chunk)
+            if time.time() - last_update_time >= 10:
+                percentage_done = size_downloaded / total_length * 100
+                human_pct = "{:.1f}".format(percentage_done)
+                human_size_dl = humanize.naturalsize(size_downloaded)
+                human_total_dl = humanize.naturalsize(total_length)
+                last_update_time = time.time()
+                log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""")
+            if done_event.is_set():
+                log.debug(f"done_event")
+                return
+
+        #got_json_response = MVWJSONResponse(**json.loads(s.content))
+        #return got_json_response
+
+    # progress.console.log(f"Requesting {url}")
+    # response = urlopen(url)
+    # # This will break if the response doesn't contain content length
+    # progress.update(task_id, total=int(response.info()["Content-length"]))
+    # with open(path, "wb") as dest_file:
+    #     progress.start_task(task_id)
+    #     for data in iter(partial(response.read, 32768), b""):
+    #         dest_file.write(data)
+    #         progress.update(task_id, advance=len(data))
+    #         if done_event.is_set():
+    #             return
+    # progress.console.log(f"Downloaded {path}")
+
+
+#def download(urls: Iterable[str], dest_dir: str):
+#    """Download multuple files to the given directory."""
+#
+#    with progress:
+#        with ThreadPoolExecutor(max_workers=1) as pool:
+#            for url in urls:
+#                filename = url.split("/")[-1]
+#                dest_path = os.path.join(dest_dir, filename)
+#                task_id = progress.add_task("download", filename=filename, start=False)
+#                pool.submit(copy_url, task_id, url, dest_path)
+
+
+def download_media(
+        section_name: str,
+        config_obj: configparser.ConfigParser()) -> None:
+    with ThreadPoolExecutor(max_workers=2) as pool:
+        for result in json_response.result.results.copy():
+            # filename = url.split("/")[-1]
+            # dest_path = os.path.join(dest_dir, filename)
+            # task_id = progress.add_task("download", filename=filename, start=False)
+            pool.submit(copy_url, result)
+            # TODO before sending into pool validate which url we're going to use
+            # TODO from each url get total content-length
+            # TODO use total content-length for overall progress of what we want to download
+    pass
+
+
 if __name__ == '__main__':
    validate_default_section(config)
    if config_has_valid_section(config):
@@ -319,8 +443,7 @@ if __name__ == '__main__':
        if config.has_option(section, "title_not_regex"):
            json_response = dedup_json_titles(section, config, json_response)

-        #if json_response.result.queryInfo.resultCount:
-        #    log.debug(f"Downloading shows ...")
-        #else
-        #
-        #    # console.print_json(json_response.json())
+        log.debug(f"Downloading shows ...")
+        download_media(section, config)
+
+            # console.print_json(json_response.json())