Dabble in downloading with Will McGugan's downloader example at https://github.com/Textualize/rich/blob/master/examples/downloader.py

This commit is contained in:
hygienic-books 2022-03-17 04:11:44 +01:00
parent 6aeecabc9b
commit 376932583e

133
mvw-dl.py
View File

@ -1,9 +1,12 @@
import configparser import configparser
import datetime as d
import json import json
import logging import logging
import os import os
import re import re
import sys import sys
import time
import humanize
import requests import requests
import inflect import inflect
from rich.logging import RichHandler from rich.logging import RichHandler
@ -15,6 +18,39 @@ import type_def.mvw_json_response
from type_def.mvw_json_request import MVWJSONRequest from type_def.mvw_json_request import MVWJSONRequest
from type_def.mvw_json_response import MVWJSONResponse from type_def.mvw_json_response import MVWJSONResponse
# Downloading
import os.path
import sys
from concurrent.futures import ThreadPoolExecutor
import signal
from functools import partial
from threading import Event
from typing import Iterable
from urllib.request import urlopen
from rich.progress import (
BarColumn,
DownloadColumn,
Progress,
TaskID,
TextColumn,
TimeRemainingColumn,
TransferSpeedColumn,
)
progress = Progress(
TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
BarColumn(bar_width=None),
"[progress.percentage]{task.percentage:>3.1f}%",
"",
DownloadColumn(),
"",
TransferSpeedColumn(),
"",
TimeRemainingColumn(),
)
# Downloading
# Without width # Without width
console = Console(width=180) console = Console(width=180)
p = inflect.engine() p = inflect.engine()
@ -292,6 +328,94 @@ def dedup_json_titles(
return json_obj return json_obj
done_event = Event()
def handle_sigint(signum, frame):
done_event.set()
signal.signal(signal.SIGINT, handle_sigint)
def copy_url(
show: type_def.mvw_json_response.Show) -> None:
"""Copy data from a url to a local file."""
url = show.url_video_hd
filename = url.split("/")[-1]
dest_path = os.path.join("./", filename)
release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X')
#s = requests.Session()
#newline = "\n"
#log.debug(f"Request method: {req.method}\n"
# f"URL: {req.url}\n"
# f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""")
show_name = f"{show.topic} - {show.title}"
log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""")
with open(dest_path, "wb") as dest_file:
last_update_time = time.time()
r = requests.get(url, stream=True)
total_length = int(r.headers.get('content-length'))
size_downloaded = 0
for chunk in r.iter_content(32768):
size_downloaded += len(chunk)
dest_file.write(chunk)
if time.time() - last_update_time >= 10:
percentage_done = size_downloaded / total_length * 100
human_pct = "{:.1f}".format(percentage_done)
human_size_dl = humanize.naturalsize(size_downloaded)
human_total_dl = humanize.naturalsize(total_length)
last_update_time = time.time()
log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""")
if done_event.is_set():
log.debug(f"done_event")
return
#got_json_response = MVWJSONResponse(**json.loads(s.content))
#return got_json_response
# progress.console.log(f"Requesting {url}")
# response = urlopen(url)
# # This will break if the response doesn't contain content length
# progress.update(task_id, total=int(response.info()["Content-length"]))
# with open(path, "wb") as dest_file:
# progress.start_task(task_id)
# for data in iter(partial(response.read, 32768), b""):
# dest_file.write(data)
# progress.update(task_id, advance=len(data))
# if done_event.is_set():
# return
# progress.console.log(f"Downloaded {path}")
#def download(urls: Iterable[str], dest_dir: str):
# """Download multuple files to the given directory."""
#
# with progress:
# with ThreadPoolExecutor(max_workers=1) as pool:
# for url in urls:
# filename = url.split("/")[-1]
# dest_path = os.path.join(dest_dir, filename)
# task_id = progress.add_task("download", filename=filename, start=False)
# pool.submit(copy_url, task_id, url, dest_path)
def download_media(
section_name: str,
config_obj: configparser.ConfigParser()) -> None:
with ThreadPoolExecutor(max_workers=2) as pool:
for result in json_response.result.results.copy():
# filename = url.split("/")[-1]
# dest_path = os.path.join(dest_dir, filename)
# task_id = progress.add_task("download", filename=filename, start=False)
pool.submit(copy_url, result)
# TODO before sending into pool validate which url we're going to use
# TODO from each url get total content-length
# TODO use total content-length for overall progress of what we want to download
pass
if __name__ == '__main__': if __name__ == '__main__':
validate_default_section(config) validate_default_section(config)
if config_has_valid_section(config): if config_has_valid_section(config):
@ -319,8 +443,7 @@ if __name__ == '__main__':
if config.has_option(section, "title_not_regex"): if config.has_option(section, "title_not_regex"):
json_response = dedup_json_titles(section, config, json_response) json_response = dedup_json_titles(section, config, json_response)
#if json_response.result.queryInfo.resultCount: log.debug(f"Downloading shows ...")
# log.debug(f"Downloading shows ...") download_media(section, config)
#else
# # console.print_json(json_response.json())
# # console.print_json(json_response.json())