Compare commits

..

4 Commits

4 changed files with 143 additions and 3 deletions

137
mvw-dl.py
View File

@ -1,9 +1,12 @@
import configparser
import datetime as d
import json
import logging
import os
import re
import sys
import time
import humanize
import requests
import inflect
from rich.logging import RichHandler
@ -15,6 +18,39 @@ import type_def.mvw_json_response
from type_def.mvw_json_request import MVWJSONRequest
from type_def.mvw_json_response import MVWJSONResponse
# Downloading
import os.path
import sys
from concurrent.futures import ThreadPoolExecutor
import signal
from functools import partial
from threading import Event
from typing import Iterable
from urllib.request import urlopen
from rich.progress import (
BarColumn,
DownloadColumn,
Progress,
TaskID,
TextColumn,
TimeRemainingColumn,
TransferSpeedColumn,
)
progress = Progress(
TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
BarColumn(bar_width=None),
"[progress.percentage]{task.percentage:>3.1f}%",
"•",
DownloadColumn(),
"•",
TransferSpeedColumn(),
"•",
TimeRemainingColumn(),
)
# Downloading
# Without width
console = Console(width=180)
p = inflect.engine()
@ -29,6 +65,7 @@ JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any
# Exit codes
# 1: Config file invalid, it has no sections
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
# 3: No search results to download
class CONST(object):
@ -204,12 +241,20 @@ def get_json_response(
return got_json_response
def no_downloads_needed() -> None:
log.info(f"No search results to download, exiting 3 ...")
sys.exit(3)
def remove_result(
json_obj: MVWJSONResponse,
result_obj: type_def.mvw_json_response.Show) -> MVWJSONResponse:
json_obj.result.results.remove(result_obj)
json_obj.result.queryInfo.resultCount -= 1
if json_obj.result.queryInfo.resultCount:
return json_obj
else:
no_downloads_needed()
def log_result_count(result_count: int, pre_filter: bool = True) -> None:
@ -283,6 +328,94 @@ def dedup_json_titles(
return json_obj
done_event = Event()
def handle_sigint(signum, frame):
done_event.set()
signal.signal(signal.SIGINT, handle_sigint)
def copy_url(
show: type_def.mvw_json_response.Show) -> None:
"""Copy data from a url to a local file."""
url = show.url_video_hd
filename = url.split("/")[-1]
dest_path = os.path.join("./", filename)
release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X')
#s = requests.Session()
#newline = "\n"
#log.debug(f"Request method: {req.method}\n"
# f"URL: {req.url}\n"
# f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""")
show_name = f"{show.topic} - {show.title}"
log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""")
with open(dest_path, "wb") as dest_file:
last_update_time = time.time()
r = requests.get(url, stream=True)
total_length = int(r.headers.get('content-length'))
size_downloaded = 0
for chunk in r.iter_content(32768):
size_downloaded += len(chunk)
dest_file.write(chunk)
if time.time() - last_update_time >= 10:
percentage_done = size_downloaded / total_length * 100
human_pct = "{:.1f}".format(percentage_done)
human_size_dl = humanize.naturalsize(size_downloaded)
human_total_dl = humanize.naturalsize(total_length)
last_update_time = time.time()
log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""")
if done_event.is_set():
log.debug(f"done_event")
return
#got_json_response = MVWJSONResponse(**json.loads(s.content))
#return got_json_response
# progress.console.log(f"Requesting {url}")
# response = urlopen(url)
# # This will break if the response doesn't contain content length
# progress.update(task_id, total=int(response.info()["Content-length"]))
# with open(path, "wb") as dest_file:
# progress.start_task(task_id)
# for data in iter(partial(response.read, 32768), b""):
# dest_file.write(data)
# progress.update(task_id, advance=len(data))
# if done_event.is_set():
# return
# progress.console.log(f"Downloaded {path}")
#def download(urls: Iterable[str], dest_dir: str):
# """Download multuple files to the given directory."""
#
# with progress:
# with ThreadPoolExecutor(max_workers=1) as pool:
# for url in urls:
# filename = url.split("/")[-1]
# dest_path = os.path.join(dest_dir, filename)
# task_id = progress.add_task("download", filename=filename, start=False)
# pool.submit(copy_url, task_id, url, dest_path)
def download_media(
section_name: str,
config_obj: configparser.ConfigParser()) -> None:
with ThreadPoolExecutor(max_workers=2) as pool:
for result in json_response.result.results.copy():
# filename = url.split("/")[-1]
# dest_path = os.path.join(dest_dir, filename)
# task_id = progress.add_task("download", filename=filename, start=False)
pool.submit(copy_url, result)
# TODO before sending into pool validate which url we're going to use
# TODO from each url get total content-length
# TODO use total content-length for overall progress of what we want to download
pass
if __name__ == '__main__':
validate_default_section(config)
if config_has_valid_section(config):
@ -309,4 +442,8 @@ if __name__ == '__main__':
log.debug(f"Deduplicating results by title where needed ...")
if config.has_option(section, "title_not_regex"):
json_response = dedup_json_titles(section, config, json_response)
log.debug(f"Downloading shows ...")
download_media(section, config)
# console.print_json(json_response.json())

View File

@ -2,3 +2,4 @@ rich
requests
inflect
pydantic
humanize

View File

@ -10,6 +10,8 @@ charset-normalizer==2.0.12
# via requests
commonmark==0.9.1
# via rich
humanize==4.0.0
# via -r requirements.in
idna==3.3
# via requests
inflect==5.4.0

View File

@ -10,7 +10,7 @@ class Show(BaseModel):
description: str
timestamp: int
duration: int
size: int
size: Optional[int]
url_website: str
url_subtitle: str
url_video: str