Compare commits

...

4 Commits

4 changed files with 143 additions and 3 deletions

139
mvw-dl.py
View File

@ -1,9 +1,12 @@
import configparser import configparser
import datetime as d
import json import json
import logging import logging
import os import os
import re import re
import sys import sys
import time
import humanize
import requests import requests
import inflect import inflect
from rich.logging import RichHandler from rich.logging import RichHandler
@ -15,6 +18,39 @@ import type_def.mvw_json_response
from type_def.mvw_json_request import MVWJSONRequest from type_def.mvw_json_request import MVWJSONRequest
from type_def.mvw_json_response import MVWJSONResponse from type_def.mvw_json_response import MVWJSONResponse
# Downloading
import os.path
import sys
from concurrent.futures import ThreadPoolExecutor
import signal
from functools import partial
from threading import Event
from typing import Iterable
from urllib.request import urlopen
from rich.progress import (
BarColumn,
DownloadColumn,
Progress,
TaskID,
TextColumn,
TimeRemainingColumn,
TransferSpeedColumn,
)
progress = Progress(
TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
BarColumn(bar_width=None),
"[progress.percentage]{task.percentage:>3.1f}%",
"•",
DownloadColumn(),
"•",
TransferSpeedColumn(),
"•",
TimeRemainingColumn(),
)
# Downloading
# Without width # Without width
console = Console(width=180) console = Console(width=180)
p = inflect.engine() p = inflect.engine()
@ -29,6 +65,7 @@ JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any
# Exit codes # Exit codes
# 1: Config file invalid, it has no sections # 1: Config file invalid, it has no sections
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY # 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
# 3: No search results to download
class CONST(object): class CONST(object):
@ -204,12 +241,20 @@ def get_json_response(
return got_json_response return got_json_response
def no_downloads_needed() -> None:
log.info(f"No search results to download, exiting 3 ...")
sys.exit(3)
def remove_result( def remove_result(
json_obj: MVWJSONResponse, json_obj: MVWJSONResponse,
result_obj: type_def.mvw_json_response.Show) -> MVWJSONResponse: result_obj: type_def.mvw_json_response.Show) -> MVWJSONResponse:
json_obj.result.results.remove(result_obj) json_obj.result.results.remove(result_obj)
json_obj.result.queryInfo.resultCount -= 1 json_obj.result.queryInfo.resultCount -= 1
return json_obj if json_obj.result.queryInfo.resultCount:
return json_obj
else:
no_downloads_needed()
def log_result_count(result_count: int, pre_filter: bool = True) -> None: def log_result_count(result_count: int, pre_filter: bool = True) -> None:
@ -283,6 +328,94 @@ def dedup_json_titles(
return json_obj return json_obj
done_event = Event()
def handle_sigint(signum, frame):
done_event.set()
signal.signal(signal.SIGINT, handle_sigint)
def copy_url(
show: type_def.mvw_json_response.Show) -> None:
"""Copy data from a url to a local file."""
url = show.url_video_hd
filename = url.split("/")[-1]
dest_path = os.path.join("./", filename)
release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X')
#s = requests.Session()
#newline = "\n"
#log.debug(f"Request method: {req.method}\n"
# f"URL: {req.url}\n"
# f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""")
show_name = f"{show.topic} - {show.title}"
log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""")
with open(dest_path, "wb") as dest_file:
last_update_time = time.time()
r = requests.get(url, stream=True)
total_length = int(r.headers.get('content-length'))
size_downloaded = 0
for chunk in r.iter_content(32768):
size_downloaded += len(chunk)
dest_file.write(chunk)
if time.time() - last_update_time >= 10:
percentage_done = size_downloaded / total_length * 100
human_pct = "{:.1f}".format(percentage_done)
human_size_dl = humanize.naturalsize(size_downloaded)
human_total_dl = humanize.naturalsize(total_length)
last_update_time = time.time()
log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""")
if done_event.is_set():
log.debug(f"done_event")
return
#got_json_response = MVWJSONResponse(**json.loads(s.content))
#return got_json_response
# progress.console.log(f"Requesting {url}")
# response = urlopen(url)
# # This will break if the response doesn't contain content length
# progress.update(task_id, total=int(response.info()["Content-length"]))
# with open(path, "wb") as dest_file:
# progress.start_task(task_id)
# for data in iter(partial(response.read, 32768), b""):
# dest_file.write(data)
# progress.update(task_id, advance=len(data))
# if done_event.is_set():
# return
# progress.console.log(f"Downloaded {path}")
#def download(urls: Iterable[str], dest_dir: str):
# """Download multuple files to the given directory."""
#
# with progress:
# with ThreadPoolExecutor(max_workers=1) as pool:
# for url in urls:
# filename = url.split("/")[-1]
# dest_path = os.path.join(dest_dir, filename)
# task_id = progress.add_task("download", filename=filename, start=False)
# pool.submit(copy_url, task_id, url, dest_path)
def download_media(
section_name: str,
config_obj: configparser.ConfigParser()) -> None:
with ThreadPoolExecutor(max_workers=2) as pool:
for result in json_response.result.results.copy():
# filename = url.split("/")[-1]
# dest_path = os.path.join(dest_dir, filename)
# task_id = progress.add_task("download", filename=filename, start=False)
pool.submit(copy_url, result)
# TODO before sending into pool validate which url we're going to use
# TODO from each url get total content-length
# TODO use total content-length for overall progress of what we want to download
pass
if __name__ == '__main__': if __name__ == '__main__':
validate_default_section(config) validate_default_section(config)
if config_has_valid_section(config): if config_has_valid_section(config):
@ -309,4 +442,8 @@ if __name__ == '__main__':
log.debug(f"Deduplicating results by title where needed ...") log.debug(f"Deduplicating results by title where needed ...")
if config.has_option(section, "title_not_regex"): if config.has_option(section, "title_not_regex"):
json_response = dedup_json_titles(section, config, json_response) json_response = dedup_json_titles(section, config, json_response)
log.debug(f"Downloading shows ...")
download_media(section, config)
# console.print_json(json_response.json()) # console.print_json(json_response.json())

View File

@ -2,3 +2,4 @@ rich
requests requests
inflect inflect
pydantic pydantic
humanize

View File

@ -10,6 +10,8 @@ charset-normalizer==2.0.12
# via requests # via requests
commonmark==0.9.1 commonmark==0.9.1
# via rich # via rich
humanize==4.0.0
# via -r requirements.in
idna==3.3 idna==3.3
# via requests # via requests
inflect==5.4.0 inflect==5.4.0

View File

@ -10,7 +10,7 @@ class Show(BaseModel):
description: str description: str
timestamp: int timestamp: int
duration: int duration: int
size: int size: Optional[int]
url_website: str url_website: str
url_subtitle: str url_subtitle: str
url_video: str url_video: str