|
|
@@ -3,9 +3,13 @@ import datetime as d
|
|
|
|
import json
|
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
|
|
|
|
import pathlib
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
|
|
|
|
import shutil
|
|
|
|
import sys
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import filelock
|
|
|
|
import humanize
|
|
|
|
import humanize
|
|
|
|
import requests
|
|
|
|
import requests
|
|
|
|
import inflect
|
|
|
|
import inflect
|
|
|
@@ -13,6 +17,8 @@ from rich.logging import RichHandler
|
|
|
|
from rich.traceback import install
|
|
|
|
from rich.traceback import install
|
|
|
|
import typing as t
|
|
|
|
import typing as t
|
|
|
|
from rich.console import Console
|
|
|
|
from rich.console import Console
|
|
|
|
|
|
|
|
from filelock import Timeout, FileLock
|
|
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
|
|
|
|
|
|
import type_def.mvw_json_response
|
|
|
|
import type_def.mvw_json_response
|
|
|
|
from type_def.mvw_json_request import MVWJSONRequest
|
|
|
|
from type_def.mvw_json_request import MVWJSONRequest
|
|
|
@@ -28,6 +34,7 @@ from threading import Event
|
|
|
|
from typing import Iterable
|
|
|
|
from typing import Iterable
|
|
|
|
from urllib.request import urlopen
|
|
|
|
from urllib.request import urlopen
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from rich.progress import (
|
|
|
|
from rich.progress import (
|
|
|
|
BarColumn,
|
|
|
|
BarColumn,
|
|
|
|
DownloadColumn,
|
|
|
|
DownloadColumn,
|
|
|
@@ -38,6 +45,20 @@ from rich.progress import (
|
|
|
|
TransferSpeedColumn,
|
|
|
|
TransferSpeedColumn,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# TODO set locale for datetime and others to globally stick to en_US
|
|
|
|
|
|
|
|
# TODO thread log messages display timestamp in systemd journal
|
|
|
|
|
|
|
|
# TODO Increment file name suffix more than once of needed
|
|
|
|
|
|
|
|
# TODO [23:15:14] DEBUG [thread]
|
|
|
|
|
|
|
|
# TODO Clean mvw-dl.timer
|
|
|
|
|
|
|
|
# TODO Reset maus-query.json
|
|
|
|
|
|
|
|
download_start_time = 0
|
|
|
|
|
|
|
|
download_last_update_time = 0
|
|
|
|
|
|
|
|
total_content_length = 0
|
|
|
|
|
|
|
|
size_downloaded_for_progress_tracking = 0
|
|
|
|
|
|
|
|
size_downloaded_for_speed_tracking = 0
|
|
|
|
|
|
|
|
file_lock_timeout = 1
|
|
|
|
|
|
|
|
state_lock_file_ext = ".lock"
|
|
|
|
|
|
|
|
|
|
|
|
progress = Progress(
|
|
|
|
progress = Progress(
|
|
|
|
TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
|
|
|
|
TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
|
|
|
|
BarColumn(bar_width=None),
|
|
|
|
BarColumn(bar_width=None),
|
|
|
@@ -66,6 +87,9 @@ JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any
|
|
|
|
# 1: Config file invalid, it has no sections
|
|
|
|
# 1: Config file invalid, it has no sections
|
|
|
|
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
|
|
|
|
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
|
|
|
|
# 3: No search results to download
|
|
|
|
# 3: No search results to download
|
|
|
|
|
|
|
|
# 4: State file already exists, has more than 0 bytes size but doesn't contain usable JSON
|
|
|
|
|
|
|
|
# 5: State file lock cannot be acquired within file_lock_timeout
|
|
|
|
|
|
|
|
# 6: Unable to create state directory
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CONST(object):
|
|
|
|
class CONST(object):
|
|
|
@@ -83,7 +107,13 @@ class CONST(object):
|
|
|
|
{"key": "state_file_name_prefix", "value": "state-"},
|
|
|
|
{"key": "state_file_name_prefix", "value": "state-"},
|
|
|
|
{"key": "state_file_name_suffix", "value": ".log"},
|
|
|
|
{"key": "state_file_name_suffix", "value": ".log"},
|
|
|
|
{"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"},
|
|
|
|
{"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"},
|
|
|
|
{"key": "title_dedup_winner", "value": "first"}
|
|
|
|
{"key": "title_dedup_winner", "value": "first"},
|
|
|
|
|
|
|
|
{"key": "dl_progress_update_interval", "value": "10"},
|
|
|
|
|
|
|
|
{"key": "dl_threads", "value": "2"},
|
|
|
|
|
|
|
|
{"key": "dl_filename_pattern", "value": "&(channel)s - &(publish_date)s - &(topic)s - &(title)s"},
|
|
|
|
|
|
|
|
{"key": "publish_date_srtftime_pattern", "value": "%%Y%%m%%d"},
|
|
|
|
|
|
|
|
{"key": "dl_filename_replace_spaces_with", "value": "_"},
|
|
|
|
|
|
|
|
{"key": "dl_filename_all_lowercase", "value": "yes"}
|
|
|
|
]
|
|
|
|
]
|
|
|
|
CFG_KNOWN_SECTION = [
|
|
|
|
CFG_KNOWN_SECTION = [
|
|
|
|
{"key": "min_duration", "is_mandatory": False},
|
|
|
|
{"key": "min_duration", "is_mandatory": False},
|
|
|
@@ -102,16 +132,16 @@ logging.basicConfig(
|
|
|
|
format=CONST.LOG_FORMAT,
|
|
|
|
format=CONST.LOG_FORMAT,
|
|
|
|
datefmt="[%X]",
|
|
|
|
datefmt="[%X]",
|
|
|
|
handlers=[RichHandler(
|
|
|
|
handlers=[RichHandler(
|
|
|
|
show_time=False if "SYSTEMD_EXEC_PID" in os.environ else True,
|
|
|
|
show_time=False,
|
|
|
|
rich_tracebacks=True
|
|
|
|
rich_tracebacks=True
|
|
|
|
)]
|
|
|
|
)]
|
|
|
|
)
|
|
|
|
)
|
|
|
|
log = logging.getLogger("rich")
|
|
|
|
log = logging.getLogger("rich")
|
|
|
|
# Our own code logs with this level
|
|
|
|
# Our own code logs with this level
|
|
|
|
log.setLevel(logging.DEBUG)
|
|
|
|
log.setLevel(logging.DEBUG)
|
|
|
|
# connectionpool logs with WARNING, we don't need its verbosity
|
|
|
|
# connectionpool and filelock log with WARNING, we don't need its verbosity
|
|
|
|
log_connectionpool = logging.getLogger("urllib3.connectionpool")
|
|
|
|
logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
|
|
|
|
log_connectionpool.setLevel(logging.WARNING)
|
|
|
|
logging.getLogger("filelock").setLevel(logging.WARNING)
|
|
|
|
install(show_locals=True)
|
|
|
|
install(show_locals=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -135,7 +165,7 @@ class ConfigParser(
|
|
|
|
ini_defaults = []
|
|
|
|
ini_defaults = []
|
|
|
|
internal_defaults = {default["key"]: default["value"] for default in CONST.CFG_KNOWN_DEFAULTS}
|
|
|
|
internal_defaults = {default["key"]: default["value"] for default in CONST.CFG_KNOWN_DEFAULTS}
|
|
|
|
config = ConfigParser(defaults=internal_defaults)
|
|
|
|
config = ConfigParser(defaults=internal_defaults)
|
|
|
|
config.read(CONST.CFG_DEFAULT_FILENAME)
|
|
|
|
config.read(CONST.CFG_DEFAULT_ABS_PATH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def print_section_header(
|
|
|
|
def print_section_header(
|
|
|
@@ -203,7 +233,8 @@ def validate_config_sections(
|
|
|
|
|
|
|
|
|
|
|
|
def query_string_from_file(
|
|
|
|
def query_string_from_file(
|
|
|
|
filename: str) -> str:
|
|
|
|
filename: str) -> str:
|
|
|
|
with open(filename, "r") as jsonfile:
|
|
|
|
filename_abs_path = os.path.join(CONST.CFG_THIS_FILE_DIRNAME, filename)
|
|
|
|
|
|
|
|
with open(filename_abs_path, "r", encoding="utf-8") as jsonfile:
|
|
|
|
query_string = jsonfile.read()
|
|
|
|
query_string = jsonfile.read()
|
|
|
|
return query_string
|
|
|
|
return query_string
|
|
|
|
|
|
|
|
|
|
|
@@ -328,92 +359,396 @@ def dedup_json_titles(
|
|
|
|
return json_obj
|
|
|
|
return json_obj
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
done_event = Event()
|
|
|
|
def expanded_dest_dir(
|
|
|
|
|
|
|
|
raw_dest_dir: str) -> str:
|
|
|
|
|
|
|
|
user_expanded_dest_dir = os.path.expanduser(raw_dest_dir)
|
|
|
|
|
|
|
|
all_expanded_dest_dir = os.path.expandvars(user_expanded_dest_dir)
|
|
|
|
|
|
|
|
return all_expanded_dest_dir
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def handle_sigint(signum, frame):
|
|
|
|
def filename_replace_pattern(
|
|
|
|
done_event.set()
|
|
|
|
section_name: str,
|
|
|
|
|
|
|
|
config_obj: configparser.ConfigParser(),
|
|
|
|
|
|
|
|
show: type_def.mvw_json_response.Show,
|
|
|
|
|
|
|
|
max_quality_url: str,
|
|
|
|
|
|
|
|
shorthand_uuid: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
filename = config_obj.get(section_name, "dl_filename_pattern")
|
|
|
|
|
|
|
|
ext = pathlib.Path(max_quality_url).suffix.lstrip(".")
|
|
|
|
|
|
|
|
publish_date = d.datetime.utcfromtimestamp(show.timestamp).strftime(
|
|
|
|
|
|
|
|
config_obj.get(section_name, "publish_date_srtftime_pattern"))
|
|
|
|
|
|
|
|
show_extended = {"ext": ext, "publish_date": publish_date}
|
|
|
|
|
|
|
|
show_attrs = [attr for attr in dir(show) if not attr.startswith('_') and not callable(getattr(show, attr))]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for attr in show_attrs:
|
|
|
|
|
|
|
|
attr_re = re.compile(r"&\(" + re.escape(attr) + r"\)s")
|
|
|
|
|
|
|
|
if re.search(attr_re, filename):
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
|
|
|
|
|
|
|
|
filename = re.sub(attr_re, str(getattr(show, attr)), filename)
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} New filename: '{filename}'")
|
|
|
|
|
|
|
|
for extended_attr in show_extended:
|
|
|
|
|
|
|
|
extended_attr_re = re.compile(r"&\(" + re.escape(extended_attr) + r"\)s")
|
|
|
|
|
|
|
|
if re.search(extended_attr_re, filename):
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
|
|
|
|
|
|
|
|
filename = re.sub(extended_attr_re, show_extended[extended_attr], filename)
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} New filename: '{filename}'")
|
|
|
|
|
|
|
|
return filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
signal.signal(signal.SIGINT, handle_sigint)
|
|
|
|
def get_safe_filename(
|
|
|
|
|
|
|
|
dirty_filename: str,
|
|
|
|
|
|
|
|
shorthand_uuid: str) -> str:
|
|
|
|
|
|
|
|
"""https://stackoverflow.com/a/71199182"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Removing question marks from file name ...")
|
|
|
|
|
|
|
|
clean_filename = re.sub(r"""[?]""", "", dirty_filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Replacing unsafe characters in filename with dashes ...")
|
|
|
|
|
|
|
|
clean_filename = re.sub(r"""[/\\?%*:|"<>\x7F\x00-\x1F]""", "-", clean_filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} New filename: '{clean_filename}'")
|
|
|
|
|
|
|
|
return clean_filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def filename_replace_spaces_with_underscores(
|
|
|
|
|
|
|
|
section_name: str,
|
|
|
|
|
|
|
|
config_obj: configparser.ConfigParser(),
|
|
|
|
|
|
|
|
filename: str,
|
|
|
|
|
|
|
|
shorthand_uuid: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
space_replace_string = config_obj.get(section_name, "dl_filename_replace_spaces_with")
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Replacing space characters with '{space_replace_string}' ...")
|
|
|
|
|
|
|
|
underscored_filename = re.sub(
|
|
|
|
|
|
|
|
r"\s",
|
|
|
|
|
|
|
|
space_replace_string,
|
|
|
|
|
|
|
|
filename)
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} New filename: '{underscored_filename}'")
|
|
|
|
|
|
|
|
return underscored_filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_filename(
|
|
|
|
|
|
|
|
section_name: str,
|
|
|
|
|
|
|
|
config_obj: configparser.ConfigParser(),
|
|
|
|
|
|
|
|
show: type_def.mvw_json_response.Show,
|
|
|
|
|
|
|
|
max_quality_url: str,
|
|
|
|
|
|
|
|
shorthand_uuid: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Generating final filename ...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
filename_replaced_patterns = filename_replace_pattern(
|
|
|
|
|
|
|
|
section_name,
|
|
|
|
|
|
|
|
config_obj,
|
|
|
|
|
|
|
|
show,
|
|
|
|
|
|
|
|
max_quality_url,
|
|
|
|
|
|
|
|
shorthand_uuid)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
filename_safe = get_safe_filename(
|
|
|
|
|
|
|
|
filename_replaced_patterns,
|
|
|
|
|
|
|
|
shorthand_uuid)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if config.get(section_name, "dl_filename_replace_spaces_with"):
|
|
|
|
|
|
|
|
filename_safe = filename_replace_spaces_with_underscores(
|
|
|
|
|
|
|
|
section_name,
|
|
|
|
|
|
|
|
config_obj,
|
|
|
|
|
|
|
|
filename_safe,
|
|
|
|
|
|
|
|
shorthand_uuid)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if config.getboolean(section_name, "dl_filename_all_lowercase"):
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Lowercasing all filename characters ...")
|
|
|
|
|
|
|
|
filename_safe = filename_safe.lower()
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} New filename: '{filename_safe}'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return filename_safe
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_state_file_abs_path(
|
|
|
|
|
|
|
|
section_name: str,
|
|
|
|
|
|
|
|
config_obj: configparser.ConfigParser()) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
state_dir = config_obj.get(section_name, "state_files_dir")
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
os.makedirs(state_dir, exist_ok=True)
|
|
|
|
|
|
|
|
except OSError:
|
|
|
|
|
|
|
|
log.error(f"Unable to create '[{section}]' state directory '{state_dir}'. "
|
|
|
|
|
|
|
|
f"We're not going to be able to log state information. Exiting 6 ...")
|
|
|
|
|
|
|
|
sys.exit(6)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
state_file = \
|
|
|
|
|
|
|
|
config_obj.get(section_name, "state_file_name_prefix") + \
|
|
|
|
|
|
|
|
section_name + \
|
|
|
|
|
|
|
|
config_obj.get(section_name, "state_file_name_suffix")
|
|
|
|
|
|
|
|
state_file_abs_path = os.path.join(state_dir, state_file)
|
|
|
|
|
|
|
|
return state_file_abs_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def state_file_none_or_valid_json(
|
|
|
|
|
|
|
|
state_file_abs_path: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if os.path.exists(state_file_abs_path):
|
|
|
|
|
|
|
|
if os.path.getsize(state_file_abs_path) > 0:
|
|
|
|
|
|
|
|
with open(state_file_abs_path, "r", encoding="utf-8") as state_file:
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
json.loads(state_file.read())
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
|
|
log.warning(f"State file '{state_file_abs_path}' does not contain valid JSON. We're not going to "
|
|
|
|
|
|
|
|
f"be able to log anything into it. Exiting 4 ...")
|
|
|
|
|
|
|
|
sys.exit(4)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def truncate_log(
|
|
|
|
|
|
|
|
json_data: json.loads,
|
|
|
|
|
|
|
|
max_log_entries: int) -> json.loads:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for i in range(len(json_data)):
|
|
|
|
|
|
|
|
del json_data[i]
|
|
|
|
|
|
|
|
if len(json_data) <= max_log_entries:
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
return json_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_state_file_lock(
|
|
|
|
|
|
|
|
state_lock_file: str) -> filelock.BaseFileLock:
|
|
|
|
|
|
|
|
global file_lock_timeout
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
lock = FileLock(state_lock_file, timeout=file_lock_timeout)
|
|
|
|
|
|
|
|
return lock
|
|
|
|
|
|
|
|
except filelock.Timeout:
|
|
|
|
|
|
|
|
log.error(f"Unable to acquire lock on state lock file '{state_lock_file}' "
|
|
|
|
|
|
|
|
f"""within {file_lock_timeout} {p.plural("second", file_lock_timeout)}, exiting 5 ...""")
|
|
|
|
|
|
|
|
sys.exit(5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def log_successful_download(
|
|
|
|
|
|
|
|
section_name: str,
|
|
|
|
|
|
|
|
config_obj: configparser.ConfigParser(),
|
|
|
|
|
|
|
|
show: type_def.mvw_json_response.Show,
|
|
|
|
|
|
|
|
state_file_abs_path: str,
|
|
|
|
|
|
|
|
job_uuid: str,
|
|
|
|
|
|
|
|
shorthand_uuid: str) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
timestamp_now = int(time.time())
|
|
|
|
|
|
|
|
os.makedirs(os.path.dirname(state_file_abs_path), exist_ok=True)
|
|
|
|
|
|
|
|
state_lock_file = state_file_abs_path + state_lock_file_ext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
state_body = show.dict(include={"topic", "title"})
|
|
|
|
|
|
|
|
state_body["dl_complete_timestamp_epoch"] = timestamp_now
|
|
|
|
|
|
|
|
state_body["dl_complete_timestamp_human"] = \
|
|
|
|
|
|
|
|
d.datetime.utcfromtimestamp(timestamp_now).strftime("%Y-%m-%d %H%M%S UTC")
|
|
|
|
|
|
|
|
state_entry = {job_uuid: state_body}
|
|
|
|
|
|
|
|
json_state = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lock = get_state_file_lock(state_lock_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with lock:
|
|
|
|
|
|
|
|
state_file_none_or_valid_json(state_file_abs_path)
|
|
|
|
|
|
|
|
state_file_open_mode = "r+" if os.path.exists(state_file_abs_path) else "w+"
|
|
|
|
|
|
|
|
with open(state_file_abs_path, state_file_open_mode, encoding="utf-8") as state_file:
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
json_state = json.load(state_file)
|
|
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
|
|
if json_state is None:
|
|
|
|
|
|
|
|
state_file.truncate()
|
|
|
|
|
|
|
|
json_state = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Writing log entry to '{state_file_abs_path}' ...")
|
|
|
|
|
|
|
|
with open(state_file_abs_path, "w", encoding="utf-8") as state_file:
|
|
|
|
|
|
|
|
json_state.append(state_entry)
|
|
|
|
|
|
|
|
max_log_entries = config_obj.getint(section_name, "state_file_retention")
|
|
|
|
|
|
|
|
if len(json_state) > max_log_entries:
|
|
|
|
|
|
|
|
json_state = truncate_log(json_state, max_log_entries)
|
|
|
|
|
|
|
|
json.dump(json_state, state_file, indent=4, sort_keys=True, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def copy_url(
|
|
|
|
def copy_url(
|
|
|
|
show: type_def.mvw_json_response.Show) -> None:
|
|
|
|
section_name: str,
|
|
|
|
|
|
|
|
config_obj: configparser.ConfigParser(),
|
|
|
|
|
|
|
|
show: type_def.mvw_json_response.Show,
|
|
|
|
|
|
|
|
video_metadata: dict,
|
|
|
|
|
|
|
|
state_file_abs_path: str,
|
|
|
|
|
|
|
|
show_name: str,
|
|
|
|
|
|
|
|
job_uuid: str,
|
|
|
|
|
|
|
|
shorthand_uuid: str,
|
|
|
|
|
|
|
|
tmp_dir: str,
|
|
|
|
|
|
|
|
dest_dir: str) -> None:
|
|
|
|
"""Copy data from a url to a local file."""
|
|
|
|
"""Copy data from a url to a local file."""
|
|
|
|
|
|
|
|
|
|
|
|
url = show.url_video_hd
|
|
|
|
global download_start_time
|
|
|
|
filename = url.split("/")[-1]
|
|
|
|
global download_last_update_time
|
|
|
|
dest_path = os.path.join("./", filename)
|
|
|
|
global size_downloaded_for_progress_tracking
|
|
|
|
release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X')
|
|
|
|
global size_downloaded_for_speed_tracking
|
|
|
|
#s = requests.Session()
|
|
|
|
|
|
|
|
#newline = "\n"
|
|
|
|
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
|
|
|
|
#log.debug(f"Request method: {req.method}\n"
|
|
|
|
max_quality_url = video_metadata["url"]
|
|
|
|
# f"URL: {req.url}\n"
|
|
|
|
filename = get_filename(section_name, config_obj, show, max_quality_url, shorthand_uuid)
|
|
|
|
# f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""")
|
|
|
|
resume_header = {}
|
|
|
|
show_name = f"{show.topic} - {show.title}"
|
|
|
|
tmp_file_open_mode = "wb"
|
|
|
|
log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""")
|
|
|
|
tmp_file_size = 0
|
|
|
|
with open(dest_path, "wb") as dest_file:
|
|
|
|
tmp_path = os.path.join(tmp_dir, filename)
|
|
|
|
last_update_time = time.time()
|
|
|
|
dest_path = os.path.join(dest_dir, filename)
|
|
|
|
r = requests.get(url, stream=True)
|
|
|
|
|
|
|
|
total_length = int(r.headers.get('content-length'))
|
|
|
|
os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
|
|
|
|
size_downloaded = 0
|
|
|
|
log.info(f"{shorthand_uuid} Download location resolved to '{tmp_path}'")
|
|
|
|
|
|
|
|
if os.path.exists(tmp_path):
|
|
|
|
|
|
|
|
tmp_file_size = os.path.getsize(tmp_path)
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Temporary file '{tmp_path}' exists likely from a previous incomplete "
|
|
|
|
|
|
|
|
f"download attempt, size is {humanize.naturalsize(tmp_file_size, binary=True)}. Resuming ...")
|
|
|
|
|
|
|
|
tmp_file_open_mode = "ab"
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
with open(tmp_path, tmp_file_open_mode) as tmp_file:
|
|
|
|
|
|
|
|
log.info(f"""{shorthand_uuid} Downloading "{show_name}" ...""")
|
|
|
|
|
|
|
|
if tmp_file_size > 0:
|
|
|
|
|
|
|
|
resume_header = {"range": f"bytes={tmp_file_size}-"}
|
|
|
|
|
|
|
|
log.debug(f"resume_header: {resume_header}")
|
|
|
|
|
|
|
|
size_downloaded_for_progress_tracking += tmp_file_size
|
|
|
|
|
|
|
|
r = requests.get(max_quality_url, headers=resume_header, stream=True)
|
|
|
|
for chunk in r.iter_content(32768):
|
|
|
|
for chunk in r.iter_content(32768):
|
|
|
|
size_downloaded += len(chunk)
|
|
|
|
size_downloaded_for_progress_tracking += len(chunk)
|
|
|
|
dest_file.write(chunk)
|
|
|
|
size_downloaded_for_speed_tracking += len(chunk)
|
|
|
|
if time.time() - last_update_time >= 10:
|
|
|
|
tmp_file.write(chunk)
|
|
|
|
percentage_done = size_downloaded / total_length * 100
|
|
|
|
if time.time() - download_last_update_time >= update_interval:
|
|
|
|
|
|
|
|
download_last_update_time = time.time()
|
|
|
|
|
|
|
|
time_in_progress = download_last_update_time - download_start_time
|
|
|
|
|
|
|
|
dl_speed_so_far = size_downloaded_for_speed_tracking / time_in_progress
|
|
|
|
|
|
|
|
human_dl_speed_so_far = f"{humanize.naturalsize(dl_speed_so_far, binary=True)}/s"
|
|
|
|
|
|
|
|
data_missing = total_content_length - size_downloaded_for_progress_tracking
|
|
|
|
|
|
|
|
time_til_completion = 1 / dl_speed_so_far * data_missing
|
|
|
|
|
|
|
|
human_time_til_completion = humanize.naturaldelta(d.timedelta(seconds=time_til_completion))
|
|
|
|
|
|
|
|
percentage_done = size_downloaded_for_progress_tracking / total_content_length * 100
|
|
|
|
human_pct = "{:.1f}".format(percentage_done)
|
|
|
|
human_pct = "{:.1f}".format(percentage_done)
|
|
|
|
human_size_dl = humanize.naturalsize(size_downloaded)
|
|
|
|
human_size_dl = humanize.naturalsize(size_downloaded_for_progress_tracking, binary=True)
|
|
|
|
human_total_dl = humanize.naturalsize(total_length)
|
|
|
|
human_total_dl = humanize.naturalsize(total_content_length, binary=True)
|
|
|
|
last_update_time = time.time()
|
|
|
|
log.debug(f"[thread] Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} "
|
|
|
|
log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""")
|
|
|
|
f"at an average {human_dl_speed_so_far}, approximately {human_time_til_completion} "
|
|
|
|
if done_event.is_set():
|
|
|
|
f"left til completion.)")
|
|
|
|
log.debug(f"done_event")
|
|
|
|
log.info(f"""{shorthand_uuid} Download of "{show_name}" done""")
|
|
|
|
|
|
|
|
except IOError:
|
|
|
|
|
|
|
|
log.error(f"{shorthand_uuid} IOError during download. Aborting this download thread ...")
|
|
|
|
return
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
#got_json_response = MVWJSONResponse(**json.loads(s.content))
|
|
|
|
log.info(f"{shorthand_uuid} Moving file to final location '{dest_path}' ...")
|
|
|
|
#return got_json_response
|
|
|
|
try:
|
|
|
|
|
|
|
|
shutil.move(tmp_path, dest_path)
|
|
|
|
# progress.console.log(f"Requesting {url}")
|
|
|
|
except OSError as ose:
|
|
|
|
# response = urlopen(url)
|
|
|
|
log.error(f"{shorthand_uuid} Failed moving file with an OSError\n"
|
|
|
|
# # This will break if the response doesn't contain content length
|
|
|
|
f"{ose}\n"
|
|
|
|
# progress.update(task_id, total=int(response.info()["Content-length"]))
|
|
|
|
f"Other threads continue unhindered.")
|
|
|
|
# with open(path, "wb") as dest_file:
|
|
|
|
else:
|
|
|
|
# progress.start_task(task_id)
|
|
|
|
log_successful_download(section_name, config_obj, show, state_file_abs_path, job_uuid, shorthand_uuid)
|
|
|
|
# for data in iter(partial(response.read, 32768), b""):
|
|
|
|
log.info(f"{shorthand_uuid} Done moving")
|
|
|
|
# dest_file.write(data)
|
|
|
|
|
|
|
|
# progress.update(task_id, advance=len(data))
|
|
|
|
|
|
|
|
# if done_event.is_set():
|
|
|
|
|
|
|
|
# return
|
|
|
|
|
|
|
|
# progress.console.log(f"Downloaded {path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def download(urls: Iterable[str], dest_dir: str):
|
|
|
|
def get_max_quality_url(
|
|
|
|
# """Download multuple files to the given directory."""
|
|
|
|
show: type_def.mvw_json_response.Show) -> str:
|
|
|
|
#
|
|
|
|
if show.url_video_hd:
|
|
|
|
# with progress:
|
|
|
|
max_quality_url = show.url_video_hd
|
|
|
|
# with ThreadPoolExecutor(max_workers=1) as pool:
|
|
|
|
elif show.url_video:
|
|
|
|
# for url in urls:
|
|
|
|
max_quality_url = show.url_video
|
|
|
|
# filename = url.split("/")[-1]
|
|
|
|
else:
|
|
|
|
# dest_path = os.path.join(dest_dir, filename)
|
|
|
|
max_quality_url = show.url_video_low
|
|
|
|
# task_id = progress.add_task("download", filename=filename, start=False)
|
|
|
|
return max_quality_url
|
|
|
|
# pool.submit(copy_url, task_id, url, dest_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_content_length(
|
|
|
|
|
|
|
|
video_url: str) -> int:
|
|
|
|
|
|
|
|
r = requests.head(video_url)
|
|
|
|
|
|
|
|
if r.status_code == requests.codes.ok:
|
|
|
|
|
|
|
|
return int(r.headers["content-length"])
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_json_state(
|
|
|
|
|
|
|
|
state_file_abs_path: str) -> json.loads:
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
with open(state_file_abs_path, "r", encoding="utf-8") as state_file:
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
json_state = json.load(state_file)
|
|
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
return json_state
|
|
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
|
|
|
log.debug(f"State file does not exist (yet), assuming no previous downloads have ever happened ...")
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_already_downloaded(
|
|
|
|
|
|
|
|
show: type_def.mvw_json_response.Show,
|
|
|
|
|
|
|
|
json_state: json.loads,
|
|
|
|
|
|
|
|
show_name: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for log_entry in json_state:
|
|
|
|
|
|
|
|
for log_data in [key for key in log_entry]:
|
|
|
|
|
|
|
|
if show.topic == log_entry[log_data]["topic"] and show.title == log_entry[log_data]["title"]:
|
|
|
|
|
|
|
|
log.debug(f"""Show "{show_name}" already downloaded, won't queue""")
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def download_media(
|
|
|
|
def download_media(
|
|
|
|
section_name: str,
|
|
|
|
section_name: str,
|
|
|
|
config_obj: configparser.ConfigParser()) -> None:
|
|
|
|
config_obj: configparser.ConfigParser(),
|
|
|
|
with ThreadPoolExecutor(max_workers=2) as pool:
|
|
|
|
json_obj: MVWJSONResponse) -> None:
|
|
|
|
for result in json_response.result.results.copy():
|
|
|
|
|
|
|
|
# filename = url.split("/")[-1]
|
|
|
|
global download_start_time
|
|
|
|
# dest_path = os.path.join(dest_dir, filename)
|
|
|
|
global download_last_update_time
|
|
|
|
# task_id = progress.add_task("download", filename=filename, start=False)
|
|
|
|
global total_content_length
|
|
|
|
pool.submit(copy_url, result)
|
|
|
|
|
|
|
|
# TODO before sending into pool validate which url we're going to use
|
|
|
|
dl_threads = config_obj.getint(section_name, "dl_threads")
|
|
|
|
# TODO from each url get total content-length
|
|
|
|
state_file_abs_path = get_state_file_abs_path(section_name, config_obj)
|
|
|
|
# TODO use total content-length for overall progress of what we want to download
|
|
|
|
state_lock_file = state_file_abs_path + state_lock_file_ext
|
|
|
|
pass
|
|
|
|
video_metadata = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tmp_dir = expanded_dest_dir(config_obj.get(section_name, "tmp_base_dir"))
|
|
|
|
|
|
|
|
dest_dir = expanded_dest_dir(config_obj.get(section_name, "dl_dir"))
|
|
|
|
|
|
|
|
log.info(f"""Download location is {tmp_dir}""")
|
|
|
|
|
|
|
|
log.info(f"""Final location is {dest_dir}""")
|
|
|
|
|
|
|
|
log.info(f"Limiting parallel downloads to {dl_threads} ...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lock = get_state_file_lock(state_lock_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with lock:
|
|
|
|
|
|
|
|
state_file_none_or_valid_json(state_file_abs_path)
|
|
|
|
|
|
|
|
json_state = get_json_state(state_file_abs_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with ThreadPoolExecutor(max_workers=dl_threads) as pool:
|
|
|
|
|
|
|
|
download_last_update_time = time.time()
|
|
|
|
|
|
|
|
download_start_time = download_last_update_time
|
|
|
|
|
|
|
|
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
|
|
|
|
|
|
|
|
log.debug(f"""Will provide updates every {update_interval} {p.plural("second", update_interval)}""")
|
|
|
|
|
|
|
|
for result in json_obj.result.results.copy():
|
|
|
|
|
|
|
|
show_name = f"{result.topic} - {result.title}"
|
|
|
|
|
|
|
|
future = None
|
|
|
|
|
|
|
|
if not is_already_downloaded(result, json_state, show_name):
|
|
|
|
|
|
|
|
max_quality_url = get_max_quality_url(result)
|
|
|
|
|
|
|
|
content_length = get_content_length(max_quality_url)
|
|
|
|
|
|
|
|
video_metadata[result.id] = {"url": max_quality_url, "content_length": content_length}
|
|
|
|
|
|
|
|
total_content_length += video_metadata[result.id]["content_length"]
|
|
|
|
|
|
|
|
log.debug(f"Total download size upped to "
|
|
|
|
|
|
|
|
f"{humanize.naturalsize(total_content_length, binary=True)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
job_uuid = str(uuid.uuid4())
|
|
|
|
|
|
|
|
shorthand_uuid = f"[{job_uuid[:2]}..{job_uuid[-2:]}]"
|
|
|
|
|
|
|
|
log.debug(f"{shorthand_uuid} Job UUID {job_uuid} generated, shorthand is {shorthand_uuid}")
|
|
|
|
|
|
|
|
log.debug(f"""{shorthand_uuid} Queuing "{show_name}" for download ...""")
|
|
|
|
|
|
|
|
future = pool.submit(
|
|
|
|
|
|
|
|
copy_url,
|
|
|
|
|
|
|
|
section_name,
|
|
|
|
|
|
|
|
config_obj,
|
|
|
|
|
|
|
|
result,
|
|
|
|
|
|
|
|
video_metadata[result.id],
|
|
|
|
|
|
|
|
state_file_abs_path,
|
|
|
|
|
|
|
|
show_name,
|
|
|
|
|
|
|
|
job_uuid,
|
|
|
|
|
|
|
|
shorthand_uuid,
|
|
|
|
|
|
|
|
tmp_dir,
|
|
|
|
|
|
|
|
dest_dir)
|
|
|
|
|
|
|
|
if future is not None:
|
|
|
|
|
|
|
|
future.result()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if __name__ == '__main__':
|
|
|
@@ -443,7 +778,6 @@ if __name__ == '__main__':
|
|
|
|
if config.has_option(section, "title_not_regex"):
|
|
|
|
if config.has_option(section, "title_not_regex"):
|
|
|
|
json_response = dedup_json_titles(section, config, json_response)
|
|
|
|
json_response = dedup_json_titles(section, config, json_response)
|
|
|
|
|
|
|
|
|
|
|
|
log.debug(f"Downloading shows ...")
|
|
|
|
log.debug(f"Interested in {json_response.result.queryInfo.resultCount} "
|
|
|
|
download_media(section, config)
|
|
|
|
f"""{p.plural("show", json_response.result.queryInfo.resultCount)} ...""")
|
|
|
|
|
|
|
|
download_media(section, config, json_response)
|
|
|
|
# console.print_json(json_response.json())
|
|
|
|
|
|
|
|