Compare commits
13 Commits
d0552f9e67
...
287a755e65
Author | SHA1 | Date | |
---|---|---|---|
287a755e65 | |||
d70766bae0 | |||
380fb4bf2e | |||
e395309011 | |||
029d9ffb7e | |||
ec612de2dd | |||
2905ff5c74 | |||
0cfe47465d | |||
5eff7876bc | |||
4809846edf | |||
b5dff485d9 | |||
e78659b2de | |||
27004a5294 |
@ -19,7 +19,7 @@ dl_filename_all_lowercase = no
|
||||
min_duration = 1200
|
||||
max_duration = 2700
|
||||
query = @maus-query.json
|
||||
title_not_regex = audiodeskription|gebärdensprache
|
||||
title_not_regex = audiodeskription|gebärdensprache|hörfassung
|
||||
# dl_filename_pattern = &(publish_date)s.&(ext)s
|
||||
# publish_date_srtftime_pattern = S%%YE%%Y%%m%%d01
|
||||
# query = {"queries":[{"fields":["topic"],"query":"die sendung mit der maus"},{"fields":["channel"],"query":"ARD"}],"sortBy":"timestamp","sortOrder":"desc","future":false,"offset":0,"size":50}
|
||||
|
@ -17,5 +17,5 @@
|
||||
"sortOrder": "desc",
|
||||
"future": false,
|
||||
"offset": 0,
|
||||
"size": 15
|
||||
"size": 100
|
||||
}
|
||||
|
144
mvw-dl.py
144
mvw-dl.py
@ -5,6 +5,7 @@ import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
|
||||
@ -47,7 +48,9 @@ from rich.progress import (
|
||||
# TODO set locale for datetime and others to globally stick to en_US
|
||||
download_start_time = 0
|
||||
download_last_update_time = 0
|
||||
size_downloaded = 0
|
||||
total_content_length = 0
|
||||
size_downloaded_for_progress_tracking = 0
|
||||
size_downloaded_for_speed_tracking = 0
|
||||
file_lock_timeout = 1
|
||||
state_lock_file_ext = ".lock"
|
||||
|
||||
@ -131,7 +134,7 @@ log = logging.getLogger("rich")
|
||||
# Our own code logs with this level
|
||||
log.setLevel(logging.DEBUG)
|
||||
# connectionpool and filelock log with WARNING, we don't need its verbosity
|
||||
# logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
|
||||
logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)
|
||||
logging.getLogger("filelock").setLevel(logging.WARNING)
|
||||
install(show_locals=True)
|
||||
|
||||
@ -224,7 +227,7 @@ def validate_config_sections(
|
||||
|
||||
def query_string_from_file(
|
||||
filename: str) -> str:
|
||||
with open(filename, "r") as jsonfile:
|
||||
with open(filename, "r", encoding="utf-8") as jsonfile:
|
||||
query_string = jsonfile.read()
|
||||
return query_string
|
||||
|
||||
@ -349,16 +352,6 @@ def dedup_json_titles(
|
||||
return json_obj
|
||||
|
||||
|
||||
done_event = Event()
|
||||
|
||||
|
||||
def handle_sigint(signum, frame):
|
||||
done_event.set()
|
||||
|
||||
|
||||
signal.signal(signal.SIGINT, handle_sigint)
|
||||
|
||||
|
||||
def expanded_dest_dir(
|
||||
raw_dest_dir: str) -> str:
|
||||
user_expanded_dest_dir = os.path.expanduser(raw_dest_dir)
|
||||
@ -381,13 +374,17 @@ def filename_replace_pattern(
|
||||
show_attrs = [attr for attr in dir(show) if not attr.startswith('_') and not callable(getattr(show, attr))]
|
||||
|
||||
for attr in show_attrs:
|
||||
# log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
|
||||
filename = re.sub(r"&\(" + re.escape(attr) + r"\)s", str(getattr(show, attr)), filename)
|
||||
# log.debug(f"{shorthand_uuid} New filename: '{filename}'")
|
||||
attr_re = re.compile(r"&\(" + re.escape(attr) + r"\)s")
|
||||
if re.search(attr_re, filename):
|
||||
log.debug(f"{shorthand_uuid} Replacing filename pattern '&({attr})s' ...")
|
||||
filename = re.sub(attr_re, str(getattr(show, attr)), filename)
|
||||
log.debug(f"{shorthand_uuid} New filename: '{filename}'")
|
||||
for extended_attr in show_extended:
|
||||
# log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
|
||||
filename = re.sub(r"&\(" + re.escape(extended_attr) + r"\)s", show_extended[extended_attr], filename)
|
||||
# log.debug(f"{shorthand_uuid} New filename: '{filename}'")
|
||||
extended_attr_re = re.compile(r"&\(" + re.escape(extended_attr) + r"\)s")
|
||||
if re.search(extended_attr_re, filename):
|
||||
log.debug(f"{shorthand_uuid} Replacing filename pattern '&({extended_attr})s' ...")
|
||||
filename = re.sub(extended_attr_re, show_extended[extended_attr], filename)
|
||||
log.debug(f"{shorthand_uuid} New filename: '{filename}'")
|
||||
return filename
|
||||
|
||||
|
||||
@ -398,6 +395,7 @@ def get_safe_filename(
|
||||
|
||||
log.debug(f"{shorthand_uuid} Replacing unsafe characters in filename with dashes ...")
|
||||
clean_filename = re.sub(r"""[/\\?%*:|"<>\x7F\x00-\x1F]""", "-", dirty_filename)
|
||||
|
||||
log.debug(f"{shorthand_uuid} New filename: '{clean_filename}'")
|
||||
return clean_filename
|
||||
|
||||
@ -450,7 +448,6 @@ def get_filename(
|
||||
filename_safe = filename_safe.lower()
|
||||
log.debug(f"{shorthand_uuid} New filename: '{filename_safe}'")
|
||||
|
||||
log.debug(f"{shorthand_uuid} {filename_safe}")
|
||||
return filename_safe
|
||||
|
||||
|
||||
@ -472,7 +469,7 @@ def state_file_none_or_valid_json(
|
||||
|
||||
if os.path.exists(state_file_abs_path):
|
||||
if os.path.getsize(state_file_abs_path) > 0:
|
||||
with open(state_file_abs_path, "r") as state_file:
|
||||
with open(state_file_abs_path, "r", encoding="utf-8") as state_file:
|
||||
try:
|
||||
json.loads(state_file.read())
|
||||
return True
|
||||
@ -532,7 +529,7 @@ def log_successful_download(
|
||||
|
||||
with lock:
|
||||
state_file_none_or_valid_json(state_file_abs_path)
|
||||
with open(state_file_abs_path, "r+") as state_file:
|
||||
with open(state_file_abs_path, "r+", encoding="utf-8") as state_file:
|
||||
try:
|
||||
json_state = json.load(state_file)
|
||||
except json.JSONDecodeError:
|
||||
@ -541,12 +538,12 @@ def log_successful_download(
|
||||
json_state = []
|
||||
|
||||
log.debug(f"{shorthand_uuid} Writing log entry to '{state_file_abs_path}' ...")
|
||||
with open(state_file_abs_path, "w") as state_file:
|
||||
with open(state_file_abs_path, "w", encoding="utf-8") as state_file:
|
||||
json_state.append(state_entry)
|
||||
max_log_entries = config_obj.getint(section_name, "state_file_retention")
|
||||
if len(json_state) > max_log_entries:
|
||||
json_state = truncate_log(json_state, max_log_entries)
|
||||
json.dump(json_state, state_file, indent=4, sort_keys=True)
|
||||
json.dump(json_state, state_file, indent=4, sort_keys=True, ensure_ascii=False)
|
||||
|
||||
|
||||
def copy_url(
|
||||
@ -554,7 +551,6 @@ def copy_url(
|
||||
config_obj: configparser.ConfigParser(),
|
||||
show: type_def.mvw_json_response.Show,
|
||||
video_metadata: dict,
|
||||
total_content_length: int,
|
||||
state_file_abs_path: str,
|
||||
show_name: str,
|
||||
job_uuid: str,
|
||||
@ -565,12 +561,14 @@ def copy_url(
|
||||
|
||||
global download_start_time
|
||||
global download_last_update_time
|
||||
global size_downloaded
|
||||
global size_downloaded_for_progress_tracking
|
||||
global size_downloaded_for_speed_tracking
|
||||
|
||||
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
|
||||
max_quality_url = video_metadata["url"]
|
||||
filename = get_filename(section_name, config_obj, show, max_quality_url, shorthand_uuid)
|
||||
resume_header = {}
|
||||
tmp_file_open_mode = "wb"
|
||||
tmp_file_size = 0
|
||||
tmp_path = os.path.join(tmp_dir, filename)
|
||||
dest_path = os.path.join(dest_dir, filename)
|
||||
@ -581,29 +579,34 @@ def copy_url(
|
||||
tmp_file_size = os.path.getsize(tmp_path)
|
||||
log.debug(f"{shorthand_uuid} Temporary file '{tmp_path}' exists likely from a previous incomplete "
|
||||
f"download attempt, size is {humanize.naturalsize(tmp_file_size, binary=True)}. Resuming ...")
|
||||
tmp_file_open_mode = "ab"
|
||||
try:
|
||||
with open(tmp_path, "wb") as tmp_file:
|
||||
with open(tmp_path, tmp_file_open_mode) as tmp_file:
|
||||
log.info(f"""{shorthand_uuid} Downloading "{show_name}" ...""")
|
||||
if tmp_file_size > 0:
|
||||
resume_header = {"range": f"bytes={tmp_file_size}-"}
|
||||
log.info(f"resume_header: {resume_header}")
|
||||
log.debug(f"resume_header: {resume_header}")
|
||||
size_downloaded_for_progress_tracking += tmp_file_size
|
||||
r = requests.get(max_quality_url, headers=resume_header, stream=True)
|
||||
for chunk in r.iter_content(32768):
|
||||
size_downloaded += len(chunk)
|
||||
size_downloaded_for_progress_tracking += len(chunk)
|
||||
size_downloaded_for_speed_tracking += len(chunk)
|
||||
tmp_file.write(chunk)
|
||||
if time.time() - download_last_update_time >= update_interval:
|
||||
download_last_update_time = time.time()
|
||||
dl_speed_so_far = size_downloaded / (download_last_update_time - download_start_time)
|
||||
time_in_progress = download_last_update_time - download_start_time
|
||||
dl_speed_so_far = size_downloaded_for_speed_tracking / time_in_progress
|
||||
human_dl_speed_so_far = f"{humanize.naturalsize(dl_speed_so_far, binary=True)}/s"
|
||||
percentage_done = size_downloaded / total_content_length * 100
|
||||
data_missing = total_content_length - size_downloaded_for_progress_tracking
|
||||
time_til_completion = 1 / dl_speed_so_far * data_missing
|
||||
human_time_til_completion = humanize.naturaldelta(d.timedelta(seconds=time_til_completion))
|
||||
percentage_done = size_downloaded_for_progress_tracking / total_content_length * 100
|
||||
human_pct = "{:.1f}".format(percentage_done)
|
||||
human_size_dl = humanize.naturalsize(size_downloaded, binary=True)
|
||||
human_size_dl = humanize.naturalsize(size_downloaded_for_progress_tracking, binary=True)
|
||||
human_total_dl = humanize.naturalsize(total_content_length, binary=True)
|
||||
log.debug(f"[thread] Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} "
|
||||
f"at an average {human_dl_speed_so_far})")
|
||||
if done_event.is_set():
|
||||
log.info(f"""{shorthand_uuid} Download of "{show_name}" interrupted""")
|
||||
return
|
||||
f"at an average {human_dl_speed_so_far}, approximately {human_time_til_completion} "
|
||||
f"left til completion.)")
|
||||
log.info(f"""{shorthand_uuid} Download of "{show_name}" done""")
|
||||
except IOError:
|
||||
log.error(f"{shorthand_uuid} IOError during download. Aborting this download thread ...")
|
||||
@ -611,12 +614,14 @@ def copy_url(
|
||||
|
||||
log.info(f"{shorthand_uuid} Moving file to final location '{dest_path}' ...")
|
||||
try:
|
||||
os.rename(tmp_path, dest_path)
|
||||
shutil.move(tmp_path, dest_path)
|
||||
except OSError as ose:
|
||||
log.error(f"{shorthand_uuid} Failed moving file with an OSError\n"
|
||||
f"{ose}\n"
|
||||
f"Other threads continue unhindered.")
|
||||
else:
|
||||
log_successful_download(section_name, config_obj, show, state_file_abs_path, job_uuid, shorthand_uuid)
|
||||
log.info(f"{shorthand_uuid} Done moving")
|
||||
except Exception:
|
||||
console.print_exception(show_locals=True)
|
||||
log.error(f"{shorthand_uuid} Failed moving file")
|
||||
|
||||
|
||||
def get_max_quality_url(
|
||||
@ -639,22 +644,28 @@ def get_content_length(
|
||||
return 0
|
||||
|
||||
|
||||
def is_already_downloaded(
|
||||
show: type_def.mvw_json_response.Show,
|
||||
state_file_abs_path: str,
|
||||
show_name: str) -> bool:
|
||||
def get_json_state(
|
||||
state_file_abs_path: str) -> json.loads:
|
||||
|
||||
with open(state_file_abs_path, "r") as state_file:
|
||||
with open(state_file_abs_path, "r", encoding="utf-8") as state_file:
|
||||
try:
|
||||
json_state = json.load(state_file)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
else:
|
||||
return json_state
|
||||
|
||||
|
||||
def is_already_downloaded(
|
||||
show: type_def.mvw_json_response.Show,
|
||||
json_state: json.loads,
|
||||
show_name: str) -> bool:
|
||||
|
||||
for log_entry in json_state:
|
||||
for log_data in [key for key in log_entry]:
|
||||
if show.topic == log_entry[log_data]["topic"] and show.title == log_entry[log_data]["title"]:
|
||||
log.debug(f"""Show "{show_name}" already downloaded, won't queue""")
|
||||
return True
|
||||
except json.JSONDecodeError:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def download_media(
|
||||
@ -664,20 +675,13 @@ def download_media(
|
||||
|
||||
global download_start_time
|
||||
global download_last_update_time
|
||||
global total_content_length
|
||||
|
||||
dl_threads = config_obj.getint(section_name, "dl_threads")
|
||||
state_file_abs_path = get_state_file_abs_path(section_name, config_obj)
|
||||
state_lock_file = state_file_abs_path + state_lock_file_ext
|
||||
video_metadata = {}
|
||||
|
||||
for result in json_obj.result.results.copy():
|
||||
max_quality_url = get_max_quality_url(result)
|
||||
content_length = get_content_length(max_quality_url)
|
||||
video_metadata[result.id] = {"url": max_quality_url, "content_length": content_length}
|
||||
total_content_length = 0
|
||||
for video in video_metadata:
|
||||
total_content_length += video_metadata[video]["content_length"]
|
||||
video_metadata["total_content_length"] = total_content_length
|
||||
tmp_dir = expanded_dest_dir(config_obj.get(section_name, "tmp_base_dir"))
|
||||
dest_dir = expanded_dest_dir(config_obj.get(section_name, "dl_dir"))
|
||||
log.info(f"""Download location is {tmp_dir}""")
|
||||
@ -686,32 +690,44 @@ def download_media(
|
||||
|
||||
lock = get_state_file_lock(state_lock_file)
|
||||
|
||||
with lock:
|
||||
state_file_none_or_valid_json(state_file_abs_path)
|
||||
json_state = get_json_state(state_file_abs_path)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=dl_threads) as pool:
|
||||
download_last_update_time = time.time()
|
||||
download_start_time = download_last_update_time
|
||||
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
|
||||
log.debug(f"""Will provide updates every {update_interval} {p.plural("second", update_interval)}""")
|
||||
with lock:
|
||||
state_file_none_or_valid_json(state_file_abs_path)
|
||||
for result in json_obj.result.results.copy():
|
||||
show_name = f"{result.topic} - {result.title}"
|
||||
if not is_already_downloaded(result, state_file_abs_path, show_name):
|
||||
future = None
|
||||
if not is_already_downloaded(result, json_state, show_name):
|
||||
max_quality_url = get_max_quality_url(result)
|
||||
content_length = get_content_length(max_quality_url)
|
||||
video_metadata[result.id] = {"url": max_quality_url, "content_length": content_length}
|
||||
total_content_length += video_metadata[result.id]["content_length"]
|
||||
log.debug(f"Total download size upped to "
|
||||
f"{humanize.naturalsize(total_content_length, binary=True)}")
|
||||
|
||||
job_uuid = str(uuid.uuid4())
|
||||
shorthand_uuid = f"[{job_uuid[:2]}..{job_uuid[-2:]}]"
|
||||
log.debug(f"""Queuing "{show_name}" for download ...""")
|
||||
pool.submit(
|
||||
log.debug(f"{shorthand_uuid} Job UUID {job_uuid} generated, shorthand is {shorthand_uuid}")
|
||||
log.debug(f"""{shorthand_uuid} Queuing "{show_name}" for download ...""")
|
||||
future = pool.submit(
|
||||
copy_url,
|
||||
section_name,
|
||||
config_obj,
|
||||
result,
|
||||
video_metadata[result.id],
|
||||
video_metadata["total_content_length"],
|
||||
state_file_abs_path,
|
||||
show_name,
|
||||
job_uuid,
|
||||
shorthand_uuid,
|
||||
tmp_dir,
|
||||
dest_dir)
|
||||
if future is not None:
|
||||
future.result()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@ -741,8 +757,6 @@ if __name__ == '__main__':
|
||||
if config.has_option(section, "title_not_regex"):
|
||||
json_response = dedup_json_titles(section, config, json_response)
|
||||
|
||||
log.debug(f"Downloading {json_response.result.queryInfo.resultCount} "
|
||||
log.debug(f"Interested in {json_response.result.queryInfo.resultCount} "
|
||||
f"""{p.plural("show", json_response.result.queryInfo.resultCount)} ...""")
|
||||
download_media(section, config, json_response)
|
||||
|
||||
# console.print_json(json_response.json())
|
||||
|
Loading…
x
Reference in New Issue
Block a user