Maintain a state file
This commit is contained in:
parent
7081c6b50a
commit
601583afc3
@ -12,7 +12,7 @@ dl_progress_update_interval = 10
|
|||||||
dl_threads = 2
|
dl_threads = 2
|
||||||
dl_filename_pattern = &(channel)s - &(publish_date)s - &(topic)s - &(title)s.&(ext)s
|
dl_filename_pattern = &(channel)s - &(publish_date)s - &(topic)s - &(title)s.&(ext)s
|
||||||
publish_date_srtftime_pattern = %%Y%%m%%d
|
publish_date_srtftime_pattern = %%Y%%m%%d
|
||||||
dl_filename_replace_spaces =
|
dl_filename_replace_spaces_with =
|
||||||
dl_filename_all_lowercase = no
|
dl_filename_all_lowercase = no
|
||||||
|
|
||||||
[maus]
|
[maus]
|
||||||
@ -20,6 +20,8 @@ min_duration = 1200
|
|||||||
max_duration = 2700
|
max_duration = 2700
|
||||||
query = @maus-query.json
|
query = @maus-query.json
|
||||||
title_not_regex = audiodeskription|gebärdensprache
|
title_not_regex = audiodeskription|gebärdensprache
|
||||||
|
dl_filename_pattern = &(publish_date)s.&(ext)s
|
||||||
|
publish_date_srtftime_pattern = S%%YE%%Y%%m%%d01
|
||||||
# query = {"queries":[{"fields":["topic"],"query":"die sendung mit der maus"},{"fields":["channel"],"query":"ARD"}],"sortBy":"timestamp","sortOrder":"desc","future":false,"offset":0,"size":50}
|
# query = {"queries":[{"fields":["topic"],"query":"die sendung mit der maus"},{"fields":["channel"],"query":"ARD"}],"sortBy":"timestamp","sortOrder":"desc","future":false,"offset":0,"size":50}
|
||||||
# state_file_name = maus
|
# state_file_name = maus
|
||||||
# tmp_base_dir = %(tmp_base_dir)s/maus
|
# tmp_base_dir = %(tmp_base_dir)s/maus
|
||||||
|
99
mvw-dl.py
99
mvw-dl.py
@ -72,6 +72,7 @@ JSONType = t.Union[str, int, float, bool, None, t.Dict[str, t.Any], t.List[t.Any
|
|||||||
# 1: Config file invalid, it has no sections
|
# 1: Config file invalid, it has no sections
|
||||||
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
|
# 2: Config file invalid, sections must define at least CONST.CFG_MANDATORY
|
||||||
# 3: No search results to download
|
# 3: No search results to download
|
||||||
|
# 4: State file already exists, has more than 0 bytes size but doesn't contain usable JSON
|
||||||
|
|
||||||
|
|
||||||
class CONST(object):
|
class CONST(object):
|
||||||
@ -94,7 +95,7 @@ class CONST(object):
|
|||||||
{"key": "dl_threads", "value": "2"},
|
{"key": "dl_threads", "value": "2"},
|
||||||
{"key": "dl_filename_pattern", "value": "&(channel)s - &(publish_date)s - &(topic)s - &(title)s"},
|
{"key": "dl_filename_pattern", "value": "&(channel)s - &(publish_date)s - &(topic)s - &(title)s"},
|
||||||
{"key": "publish_date_srtftime_pattern", "value": "%%Y%%m%%d"},
|
{"key": "publish_date_srtftime_pattern", "value": "%%Y%%m%%d"},
|
||||||
{"key": "dl_filename_replace_spaces", "value": "_"},
|
{"key": "dl_filename_replace_spaces_with", "value": "_"},
|
||||||
{"key": "dl_filename_all_lowercase", "value": "yes"}
|
{"key": "dl_filename_all_lowercase", "value": "yes"}
|
||||||
]
|
]
|
||||||
CFG_KNOWN_SECTION = [
|
CFG_KNOWN_SECTION = [
|
||||||
@ -394,7 +395,7 @@ def filename_replace_spaces_with_underscores(
|
|||||||
section_name: str,
|
section_name: str,
|
||||||
config_obj: configparser.ConfigParser(),
|
config_obj: configparser.ConfigParser(),
|
||||||
filename: str) -> str:
|
filename: str) -> str:
|
||||||
space_replace_string = config_obj.get(section_name, "dl_filename_replace_spaces")
|
space_replace_string = config_obj.get(section_name, "dl_filename_replace_spaces_with")
|
||||||
log.debug(f"Replacing space characters with '{space_replace_string}' ...")
|
log.debug(f"Replacing space characters with '{space_replace_string}' ...")
|
||||||
underscored_filename = re.sub(
|
underscored_filename = re.sub(
|
||||||
r"\s",
|
r"\s",
|
||||||
@ -411,19 +412,91 @@ def get_filename(
|
|||||||
max_quality_url: str) -> str:
|
max_quality_url: str) -> str:
|
||||||
filename_replaced_patterns = filename_replace_pattern(section_name, config_obj, show, max_quality_url)
|
filename_replaced_patterns = filename_replace_pattern(section_name, config_obj, show, max_quality_url)
|
||||||
filename_safe = get_safe_filename(filename_replaced_patterns)
|
filename_safe = get_safe_filename(filename_replaced_patterns)
|
||||||
if config.get(section_name, "dl_filename_replace_spaces"):
|
if config.get(section_name, "dl_filename_replace_spaces_with"):
|
||||||
filename_safe = filename_replace_spaces_with_underscores(section_name, config_obj, filename_safe)
|
filename_safe = filename_replace_spaces_with_underscores(section_name, config_obj, filename_safe)
|
||||||
if config.getboolean(section_name, "dl_filename_all_lowercase"):
|
if config.getboolean(section_name, "dl_filename_all_lowercase"):
|
||||||
log.debug(f"Lowercasing all filename characters ...")
|
log.debug(f"Lowercasing all filename characters ...")
|
||||||
filename_safe = filename_safe.lower()
|
filename_safe = filename_safe.lower()
|
||||||
log.debug(f"New filename: '{filename_safe}'")
|
log.debug(f"New filename: '{filename_safe}'")
|
||||||
log.debug(filename_safe)
|
log.debug(filename_safe)
|
||||||
quit()
|
return filename_safe
|
||||||
|
|
||||||
|
|
||||||
|
def get_state_file_abs_path(
|
||||||
|
section_name: str,
|
||||||
|
config_obj: configparser.ConfigParser()) -> str:
|
||||||
|
|
||||||
|
state_dir = config_obj.get(section_name, "state_files_dir")
|
||||||
|
state_file = \
|
||||||
|
config_obj.get(section_name, "state_file_name_prefix") + \
|
||||||
|
section_name + \
|
||||||
|
config_obj.get(section_name, "state_file_name_suffix")
|
||||||
|
state_file_abs_path = os.path.join(state_dir, state_file)
|
||||||
|
return state_file_abs_path
|
||||||
|
|
||||||
|
|
||||||
|
def state_file_none_or_valid_json(
|
||||||
|
state_file_abs_path: str) -> bool:
|
||||||
|
|
||||||
|
if os.path.exists(state_file_abs_path):
|
||||||
|
if os.path.getsize(state_file_abs_path) > 0:
|
||||||
|
with open(state_file_abs_path, "r") as state_file:
|
||||||
|
try:
|
||||||
|
json.loads(state_file.read())
|
||||||
|
return True
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
log.warning(f"State file '{state_file_abs_path}' does not contain valid JSON. We're not going to "
|
||||||
|
f"be able to log anything into it. Exiting 4 ...")
|
||||||
|
sys.exit(4)
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def truncate_log(
|
||||||
|
json_data: json.loads,
|
||||||
|
max_log_entries: int) -> json.loads:
|
||||||
|
|
||||||
|
for i in range(len(json_data)):
|
||||||
|
del json_data[i]
|
||||||
|
if len(json_data) <= max_log_entries:
|
||||||
|
break
|
||||||
|
return json_data
|
||||||
|
|
||||||
|
|
||||||
def log_successful_download(
|
def log_successful_download(
|
||||||
show: type_def.mvw_json_response.Show) -> None:
|
section_name: str,
|
||||||
pass
|
config_obj: configparser.ConfigParser(),
|
||||||
|
show: type_def.mvw_json_response.Show,
|
||||||
|
state_file_abs_path: str) -> None:
|
||||||
|
|
||||||
|
timestamp_now = int(time.time())
|
||||||
|
state_file_none_or_valid_json(state_file_abs_path)
|
||||||
|
os.makedirs(os.path.dirname(state_file_abs_path), exist_ok=True)
|
||||||
|
|
||||||
|
state_body = show.dict(include={"topic", "title"})
|
||||||
|
state_body["dl_complete_timestamp_epoch"] = timestamp_now
|
||||||
|
state_body["dl_complete_timestamp_human"] = \
|
||||||
|
d.datetime.utcfromtimestamp(timestamp_now).strftime("%Y-%m-%d %H%M%S UTC")
|
||||||
|
state_entry = {timestamp_now: state_body}
|
||||||
|
json_state = None
|
||||||
|
|
||||||
|
log.debug(f"Writing log entry to '{state_file_abs_path}' ...")
|
||||||
|
with open(state_file_abs_path, "r+") as state_file:
|
||||||
|
try:
|
||||||
|
json_state = json.load(state_file)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
if json_state is None:
|
||||||
|
state_file.truncate()
|
||||||
|
json_state = []
|
||||||
|
|
||||||
|
with open(state_file_abs_path, "w") as state_file:
|
||||||
|
json_state.append(state_entry)
|
||||||
|
max_log_entries = config_obj.getint(section_name, "state_file_retention")
|
||||||
|
if len(json_state) > max_log_entries:
|
||||||
|
json_state = truncate_log(json_state, max_log_entries)
|
||||||
|
json.dump(json_state, state_file, indent=4, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
def copy_url(
|
def copy_url(
|
||||||
@ -431,7 +504,8 @@ def copy_url(
|
|||||||
config_obj: configparser.ConfigParser(),
|
config_obj: configparser.ConfigParser(),
|
||||||
show: type_def.mvw_json_response.Show,
|
show: type_def.mvw_json_response.Show,
|
||||||
video_metadata: dict,
|
video_metadata: dict,
|
||||||
total_content_length: int) -> None:
|
total_content_length: int,
|
||||||
|
state_file_abs_path: str) -> None:
|
||||||
"""Copy data from a url to a local file."""
|
"""Copy data from a url to a local file."""
|
||||||
|
|
||||||
global download_start_time
|
global download_start_time
|
||||||
@ -447,6 +521,9 @@ def copy_url(
|
|||||||
publish_date = d.datetime.utcfromtimestamp(show.timestamp).strftime('%Y%m%d')
|
publish_date = d.datetime.utcfromtimestamp(show.timestamp).strftime('%Y%m%d')
|
||||||
|
|
||||||
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
||||||
|
# TODO quit
|
||||||
|
log_successful_download(section_name, config_obj, show, state_file_abs_path)
|
||||||
|
quit()
|
||||||
with open(dest_path, "wb") as dest_file:
|
with open(dest_path, "wb") as dest_file:
|
||||||
log.info(f"""Downloading "{show_name}" ...""")
|
log.info(f"""Downloading "{show_name}" ...""")
|
||||||
log.info(f"Download location resolved to {dest_path}")
|
log.info(f"Download location resolved to {dest_path}")
|
||||||
@ -468,7 +545,7 @@ def copy_url(
|
|||||||
log.info(f"""Download of "{show_name}" interrupted""")
|
log.info(f"""Download of "{show_name}" interrupted""")
|
||||||
return
|
return
|
||||||
log.info(f"""Download of "{show_name}" done""")
|
log.info(f"""Download of "{show_name}" done""")
|
||||||
log_successful_download(show)
|
# log_successful_download(show)
|
||||||
|
|
||||||
|
|
||||||
def get_max_quality_url(
|
def get_max_quality_url(
|
||||||
@ -500,6 +577,8 @@ def download_media(
|
|||||||
global download_last_update_time
|
global download_last_update_time
|
||||||
|
|
||||||
dl_threads = config_obj.getint(section_name, "dl_threads")
|
dl_threads = config_obj.getint(section_name, "dl_threads")
|
||||||
|
state_file_abs_path = get_state_file_abs_path(section_name, config_obj)
|
||||||
|
state_file_none_or_valid_json(state_file_abs_path)
|
||||||
video_metadata = {}
|
video_metadata = {}
|
||||||
|
|
||||||
for result in json_obj.result.results.copy():
|
for result in json_obj.result.results.copy():
|
||||||
@ -512,6 +591,7 @@ def download_media(
|
|||||||
video_metadata["total_content_length"] = total_content_length
|
video_metadata["total_content_length"] = total_content_length
|
||||||
log.info(f"""Download location is {config_obj.get(section_name, "dl_dir")}""")
|
log.info(f"""Download location is {config_obj.get(section_name, "dl_dir")}""")
|
||||||
log.info(f"Limiting parallel downloads to {dl_threads} ...")
|
log.info(f"Limiting parallel downloads to {dl_threads} ...")
|
||||||
|
# TODO prior to download check state file
|
||||||
with ThreadPoolExecutor(max_workers=dl_threads) as pool:
|
with ThreadPoolExecutor(max_workers=dl_threads) as pool:
|
||||||
download_last_update_time = time.time()
|
download_last_update_time = time.time()
|
||||||
download_start_time = download_last_update_time
|
download_start_time = download_last_update_time
|
||||||
@ -524,7 +604,8 @@ def download_media(
|
|||||||
config_obj,
|
config_obj,
|
||||||
result,
|
result,
|
||||||
video_metadata[result.id],
|
video_metadata[result.id],
|
||||||
video_metadata["total_content_length"])
|
video_metadata["total_content_length"],
|
||||||
|
state_file_abs_path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user