Generate safe filename

This commit is contained in:
hygienic-books 2022-03-19 03:52:26 +01:00
parent 7b391be89e
commit 7081c6b50a
2 changed files with 63 additions and 4 deletions

View File

@ -11,8 +11,9 @@ title_dedup_winner = first
dl_progress_update_interval = 10
dl_threads = 2
dl_filename_pattern = &(channel)s - &(publish_date)s - &(topic)s - &(title)s.&(ext)s
dl_filename_spaces_to_underscores = yes
dl_filename_all_lowercase = yes
publish_date_srtftime_pattern = %%Y%%m%%d
dl_filename_replace_spaces =
dl_filename_all_lowercase = no
[maus]
min_duration = 1200

View File

@ -3,6 +3,7 @@ import datetime as d
import json
import logging
import os
import pathlib
import re
import sys
import time
@ -92,7 +93,8 @@ class CONST(object):
{"key": "dl_progress_update_interval", "value": "10"},
{"key": "dl_threads", "value": "2"},
{"key": "dl_filename_pattern", "value": "&(channel)s - &(publish_date)s - &(topic)s - &(title)s"},
{"key": "dl_filename_spaces_to_underscores", "value": "yes"},
{"key": "publish_date_srtftime_pattern", "value": "%%Y%%m%%d"},
{"key": "dl_filename_replace_spaces", "value": "_"},
{"key": "dl_filename_all_lowercase", "value": "yes"}
]
CFG_KNOWN_SECTION = [
@ -355,14 +357,70 @@ def expanded_dest_dir(
return all_expanded_dest_dir
def filename_replace_pattern(
section_name: str,
config_obj: configparser.ConfigParser(),
show: type_def.mvw_json_response.Show,
max_quality_url: str) -> str:
filename = config_obj.get(section_name, "dl_filename_pattern")
ext = pathlib.Path(max_quality_url).suffix.lstrip(".")
publish_date = d.datetime.utcfromtimestamp(show.timestamp).strftime(
config_obj.get(section_name, "publish_date_srtftime_pattern"))
show_extended = {"ext": ext, "publish_date": publish_date}
show_attrs = [attr for attr in dir(show) if not attr.startswith('_') and not callable(getattr(show, attr))]
for attr in show_attrs:
log.debug(f"Replacing filename pattern '&({attr})s' ...")
filename = re.sub(r"&\(" + re.escape(attr) + r"\)s", str(getattr(show, attr)), filename)
log.debug(f"New filename: '{filename}'")
for extended_attr in show_extended:
log.debug(f"Replacing filename pattern '&({extended_attr})s' ...")
filename = re.sub(r"&\(" + re.escape(extended_attr) + r"\)s", show_extended[extended_attr], filename)
log.debug(f"New filename: '{filename}'")
return filename
def get_safe_filename(
dirty_filename: str) -> str:
"""https://stackoverflow.com/a/71199182"""
log.debug(f"Replacing unsafe characters in filename with dashes ...")
clean_filename = re.sub(r"""[/\\?%*:|"<>\x7F\x00-\x1F]""", "-", dirty_filename)
log.debug(f"New filename: '{clean_filename}'")
return clean_filename
def filename_replace_spaces_with_underscores(
section_name: str,
config_obj: configparser.ConfigParser(),
filename: str) -> str:
space_replace_string = config_obj.get(section_name, "dl_filename_replace_spaces")
log.debug(f"Replacing space characters with '{space_replace_string}' ...")
underscored_filename = re.sub(
r"\s",
space_replace_string,
filename)
log.debug(f"New filename: '{underscored_filename}'")
return underscored_filename
def get_filename(
section_name: str,
config_obj: configparser.ConfigParser(),
show: type_def.mvw_json_response.Show,
max_quality_url: str) -> str:
filename_replaced_patterns = filename_replace_pattern(section_name, config_obj, show, max_quality_url)
filename_safe = get_safe_filename(filename_replaced_patterns)
if config.get(section_name, "dl_filename_replace_spaces"):
filename_safe = filename_replace_spaces_with_underscores(section_name, config_obj, filename_safe)
if config.getboolean(section_name, "dl_filename_all_lowercase"):
log.debug(f"Lowercasing all filename characters ...")
filename_safe = filename_safe.lower()
log.debug(f"New filename: '{filename_safe}'")
log.debug(filename_safe)
quit()
def log_successful_download(
show: type_def.mvw_json_response.Show) -> None:
pass
@ -382,7 +440,7 @@ def copy_url(
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
max_quality_url = video_metadata["url"]
filename = max_quality_url.split("/")[-1]
filename = get_filename(section_name, config_obj, show, max_quality_url)
dest_dir = expanded_dest_dir(config_obj.get(section_name, "dl_dir"))
dest_path = os.path.join(dest_dir, filename)
show_name = f"{show.topic} - {show.title}"