Compare commits
5 Commits
376932583e
...
25d0059f14
Author | SHA1 | Date | |
---|---|---|---|
25d0059f14 | |||
296e2ca7e5 | |||
34ef348929 | |||
47bb1f761a | |||
d92236a2f2 |
@ -8,6 +8,8 @@ state_file_name_prefix = state-
|
||||
state_file_name_suffix = .log
|
||||
mvw_endpoint = http://localhost:8000/api/query
|
||||
title_dedup_winner = first
|
||||
dl_progress_update_interval = 10
|
||||
dl_threads = 2
|
||||
|
||||
[maus]
|
||||
min_duration = 1200
|
||||
|
@ -17,5 +17,5 @@
|
||||
"sortOrder": "desc",
|
||||
"future": false,
|
||||
"offset": 0,
|
||||
"size": 50
|
||||
"size": 20
|
||||
}
|
||||
|
170
mvw-dl.py
170
mvw-dl.py
@ -28,6 +28,11 @@ from threading import Event
|
||||
from typing import Iterable
|
||||
from urllib.request import urlopen
|
||||
|
||||
# TODO set locale for datetime and others to globally stick to en_US
|
||||
download_start_time = 0
|
||||
download_last_update_time = 0
|
||||
size_downloaded = 0
|
||||
|
||||
from rich.progress import (
|
||||
BarColumn,
|
||||
DownloadColumn,
|
||||
@ -83,7 +88,9 @@ class CONST(object):
|
||||
{"key": "state_file_name_prefix", "value": "state-"},
|
||||
{"key": "state_file_name_suffix", "value": ".log"},
|
||||
{"key": "mvw_endpoint", "value": "http://localhost:8000/api/query"},
|
||||
{"key": "title_dedup_winner", "value": "first"}
|
||||
{"key": "title_dedup_winner", "value": "first"},
|
||||
{"key": "dl_progress_update_interval", "value": "10"},
|
||||
{"key": "dl_threads", "value": "2"}
|
||||
]
|
||||
CFG_KNOWN_SECTION = [
|
||||
{"key": "min_duration", "is_mandatory": False},
|
||||
@ -338,82 +345,120 @@ def handle_sigint(signum, frame):
|
||||
signal.signal(signal.SIGINT, handle_sigint)
|
||||
|
||||
|
||||
def copy_url(
|
||||
def get_safe_filename(
|
||||
dirty_filename: str) -> str:
|
||||
"""https://stackoverflow.com/a/71199182"""
|
||||
|
||||
clean_filename = re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "-", dirty_filename)
|
||||
return clean_filename
|
||||
|
||||
|
||||
def log_successful_download(
|
||||
show: type_def.mvw_json_response.Show) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def copy_url(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser(),
|
||||
show: type_def.mvw_json_response.Show,
|
||||
video_metadata: dict,
|
||||
total_content_length: int) -> None:
|
||||
"""Copy data from a url to a local file."""
|
||||
|
||||
url = show.url_video_hd
|
||||
filename = url.split("/")[-1]
|
||||
dest_path = os.path.join("./", filename)
|
||||
release_timestamp = d.datetime.utcfromtimestamp(show.timestamp).strftime('%A %x %X')
|
||||
#s = requests.Session()
|
||||
#newline = "\n"
|
||||
#log.debug(f"Request method: {req.method}\n"
|
||||
# f"URL: {req.url}\n"
|
||||
# f"""{newline.join(f"Header '{header}': '{value}'" for header, value in list(req.headers.items()))}\n""")
|
||||
global download_start_time
|
||||
global download_last_update_time
|
||||
global size_downloaded
|
||||
|
||||
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
|
||||
max_quality_url = video_metadata["url"]
|
||||
filename = max_quality_url.split("/")[-1]
|
||||
dest_dir = config_obj.get(section_name, "dl_dir")
|
||||
dest_path = os.path.join(dest_dir, filename)
|
||||
dest_path = os.path.expanduser(dest_path)
|
||||
dest_path = os.path.expandvars(dest_path)
|
||||
show_name = f"{show.topic} - {show.title}"
|
||||
log.debug(f"""Downloading "{show_name}" posted {release_timestamp} ...""")
|
||||
publish_date = d.datetime.utcfromtimestamp(show.timestamp).strftime('%Y%m%d')
|
||||
|
||||
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
||||
with open(dest_path, "wb") as dest_file:
|
||||
last_update_time = time.time()
|
||||
r = requests.get(url, stream=True)
|
||||
total_length = int(r.headers.get('content-length'))
|
||||
size_downloaded = 0
|
||||
log.info(f"""Downloading "{show_name}" ...""")
|
||||
log.info(f"Download location resolved to {dest_path}")
|
||||
r = requests.get(max_quality_url, stream=True)
|
||||
for chunk in r.iter_content(32768):
|
||||
size_downloaded += len(chunk)
|
||||
dest_file.write(chunk)
|
||||
if time.time() - last_update_time >= 10:
|
||||
percentage_done = size_downloaded / total_length * 100
|
||||
if time.time() - download_last_update_time >= update_interval:
|
||||
download_last_update_time = time.time()
|
||||
dl_speed_so_far = size_downloaded / (download_last_update_time - download_start_time)
|
||||
human_dl_speed_so_far = f"{humanize.naturalsize(dl_speed_so_far, binary=True)}/s"
|
||||
percentage_done = size_downloaded / total_content_length * 100
|
||||
human_pct = "{:.1f}".format(percentage_done)
|
||||
human_size_dl = humanize.naturalsize(size_downloaded)
|
||||
human_total_dl = humanize.naturalsize(total_length)
|
||||
last_update_time = time.time()
|
||||
log.debug(f"""Download of "{show_name}" at {human_pct}% ({human_size_dl}/{human_total_dl})""")
|
||||
human_size_dl = humanize.naturalsize(size_downloaded, binary=True)
|
||||
human_total_dl = humanize.naturalsize(total_content_length, binary=True)
|
||||
log.debug(f"Downloaded {human_pct}% ({human_size_dl}/{human_total_dl} at an average "
|
||||
f"{human_dl_speed_so_far})")
|
||||
if done_event.is_set():
|
||||
log.debug(f"done_event")
|
||||
log.info(f"""Download of "{show_name}" interrupted""")
|
||||
return
|
||||
|
||||
#got_json_response = MVWJSONResponse(**json.loads(s.content))
|
||||
#return got_json_response
|
||||
|
||||
# progress.console.log(f"Requesting {url}")
|
||||
# response = urlopen(url)
|
||||
# # This will break if the response doesn't contain content length
|
||||
# progress.update(task_id, total=int(response.info()["Content-length"]))
|
||||
# with open(path, "wb") as dest_file:
|
||||
# progress.start_task(task_id)
|
||||
# for data in iter(partial(response.read, 32768), b""):
|
||||
# dest_file.write(data)
|
||||
# progress.update(task_id, advance=len(data))
|
||||
# if done_event.is_set():
|
||||
# return
|
||||
# progress.console.log(f"Downloaded {path}")
|
||||
log.info(f"""Download of "{show_name}" done""")
|
||||
log_successful_download(show)
|
||||
|
||||
|
||||
#def download(urls: Iterable[str], dest_dir: str):
|
||||
# """Download multuple files to the given directory."""
|
||||
#
|
||||
# with progress:
|
||||
# with ThreadPoolExecutor(max_workers=1) as pool:
|
||||
# for url in urls:
|
||||
# filename = url.split("/")[-1]
|
||||
# dest_path = os.path.join(dest_dir, filename)
|
||||
# task_id = progress.add_task("download", filename=filename, start=False)
|
||||
# pool.submit(copy_url, task_id, url, dest_path)
|
||||
def get_max_quality_url(
|
||||
show: type_def.mvw_json_response.Show) -> str:
|
||||
if show.url_video_hd:
|
||||
max_quality_url = show.url_video_hd
|
||||
elif show.url_video:
|
||||
max_quality_url = show.url_video
|
||||
else:
|
||||
max_quality_url = show.url_video_low
|
||||
return max_quality_url
|
||||
|
||||
|
||||
def get_content_length(
|
||||
video_url: str) -> int:
|
||||
r = requests.head(video_url)
|
||||
if r.status_code == requests.codes.ok:
|
||||
return int(r.headers["content-length"])
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def download_media(
|
||||
section_name: str,
|
||||
config_obj: configparser.ConfigParser()) -> None:
|
||||
with ThreadPoolExecutor(max_workers=2) as pool:
|
||||
for result in json_response.result.results.copy():
|
||||
# filename = url.split("/")[-1]
|
||||
# dest_path = os.path.join(dest_dir, filename)
|
||||
# task_id = progress.add_task("download", filename=filename, start=False)
|
||||
pool.submit(copy_url, result)
|
||||
# TODO before sending into pool validate which url we're going to use
|
||||
# TODO from each url get total content-length
|
||||
# TODO use total content-length for overall progress of what we want to download
|
||||
pass
|
||||
config_obj: configparser.ConfigParser(),
|
||||
json_obj: MVWJSONResponse) -> None:
|
||||
|
||||
global download_start_time
|
||||
global download_last_update_time
|
||||
|
||||
dl_threads = config_obj.getint(section_name, "dl_threads")
|
||||
video_metadata = {}
|
||||
|
||||
for result in json_obj.result.results.copy():
|
||||
max_quality_url = get_max_quality_url(result)
|
||||
content_length = get_content_length(max_quality_url)
|
||||
video_metadata[result.id] = {"url": max_quality_url, "content_length": content_length}
|
||||
total_content_length = 0
|
||||
for video in video_metadata:
|
||||
total_content_length += video_metadata[video]["content_length"]
|
||||
video_metadata["total_content_length"] = total_content_length
|
||||
log.info(f"""Download location is {config_obj.get(section_name, "dl_dir")}""")
|
||||
log.info(f"Limiting parallel downloads to {dl_threads} ...")
|
||||
with ThreadPoolExecutor(max_workers=dl_threads) as pool:
|
||||
download_last_update_time = time.time()
|
||||
download_start_time = download_last_update_time
|
||||
update_interval = config_obj.getint(section_name, "dl_progress_update_interval")
|
||||
log.debug(f"""Will provide updates every {update_interval} {p.plural("second", update_interval)}""")
|
||||
for result in json_obj.result.results.copy():
|
||||
pool.submit(
|
||||
copy_url,
|
||||
section_name,
|
||||
config_obj,
|
||||
result,
|
||||
video_metadata[result.id],
|
||||
video_metadata["total_content_length"])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@ -443,7 +488,8 @@ if __name__ == '__main__':
|
||||
if config.has_option(section, "title_not_regex"):
|
||||
json_response = dedup_json_titles(section, config, json_response)
|
||||
|
||||
log.debug(f"Downloading shows ...")
|
||||
download_media(section, config)
|
||||
log.debug(f"Downloading {json_response.result.queryInfo.resultCount} "
|
||||
f"""{p.plural("show", json_response.result.queryInfo.resultCount)} ...""")
|
||||
download_media(section, config, json_response)
|
||||
|
||||
# console.print_json(json_response.json())
|
||||
|
Loading…
x
Reference in New Issue
Block a user