112 lines
4.1 KiB
Python
112 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
from pathlib import Path
|
|
import subprocess
|
|
import time
|
|
|
|
import click
|
|
|
|
|
|
@click.command()
|
|
@click.option('--allow-unknown', is_flag=True,
|
|
help='Error out if one of the directories from the given list does not exist')
|
|
@click.option('--base-dir', type=Path, default='.', show_default=True,
|
|
help='Base directory where the channel directories get created')
|
|
@click.option('--max-video-height', type=int,
|
|
help='Maximum height of the video e.g. 720 or 1080 to filter video formats by')
|
|
@click.option('--sleep-interval', type=int, default=30, show_default=True,
|
|
help="Minimum sleep interval between downloads")
|
|
@click.option('--max-sleep-interval', type=int, default=300, show_default=True,
|
|
help="Maximum sleep interval between downloads (see also --sleep-interval")
|
|
@click.option('--with-playlist-index', is_flag=True, help="Prepend the playlist index to the output file name")
|
|
@click.argument('CHANNEL_FILE', required=True)
|
|
def download(allow_unknown, base_dir, max_video_height, sleep_interval, max_sleep_interval, with_playlist_index,
|
|
channel_file):
|
|
"""Mirror channels from YouTube
|
|
|
|
The list of channels needs to be provided via CHANNEL_FILE. The format of the file should be one channel per line.
|
|
each line consisting of the directory name and the URL to download the videos joined by a semicolon.
|
|
"""
|
|
base_dir = base_dir.resolve()
|
|
|
|
channels = {}
|
|
with open(channel_file) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line.startswith('#'):
|
|
continue
|
|
|
|
dir_name, url = line.split(';')
|
|
channels[dir_name] = url
|
|
|
|
if not allow_unknown:
|
|
error = False
|
|
for dir_name in channels:
|
|
if (base_dir / dir_name).is_dir():
|
|
continue
|
|
|
|
print(f"Error: '{dir_name}' is an unknown channel")
|
|
error = True
|
|
|
|
if error:
|
|
raise click.ClickException("Unknown channels and '--allow-unknown' not given")
|
|
|
|
for dir_name, url in channels.items():
|
|
target_dir = base_dir / dir_name
|
|
|
|
print(f"Starting download for {dir_name}")
|
|
|
|
if not target_dir.exists():
|
|
target_dir.mkdir()
|
|
|
|
# NOTE: Using %(formats.:.height)s doesn't work, because the
|
|
# thumbnail doesn't take it's own output template even when
|
|
# provided with -o 'thumbnail:…'
|
|
name_fmt = '%(upload_date)s - %(title)s - %(id)s{}.%(ext)s'.format(
|
|
f" - {max_video_height}" if max_video_height else '')
|
|
if with_playlist_index:
|
|
name_fmt = f"%(playlist_index)s - {name_fmt}"
|
|
|
|
cmd = [
|
|
'yt-dlp',
|
|
'-o', name_fmt,
|
|
'--sleep-interval', str(sleep_interval),
|
|
'--max-sleep-interval', str(max_sleep_interval),
|
|
'--download-archive', target_dir / '.archive',
|
|
'--break-on-existing',
|
|
'--write-thumbnail',
|
|
'--embed-thumbnail',
|
|
]
|
|
if max_video_height:
|
|
cmd.extend(['-f', f"bv*[height<={max_video_height}]+ba"])
|
|
cmd.append(url)
|
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=target_dir)
|
|
prev_output = ''
|
|
while p.poll() is None:
|
|
try:
|
|
stdout_data, stderr_data = p.communicate(timeout=0.5)
|
|
except subprocess.TimeoutExpired as e:
|
|
if e.output:
|
|
print(e.output[len(prev_output):].decode('utf-8'), end='')
|
|
prev_output = e.output or ''
|
|
else:
|
|
print(stdout_data[len(prev_output):].decode('utf-8'))
|
|
|
|
stdout_lines = stdout_data.decode('utf-8').splitlines()
|
|
if p.returncode == 0:
|
|
return
|
|
if p.returncode == 101:
|
|
pass
|
|
elif p.returncode == 1 and stdout_lines[-1] and 'Finished downloading playlist' in stdout_lines[-1]:
|
|
pass
|
|
else:
|
|
breakpoint()
|
|
raise subprocess.CalledProcessError(returncode=p.returncode, cmd=cmd)
|
|
|
|
sleep_seconds = 5
|
|
print(f"Sleeping {sleep_seconds}s")
|
|
time.sleep(sleep_seconds)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
download()
|