script-dump/mirror-yt.py

93 lines
3.0 KiB
Python

#!/usr/bin/env python3
from pathlib import Path
import subprocess
import time
import click
@click.command()
@click.option('--allow-unknown', is_flag=True,
help='Error out if one of the directories from the given list does not exist')
@click.option('--base-dir', type=Path, default='.', show_default=True,
help='Base directory where the channel directories get created')
@click.argument('CHANNEL_FILE', required=True)
def download(allow_unknown, base_dir, channel_file):
"""Mirror channels from YouTube
The list of channels needs to be provided via CHANNEL_FILE. The format of the file should be one channel per line.
each line consisting of the directory name and the URL to download the videos joined by a semicolon.
"""
base_dir = base_dir.resolve()
channels = {}
with open(channel_file) as f:
for line in f:
line = line.strip()
if line.startswith('#'):
continue
dir_name, url = line.split(';')
channels[dir_name] = url
if not allow_unknown:
error = False
for dir_name in channels:
if (base_dir / dir_name).is_dir():
continue
print(f"Error: '{dir_name}' is an unknown channel")
error = True
if error:
raise click.ClickException("Unknown channels and '--allow-unknown' not given")
for dir_name, url in channels.items():
target_dir = base_dir / dir_name
print(f"Starting download for {dir_name}")
if not target_dir.exists():
target_dir.mkdir()
cmd = [
'yt-dlp',
'-o', '%(upload_date)s - %(title)s - %(id)s.%(ext)s',
'--sleep-interval', '30',
'--max-sleep-interval', '300',
'--download-archive', target_dir / '.archive',
'--break-on-existing',
'--write-thumbnail',
url
]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=target_dir)
prev_output = ''
while p.poll() is None:
try:
stdout_data, stderr_data = p.communicate(timeout=0.5)
except subprocess.TimeoutExpired as e:
if e.output:
print(e.output[len(prev_output):].decode('utf-8'), end='')
prev_output = e.output or ''
else:
print(stdout_data[len(prev_output):].decode('utf-8'))
stdout_lines = stdout_data.decode('utf-8').splitlines()
if p.returncode == 0:
return
if p.returncode == 101:
pass
elif p.returncode == 1 and stdout_lines[-1] and 'Finished downloading playlist' in stdout_lines[-1]:
pass
else:
breakpoint()
raise subprocess.CalledProcessError(returncode=p.returncode, cmd=cmd)
sleep_seconds = 5
print(f"Sleeping {sleep_seconds}s")
time.sleep(sleep_seconds)
if __name__ == '__main__':
download()