From 171163b3d917951d8366be1fb7707c6f67b3710e Mon Sep 17 00:00:00 2001 From: MasterofJOKers Date: Fri, 8 Mar 2024 16:21:32 +0100 Subject: [PATCH] Add mirror-yt.py - a wrapper around yt-dlp This script is supposed to run regularly to mirror videos of certain YouTube channels locally for archiving purposes. --- mirror-yt.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 mirror-yt.py diff --git a/mirror-yt.py b/mirror-yt.py new file mode 100644 index 0000000..6601edb --- /dev/null +++ b/mirror-yt.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +from pathlib import Path +import subprocess + +import click + + +@click.command() +@click.option('--allow-unknown', is_flag=True, + help='Error out if one of the directories from the given list does not exist') +@click.option('--base-dir', type=Path, default='.', show_default=True, + help='Base directory where the channel directories get created') +@click.argument('CHANNEL_FILE', required=True) +def download(allow_unknown, base_dir, channel_file): + """Mirror channels from YouTube + + The list of channels needs to be provided via CHANNEL_FILE. The format of the file should be one channel per line. + each line consisting of the directory name and the URL to download the videos joined by a semicolon. + """ + base_dir = base_dir.resolve() + + channels = {} + with open(channel_file) as f: + for line in f: + line = line.strip() + if line.startswith('#'): + continue + + dir_name, url = line.split(';') + channels[dir_name] = url + + if not allow_unknown: + error = False + for dir_name in channels: + if (base_dir / dir_name).is_dir(): + continue + + print(f"Error: '{dir_name}' is an unknown channel") + error = True + + if error: + raise click.ClickException("Unknown channels and '--allow-unknown' not given") + + for dir_name, url in channels.items(): + target_dir = base_dir / dir_name + + print(f"Starting download for {dir_name}") + + if not target_dir.exists(): + target_dir.mkdir() + + cmd = [ + 'yt-dlp', + '-o', '%(upload_date)s - %(title)s - %(id)s.%(ext)s', + '--sleep-interval', '30', + '--max-sleep-interval', '300', + '--download-archive', target_dir / '.archive', + '--break-on-existing', + '--write-thumbnail', + url + ] + subprocess.run(cmd, check=True, cwd=target_dir) + + +if __name__ == '__main__': + download()