Files
patreon-downloader/batch.py
Space-Banane f34c90b999
Some checks failed
Code Check - Quality and Syntax / syntax-lint (3.14) (push) Has been cancelled
Code Check - Quality and Syntax / syntax-lint (3.12) (push) Has been cancelled
Code Check - Quality and Syntax / syntax-lint (3.11) (push) Has been cancelled
Code Check - Quality and Syntax / syntax-lint (3.13) (push) Has been cancelled
refactor: improve error handling and response formatting in API endpoints
2026-04-06 23:09:06 +02:00

219 lines
6.2 KiB
Python

import subprocess
import json
from pathlib import Path
import os
# Get cookies using the browser extension "Get cookies.txt" and export while
# logged in to your account.
COOKIES = "cookies.txt"
# Change this if you aren't on Windows. Get the latest release from GitHub.
YTDLP = "yt-dlp.exe"
OUTPUT_DIR = Path(__file__).parent / "YOUR_OUTPUT_FOLDER"
# Input options:
# - If `urls.json` exists in the project root it will be used.
# The JSON should be an array of strings (URLs).
# - Otherwise set URLs below or pass a custom JSON path via the
# `URLS_JSON` environment variable.
URLS = [
# fallback list — leave empty if you'll use urls.json
]
default_urls = str(Path(__file__).parent / "urls.json")
URLS_JSON = os.environ.get("URLS_JSON", default_urls)
RESET = "\033[0m"
BOLD = "\033[1m"
RED = "\033[91m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
CYAN = "\033[96m"
DIM = "\033[2m"
def log(msg, color=RESET):
print(f"{color}{msg}{RESET}", flush=True)
def get_post_id(url):
return url.rstrip("/").split("-")[-1]
def fetch_info(url):
cmd = [
YTDLP,
"--dump-json",
"--no-playlist",
"--cookies",
COOKIES,
"--extractor-args",
"generic:impersonate",
url,
]
cwd_dir = Path(__file__).parent
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=30,
cwd=cwd_dir,
)
if result.returncode != 0:
return None, result.stderr.strip()
try:
return json.loads(result.stdout), None
except json.JSONDecodeError:
return None, "Failed to parse JSON"
def download(url, out_dir):
cmd = [
YTDLP,
"-f",
"bestvideo+bestaudio/best",
"--prefer-free-formats",
"--cookies",
COOKIES,
"--extractor-args",
"generic:impersonate",
"--merge-output-format",
"mp4",
"-o",
str(out_dir / "%(title)s.%(ext)s"),
url,
]
result = subprocess.run(
cmd, capture_output=True, text=True, cwd=Path(__file__).parent
)
return result.returncode == 0, (result.stdout + result.stderr).strip()
def sanitize(name, max_len=60):
safe = "".join(c if c.isalnum() or c in " _-." else "_" for c in name)
return safe.strip()[:max_len].strip("_. ")
def main():
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
total = len(URLS)
# Load from JSON if present and non-empty
try:
if Path(URLS_JSON).exists():
with open(URLS_JSON, "r", encoding="utf-8") as fh:
data = json.load(fh)
if isinstance(data, list) and data:
URLS.clear()
for u in data:
if isinstance(u, str) and u.strip():
URLS.append(u)
total = len(URLS)
except Exception as e:
log(f"Failed to load URLs from {URLS_JSON}: {e}", YELLOW)
ok, skipped, failed = [], [], []
log("\n" + "" * 55, CYAN)
log(" yt-dlp batch — {} posts → {}/".format(total, OUTPUT_DIR.name), BOLD)
log("" + "" * 55 + "\n", CYAN)
for i, url in enumerate(URLS, 1):
post_id = get_post_id(url)
header = "[{:02d}/{:d}]".format(i, total)
log(header + " " + DIM + url + RESET)
# Fetch metadata first to get the title
log(" fetching info…", DIM)
info, err = fetch_info(url)
if info is None:
# Likely a text post or unavailable
reason = err[:80] if err else "no video found"
msg = (
" "
+ YELLOW
+ "⚠ skipped — no media ("
+ reason
+ ")"
+ RESET
)
log(msg)
skipped.append((url, err))
print()
continue
title = info.get("title") or f"post_{post_id}"
folder_name = sanitize(title)
out_dir = OUTPUT_DIR / folder_name
out_dir.mkdir(parents=True, exist_ok=True)
log(" title: " + BOLD + title[:55] + RESET)
log(" dir: " + str(out_dir))
log(" downloading…", DIM)
success, output = download(url, out_dir)
if success:
# Find what was downloaded
files = list(out_dir.iterdir())
sizes = []
for f in files:
if f.is_file():
sizes.append("{:.1f} MB".format(f.stat().st_size / 1e6))
msg = ", ".join(sizes) if sizes else "file saved"
log(" " + GREEN + "✓ done — " + msg + RESET)
ok.append(url)
else:
# Check if it's just no video
# (text post that slipped through info check)
if "no video" in output.lower() or "no formats" in output.lower():
log(" " + YELLOW + "⚠ skipped — text post" + RESET)
skipped.append((url, "no video content"))
# Remove empty dir
try:
out_dir.rmdir()
except OSError:
pass
else:
log(" " + RED + "✗ failed" + RESET)
# Print last few lines of output for context
for line in output.splitlines()[-3:]:
log(" " + DIM + line + RESET)
if output:
last_err = output.splitlines()[-1]
else:
last_err = "unknown error"
failed.append((url, last_err))
print()
# Summary
log("" + "" * 55, CYAN)
log(" Summary", BOLD)
log("" + "" * 55, CYAN)
log(" " + GREEN + "✓ downloaded: " + str(len(ok)) + RESET)
skipped_msg = (
" " + YELLOW + "⚠ skipped (text/no media): "
+ str(len(skipped))
+ RESET
)
log(skipped_msg)
failed_msg = (
" " + RED + "✗ failed: "
+ str(len(failed))
+ RESET
)
log(failed_msg)
if failed:
log("\n Failed URLs:", RED)
for url, reason in failed:
log("" + url, RED)
log(" " + DIM + reason[:80] + RESET)
log("" + "" * 55 + "\n", CYAN)
if __name__ == "__main__":
main()