patreon-downloader/batch.py

import subprocess
import json
from pathlib import Path
import os

# Get cookies using the browser extension "Get cookies.txt" and export while
# logged in to your account.
COOKIES = "cookies.txt"

# Change this if you aren't on Windows. Get the latest release from GitHub.
YTDLP = "yt-dlp.exe"
OUTPUT_DIR = Path(__file__).parent / "YOUR_OUTPUT_FOLDER"

# Input options:
# - If `urls.json` exists in the project root it will be used.
#   The JSON should be an array of strings (URLs).
# - Otherwise set URLs below or pass a custom JSON path via the
#   `URLS_JSON` environment variable.
URLS = [
    # fallback list — leave empty if you'll use urls.json
]

default_urls = str(Path(__file__).parent / "urls.json")
URLS_JSON = os.environ.get("URLS_JSON", default_urls)

RESET = "\033[0m"
BOLD = "\033[1m"
RED = "\033[91m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
CYAN = "\033[96m"
DIM = "\033[2m"


def log(msg, color=RESET):
    print(f"{color}{msg}{RESET}", flush=True)


def get_post_id(url):
    return url.rstrip("/").split("-")[-1]


def fetch_info(url):
    cmd = [
        YTDLP,
        "--dump-json",
        "--no-playlist",
        "--cookies",
        COOKIES,
        "--extractor-args",
        "generic:impersonate",
        url,
    ]
    cwd_dir = Path(__file__).parent
    result = subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        timeout=30,
        cwd=cwd_dir,
    )
    if result.returncode != 0:
        return None, result.stderr.strip()
    try:
        return json.loads(result.stdout), None
    except json.JSONDecodeError:
        return None, "Failed to parse JSON"


def download(url, out_dir):
    cmd = [
        YTDLP,
        "-f",
        "bestvideo+bestaudio/best",
        "--prefer-free-formats",
        "--cookies",
        COOKIES,
        "--extractor-args",
        "generic:impersonate",
        "--merge-output-format",
        "mp4",
        "-o",
        str(out_dir / "%(title)s.%(ext)s"),
        url,
    ]
    result = subprocess.run(
        cmd, capture_output=True, text=True, cwd=Path(__file__).parent
    )
    return result.returncode == 0, (result.stdout + result.stderr).strip()


def sanitize(name, max_len=60):
    safe = "".join(c if c.isalnum() or c in " _-." else "_" for c in name)
    return safe.strip()[:max_len].strip("_. ")


def main():
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    total = len(URLS)
    # Load from JSON if present and non-empty
    try:
        if Path(URLS_JSON).exists():
            with open(URLS_JSON, "r", encoding="utf-8") as fh:
                data = json.load(fh)
                if isinstance(data, list) and data:
                    URLS.clear()
                    for u in data:
                        if isinstance(u, str) and u.strip():
                            URLS.append(u)
                    total = len(URLS)
    except Exception as e:
        log(f"Failed to load URLs from {URLS_JSON}: {e}", YELLOW)

    ok, skipped, failed = [], [], []

    log("\n" + "━" * 55, CYAN)
    log("  yt-dlp batch — {} posts → {}/".format(total, OUTPUT_DIR.name), BOLD)
    log("" + "━" * 55 + "\n", CYAN)

    for i, url in enumerate(URLS, 1):
        post_id = get_post_id(url)
        header = "[{:02d}/{:d}]".format(i, total)
        log(header + " " + DIM + url + RESET)

        # Fetch metadata first to get the title
        log("       fetching info…", DIM)
        info, err = fetch_info(url)

        if info is None:
            # Likely a text post or unavailable
            reason = err[:80] if err else "no video found"
            msg = (
                "       "
                + YELLOW
                + "⚠ skipped — no media ("
                + reason
                + ")"
                + RESET
            )
            log(msg)
            skipped.append((url, err))
            print()
            continue

        title = info.get("title") or f"post_{post_id}"
        folder_name = sanitize(title)
        out_dir = OUTPUT_DIR / folder_name
        out_dir.mkdir(parents=True, exist_ok=True)

        log("       title: " + BOLD + title[:55] + RESET)
        log("       dir:   " + str(out_dir))
        log("       downloading…", DIM)

        success, output = download(url, out_dir)

        if success:
            # Find what was downloaded
            files = list(out_dir.iterdir())
            sizes = []
            for f in files:
                if f.is_file():
                    sizes.append("{:.1f} MB".format(f.stat().st_size / 1e6))
            msg = ", ".join(sizes) if sizes else "file saved"
            log("       " + GREEN + "✓ done — " + msg + RESET)
            ok.append(url)
        else:
            # Check if it's just no video
            # (text post that slipped through info check)
            if "no video" in output.lower() or "no formats" in output.lower():
                log("       " + YELLOW + "⚠ skipped — text post" + RESET)
                skipped.append((url, "no video content"))
                # Remove empty dir
                try:
                    out_dir.rmdir()
                except OSError:
                    pass
            else:
                log("       " + RED + "✗ failed" + RESET)
                # Print last few lines of output for context
                for line in output.splitlines()[-3:]:
                    log("         " + DIM + line + RESET)
                if output:
                    last_err = output.splitlines()[-1]
                else:
                    last_err = "unknown error"
                failed.append((url, last_err))

        print()

    # Summary
    log("" + "━" * 55, CYAN)
    log("  Summary", BOLD)
    log("" + "━" * 55, CYAN)
    log("  " + GREEN + "✓ downloaded: " + str(len(ok)) + RESET)
    skipped_msg = (
        "  " + YELLOW + "⚠ skipped (text/no media): "
        + str(len(skipped))
        + RESET
    )
    log(skipped_msg)
    failed_msg = (
        "  " + RED + "✗ failed: "
        + str(len(failed))
        + RESET
    )
    log(failed_msg)

    if failed:
        log("\n  Failed URLs:", RED)
        for url, reason in failed:
            log("    • " + url, RED)
            log("      " + DIM + reason[:80] + RESET)

    log("" + "━" * 55 + "\n", CYAN)


if __name__ == "__main__":
    main()