Initial commit: add core functionality for Patreon media downloading with UI and batch processing support

2026-04-06 23:00:10 +02:00
commit 75b071c5be
7 changed files with 786 additions and 0 deletions
--- a/batch.py
+++ b/batch.py
@@ -0,0 +1,154 @@
+import subprocess
+import json
+from pathlib import Path
+import os
+
+COOKIES = "cookies.txt"  # Get them using the browser extension "Get cookies.txt" and export them while logged in to your account.
+YTDLP = "yt-dlp.exe"  # Change this if you aren't on Windows. Get the latest release from github
+OUTPUT_DIR = Path(__file__).parent / "YOUR_OUTPUT_FOLDER"
+
+# Input options:
+# - If `urls.json` exists in the project root it will be used. The JSON should be an array of strings (URLs)
+# - Otherwise set URLs below or pass a custom JSON path via the `URLS_JSON` environment variable.
+URLS = [
+    # fallback list — leave empty if you'll use urls.json
+]
+
+URLS_JSON = os.environ.get("URLS_JSON", str(Path(__file__).parent / "urls.json"))
+
+RESET  = "\033[0m"
+BOLD   = "\033[1m"
+RED    = "\033[91m"
+GREEN  = "\033[92m"
+YELLOW = "\033[93m"
+CYAN   = "\033[96m"
+DIM    = "\033[2m"
+
+def log(msg, color=RESET):
+    print(f"{color}{msg}{RESET}", flush=True)
+
+def get_post_id(url):
+    return url.rstrip("/").split("-")[-1]
+
+def fetch_info(url):
+    cmd = [
+        YTDLP,
+        "--dump-json", "--no-playlist",
+        "--cookies", COOKIES,
+        "--extractor-args", "generic:impersonate",
+        url
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30, cwd=Path(__file__).parent)
+    if result.returncode != 0:
+        return None, result.stderr.strip()
+    try:
+        return json.loads(result.stdout), None
+    except json.JSONDecodeError:
+        return None, "Failed to parse JSON"
+
+def download(url, out_dir):
+    cmd = [
+        YTDLP,
+        "-f", "bestvideo+bestaudio/best",
+        "--prefer-free-formats",
+        "--cookies", COOKIES,
+        "--extractor-args", "generic:impersonate",
+        "--merge-output-format", "mp4",
+        "-o", str(out_dir / "%(title)s.%(ext)s"),
+        url
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent)
+    return result.returncode == 0, (result.stdout + result.stderr).strip()
+
+def sanitize(name, max_len=60):
+    safe = "".join(c if c.isalnum() or c in " _-." else "_" for c in name)
+    return safe.strip()[:max_len].strip("_. ")
+
+def main():
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    total = len(URLS)
+    # Load from JSON if present and non-empty
+    try:
+        if Path(URLS_JSON).exists():
+            with open(URLS_JSON, 'r', encoding='utf-8') as fh:
+                data = json.load(fh)
+                if isinstance(data, list) and data:
+                    URLS.clear()
+                    URLS.extend([u for u in data if isinstance(u, str) and u.strip()])
+                    total = len(URLS)
+    except Exception as e:
+        log(f"Failed to load URLs from {URLS_JSON}: {e}", YELLOW)
+
+    ok, skipped, failed = [], [], []
+
+    log(f"\n{'━'*55}", CYAN)
+    log(f"  yt-dlp batch — {total} posts → {OUTPUT_DIR.name}/", BOLD)
+    log(f"{'━'*55}\n", CYAN)
+
+    for i, url in enumerate(URLS, 1):
+        post_id = get_post_id(url)
+        log(f"[{i:02d}/{total}] {DIM}{url}{RESET}")
+
+        # Fetch metadata first to get the title
+        log(f"       fetching info…", DIM)
+        info, err = fetch_info(url)
+
+        if info is None:
+            # Likely a text post or unavailable
+            log(f"       {YELLOW}⚠ skipped — no media ({err[:80] if err else 'no video found'}){RESET}")
+            skipped.append((url, err))
+            print()
+            continue
+
+        title = info.get("title") or f"post_{post_id}"
+        folder_name = sanitize(title)
+        out_dir = OUTPUT_DIR / folder_name
+        out_dir.mkdir(parents=True, exist_ok=True)
+
+        log(f"       title: {BOLD}{title[:55]}{RESET}")
+        log(f"       dir:   {out_dir}")
+        log(f"       downloading…", DIM)
+
+        success, output = download(url, out_dir)
+
+        if success:
+            # Find what was downloaded
+            files = list(out_dir.iterdir())
+            sizes = [f"{f.stat().st_size / 1e6:.1f} MB" for f in files if f.is_file()]
+            log(f"       {GREEN}✓ done — {', '.join(sizes) if sizes else 'file saved'}{RESET}")
+            ok.append(url)
+        else:
+            # Check if it's just no video (text post that slipped through info check)
+            if "no video" in output.lower() or "no formats" in output.lower():
+                log(f"       {YELLOW}⚠ skipped — text post{RESET}")
+                skipped.append((url, "no video content"))
+                # Remove empty dir
+                try: out_dir.rmdir()
+                except OSError: pass
+            else:
+                log(f"       {RED}✗ failed{RESET}")
+                # Print last few lines of output for context
+                for line in output.splitlines()[-3:]:
+                    log(f"         {DIM}{line}{RESET}")
+                failed.append((url, output.splitlines()[-1] if output else "unknown error"))
+
+        print()
+
+    # Summary
+    log(f"{'━'*55}", CYAN)
+    log(f"  Summary", BOLD)
+    log(f"{'━'*55}", CYAN)
+    log(f"  {GREEN}✓ downloaded: {len(ok)}{RESET}")
+    log(f"  {YELLOW}⚠ skipped (text/no media): {len(skipped)}{RESET}")
+    log(f"  {RED}✗ failed: {len(failed)}{RESET}")
+
+    if failed:
+        log(f"\n  Failed URLs:", RED)
+        for url, reason in failed:
+            log(f"    • {url}", RED)
+            log(f"      {DIM}{reason[:80]}{RESET}")
+
+    log(f"{'━'*55}\n", CYAN)
+
+if __name__ == "__main__":
+    main()