Some checks failed
Code Check - Quality and Syntax / syntax-lint (3.14) (push) Has been cancelled
Code Check - Quality and Syntax / syntax-lint (3.12) (push) Has been cancelled
Code Check - Quality and Syntax / syntax-lint (3.11) (push) Has been cancelled
Code Check - Quality and Syntax / syntax-lint (3.13) (push) Has been cancelled
219 lines
6.2 KiB
Python
219 lines
6.2 KiB
Python
import subprocess
|
|
import json
|
|
from pathlib import Path
|
|
import os
|
|
|
|
# Get cookies using the browser extension "Get cookies.txt" and export while
|
|
# logged in to your account.
|
|
COOKIES = "cookies.txt"
|
|
|
|
# Change this if you aren't on Windows. Get the latest release from GitHub.
|
|
YTDLP = "yt-dlp.exe"
|
|
OUTPUT_DIR = Path(__file__).parent / "YOUR_OUTPUT_FOLDER"
|
|
|
|
# Input options:
|
|
# - If `urls.json` exists in the project root it will be used.
|
|
# The JSON should be an array of strings (URLs).
|
|
# - Otherwise set URLs below or pass a custom JSON path via the
|
|
# `URLS_JSON` environment variable.
|
|
URLS = [
|
|
# fallback list — leave empty if you'll use urls.json
|
|
]
|
|
|
|
default_urls = str(Path(__file__).parent / "urls.json")
|
|
URLS_JSON = os.environ.get("URLS_JSON", default_urls)
|
|
|
|
RESET = "\033[0m"
|
|
BOLD = "\033[1m"
|
|
RED = "\033[91m"
|
|
GREEN = "\033[92m"
|
|
YELLOW = "\033[93m"
|
|
CYAN = "\033[96m"
|
|
DIM = "\033[2m"
|
|
|
|
|
|
def log(msg, color=RESET):
|
|
print(f"{color}{msg}{RESET}", flush=True)
|
|
|
|
|
|
def get_post_id(url):
|
|
return url.rstrip("/").split("-")[-1]
|
|
|
|
|
|
def fetch_info(url):
|
|
cmd = [
|
|
YTDLP,
|
|
"--dump-json",
|
|
"--no-playlist",
|
|
"--cookies",
|
|
COOKIES,
|
|
"--extractor-args",
|
|
"generic:impersonate",
|
|
url,
|
|
]
|
|
cwd_dir = Path(__file__).parent
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30,
|
|
cwd=cwd_dir,
|
|
)
|
|
if result.returncode != 0:
|
|
return None, result.stderr.strip()
|
|
try:
|
|
return json.loads(result.stdout), None
|
|
except json.JSONDecodeError:
|
|
return None, "Failed to parse JSON"
|
|
|
|
|
|
def download(url, out_dir):
|
|
cmd = [
|
|
YTDLP,
|
|
"-f",
|
|
"bestvideo+bestaudio/best",
|
|
"--prefer-free-formats",
|
|
"--cookies",
|
|
COOKIES,
|
|
"--extractor-args",
|
|
"generic:impersonate",
|
|
"--merge-output-format",
|
|
"mp4",
|
|
"-o",
|
|
str(out_dir / "%(title)s.%(ext)s"),
|
|
url,
|
|
]
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, text=True, cwd=Path(__file__).parent
|
|
)
|
|
return result.returncode == 0, (result.stdout + result.stderr).strip()
|
|
|
|
|
|
def sanitize(name, max_len=60):
|
|
safe = "".join(c if c.isalnum() or c in " _-." else "_" for c in name)
|
|
return safe.strip()[:max_len].strip("_. ")
|
|
|
|
|
|
def main():
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
total = len(URLS)
|
|
# Load from JSON if present and non-empty
|
|
try:
|
|
if Path(URLS_JSON).exists():
|
|
with open(URLS_JSON, "r", encoding="utf-8") as fh:
|
|
data = json.load(fh)
|
|
if isinstance(data, list) and data:
|
|
URLS.clear()
|
|
for u in data:
|
|
if isinstance(u, str) and u.strip():
|
|
URLS.append(u)
|
|
total = len(URLS)
|
|
except Exception as e:
|
|
log(f"Failed to load URLs from {URLS_JSON}: {e}", YELLOW)
|
|
|
|
ok, skipped, failed = [], [], []
|
|
|
|
log("\n" + "━" * 55, CYAN)
|
|
log(" yt-dlp batch — {} posts → {}/".format(total, OUTPUT_DIR.name), BOLD)
|
|
log("" + "━" * 55 + "\n", CYAN)
|
|
|
|
for i, url in enumerate(URLS, 1):
|
|
post_id = get_post_id(url)
|
|
header = "[{:02d}/{:d}]".format(i, total)
|
|
log(header + " " + DIM + url + RESET)
|
|
|
|
# Fetch metadata first to get the title
|
|
log(" fetching info…", DIM)
|
|
info, err = fetch_info(url)
|
|
|
|
if info is None:
|
|
# Likely a text post or unavailable
|
|
reason = err[:80] if err else "no video found"
|
|
msg = (
|
|
" "
|
|
+ YELLOW
|
|
+ "⚠ skipped — no media ("
|
|
+ reason
|
|
+ ")"
|
|
+ RESET
|
|
)
|
|
log(msg)
|
|
skipped.append((url, err))
|
|
print()
|
|
continue
|
|
|
|
title = info.get("title") or f"post_{post_id}"
|
|
folder_name = sanitize(title)
|
|
out_dir = OUTPUT_DIR / folder_name
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
log(" title: " + BOLD + title[:55] + RESET)
|
|
log(" dir: " + str(out_dir))
|
|
log(" downloading…", DIM)
|
|
|
|
success, output = download(url, out_dir)
|
|
|
|
if success:
|
|
# Find what was downloaded
|
|
files = list(out_dir.iterdir())
|
|
sizes = []
|
|
for f in files:
|
|
if f.is_file():
|
|
sizes.append("{:.1f} MB".format(f.stat().st_size / 1e6))
|
|
msg = ", ".join(sizes) if sizes else "file saved"
|
|
log(" " + GREEN + "✓ done — " + msg + RESET)
|
|
ok.append(url)
|
|
else:
|
|
# Check if it's just no video
|
|
# (text post that slipped through info check)
|
|
if "no video" in output.lower() or "no formats" in output.lower():
|
|
log(" " + YELLOW + "⚠ skipped — text post" + RESET)
|
|
skipped.append((url, "no video content"))
|
|
# Remove empty dir
|
|
try:
|
|
out_dir.rmdir()
|
|
except OSError:
|
|
pass
|
|
else:
|
|
log(" " + RED + "✗ failed" + RESET)
|
|
# Print last few lines of output for context
|
|
for line in output.splitlines()[-3:]:
|
|
log(" " + DIM + line + RESET)
|
|
if output:
|
|
last_err = output.splitlines()[-1]
|
|
else:
|
|
last_err = "unknown error"
|
|
failed.append((url, last_err))
|
|
|
|
print()
|
|
|
|
# Summary
|
|
log("" + "━" * 55, CYAN)
|
|
log(" Summary", BOLD)
|
|
log("" + "━" * 55, CYAN)
|
|
log(" " + GREEN + "✓ downloaded: " + str(len(ok)) + RESET)
|
|
skipped_msg = (
|
|
" " + YELLOW + "⚠ skipped (text/no media): "
|
|
+ str(len(skipped))
|
|
+ RESET
|
|
)
|
|
log(skipped_msg)
|
|
failed_msg = (
|
|
" " + RED + "✗ failed: "
|
|
+ str(len(failed))
|
|
+ RESET
|
|
)
|
|
log(failed_msg)
|
|
|
|
if failed:
|
|
log("\n Failed URLs:", RED)
|
|
for url, reason in failed:
|
|
log(" • " + url, RED)
|
|
log(" " + DIM + reason[:80] + RESET)
|
|
|
|
log("" + "━" * 55 + "\n", CYAN)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|