From 12de3f13df90961cea87a476482240286076fe5d Mon Sep 17 00:00:00 2001 From: Luna Date: Tue, 7 Apr 2026 22:28:18 +0200 Subject: [PATCH] Initial commit: latest3 YouTube extractor --- .gitignore | 2 + README.md | 32 +++++++++++++ latest3.py | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 latest3.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7a60b85 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*.pyc diff --git a/README.md b/README.md new file mode 100644 index 0000000..1d53322 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# youtube-latest3 + +Fetch the latest uploads from a YouTube channel URL (including dynamic `@handle` URLs like `https://www.youtube.com/@ludwig/videos`). + +## Why this approach + +This tool resolves the channel ID from the exact channel URL, then reads YouTube's official uploads feed: + +- `https://www.youtube.com/feeds/videos.xml?channel_id=` + +That feed is ordered by newest uploads, so fetching the first 3 entries gives the latest 3 videos. + +## Usage + +```bash +python3 latest3.py "https://www.youtube.com/@ludwig/videos" +python3 latest3.py "@ludwig" --json +python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --json +``` + +## Output fields + +- `id` - video ID +- `title` - video title +- `url` - watch URL +- `published` - ISO-8601 timestamp from the feed + +## Notes + +- Works with `@handle` URLs and plain `@handle` input. +- No API key required. +- If YouTube changes page markup for channel ID extraction, update `extract_channel_id()`. diff --git a/latest3.py b/latest3.py new file mode 100755 index 0000000..50a9b0b --- /dev/null +++ b/latest3.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +import argparse +import json +import re +import sys +import urllib.error +import urllib.parse +import urllib.request +import xml.etree.ElementTree as ET + +UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" + + +def http_get(url: str, timeout: int = 20) -> str: + req = urllib.request.Request(url, headers={"User-Agent": UA}) + with urllib.request.urlopen(req, timeout=timeout) as r: + data = r.read() + return data.decode("utf-8", "ignore") + + +def normalize_channel_url(raw: str) -> str: + raw = raw.strip() + if not raw: + raise ValueError("Empty URL/handle") + + if raw.startswith("@"): + return f"https://www.youtube.com/{raw}/videos" + + if not raw.startswith("http://") and not raw.startswith("https://"): + raw = "https://" + raw + + p = urllib.parse.urlparse(raw) + if "youtube.com" not in p.netloc and "youtu.be" not in p.netloc: + raise ValueError("Not a YouTube URL/handle") + + path = p.path or "/" + if path.startswith("/@") and not path.rstrip("/").endswith("/videos"): + path = path.rstrip("/") + "/videos" + + return urllib.parse.urlunparse(("https", "www.youtube.com", path, "", "", "")) + + +def extract_channel_id(html: str) -> str: + patterns = [ + r'"channelId":"(UC[0-9A-Za-z_-]{22})"', + r'"externalId":"(UC[0-9A-Za-z_-]{22})"', + r'', + ] + for pat in patterns: + m = re.search(pat, html) + if m: + return m.group(1) + raise RuntimeError("Could not resolve channel ID from URL") + + +def fetch_latest_from_feed(channel_id: str, limit: int = 3): + feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}" + xml_text = http_get(feed_url) + root = ET.fromstring(xml_text) + + ns = { + "atom": "http://www.w3.org/2005/Atom", + "yt": "http://www.youtube.com/xml/schemas/2015", + "media": "http://search.yahoo.com/mrss/", + } + + out = [] + for entry in root.findall("atom:entry", ns)[:limit]: + vid = entry.findtext("yt:videoId", default="", namespaces=ns) + title = entry.findtext("atom:title", default="", namespaces=ns) + published = entry.findtext("atom:published", default="", namespaces=ns) + link_el = entry.find("atom:link", ns) + url = link_el.attrib.get("href") if link_el is not None else (f"https://www.youtube.com/watch?v={vid}" if vid else "") + + out.append( + { + "id": vid, + "title": title, + "url": url, + "published": published, + } + ) + + if not out: + raise RuntimeError("No videos found in channel feed") + return out + + +def get_latest_videos(channel_url_or_handle: str, limit: int = 3): + normalized = normalize_channel_url(channel_url_or_handle) + html = http_get(normalized) + channel_id = extract_channel_id(html) + videos = fetch_latest_from_feed(channel_id, limit=limit) + return { + "input": channel_url_or_handle, + "resolved_url": normalized, + "channel_id": channel_id, + "videos": videos, + } + + +def main(): + ap = argparse.ArgumentParser(description="Get latest YouTube uploads from a channel URL/handle") + ap.add_argument("channel", help="YouTube channel URL (including @handle) or @handle") + ap.add_argument("--limit", type=int, default=3, help="How many latest videos to return (default: 3)") + ap.add_argument("--json", action="store_true", help="Print full JSON output") + args = ap.parse_args() + + if args.limit < 1 or args.limit > 20: + print("--limit must be between 1 and 20", file=sys.stderr) + sys.exit(2) + + try: + data = get_latest_videos(args.channel, limit=args.limit) + except (urllib.error.URLError, urllib.error.HTTPError) as e: + print(f"Network error: {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + if args.json: + print(json.dumps(data, indent=2, ensure_ascii=False)) + return + + print(f"Channel ID: {data['channel_id']}") + for i, v in enumerate(data["videos"], start=1): + print(f"{i}. {v['title']}") + print(f" {v['url']}") + print(f" published: {v['published']}") + + +if __name__ == "__main__": + main()