#!/usr/bin/env python3 import argparse import json import re import sys import urllib.error import urllib.parse import urllib.request import xml.etree.ElementTree as ET from functools import lru_cache UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" def http_get(url: str, timeout: int = 20) -> str: req = urllib.request.Request(url, headers={"User-Agent": UA}) with urllib.request.urlopen(req, timeout=timeout) as r: data = r.read() return data.decode("utf-8", "ignore") def normalize_channel_url(raw: str) -> str: raw = raw.strip() if not raw: raise ValueError("Empty URL/handle") if raw.startswith("@"): return f"https://www.youtube.com/{raw}/videos" if not raw.startswith("http://") and not raw.startswith("https://"): raw = "https://" + raw p = urllib.parse.urlparse(raw) if "youtube.com" not in p.netloc and "youtu.be" not in p.netloc: raise ValueError("Not a YouTube URL/handle") path = p.path or "/" if path.startswith("/@") and not path.rstrip("/").endswith("/videos"): path = path.rstrip("/") + "/videos" return urllib.parse.urlunparse(("https", "www.youtube.com", path, "", "", "")) def extract_channel_id(html: str) -> str: patterns = [ r'"channelId":"(UC[0-9A-Za-z_-]{22})"', r'"externalId":"(UC[0-9A-Za-z_-]{22})"', r'', ] for pat in patterns: m = re.search(pat, html) if m: return m.group(1) raise RuntimeError("Could not resolve channel ID from URL") @lru_cache(maxsize=512) def is_live_content(video_id: str) -> bool: watch_url = f"https://www.youtube.com/watch?v={video_id}" html = http_get(watch_url) m = re.search(r'"isLiveContent":(true|false)', html) return bool(m and m.group(1) == "true") def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = False, no_vods: bool = False): feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}" xml_text = http_get(feed_url) root = ET.fromstring(xml_text) ns = { "atom": "http://www.w3.org/2005/Atom", "yt": "http://www.youtube.com/xml/schemas/2015", "media": "http://search.yahoo.com/mrss/", } out = [] for entry in root.findall("atom:entry", ns): vid = entry.findtext("yt:videoId", default="", namespaces=ns) title = entry.findtext("atom:title", default="", namespaces=ns) published = entry.findtext("atom:published", default="", namespaces=ns) link_el = entry.find("atom:link", ns) url = link_el.attrib.get("href") if link_el is not None else (f"https://www.youtube.com/watch?v={vid}" if vid else "") if no_shorts and "/shorts/" in (url or ""): continue if no_vods and vid and is_live_content(vid): continue out.append( { "id": vid, "title": title, "url": url, "published": published, } ) if len(out) >= limit: break if not out: raise RuntimeError("No videos found in channel feed") return out def get_latest_videos(channel_url_or_handle: str, limit: int = 3, no_shorts: bool = False, no_vods: bool = False): normalized = normalize_channel_url(channel_url_or_handle) html = http_get(normalized) channel_id = extract_channel_id(html) videos = fetch_latest_from_feed(channel_id, limit=limit, no_shorts=no_shorts, no_vods=no_vods) return { "input": channel_url_or_handle, "resolved_url": normalized, "channel_id": channel_id, "videos": videos, } def main(): ap = argparse.ArgumentParser(description="Get latest YouTube uploads from a channel URL/handle") ap.add_argument("channel", help="YouTube channel URL (including @handle) or @handle") ap.add_argument("--limit", type=int, default=3, help="How many latest videos to return (default: 3)") ap.add_argument("--json", action="store_true", help="Print full JSON output") ap.add_argument("--no-shorts", action="store_true", help="Exclude Shorts URLs") ap.add_argument("--no-vods", action="store_true", help="Exclude livestream VODs (isLiveContent=true)") args = ap.parse_args() if args.limit < 1 or args.limit > 20: print("--limit must be between 1 and 20", file=sys.stderr) sys.exit(2) try: data = get_latest_videos( args.channel, limit=args.limit, no_shorts=args.no_shorts, no_vods=args.no_vods, ) except (urllib.error.URLError, urllib.error.HTTPError) as e: print(f"Network error: {e}", file=sys.stderr) sys.exit(1) except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) if args.json: print(json.dumps(data, indent=2, ensure_ascii=False)) return print(f"Channel ID: {data['channel_id']}") for i, v in enumerate(data["videos"], start=1): print(f"{i}. {v['title']}") print(f" {v['url']}") print(f" published: {v['published']}") if __name__ == "__main__": main()