youtube-latest3/latest3.py

#!/usr/bin/env python3
import argparse
import json
import re
import sys
import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree as ET

UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"


def http_get(url: str, timeout: int = 20) -> str:
    req = urllib.request.Request(url, headers={"User-Agent": UA})
    with urllib.request.urlopen(req, timeout=timeout) as r:
        data = r.read()
    return data.decode("utf-8", "ignore")


def normalize_channel_url(raw: str) -> str:
    raw = raw.strip()
    if not raw:
        raise ValueError("Empty URL/handle")

    if raw.startswith("@"):
        return f"https://www.youtube.com/{raw}/videos"

    if not raw.startswith("http://") and not raw.startswith("https://"):
        raw = "https://" + raw

    p = urllib.parse.urlparse(raw)
    if "youtube.com" not in p.netloc and "youtu.be" not in p.netloc:
        raise ValueError("Not a YouTube URL/handle")

    path = p.path or "/"
    if path.startswith("/@") and not path.rstrip("/").endswith("/videos"):
        path = path.rstrip("/") + "/videos"

    return urllib.parse.urlunparse(("https", "www.youtube.com", path, "", "", ""))


def extract_channel_id(html: str) -> str:
    patterns = [
        r'"channelId":"(UC[0-9A-Za-z_-]{22})"',
        r'"externalId":"(UC[0-9A-Za-z_-]{22})"',
        r'<meta itemprop="channelId" content="(UC[0-9A-Za-z_-]{22})">',
    ]
    for pat in patterns:
        m = re.search(pat, html)
        if m:
            return m.group(1)
    raise RuntimeError("Could not resolve channel ID from URL")


def fetch_latest_from_feed(channel_id: str, limit: int = 3):
    feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
    xml_text = http_get(feed_url)
    root = ET.fromstring(xml_text)

    ns = {
        "atom": "http://www.w3.org/2005/Atom",
        "yt": "http://www.youtube.com/xml/schemas/2015",
        "media": "http://search.yahoo.com/mrss/",
    }

    out = []
    for entry in root.findall("atom:entry", ns)[:limit]:
        vid = entry.findtext("yt:videoId", default="", namespaces=ns)
        title = entry.findtext("atom:title", default="", namespaces=ns)
        published = entry.findtext("atom:published", default="", namespaces=ns)
        link_el = entry.find("atom:link", ns)
        url = link_el.attrib.get("href") if link_el is not None else (f"https://www.youtube.com/watch?v={vid}" if vid else "")

        out.append(
            {
                "id": vid,
                "title": title,
                "url": url,
                "published": published,
            }
        )

    if not out:
        raise RuntimeError("No videos found in channel feed")
    return out


def get_latest_videos(channel_url_or_handle: str, limit: int = 3):
    normalized = normalize_channel_url(channel_url_or_handle)
    html = http_get(normalized)
    channel_id = extract_channel_id(html)
    videos = fetch_latest_from_feed(channel_id, limit=limit)
    return {
        "input": channel_url_or_handle,
        "resolved_url": normalized,
        "channel_id": channel_id,
        "videos": videos,
    }


def main():
    ap = argparse.ArgumentParser(description="Get latest YouTube uploads from a channel URL/handle")
    ap.add_argument("channel", help="YouTube channel URL (including @handle) or @handle")
    ap.add_argument("--limit", type=int, default=3, help="How many latest videos to return (default: 3)")
    ap.add_argument("--json", action="store_true", help="Print full JSON output")
    args = ap.parse_args()

    if args.limit < 1 or args.limit > 20:
        print("--limit must be between 1 and 20", file=sys.stderr)
        sys.exit(2)

    try:
        data = get_latest_videos(args.channel, limit=args.limit)
    except (urllib.error.URLError, urllib.error.HTTPError) as e:
        print(f"Network error: {e}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)

    if args.json:
        print(json.dumps(data, indent=2, ensure_ascii=False))
        return

    print(f"Channel ID: {data['channel_id']}")
    for i, v in enumerate(data["videos"], start=1):
        print(f"{i}. {v['title']}")
        print(f"   {v['url']}")
        print(f"   published: {v['published']}")


if __name__ == "__main__":
    main()