diff --git a/README.md b/README.md index 9607f56..325340c 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ python3 latest3.py "https://www.youtube.com/@ludwig/videos" python3 latest3.py "@ludwig" --json python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --json python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-shorts +python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-vods +python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-shorts --no-vods ``` ## Output fields @@ -31,6 +33,7 @@ python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-short - `--limit N` how many videos to return (1-20) - `--json` output as JSON - `--no-shorts` exclude URLs matching `/shorts/` +- `--no-vods` exclude livestream VODs (`isLiveContent=true` on watch page) ## Notes diff --git a/latest3.py b/latest3.py index 5bd7927..82d3ae6 100755 --- a/latest3.py +++ b/latest3.py @@ -7,6 +7,7 @@ import urllib.error import urllib.parse import urllib.request import xml.etree.ElementTree as ET +from functools import lru_cache UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" @@ -53,7 +54,15 @@ def extract_channel_id(html: str) -> str: raise RuntimeError("Could not resolve channel ID from URL") -def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = False): +@lru_cache(maxsize=512) +def is_live_content(video_id: str) -> bool: + watch_url = f"https://www.youtube.com/watch?v={video_id}" + html = http_get(watch_url) + m = re.search(r'"isLiveContent":(true|false)', html) + return bool(m and m.group(1) == "true") + + +def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = False, no_vods: bool = False): feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}" xml_text = http_get(feed_url) root = ET.fromstring(xml_text) @@ -75,6 +84,9 @@ def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = Fa if no_shorts and "/shorts/" in (url or ""): continue + if no_vods and vid and is_live_content(vid): + continue + out.append( { "id": vid, @@ -92,11 +104,11 @@ def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = Fa return out -def get_latest_videos(channel_url_or_handle: str, limit: int = 3, no_shorts: bool = False): +def get_latest_videos(channel_url_or_handle: str, limit: int = 3, no_shorts: bool = False, no_vods: bool = False): normalized = normalize_channel_url(channel_url_or_handle) html = http_get(normalized) channel_id = extract_channel_id(html) - videos = fetch_latest_from_feed(channel_id, limit=limit, no_shorts=no_shorts) + videos = fetch_latest_from_feed(channel_id, limit=limit, no_shorts=no_shorts, no_vods=no_vods) return { "input": channel_url_or_handle, "resolved_url": normalized, @@ -111,6 +123,7 @@ def main(): ap.add_argument("--limit", type=int, default=3, help="How many latest videos to return (default: 3)") ap.add_argument("--json", action="store_true", help="Print full JSON output") ap.add_argument("--no-shorts", action="store_true", help="Exclude Shorts URLs") + ap.add_argument("--no-vods", action="store_true", help="Exclude livestream VODs (isLiveContent=true)") args = ap.parse_args() if args.limit < 1 or args.limit > 20: @@ -118,7 +131,12 @@ def main(): sys.exit(2) try: - data = get_latest_videos(args.channel, limit=args.limit, no_shorts=args.no_shorts) + data = get_latest_videos( + args.channel, + limit=args.limit, + no_shorts=args.no_shorts, + no_vods=args.no_vods, + ) except (urllib.error.URLError, urllib.error.HTTPError) as e: print(f"Network error: {e}", file=sys.stderr) sys.exit(1)