Add --no-vods flag using isLiveContent detection

This commit is contained in:
2026-04-07 22:32:04 +02:00
parent 2533b63597
commit 35ab1bdaf6
2 changed files with 25 additions and 4 deletions

View File

@@ -17,6 +17,8 @@ python3 latest3.py "https://www.youtube.com/@ludwig/videos"
python3 latest3.py "@ludwig" --json
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --json
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-shorts
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-vods
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-shorts --no-vods
```
## Output fields
@@ -31,6 +33,7 @@ python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-short
- `--limit N` how many videos to return (1-20)
- `--json` output as JSON
- `--no-shorts` exclude URLs matching `/shorts/`
- `--no-vods` exclude livestream VODs (`isLiveContent=true` on watch page)
## Notes

View File

@@ -7,6 +7,7 @@ import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree as ET
from functools import lru_cache
UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
@@ -53,7 +54,15 @@ def extract_channel_id(html: str) -> str:
raise RuntimeError("Could not resolve channel ID from URL")
def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = False):
@lru_cache(maxsize=512)
def is_live_content(video_id: str) -> bool:
watch_url = f"https://www.youtube.com/watch?v={video_id}"
html = http_get(watch_url)
m = re.search(r'"isLiveContent":(true|false)', html)
return bool(m and m.group(1) == "true")
def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = False, no_vods: bool = False):
feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
xml_text = http_get(feed_url)
root = ET.fromstring(xml_text)
@@ -75,6 +84,9 @@ def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = Fa
if no_shorts and "/shorts/" in (url or ""):
continue
if no_vods and vid and is_live_content(vid):
continue
out.append(
{
"id": vid,
@@ -92,11 +104,11 @@ def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = Fa
return out
def get_latest_videos(channel_url_or_handle: str, limit: int = 3, no_shorts: bool = False):
def get_latest_videos(channel_url_or_handle: str, limit: int = 3, no_shorts: bool = False, no_vods: bool = False):
normalized = normalize_channel_url(channel_url_or_handle)
html = http_get(normalized)
channel_id = extract_channel_id(html)
videos = fetch_latest_from_feed(channel_id, limit=limit, no_shorts=no_shorts)
videos = fetch_latest_from_feed(channel_id, limit=limit, no_shorts=no_shorts, no_vods=no_vods)
return {
"input": channel_url_or_handle,
"resolved_url": normalized,
@@ -111,6 +123,7 @@ def main():
ap.add_argument("--limit", type=int, default=3, help="How many latest videos to return (default: 3)")
ap.add_argument("--json", action="store_true", help="Print full JSON output")
ap.add_argument("--no-shorts", action="store_true", help="Exclude Shorts URLs")
ap.add_argument("--no-vods", action="store_true", help="Exclude livestream VODs (isLiveContent=true)")
args = ap.parse_args()
if args.limit < 1 or args.limit > 20:
@@ -118,7 +131,12 @@ def main():
sys.exit(2)
try:
data = get_latest_videos(args.channel, limit=args.limit, no_shorts=args.no_shorts)
data = get_latest_videos(
args.channel,
limit=args.limit,
no_shorts=args.no_shorts,
no_vods=args.no_vods,
)
except (urllib.error.URLError, urllib.error.HTTPError) as e:
print(f"Network error: {e}", file=sys.stderr)
sys.exit(1)