Add --no-vods flag using isLiveContent detection
This commit is contained in:
@@ -17,6 +17,8 @@ python3 latest3.py "https://www.youtube.com/@ludwig/videos"
|
|||||||
python3 latest3.py "@ludwig" --json
|
python3 latest3.py "@ludwig" --json
|
||||||
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --json
|
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --json
|
||||||
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-shorts
|
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-shorts
|
||||||
|
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-vods
|
||||||
|
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-shorts --no-vods
|
||||||
```
|
```
|
||||||
|
|
||||||
## Output fields
|
## Output fields
|
||||||
@@ -31,6 +33,7 @@ python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --no-short
|
|||||||
- `--limit N` how many videos to return (1-20)
|
- `--limit N` how many videos to return (1-20)
|
||||||
- `--json` output as JSON
|
- `--json` output as JSON
|
||||||
- `--no-shorts` exclude URLs matching `/shorts/`
|
- `--no-shorts` exclude URLs matching `/shorts/`
|
||||||
|
- `--no-vods` exclude livestream VODs (`isLiveContent=true` on watch page)
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
|
|||||||
26
latest3.py
26
latest3.py
@@ -7,6 +7,7 @@ import urllib.error
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||||
|
|
||||||
@@ -53,7 +54,15 @@ def extract_channel_id(html: str) -> str:
|
|||||||
raise RuntimeError("Could not resolve channel ID from URL")
|
raise RuntimeError("Could not resolve channel ID from URL")
|
||||||
|
|
||||||
|
|
||||||
def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = False):
|
@lru_cache(maxsize=512)
|
||||||
|
def is_live_content(video_id: str) -> bool:
|
||||||
|
watch_url = f"https://www.youtube.com/watch?v={video_id}"
|
||||||
|
html = http_get(watch_url)
|
||||||
|
m = re.search(r'"isLiveContent":(true|false)', html)
|
||||||
|
return bool(m and m.group(1) == "true")
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = False, no_vods: bool = False):
|
||||||
feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
|
feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
|
||||||
xml_text = http_get(feed_url)
|
xml_text = http_get(feed_url)
|
||||||
root = ET.fromstring(xml_text)
|
root = ET.fromstring(xml_text)
|
||||||
@@ -75,6 +84,9 @@ def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = Fa
|
|||||||
if no_shorts and "/shorts/" in (url or ""):
|
if no_shorts and "/shorts/" in (url or ""):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if no_vods and vid and is_live_content(vid):
|
||||||
|
continue
|
||||||
|
|
||||||
out.append(
|
out.append(
|
||||||
{
|
{
|
||||||
"id": vid,
|
"id": vid,
|
||||||
@@ -92,11 +104,11 @@ def fetch_latest_from_feed(channel_id: str, limit: int = 3, no_shorts: bool = Fa
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def get_latest_videos(channel_url_or_handle: str, limit: int = 3, no_shorts: bool = False):
|
def get_latest_videos(channel_url_or_handle: str, limit: int = 3, no_shorts: bool = False, no_vods: bool = False):
|
||||||
normalized = normalize_channel_url(channel_url_or_handle)
|
normalized = normalize_channel_url(channel_url_or_handle)
|
||||||
html = http_get(normalized)
|
html = http_get(normalized)
|
||||||
channel_id = extract_channel_id(html)
|
channel_id = extract_channel_id(html)
|
||||||
videos = fetch_latest_from_feed(channel_id, limit=limit, no_shorts=no_shorts)
|
videos = fetch_latest_from_feed(channel_id, limit=limit, no_shorts=no_shorts, no_vods=no_vods)
|
||||||
return {
|
return {
|
||||||
"input": channel_url_or_handle,
|
"input": channel_url_or_handle,
|
||||||
"resolved_url": normalized,
|
"resolved_url": normalized,
|
||||||
@@ -111,6 +123,7 @@ def main():
|
|||||||
ap.add_argument("--limit", type=int, default=3, help="How many latest videos to return (default: 3)")
|
ap.add_argument("--limit", type=int, default=3, help="How many latest videos to return (default: 3)")
|
||||||
ap.add_argument("--json", action="store_true", help="Print full JSON output")
|
ap.add_argument("--json", action="store_true", help="Print full JSON output")
|
||||||
ap.add_argument("--no-shorts", action="store_true", help="Exclude Shorts URLs")
|
ap.add_argument("--no-shorts", action="store_true", help="Exclude Shorts URLs")
|
||||||
|
ap.add_argument("--no-vods", action="store_true", help="Exclude livestream VODs (isLiveContent=true)")
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
||||||
if args.limit < 1 or args.limit > 20:
|
if args.limit < 1 or args.limit > 20:
|
||||||
@@ -118,7 +131,12 @@ def main():
|
|||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = get_latest_videos(args.channel, limit=args.limit, no_shorts=args.no_shorts)
|
data = get_latest_videos(
|
||||||
|
args.channel,
|
||||||
|
limit=args.limit,
|
||||||
|
no_shorts=args.no_shorts,
|
||||||
|
no_vods=args.no_vods,
|
||||||
|
)
|
||||||
except (urllib.error.URLError, urllib.error.HTTPError) as e:
|
except (urllib.error.URLError, urllib.error.HTTPError) as e:
|
||||||
print(f"Network error: {e}", file=sys.stderr)
|
print(f"Network error: {e}", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
Reference in New Issue
Block a user