Initial commit: latest3 YouTube extractor
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
__pycache__/
|
||||
*.pyc
|
||||
32
README.md
Normal file
32
README.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# youtube-latest3
|
||||
|
||||
Fetch the latest uploads from a YouTube channel URL (including dynamic `@handle` URLs like `https://www.youtube.com/@ludwig/videos`).
|
||||
|
||||
## Why this approach
|
||||
|
||||
This tool resolves the channel ID from the exact channel URL, then reads YouTube's official uploads feed:
|
||||
|
||||
- `https://www.youtube.com/feeds/videos.xml?channel_id=<CHANNEL_ID>`
|
||||
|
||||
That feed is ordered by newest uploads, so fetching the first 3 entries gives the latest 3 videos.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
python3 latest3.py "https://www.youtube.com/@ludwig/videos"
|
||||
python3 latest3.py "@ludwig" --json
|
||||
python3 latest3.py "https://www.youtube.com/@ludwig/videos" --limit 3 --json
|
||||
```
|
||||
|
||||
## Output fields
|
||||
|
||||
- `id` - video ID
|
||||
- `title` - video title
|
||||
- `url` - watch URL
|
||||
- `published` - ISO-8601 timestamp from the feed
|
||||
|
||||
## Notes
|
||||
|
||||
- Works with `@handle` URLs and plain `@handle` input.
|
||||
- No API key required.
|
||||
- If YouTube changes page markup for channel ID extraction, update `extract_channel_id()`.
|
||||
134
latest3.py
Executable file
134
latest3.py
Executable file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
UA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||
|
||||
|
||||
def http_get(url: str, timeout: int = 20) -> str:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": UA})
|
||||
with urllib.request.urlopen(req, timeout=timeout) as r:
|
||||
data = r.read()
|
||||
return data.decode("utf-8", "ignore")
|
||||
|
||||
|
||||
def normalize_channel_url(raw: str) -> str:
|
||||
raw = raw.strip()
|
||||
if not raw:
|
||||
raise ValueError("Empty URL/handle")
|
||||
|
||||
if raw.startswith("@"):
|
||||
return f"https://www.youtube.com/{raw}/videos"
|
||||
|
||||
if not raw.startswith("http://") and not raw.startswith("https://"):
|
||||
raw = "https://" + raw
|
||||
|
||||
p = urllib.parse.urlparse(raw)
|
||||
if "youtube.com" not in p.netloc and "youtu.be" not in p.netloc:
|
||||
raise ValueError("Not a YouTube URL/handle")
|
||||
|
||||
path = p.path or "/"
|
||||
if path.startswith("/@") and not path.rstrip("/").endswith("/videos"):
|
||||
path = path.rstrip("/") + "/videos"
|
||||
|
||||
return urllib.parse.urlunparse(("https", "www.youtube.com", path, "", "", ""))
|
||||
|
||||
|
||||
def extract_channel_id(html: str) -> str:
|
||||
patterns = [
|
||||
r'"channelId":"(UC[0-9A-Za-z_-]{22})"',
|
||||
r'"externalId":"(UC[0-9A-Za-z_-]{22})"',
|
||||
r'<meta itemprop="channelId" content="(UC[0-9A-Za-z_-]{22})">',
|
||||
]
|
||||
for pat in patterns:
|
||||
m = re.search(pat, html)
|
||||
if m:
|
||||
return m.group(1)
|
||||
raise RuntimeError("Could not resolve channel ID from URL")
|
||||
|
||||
|
||||
def fetch_latest_from_feed(channel_id: str, limit: int = 3):
|
||||
feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
|
||||
xml_text = http_get(feed_url)
|
||||
root = ET.fromstring(xml_text)
|
||||
|
||||
ns = {
|
||||
"atom": "http://www.w3.org/2005/Atom",
|
||||
"yt": "http://www.youtube.com/xml/schemas/2015",
|
||||
"media": "http://search.yahoo.com/mrss/",
|
||||
}
|
||||
|
||||
out = []
|
||||
for entry in root.findall("atom:entry", ns)[:limit]:
|
||||
vid = entry.findtext("yt:videoId", default="", namespaces=ns)
|
||||
title = entry.findtext("atom:title", default="", namespaces=ns)
|
||||
published = entry.findtext("atom:published", default="", namespaces=ns)
|
||||
link_el = entry.find("atom:link", ns)
|
||||
url = link_el.attrib.get("href") if link_el is not None else (f"https://www.youtube.com/watch?v={vid}" if vid else "")
|
||||
|
||||
out.append(
|
||||
{
|
||||
"id": vid,
|
||||
"title": title,
|
||||
"url": url,
|
||||
"published": published,
|
||||
}
|
||||
)
|
||||
|
||||
if not out:
|
||||
raise RuntimeError("No videos found in channel feed")
|
||||
return out
|
||||
|
||||
|
||||
def get_latest_videos(channel_url_or_handle: str, limit: int = 3):
|
||||
normalized = normalize_channel_url(channel_url_or_handle)
|
||||
html = http_get(normalized)
|
||||
channel_id = extract_channel_id(html)
|
||||
videos = fetch_latest_from_feed(channel_id, limit=limit)
|
||||
return {
|
||||
"input": channel_url_or_handle,
|
||||
"resolved_url": normalized,
|
||||
"channel_id": channel_id,
|
||||
"videos": videos,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Get latest YouTube uploads from a channel URL/handle")
|
||||
ap.add_argument("channel", help="YouTube channel URL (including @handle) or @handle")
|
||||
ap.add_argument("--limit", type=int, default=3, help="How many latest videos to return (default: 3)")
|
||||
ap.add_argument("--json", action="store_true", help="Print full JSON output")
|
||||
args = ap.parse_args()
|
||||
|
||||
if args.limit < 1 or args.limit > 20:
|
||||
print("--limit must be between 1 and 20", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
try:
|
||||
data = get_latest_videos(args.channel, limit=args.limit)
|
||||
except (urllib.error.URLError, urllib.error.HTTPError) as e:
|
||||
print(f"Network error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
return
|
||||
|
||||
print(f"Channel ID: {data['channel_id']}")
|
||||
for i, v in enumerate(data["videos"], start=1):
|
||||
print(f"{i}. {v['title']}")
|
||||
print(f" {v['url']}")
|
||||
print(f" published: {v['published']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user