Initial commit: twitter-cli v0.1.0

2026-03-04 17:56:42 +08:00
commit 16752c3115
14 changed files with 2133 additions and 0 deletions
--- a/twitter_cli/client.py
+++ b/twitter_cli/client.py
@@ -0,0 +1,470 @@
+"""Twitter GraphQL API client.
+
+Uses the same internal GraphQL endpoint that the Twitter web app uses,
+authenticated via cookies (auth_token + ct0). QueryId is resolved
+dynamically using a three-tier strategy.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import math
+import re
+import ssl
+import urllib.request
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+from .models import Author, Metrics, Tweet, TweetMedia
+
+logger = logging.getLogger(__name__)
+
+# Public bearer token shared by all Twitter web clients
+BEARER_TOKEN = (
+    "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs"
+    "%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
+)
+
+# Last-resort fallback query IDs
+FALLBACK_QUERY_IDS = {
+    "HomeTimeline": "HJFjzBgCs16TqxewQOeLNg",
+    "Bookmarks": "VFdMm9iVZxlU6hD86gfW_A",
+}
+
+# Community-maintained API definition (auto-updated daily)
+TWITTER_OPENAPI_URL = (
+    "https://raw.githubusercontent.com/fa0311/twitter-openapi/"
+    "main/src/config/placeholder.json"
+)
+
+# Default features flags required by the GraphQL endpoint
+FEATURES = {
+    "rweb_tipjar_consumption_enabled": True,
+    "responsive_web_graphql_exclude_directive_enabled": True,
+    "verified_phone_label_enabled": False,
+    "creator_subscriptions_tweet_preview_api_enabled": True,
+    "responsive_web_graphql_timeline_navigation_enabled": True,
+    "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
+    "communities_web_enable_tweet_community_results_fetch": True,
+    "c9s_tweet_anatomy_moderator_badge_enabled": True,
+    "articles_preview_enabled": True,
+    "responsive_web_edit_tweet_api_enabled": True,
+    "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
+    "view_counts_everywhere_api_enabled": True,
+    "longform_notetweets_consumption_enabled": True,
+    "responsive_web_twitter_article_tweet_consumption_enabled": True,
+    "tweet_awards_web_tipping_enabled": False,
+    "creator_subscriptions_quote_tweet_preview_enabled": False,
+    "freedom_of_speech_not_reach_fetch_enabled": True,
+    "standardized_nudges_misinfo": True,
+    "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
+    "rweb_video_timestamps_enabled": True,
+    "longform_notetweets_rich_text_read_enabled": True,
+    "longform_notetweets_inline_media_enabled": True,
+    "responsive_web_enhance_cards_enabled": False,
+}
+
+USER_AGENT = (
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/131.0.0.0 Safari/537.36"
+)
+
+# Module-level cache for query IDs
+_cached_query_ids = {}  # type: Dict[str, str]
+_bundles_scanned = False
+
+
+def _create_ssl_context():
+    # type: () -> ssl.SSLContext
+    """Create a permissive SSL context for urllib."""
+    ctx = ssl.create_default_context()
+    return ctx
+
+
+def _url_fetch(url, headers=None):
+    # type: (str, Optional[Dict[str, str]]) -> str
+    """Simple URL fetch using urllib."""
+    req = urllib.request.Request(url)
+    if headers:
+        for k, v in headers.items():
+            req.add_header(k, v)
+    ctx = _create_ssl_context()
+    with urllib.request.urlopen(req, context=ctx, timeout=30) as resp:
+        return resp.read().decode("utf-8")
+
+
+def _scan_bundles():
+    # type: () -> None
+    """Tier 1: Scan Twitter's main-page JS bundles to extract queryId/operationName pairs."""
+    global _bundles_scanned
+    if _bundles_scanned:
+        return
+    _bundles_scanned = True
+
+    try:
+        html = _url_fetch("https://x.com", {"user-agent": USER_AGENT})
+
+        script_pattern = re.compile(
+            r'(?:src|href)=["\']'
+            r'(https://abs\.twimg\.com/responsive-web/client-web[^"\']+\.js)'
+            r'["\']'
+        )
+        script_urls = script_pattern.findall(html)
+
+        for url in script_urls:
+            try:
+                js = _url_fetch(url)
+                op_pattern = re.compile(
+                    r'queryId:\s*"([A-Za-z0-9_-]+)"[^}]{0,200}'
+                    r'operationName:\s*"([^"]+)"'
+                )
+                for m in op_pattern.finditer(js):
+                    qid, name = m.group(1), m.group(2)
+                    if name not in _cached_query_ids:
+                        _cached_query_ids[name] = qid
+            except Exception:
+                continue
+
+        count = len(_cached_query_ids)
+        logger.info("Scanned %d JS bundles, found %d operations", len(script_urls), count)
+    except Exception as e:
+        logger.warning("Failed to scan JS bundles: %s", e)
+
+
+def _fetch_from_github(operation_name):
+    # type: (str) -> Optional[str]
+    """Tier 2: Fetch queryId from community-maintained twitter-openapi."""
+    try:
+        logger.info("Fetching latest queryId from GitHub (twitter-openapi)...")
+        data_str = _url_fetch(TWITTER_OPENAPI_URL)
+        data = json.loads(data_str)
+        op = data.get(operation_name, {})
+        qid = op.get("queryId")
+        if qid:
+            logger.info("Found %s queryId from GitHub: %s", operation_name, qid)
+            return qid
+        return None
+    except Exception as e:
+        logger.warning("GitHub lookup failed: %s", e)
+        return None
+
+
+def _resolve_query_id(operation_name):
+    # type: (str) -> str
+    """Resolve queryId using three-tier strategy: bundle scan -> GitHub -> fallback."""
+    if operation_name in _cached_query_ids:
+        return _cached_query_ids[operation_name]
+
+    logger.info("Auto-detecting %s queryId...", operation_name)
+
+    # Tier 1: JS bundle scan
+    _scan_bundles()
+    if operation_name in _cached_query_ids:
+        logger.info("Found %s queryId: %s", operation_name, _cached_query_ids[operation_name])
+        return _cached_query_ids[operation_name]
+
+    # Tier 2: GitHub
+    github_id = _fetch_from_github(operation_name)
+    if github_id:
+        _cached_query_ids[operation_name] = github_id
+        return github_id
+
+    # Tier 3: Hardcoded fallback
+    fallback = FALLBACK_QUERY_IDS.get(operation_name)
+    if fallback:
+        logger.info("Using hardcoded fallback queryId for %s: %s", operation_name, fallback)
+        _cached_query_ids[operation_name] = fallback
+        return fallback
+
+    raise RuntimeError(
+        'Cannot resolve queryId for "%s" — all detection methods failed' % operation_name
+    )
+
+
+class TwitterClient:
+    """Twitter GraphQL API client using cookie authentication."""
+
+    def __init__(self, auth_token, ct0):
+        # type: (str, str) -> None
+        self._auth_token = auth_token
+        self._ct0 = ct0
+
+    def fetch_home_timeline(self, count=20):
+        # type: (int) -> List[Tweet]
+        """Fetch home timeline tweets."""
+        query_id = _resolve_query_id("HomeTimeline")
+        return self._fetch_timeline(
+            query_id,
+            "HomeTimeline",
+            count,
+            lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
+        )
+
+    def fetch_bookmarks(self, count=50):
+        # type: (int) -> List[Tweet]
+        """Fetch bookmarked tweets."""
+        query_id = _resolve_query_id("Bookmarks")
+
+        def get_instructions(data):
+            # type: (Any) -> Any
+            result = _deep_get(data, "data", "bookmark_timeline", "timeline", "instructions")
+            if result is None:
+                result = _deep_get(data, "data", "bookmark_timeline_v2", "timeline", "instructions")
+            return result
+
+        return self._fetch_timeline(query_id, "Bookmarks", count, get_instructions)
+
+    def _fetch_timeline(self, query_id, operation_name, count, get_instructions, extra_variables=None):
+        # type: (str, str, int, Callable, Optional[Dict[str, Any]]) -> List[Tweet]
+        """Generic timeline fetcher with pagination and deduplication."""
+        tweets = []  # type: List[Tweet]
+        cursor = None  # type: Optional[str]
+        attempts = 0
+        max_attempts = int(math.ceil(count / 20.0)) + 2
+
+        while len(tweets) < count and attempts < max_attempts:
+            attempts += 1
+            variables = {
+                "count": min(count - len(tweets) + 5, 40),
+                "includePromotedContent": False,
+                "latestControlAvailable": True,
+                "requestContext": "launch",
+            }  # type: Dict[str, Any]
+
+            if extra_variables:
+                variables.update(extra_variables)
+            if cursor:
+                variables["cursor"] = cursor
+
+            url = "https://x.com/i/api/graphql/%s/%s?" % (query_id, operation_name)
+            url += "variables=%s&features=%s" % (
+                urllib.request.quote(json.dumps(variables)),
+                urllib.request.quote(json.dumps(FEATURES)),
+            )
+
+            data = self._api_get(url)
+            new_tweets, next_cursor = self._parse_timeline_response(data, get_instructions)
+
+            seen_ids = {t.id for t in tweets}
+            for tweet in new_tweets:
+                if tweet.id not in seen_ids:
+                    tweets.append(tweet)
+                    seen_ids.add(tweet.id)
+
+            if not next_cursor or not new_tweets:
+                break
+            cursor = next_cursor
+
+        return tweets[:count]
+
+    def _build_headers(self):
+        # type: () -> Dict[str, str]
+        return {
+            "Authorization": "Bearer %s" % BEARER_TOKEN,
+            "Cookie": "auth_token=%s; ct0=%s" % (self._auth_token, self._ct0),
+            "X-Csrf-Token": self._ct0,
+            "X-Twitter-Active-User": "yes",
+            "X-Twitter-Auth-Type": "OAuth2Session",
+            "X-Twitter-Client-Language": "en",
+            "Content-Type": "application/json",
+            "User-Agent": USER_AGENT,
+            "Referer": "https://x.com/home",
+            "Accept": "*/*",
+            "Accept-Language": "en-US,en;q=0.9",
+        }
+
+    def _api_get(self, url):
+        # type: (str) -> Any
+        """Make authenticated GET request to Twitter API."""
+        headers = self._build_headers()
+        req = urllib.request.Request(url)
+        for k, v in headers.items():
+            req.add_header(k, v)
+
+        ctx = _create_ssl_context()
+        try:
+            with urllib.request.urlopen(req, context=ctx, timeout=30) as resp:
+                body = resp.read().decode("utf-8")
+                return json.loads(body)
+        except urllib.error.HTTPError as e:
+            body = e.read().decode("utf-8", errors="replace")
+            raise RuntimeError("Twitter API error %d: %s" % (e.code, body[:500]))
+
+    def _parse_timeline_response(self, data, get_instructions):
+        # type: (Any, Callable) -> Tuple[List[Tweet], Optional[str]]
+        """Parse timeline GraphQL response into tweets + next cursor."""
+        tweets = []  # type: List[Tweet]
+        next_cursor = None  # type: Optional[str]
+
+        try:
+            instructions = get_instructions(data)
+            if not isinstance(instructions, list):
+                logger.warning("No instructions found in response")
+                return tweets, next_cursor
+
+            for instruction in instructions:
+                entries = instruction.get("entries") or instruction.get("moduleItems") or []
+
+                for entry in entries:
+                    content = entry.get("content", {})
+
+                    # Handle cursor entries
+                    if content.get("cursorType") == "Bottom" or content.get("entryType") == "TimelineTimelineCursor":
+                        val = content.get("value")
+                        if val:
+                            next_cursor = val
+                        continue
+
+                    # Handle single tweet entries
+                    item_content = content.get("itemContent", {})
+                    tweet_results = item_content.get("tweet_results", {})
+                    result = tweet_results.get("result")
+                    if result:
+                        tweet = self._parse_tweet_result(result)
+                        if tweet:
+                            tweets.append(tweet)
+
+                    # Handle conversation module (tweet threads)
+                    items = content.get("items", [])
+                    for item in items:
+                        nested = (
+                            item.get("item", {})
+                            .get("itemContent", {})
+                            .get("tweet_results", {})
+                            .get("result")
+                        )
+                        if nested:
+                            tweet = self._parse_tweet_result(nested)
+                            if tweet:
+                                tweets.append(tweet)
+        except Exception as e:
+            logger.warning("Error parsing timeline response: %s", e)
+
+        return tweets, next_cursor
+
+    def _parse_tweet_result(self, result):
+        # type: (Dict[str, Any]) -> Optional[Tweet]
+        """Parse a single TweetResult from GraphQL response."""
+        try:
+            tweet_data = result
+
+            # Handle TweetWithVisibilityResults wrapper
+            if result.get("__typename") == "TweetWithVisibilityResults" and result.get("tweet"):
+                tweet_data = result["tweet"]
+
+            if tweet_data.get("__typename") == "TweetTombstone":
+                return None
+            if not tweet_data.get("legacy") or not tweet_data.get("core"):
+                return None
+
+            legacy = tweet_data["legacy"]
+            user = tweet_data["core"]["user_results"]["result"]
+            user_legacy = user.get("legacy", {})
+            user_core = user.get("core", {})
+
+            # Check if this is a retweet
+            is_retweet = bool(legacy.get("retweeted_status_result", {}).get("result"))
+            actual_data = tweet_data
+            actual_legacy = legacy
+            actual_user = user
+            actual_user_legacy = user_legacy
+
+            if is_retweet:
+                rt_result = legacy["retweeted_status_result"]["result"]
+                # Handle wrapped retweet
+                if rt_result.get("__typename") == "TweetWithVisibilityResults" and rt_result.get("tweet"):
+                    rt_result = rt_result["tweet"]
+                if rt_result.get("legacy") and rt_result.get("core"):
+                    actual_data = rt_result
+                    actual_legacy = rt_result["legacy"]
+                    actual_user = rt_result["core"]["user_results"]["result"]
+                    actual_user_legacy = actual_user.get("legacy", {})
+
+            # Parse media
+            media = []  # type: List[TweetMedia]
+            ext_media = actual_legacy.get("extended_entities", {}).get("media", [])
+            for m in ext_media:
+                m_type = m.get("type", "")
+                if m_type == "photo":
+                    media.append(TweetMedia(
+                        type="photo",
+                        url=m.get("media_url_https", ""),
+                        width=_deep_get(m, "original_info", "width"),
+                        height=_deep_get(m, "original_info", "height"),
+                    ))
+                elif m_type in ("video", "animated_gif"):
+                    variants = m.get("video_info", {}).get("variants", [])
+                    mp4_variants = [v for v in variants if v.get("content_type") == "video/mp4"]
+                    mp4_variants.sort(key=lambda v: v.get("bitrate", 0), reverse=True)
+                    video_url = mp4_variants[0]["url"] if mp4_variants else m.get("media_url_https", "")
+                    media.append(TweetMedia(
+                        type=m_type,
+                        url=video_url,
+                        width=_deep_get(m, "original_info", "width"),
+                        height=_deep_get(m, "original_info", "height"),
+                    ))
+
+            # Parse URLs
+            urls = [u.get("expanded_url", "") for u in actual_legacy.get("entities", {}).get("urls", [])]
+
+            # Parse quoted tweet
+            quoted_tweet = None  # type: Optional[Tweet]
+            quoted_result = actual_data.get("quoted_status_result", {}).get("result")
+            if quoted_result:
+                quoted_tweet = self._parse_tweet_result(quoted_result)
+
+            # Extract user info — try user.core (new API), then user.legacy (old API)
+            au = actual_user
+            aul = actual_user_legacy
+            auc = au.get("core", {})
+            user_name = auc.get("name") or aul.get("name") or au.get("name", "Unknown")
+            user_screen_name = auc.get("screen_name") or aul.get("screen_name") or au.get("screen_name", "unknown")
+            user_profile_image = au.get("avatar", {}).get("image_url") or aul.get("profile_image_url_https", "")
+            user_verified = au.get("is_blue_verified") or aul.get("verified", False)
+
+            # Retweeted by info
+            rt_screen_name = None  # type: Optional[str]
+            if is_retweet:
+                rt_screen_name = user_core.get("screen_name") or user_legacy.get("screen_name", "unknown")
+
+            return Tweet(
+                id=actual_data.get("rest_id", ""),
+                text=actual_legacy.get("full_text", ""),
+                author=Author(
+                    id=au.get("rest_id", ""),
+                    name=user_name,
+                    screen_name=user_screen_name,
+                    profile_image_url=user_profile_image,
+                    verified=bool(user_verified),
+                ),
+                metrics=Metrics(
+                    likes=actual_legacy.get("favorite_count", 0),
+                    retweets=actual_legacy.get("retweet_count", 0),
+                    replies=actual_legacy.get("reply_count", 0),
+                    quotes=actual_legacy.get("quote_count", 0),
+                    views=int(actual_data.get("views", {}).get("count", "0") or "0"),
+                    bookmarks=actual_legacy.get("bookmark_count", 0),
+                ),
+                created_at=actual_legacy.get("created_at", ""),
+                media=media,
+                urls=urls,
+                is_retweet=is_retweet,
+                retweeted_by=rt_screen_name,
+                quoted_tweet=quoted_tweet,
+                lang=actual_legacy.get("lang", ""),
+            )
+        except Exception as e:
+            logger.warning("Failed to parse tweet: %s", e)
+            return None
+
+
+def _deep_get(d, *keys):
+    # type: (Any, *str) -> Any
+    """Safely get a nested value from a dict."""
+    for key in keys:
+        if isinstance(d, dict):
+            d = d.get(key)
+        else:
+            return None
+    return d