twitter-cli-cookiefile/twitter_cli/client.py

"""Twitter GraphQL API client."""

from __future__ import annotations

import json
import logging
import math
import random
import re
import time
import urllib.parse

import bs4
from curl_cffi import requests as _cffi_requests
from x_client_transaction import ClientTransaction
from x_client_transaction.utils import generate_headers as _gen_ct_headers, get_ondemand_file_url

from .constants import BEARER_TOKEN, USER_AGENT, SEC_CH_UA, SEC_CH_UA_MOBILE, SEC_CH_UA_PLATFORM
from .models import Author, Metrics, Tweet, TweetMedia, UserProfile

logger = logging.getLogger(__name__)

# Shared curl_cffi session — impersonates Chrome 133 TLS/JA3/HTTP2 fingerprint
_cffi_session = None  # type: Optional[Any]  # lazy init


FALLBACK_QUERY_IDS = {
    # Read operations
    "HomeTimeline": "c-CzHF1LboFilMpsx4ZCrQ",
    "HomeLatestTimeline": "BKB7oi212Fi7kQtCBGE4zA",
    "Bookmarks": "VFdMm9iVZxlU6hD86gfW_A",
    "UserByScreenName": "1VOOyvKkiI3FMmkeDNxM9A",
    "UserTweets": "E3opETHurmVJflFsUBVuUQ",
    "SearchTimeline": "nWemVnGJ6A5eQAR5-oQeAg",
    "Likes": "lIDpu_NWL7_VhimGGt0o6A",
    "TweetDetail": "xd_EMdYvB9hfZsZ6Idri0w",
    "ListLatestTweetsTimeline": "RlZzktZY_9wJynoepm8ZsA",
    "Followers": "IOh4aS6UdGWGJUYTqliQ7Q",
    "Following": "zx6e-TLzRkeDO_a7p4b3JQ",
    # Write operations
    "CreateTweet": "IID9x6WsdMnTlXnzXGq8ng",
    "DeleteTweet": "VaenaVgh5q5ih7kvyVjgtg",
    "FavoriteTweet": "lI07N6Otwv1PhnEgXILM7A",
    "UnfavoriteTweet": "ZYKSe-w7KEslx3JhSIk5LA",
    "CreateRetweet": "ojPdsZsimiJrUGLR1sjUtA",
    "DeleteRetweet": "iQtK4dl5hBmXewYZuEOKVw",
    "CreateBookmark": "aoDbu3RHznuiSkQ9aNM67Q",
    "DeleteBookmark": "Wlmlj2-xzyS1GN3a6cj-mQ",
}

TWITTER_OPENAPI_URL = (
    "https://raw.githubusercontent.com/fa0311/twitter-openapi/"
    "main/src/config/placeholder.json"
)

_DEFAULT_FEATURES = {
    "rweb_video_screen_enabled": False,
    "profile_label_improvements_pcf_label_in_post_enabled": True,
    "responsive_web_profile_redirect_enabled": False,
    "rweb_tipjar_consumption_enabled": False,
    "verified_phone_label_enabled": False,
    "creator_subscriptions_tweet_preview_api_enabled": True,
    "responsive_web_graphql_timeline_navigation_enabled": True,
    "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
    "premium_content_api_read_enabled": False,
    "communities_web_enable_tweet_community_results_fetch": True,
    "c9s_tweet_anatomy_moderator_badge_enabled": True,
    "responsive_web_grok_analyze_button_fetch_trends_enabled": False,
    "responsive_web_grok_analyze_post_followups_enabled": True,
    "responsive_web_jetfuel_frame": True,
    "responsive_web_grok_share_attachment_enabled": True,
    "responsive_web_grok_annotations_enabled": True,
    "articles_preview_enabled": True,
    "responsive_web_edit_tweet_api_enabled": True,
    "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
    "view_counts_everywhere_api_enabled": True,
    "longform_notetweets_consumption_enabled": True,
    "responsive_web_twitter_article_tweet_consumption_enabled": True,
    "tweet_awards_web_tipping_enabled": False,
    "content_disclosure_indicator_enabled": True,
    "content_disclosure_ai_generated_indicator_enabled": True,
    "responsive_web_grok_show_grok_translated_post": True,
    "responsive_web_grok_analysis_button_from_backend": True,
    "post_ctas_fetch_enabled": True,
    "freedom_of_speech_not_reach_fetch_enabled": True,
    "standardized_nudges_misinfo": True,
    "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
    "longform_notetweets_rich_text_read_enabled": True,
    "longform_notetweets_inline_media_enabled": False,
    "responsive_web_grok_image_annotation_enabled": True,
    "responsive_web_grok_imagine_annotation_enabled": True,
    "responsive_web_grok_community_note_auto_translation_is_enabled": False,
    "responsive_web_enhance_cards_enabled": False,
}

# Features dict that gets updated dynamically from x.com JS bundles
FEATURES = dict(_DEFAULT_FEATURES)

# Module-level caches (not thread-safe — CLI is single-threaded)
_cached_query_ids = {}  # type: Dict[str, str]
_bundles_scanned = False


class TwitterAPIError(RuntimeError):
    """Represents HTTP/network errors from Twitter APIs."""

    def __init__(self, status_code, message):
        # type: (int, str) -> None
        super().__init__(message)
        self.status_code = status_code

def _best_chrome_target():
    # type: () -> str
    """Detect the best available Chrome impersonation target at runtime.

    curl_cffi versions differ in which Chrome targets they ship.
    e.g. 0.14.0 has chrome133a but not chrome133.
    """
    try:
        from curl_cffi.requests import BrowserType
        available = {e.value for e in BrowserType}
    except Exception:
        available = set()

    # Preference order: exact chrome versions, then suffixed variants
    for target in ("chrome133", "chrome133a", "chrome136", "chrome131", "chrome130"):
        if target in available:
            return target
    # Fallback: pick highest chrome* with a pure numeric suffix
    chrome_targets = sorted(
        [v for v in available if v.startswith("chrome") and v.replace("chrome", "").isdigit()],
        key=lambda x: int(x.replace("chrome", "")),
        reverse=True,
    )
    return chrome_targets[0] if chrome_targets else "chrome131"


def _get_cffi_session():
    # type: () -> Any
    """Return shared curl_cffi session with Chrome impersonation and optional proxy."""
    global _cffi_session
    if _cffi_session is None:
        import os
        proxy = os.environ.get("TWITTER_PROXY", "")
        target = _best_chrome_target()
        _cffi_session = _cffi_requests.Session(
            impersonate=target,
            proxies={"https": proxy, "http": proxy} if proxy else None,
        )
        logger.info("curl_cffi impersonating %s", target)
        if proxy:
            logger.info("Using proxy: %s", proxy[:20] + "...")
    return _cffi_session


def _url_fetch(url, headers=None):
    # type: (str, Optional[Dict[str, str]]) -> str
    """URL fetch using curl_cffi for proper TLS fingerprint."""
    session = _get_cffi_session()
    resp = session.get(url, headers=headers or {}, timeout=30)
    resp.raise_for_status()
    return resp.text


def _build_graphql_url(query_id, operation_name, variables, features, field_toggles=None):
    # type: (str, str, Dict[str, Any], Dict[str, Any], Optional[Dict[str, Any]]) -> str
    """Build GraphQL GET URL with encoded variables/features/fieldToggles."""
    url = "https://x.com/i/api/graphql/%s/%s?variables=%s&features=%s" % (
        query_id,
        operation_name,
        urllib.parse.quote(json.dumps(variables, separators=(",", ":"))),
        urllib.parse.quote(json.dumps(features, separators=(",", ":"))),
    )
    if field_toggles:
        url += "&fieldToggles=%s" % urllib.parse.quote(
            json.dumps(field_toggles, separators=(",", ":"))
        )
    return url


def _scan_bundles():
    # type: () -> None
    """Scan Twitter JS bundles and cache queryId mappings."""
    global _bundles_scanned
    if _bundles_scanned:
        return
    _bundles_scanned = True

    try:
        html = _url_fetch("https://x.com", {"user-agent": USER_AGENT})
        script_pattern = re.compile(
            r'(?:src|href)=["\']'
            r'(https://abs\.twimg\.com/responsive-web/client-web[^"\']+\.js)'
            r'["\']'
        )
        script_urls = script_pattern.findall(html)
    except Exception as exc:  # pragma: no cover - network-dependent branch
        logger.warning("Failed to scan JS bundles: %s", exc)
        return

    for script_url in script_urls:
        try:
            bundle = _url_fetch(script_url)
            op_pattern = re.compile(
                r'queryId:\s*"([A-Za-z0-9_-]+)"[^}]{0,200}'
                r'operationName:\s*"([^"]+)"'
            )
            for match in op_pattern.finditer(bundle):
                query_id, operation_name = match.group(1), match.group(2)
                _cached_query_ids.setdefault(operation_name, query_id)
        except Exception:
            continue

    logger.info("Scanned %d JS bundles, cached %d query IDs", len(script_urls), len(_cached_query_ids))


def _update_features_from_html(html):
    # type: (str) -> None
    """Extract live feature flags from x.com HTML and update the global FEATURES dict.

    Twitter embeds feature switch config in inline scripts on the homepage.
    We parse these to keep FEATURES in sync with the current frontend.
    """
    try:
        # Look for feature flags in inline script content
        # Pattern: "featureSwitch":{"...":{"value":true/false},...}
        # Also try: features:{key:!0, key2:!1, ...} in JS bundles
        feature_pattern = re.compile(
            r'"([a-z][a-z0-9_]+)":\s*\{\s*"value"\s*:\s*(true|false)',
            re.IGNORECASE,
        )
        found = 0
        for match in feature_pattern.finditer(html):
            key = match.group(1)
            value = match.group(2).lower() == "true"
            # Only update keys that look like feature flags
            if any(prefix in key for prefix in ("responsive_web_", "rweb_", "longform_", "creator_", "communities_", "c9s_")):
                FEATURES[key] = value
                found += 1
        if found:
            logger.info("Updated %d feature flags from x.com", found)
    except Exception as exc:
        logger.debug("Feature extraction from HTML failed: %s", exc)


def _fetch_from_github(operation_name):
    # type: (str) -> Optional[str]
    """Fetch queryId from community-maintained twitter-openapi file."""
    try:
        payload = _url_fetch(TWITTER_OPENAPI_URL)
        parsed = json.loads(payload)
        operation = parsed.get(operation_name, {})
        query_id = operation.get("queryId")
        if isinstance(query_id, str) and query_id:
            return query_id
    except Exception as exc:  # pragma: no cover - network-dependent branch
        logger.debug("GitHub queryId lookup failed: %s", exc)
    return None


def _invalidate_query_id(operation_name):
    # type: (str) -> None
    """Remove a cached queryId for an operation."""
    _cached_query_ids.pop(operation_name, None)


def _resolve_query_id(operation_name, prefer_fallback=True):
    # type: (str, bool) -> str
    """Resolve queryId using cache, remote sources, and fallback constants."""
    cached = _cached_query_ids.get(operation_name)
    if cached:
        return cached

    fallback = FALLBACK_QUERY_IDS.get(operation_name)
    if prefer_fallback and fallback:
        _cached_query_ids[operation_name] = fallback
        return fallback

    github_query_id = _fetch_from_github(operation_name)
    if github_query_id:
        _cached_query_ids[operation_name] = github_query_id
        return github_query_id

    _scan_bundles()
    cached = _cached_query_ids.get(operation_name)
    if cached:
        return cached

    if fallback:
        _cached_query_ids[operation_name] = fallback
        return fallback

    raise RuntimeError('Cannot resolve queryId for "%s"' % operation_name)


# Hard ceiling to prevent accidental massive fetches
_ABSOLUTE_MAX_COUNT = 500


class TwitterClient:
    """Twitter GraphQL API client using cookie authentication."""

    def __init__(self, auth_token, ct0, rate_limit_config=None, cookie_string=None):
        # type: (str, str, Optional[Dict[str, Any]], Optional[str]) -> None
        self._auth_token = auth_token
        self._ct0 = ct0
        self._cookie_string = cookie_string  # Full browser cookie string
        rl = rate_limit_config or {}
        self._request_delay = float(rl.get("requestDelay", 2.5))
        self._max_retries = int(rl.get("maxRetries", 3))
        self._retry_base_delay = float(rl.get("retryBaseDelay", 5.0))
        self._max_count = min(int(rl.get("maxCount", 200)), _ABSOLUTE_MAX_COUNT)
        self._client_transaction = None  # type: Optional[Any]
        self._ct_init_attempted = False
        # Eagerly initialize ClientTransaction on construction
        self._ensure_client_transaction()

    def fetch_home_timeline(self, count=20):
        # type: (int) -> List[Tweet]
        """Fetch home timeline tweets."""
        return self._fetch_timeline(
            "HomeTimeline",
            count,
            lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
        )

    def fetch_following_feed(self, count=20):
        # type: (int) -> List[Tweet]
        """Fetch chronological following feed."""
        return self._fetch_timeline(
            "HomeLatestTimeline",
            count,
            lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
        )

    def fetch_bookmarks(self, count=50):
        # type: (int) -> List[Tweet]
        """Fetch bookmarked tweets."""
        def get_instructions(data):
            # type: (Any) -> Any
            instructions = _deep_get(data, "data", "bookmark_timeline", "timeline", "instructions")
            if instructions is None:
                instructions = _deep_get(data, "data", "bookmark_timeline_v2", "timeline", "instructions")
            return instructions

        return self._fetch_timeline("Bookmarks", count, get_instructions)

    def fetch_user(self, screen_name):
        # type: (str) -> UserProfile
        """Fetch user profile by screen name."""
        variables = {
            "screen_name": screen_name,
            "withSafetyModeUserFields": True,
        }
        features = {
            "hidden_profile_subscriptions_enabled": True,
            "rweb_tipjar_consumption_enabled": True,
            "responsive_web_graphql_exclude_directive_enabled": True,
            "verified_phone_label_enabled": False,
            "subscriptions_verification_info_is_identity_verified_enabled": True,
            "subscriptions_verification_info_verified_since_enabled": True,
            "highlights_tweets_tab_ui_enabled": True,
            "responsive_web_twitter_article_notes_tab_enabled": True,
            "subscriptions_feature_can_gift_premium": True,
            "creator_subscriptions_tweet_preview_api_enabled": True,
            "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
            "responsive_web_graphql_timeline_navigation_enabled": True,
        }
        data = self._graphql_get("UserByScreenName", variables, features)
        result = _deep_get(data, "data", "user", "result")
        if not result:
            raise RuntimeError("User @%s not found" % screen_name)

        legacy = result.get("legacy", {})
        return UserProfile(
            id=result.get("rest_id", ""),
            name=legacy.get("name", ""),
            screen_name=legacy.get("screen_name", screen_name),
            bio=legacy.get("description", ""),
            location=legacy.get("location", ""),
            url=_deep_get(legacy, "entities", "url", "urls", 0, "expanded_url") or "",
            followers_count=_parse_int(legacy.get("followers_count"), 0),
            following_count=_parse_int(legacy.get("friends_count"), 0),
            tweets_count=_parse_int(legacy.get("statuses_count"), 0),
            likes_count=_parse_int(legacy.get("favourites_count"), 0),
            verified=bool(result.get("is_blue_verified") or legacy.get("verified", False)),
            profile_image_url=legacy.get("profile_image_url_https", ""),
            created_at=legacy.get("created_at", ""),
        )

    def fetch_user_tweets(self, user_id, count=20):
        # type: (str, int) -> List[Tweet]
        """Fetch tweets posted by a user."""
        return self._fetch_timeline(
            "UserTweets",
            count,
            lambda data: _deep_get(data, "data", "user", "result", "timeline_v2", "timeline", "instructions"),
            extra_variables={
                "userId": user_id,
                "withQuickPromoteEligibilityTweetFields": True,
                "withVoice": True,
                "withV2Timeline": True,
            },
        )

    def fetch_user_likes(self, user_id, count=20):
        # type: (str, int) -> List[Tweet]
        """Fetch tweets liked by a user."""
        return self._fetch_timeline(
            "Likes",
            count,
            lambda data: _deep_get(data, "data", "user", "result", "timeline_v2", "timeline", "instructions"),
            extra_variables={
                "userId": user_id,
                "includePromotedContent": False,
                "withClientEventToken": False,
                "withBirdwatchNotes": False,
                "withVoice": True,
            },
            override_base_variables=True,
        )

    def fetch_search(self, query, count=20, product="Top"):
        # type: (str, int, str) -> List[Tweet]
        """Search tweets by query.

        Args:
            query: Search query string.
            count: Max number of tweets to return.
            product: Search tab — "Top", "Latest", "People", "Photos", "Videos".
        """
        return self._fetch_timeline(
            "SearchTimeline",
            count,
            lambda data: _deep_get(
                data, "data", "search_by_raw_query", "search_timeline", "timeline", "instructions",
            ),
            extra_variables={
                "rawQuery": query,
                "querySource": "typed_query",
                "product": product,
            },
            override_base_variables=True,
        )

    def fetch_tweet_detail(self, tweet_id, count=20):
        # type: (str, int) -> List[Tweet]
        """Fetch a tweet and its conversation thread (replies)."""
        return self._fetch_timeline(
            "TweetDetail",
            count,
            lambda data: _deep_get(data, "data", "tweetResult", "result", "timeline", "instructions")
            or _deep_get(data, "data", "threaded_conversation_with_injections_v2", "instructions"),
            extra_variables={
                "focalTweetId": tweet_id,
                "referrer": "tweet",
                "with_rux_injections": False,
                "includePromotedContent": True,
                "rankingMode": "Relevance",
                "withCommunity": True,
                "withQuickPromoteEligibilityTweetFields": True,
                "withBirdwatchNotes": True,
                "withVoice": True,
            },
            override_base_variables=True,
            field_toggles={
                "withArticleRichContentState": True,
                "withArticlePlainText": False,
                "withGrokAnalyze": False,
                "withDisallowedReplyControls": False,
            },
        )

    def fetch_list_timeline(self, list_id, count=20):
        # type: (str, int) -> List[Tweet]
        """Fetch tweets from a Twitter List."""
        return self._fetch_timeline(
            "ListLatestTweetsTimeline",
            count,
            lambda data: _deep_get(data, "data", "list", "tweets_timeline", "timeline", "instructions"),
            extra_variables={"listId": list_id},
            override_base_variables=True,
        )

    def fetch_followers(self, user_id, count=20):
        # type: (str, int) -> List[UserProfile]
        """Fetch followers of a user."""
        return self._fetch_user_list(
            "Followers", user_id, count,
            lambda data: _deep_get(data, "data", "user", "result", "timeline", "timeline", "instructions"),
        )

    def fetch_following(self, user_id, count=20):
        # type: (str, int) -> List[UserProfile]
        """Fetch users that a user is following."""
        return self._fetch_user_list(
            "Following", user_id, count,
            lambda data: _deep_get(data, "data", "user", "result", "timeline", "timeline", "instructions"),
        )

    # ── Write operations ────────────────────────────────────────────────

    def _write_delay(self):
        # type: () -> None
        """Sleep a random interval after write operations to avoid rate limits."""
        delay = random.uniform(1.5, 4.0)
        logger.debug("Write operation delay: %.1fs", delay)
        time.sleep(delay)

    def create_tweet(self, text, reply_to_id=None):
        # type: (str, Optional[str]) -> str
        """Post a new tweet.  Returns the new tweet ID."""
        variables = {
            "tweet_text": text,
            "media": {"media_entities": [], "possibly_sensitive": False},
            "semantic_annotation_ids": [],
            "dark_request": False,
        }  # type: Dict[str, Any]
        if reply_to_id:
            variables["reply"] = {
                "in_reply_to_tweet_id": reply_to_id,
                "exclude_reply_user_ids": [],
            }
        data = self._graphql_post("CreateTweet", variables, FEATURES)
        self._write_delay()
        result = _deep_get(data, "data", "create_tweet", "tweet_results", "result")
        if result:
            return result.get("rest_id", "")
        raise RuntimeError("Failed to create tweet")

    def delete_tweet(self, tweet_id):
        # type: (str) -> bool
        """Delete a tweet.  Returns True on success."""
        variables = {"tweet_id": tweet_id, "dark_request": False}
        self._graphql_post("DeleteTweet", variables)
        self._write_delay()
        return True

    def like_tweet(self, tweet_id):
        # type: (str) -> bool
        """Like a tweet.  Returns True on success."""
        self._graphql_post("FavoriteTweet", {"tweet_id": tweet_id})
        self._write_delay()
        return True

    def unlike_tweet(self, tweet_id):
        # type: (str) -> bool
        """Unlike a tweet.  Returns True on success."""
        self._graphql_post("UnfavoriteTweet", {"tweet_id": tweet_id, "dark_request": False})
        self._write_delay()
        return True

    def retweet(self, tweet_id):
        # type: (str) -> bool
        """Retweet a tweet.  Returns True on success."""
        self._graphql_post("CreateRetweet", {"tweet_id": tweet_id, "dark_request": False})
        self._write_delay()
        return True

    def unretweet(self, tweet_id):
        # type: (str) -> bool
        """Undo a retweet.  Returns True on success."""
        self._graphql_post("DeleteRetweet", {"source_tweet_id": tweet_id, "dark_request": False})
        self._write_delay()
        return True

    def bookmark_tweet(self, tweet_id):
        # type: (str) -> bool
        """Bookmark a tweet.  Returns True on success."""
        self._graphql_post("CreateBookmark", {"tweet_id": tweet_id})
        self._write_delay()
        return True

    def unbookmark_tweet(self, tweet_id):
        # type: (str) -> bool
        """Remove a tweet from bookmarks.  Returns True on success."""
        self._graphql_post("DeleteBookmark", {"tweet_id": tweet_id})
        self._write_delay()
        return True

    def _fetch_timeline(self, operation_name, count, get_instructions, extra_variables=None, override_base_variables=False, field_toggles=None):
        # type: (str, int, Callable[[Any], Any], Optional[Dict[str, Any]], bool, Optional[Dict[str, Any]]) -> List[Tweet]
        """Generic timeline fetcher with pagination and deduplication.

        Args:
            override_base_variables: If True, use only extra_variables + count/cursor
                instead of the default timeline base variables. Needed for
                endpoints like SearchTimeline that reject unknown variables.
        """
        if count <= 0:
            return []

        # Enforce max count cap
        count = min(count, self._max_count)

        tweets = []  # type: List[Tweet]
        seen_ids = set()  # type: Set[str]
        cursor = None  # type: Optional[str]
        attempts = 0
        max_attempts = int(math.ceil(count / 20.0)) + 2

        while len(tweets) < count and attempts < max_attempts:
            attempts += 1
            if override_base_variables:
                variables = {"count": min(count - len(tweets) + 5, 40)}  # type: Dict[str, Any]
            else:
                variables = {
                    "count": min(count - len(tweets) + 5, 40),
                    "includePromotedContent": False,
                    "latestControlAvailable": True,
                    "requestContext": "launch",
                }  # type: Dict[str, Any]
            if extra_variables:
                variables.update(extra_variables)
            if cursor:
                variables["cursor"] = cursor

            data = self._graphql_get(operation_name, variables, FEATURES, field_toggles=field_toggles)
            new_tweets, next_cursor = self._parse_timeline_response(data, get_instructions)

            for tweet in new_tweets:
                if tweet.id and tweet.id not in seen_ids:
                    seen_ids.add(tweet.id)
                    tweets.append(tweet)

            if not next_cursor or not new_tweets:
                break
            cursor = next_cursor

            # Rate-limit: sleep between paginated requests with jitter
            if len(tweets) < count and self._request_delay > 0:
                jitter = self._request_delay * random.uniform(0.7, 1.5)
                logger.debug("Sleeping %.1fs between requests", jitter)
                time.sleep(jitter)

        return tweets[:count]

    def _graphql_get(self, operation_name, variables, features, field_toggles=None):
        # type: (str, Dict[str, Any], Dict[str, Any], Optional[Dict[str, Any]]) -> Dict[str, Any]
        """Issue GraphQL GET request with automatic stale-fallback retry."""
        query_id = _resolve_query_id(operation_name, prefer_fallback=True)
        using_fallback = query_id == FALLBACK_QUERY_IDS.get(operation_name)
        url = _build_graphql_url(query_id, operation_name, variables, features, field_toggles)

        try:
            return self._api_get(url)
        except TwitterAPIError as exc:
            # Fallback query IDs can go stale. Retry with live lookup if 404.
            if exc.status_code == 404 and using_fallback:
                logger.info("Retrying %s with live queryId after 404", operation_name)
                _invalidate_query_id(operation_name)
                refreshed_query_id = _resolve_query_id(operation_name, prefer_fallback=False)
                retry_url = _build_graphql_url(refreshed_query_id, operation_name, variables, features, field_toggles)
                return self._api_get(retry_url)
            raise RuntimeError(str(exc))

    def _ensure_client_transaction(self):
        # type: () -> None
        """Initialize ClientTransaction for x-client-transaction-id header.

        Also attempts to extract live feature flags from JS bundles.
        """
        if self._ct_init_attempted:
            return
        self._ct_init_attempted = True
        try:
            # Use curl_cffi for ClientTransaction init to maintain consistent
            # Chrome TLS fingerprint. Using Python requests here would leak
            # a different TLS fingerprint on the same IP — a detection vector.
            cffi_session = _get_cffi_session()
            ct_headers = _gen_ct_headers()
            home_page = cffi_session.get(
                "https://x.com", headers=ct_headers, timeout=10,
            )
            home_page_response = bs4.BeautifulSoup(home_page.content, "html.parser")
            ondemand_url = get_ondemand_file_url(response=home_page_response)
            ondemand_file = cffi_session.get(
                ondemand_url, headers=ct_headers, timeout=10,
            )
            self._client_transaction = ClientTransaction(
                home_page_response=home_page_response,
                ondemand_file_response=ondemand_file.text,
            )
            logger.info("ClientTransaction initialized for x-client-transaction-id")

            # Try to extract live FEATURES from the homepage JS bundles
            _update_features_from_html(home_page.text)
        except Exception as exc:
            logger.warning("Failed to init ClientTransaction: %s", exc)

    def _build_headers(self, url="", method="GET"):
        # type: (str, str) -> Dict[str, str]
        """Build shared headers for authenticated API calls."""
        headers = {
            "Authorization": "Bearer %s" % BEARER_TOKEN,
            "Cookie": self._cookie_string or "auth_token=%s; ct0=%s" % (self._auth_token, self._ct0),
            "X-Csrf-Token": self._ct0,
            "X-Twitter-Active-User": "yes",
            "X-Twitter-Auth-Type": "OAuth2Session",
            "X-Twitter-Client-Language": "en",
            "User-Agent": USER_AGENT,
            "Origin": "https://x.com",
            "Referer": "https://x.com",
            "Accept": "*/*",
            "Accept-Language": "en-US,en;q=0.9",
            "sec-ch-ua": SEC_CH_UA,
            "sec-ch-ua-mobile": SEC_CH_UA_MOBILE,
            "sec-ch-ua-platform": SEC_CH_UA_PLATFORM,
            "Sec-Fetch-Dest": "empty",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Site": "same-origin",
        }
        if method == "POST":
            headers["Content-Type"] = "application/json"
        # Generate x-client-transaction-id if available
        if self._client_transaction and url:
            try:
                path = urllib.parse.urlparse(url).path
                tid = self._client_transaction.generate_transaction_id(
                    method=method, path=path,
                )
                headers["X-Client-Transaction-Id"] = tid
            except Exception as exc:
                logger.debug("Failed to generate transaction id: %s", exc)
        return headers

    def _api_get(self, url):
        # type: (str) -> Dict[str, Any]
        """Make authenticated GET request to Twitter API."""
        return self._api_request(url, method="GET")

    def _graphql_post(self, operation_name, variables, features=None):
        # type: (str, Dict[str, Any], Optional[Dict[str, Any]]) -> Dict[str, Any]
        """Issue GraphQL POST request with automatic stale-fallback retry."""
        query_id = _resolve_query_id(operation_name, prefer_fallback=True)
        using_fallback = query_id == FALLBACK_QUERY_IDS.get(operation_name)

        def _do_post(qid):
            # type: (str) -> Dict[str, Any]
            url = "https://x.com/i/api/graphql/%s/%s" % (qid, operation_name)
            body = {"variables": variables, "queryId": qid}  # type: Dict[str, Any]
            if features:
                body["features"] = features
            return self._api_request(url, method="POST", body=body)

        try:
            return _do_post(query_id)
        except TwitterAPIError as exc:
            if exc.status_code == 404 and using_fallback:
                logger.info("Retrying POST %s with live queryId after 404", operation_name)
                _invalidate_query_id(operation_name)
                refreshed = _resolve_query_id(operation_name, prefer_fallback=False)
                return _do_post(refreshed)
            raise RuntimeError(str(exc))

    def _api_request(self, url, method="GET", body=None):
        # type: (str, str, Optional[Dict[str, Any]]) -> Dict[str, Any]
        """Make authenticated request to Twitter API with retry on rate limits.

        Uses curl_cffi for Chrome TLS/JA3/HTTP2 fingerprint impersonation.
        Handles both GET and POST. Retries on HTTP 429 and JSON error code 88.
        """
        headers = self._build_headers(url=url, method=method)
        session = _get_cffi_session()
        json_body = body  # curl_cffi handles JSON serialization

        for attempt in range(self._max_retries + 1):
            try:
                if method == "POST":
                    response = session.post(
                        url, headers=headers, json=json_body, timeout=30,
                    )
                else:
                    response = session.get(url, headers=headers, timeout=30)

                status_code = response.status_code
                if status_code == 429 and attempt < self._max_retries:
                    wait = self._retry_base_delay * (2 ** attempt) + random.uniform(0, 2)
                    logger.warning(
                        "Rate limited (429), retrying in %.1fs (attempt %d/%d)",
                        wait, attempt + 1, self._max_retries,
                    )
                    time.sleep(wait)
                    continue
                if status_code >= 400:
                    message = "Twitter API error %d: %s" % (status_code, response.text[:500])
                    raise TwitterAPIError(status_code, message)

                payload = response.text
            except TwitterAPIError:
                raise
            except Exception as exc:
                raise TwitterAPIError(0, "Twitter API network error: %s" % exc)

            try:
                parsed = json.loads(payload)
            except (json.JSONDecodeError, ValueError):
                raise TwitterAPIError(0, "Twitter API returned invalid JSON")

            if isinstance(parsed, dict) and parsed.get("errors"):
                err_msg = parsed["errors"][0].get("message", "Unknown error")
                # Rate limit can also surface as a JSON error (code 88)
                err_code = parsed["errors"][0].get("code", 0)
                if err_code == 88 and attempt < self._max_retries:
                    wait = self._retry_base_delay * (2 ** attempt) + random.uniform(0, 2)
                    logger.warning(
                        "Rate limited (code 88), retrying in %.1fs (attempt %d/%d)",
                        wait, attempt + 1, self._max_retries,
                    )
                    time.sleep(wait)
                    continue
                # Write operation rate limits (retweet/like/bookmark limits)
                # Code 348 = "retweet limit", 327 = "already retweeted"
                # Provide user-friendly message
                if err_code in (348, 349):
                    raise TwitterAPIError(
                        429, "Rate limited: %s (try again later, recommended wait: 15+ minutes)" % err_msg
                    )
                raise TwitterAPIError(0, "Twitter API returned errors: %s" % err_msg)

            # GraphQL write mutations return errors in data.errors (separate from top-level)
            if isinstance(parsed, dict) and "data" in parsed:
                data_obj = parsed["data"]
                if isinstance(data_obj, dict):
                    for key, val in data_obj.items():
                        if isinstance(val, dict) and val.get("errors"):
                            inner_errors = val["errors"]
                            if inner_errors:
                                inner_msg = inner_errors[0].get("message", "Unknown error")
                                raise TwitterAPIError(0, "Twitter API: %s" % inner_msg)

            return parsed

        # Should not be reached, but just in case
        raise TwitterAPIError(429, "Rate limited after %d retries" % self._max_retries)

    def _fetch_user_list(self, operation_name, user_id, count, get_instructions):
        # type: (str, str, int, Callable[[Any], Any]) -> List[UserProfile]
        """Generic user list fetcher (for followers/following) with pagination."""
        if count <= 0:
            return []
        count = min(count, self._max_count)
        users = []  # type: List[UserProfile]
        seen_ids = set()  # type: Set[str]
        cursor = None  # type: Optional[str]
        attempts = 0
        max_attempts = int(math.ceil(count / 20.0)) + 2

        while len(users) < count and attempts < max_attempts:
            attempts += 1
            variables = {
                "userId": user_id,
                "count": min(count - len(users) + 5, 40),
                "includePromotedContent": False,
            }  # type: Dict[str, Any]
            if cursor:
                variables["cursor"] = cursor

            data = self._graphql_get(operation_name, variables, FEATURES)
            instructions = get_instructions(data)
            if not instructions:
                logger.warning("No user list instructions found")
                break

            new_users = []  # type: List[UserProfile]
            next_cursor = None  # type: Optional[str]
            for instruction in instructions:
                entries = instruction.get("entries", [])
                for entry in entries:
                    content = entry.get("content", {})
                    entry_type = content.get("entryType", "")

                    if entry_type == "TimelineTimelineItem":
                        item = content.get("itemContent", {})
                        user_results = _deep_get(item, "user_results", "result")
                        if user_results:
                            user = self._parse_user_result(user_results)
                            if user:
                                new_users.append(user)
                    elif entry_type == "TimelineTimelineCursor":
                        if content.get("cursorType") == "Bottom":
                            next_cursor = content.get("value")

            for user in new_users:
                if user.id and user.id not in seen_ids:
                    seen_ids.add(user.id)
                    users.append(user)

            if not next_cursor or not new_users:
                break
            cursor = next_cursor

            if len(users) < count and self._request_delay > 0:
                time.sleep(self._request_delay * random.uniform(0.7, 1.5))

        return users[:count]

    @staticmethod
    def _parse_user_result(user_data):
        # type: (Dict[str, Any]) -> Optional[UserProfile]
        """Parse a user result object into UserProfile."""
        if user_data.get("__typename") == "UserUnavailable":
            return None
        legacy = user_data.get("legacy", {})
        if not legacy:
            return None
        return UserProfile(
            id=user_data.get("rest_id", ""),
            name=legacy.get("name", ""),
            screen_name=legacy.get("screen_name", ""),
            bio=legacy.get("description", ""),
            location=legacy.get("location", ""),
            url=_deep_get(legacy, "entities", "url", "urls", 0, "expanded_url") or "",
            followers_count=legacy.get("followers_count", 0),
            following_count=legacy.get("friends_count", 0),
            tweets_count=legacy.get("statuses_count", 0),
            likes_count=legacy.get("favourites_count", 0),
            verified=user_data.get("is_blue_verified", False) or legacy.get("verified", False),
            profile_image_url=legacy.get("profile_image_url_https", ""),
            created_at=legacy.get("created_at", ""),
        )

    def _parse_timeline_response(self, data, get_instructions):
        # type: (Any, Callable[[Any], Any]) -> Tuple[List[Tweet], Optional[str]]
        """Parse timeline GraphQL response into tweets and next cursor."""
        tweets = []  # type: List[Tweet]
        next_cursor = None  # type: Optional[str]

        instructions = get_instructions(data)
        if not isinstance(instructions, list):
            logger.warning("No timeline instructions found")
            return tweets, next_cursor

        for instruction in instructions:
            entries = instruction.get("entries") or instruction.get("moduleItems") or []
            for entry in entries:
                content = entry.get("content", {})
                next_cursor = _extract_cursor(content) or next_cursor

                item_content = content.get("itemContent", {})
                result = _deep_get(item_content, "tweet_results", "result")
                if result:
                    tweet = self._parse_tweet_result(result)
                    if tweet:
                        tweets.append(tweet)

                for nested_item in content.get("items", []):
                    nested_result = _deep_get(
                        nested_item,
                        "item",
                        "itemContent",
                        "tweet_results",
                        "result",
                    )
                    if nested_result:
                        tweet = self._parse_tweet_result(nested_result)
                        if tweet:
                            tweets.append(tweet)

        return tweets, next_cursor

    def _parse_tweet_result(self, result, depth=0):
        # type: (Dict[str, Any], int) -> Optional[Tweet]
        """Parse a single TweetResult into a Tweet dataclass."""
        if depth > 2:
            return None

        tweet_data = result
        if result.get("__typename") == "TweetWithVisibilityResults" and result.get("tweet"):
            tweet_data = result["tweet"]
        if tweet_data.get("__typename") == "TweetTombstone":
            return None

        legacy = tweet_data.get("legacy")
        core = tweet_data.get("core")
        if not isinstance(legacy, dict) or not isinstance(core, dict):
            return None

        user = _deep_get(core, "user_results", "result") or {}
        user_legacy = user.get("legacy", {})
        user_core = user.get("core", {})

        is_retweet = bool(_deep_get(legacy, "retweeted_status_result", "result"))
        actual_data = tweet_data
        actual_legacy = legacy
        actual_user = user
        actual_user_legacy = user_legacy

        if is_retweet:
            retweet_result = _deep_get(legacy, "retweeted_status_result", "result") or {}
            if retweet_result.get("__typename") == "TweetWithVisibilityResults" and retweet_result.get("tweet"):
                retweet_result = retweet_result["tweet"]
            rt_legacy = retweet_result.get("legacy")
            rt_core = retweet_result.get("core")
            if isinstance(rt_legacy, dict) and isinstance(rt_core, dict):
                actual_data = retweet_result
                actual_legacy = rt_legacy
                actual_user = _deep_get(rt_core, "user_results", "result") or {}
                actual_user_legacy = actual_user.get("legacy", {})

        media = _extract_media(actual_legacy)
        urls = [item.get("expanded_url", "") for item in _deep_get(actual_legacy, "entities", "urls") or []]
        quoted = _deep_get(actual_data, "quoted_status_result", "result")
        quoted_tweet = self._parse_tweet_result(quoted, depth=depth + 1) if isinstance(quoted, dict) else None
        author = _extract_author(actual_user, actual_user_legacy)

        retweeted_by = None  # type: Optional[str]
        if is_retweet:
            retweeted_by = user_core.get("screen_name") or user_legacy.get("screen_name", "unknown")

        return Tweet(
            id=actual_data.get("rest_id", ""),
            text=actual_legacy.get("full_text", ""),
            author=author,
            metrics=Metrics(
                likes=_parse_int(actual_legacy.get("favorite_count"), 0),
                retweets=_parse_int(actual_legacy.get("retweet_count"), 0),
                replies=_parse_int(actual_legacy.get("reply_count"), 0),
                quotes=_parse_int(actual_legacy.get("quote_count"), 0),
                views=_parse_int(_deep_get(actual_data, "views", "count"), 0),
                bookmarks=_parse_int(actual_legacy.get("bookmark_count"), 0),
            ),
            created_at=actual_legacy.get("created_at", ""),
            media=media,
            urls=urls,
            is_retweet=is_retweet,
            retweeted_by=retweeted_by,
            quoted_tweet=quoted_tweet,
            lang=actual_legacy.get("lang", ""),
        )


def _extract_media(legacy):
    # type: (Dict[str, Any]) -> List[TweetMedia]
    """Extract media items from tweet legacy data."""
    media = []  # type: List[TweetMedia]
    for media_item in _deep_get(legacy, "extended_entities", "media") or []:
        media_type = media_item.get("type", "")
        if media_type == "photo":
            media.append(
                TweetMedia(
                    type="photo",
                    url=media_item.get("media_url_https", ""),
                    width=_deep_get(media_item, "original_info", "width"),
                    height=_deep_get(media_item, "original_info", "height"),
                )
            )
        elif media_type in {"video", "animated_gif"}:
            variants = media_item.get("video_info", {}).get("variants", [])
            mp4_variants = [v for v in variants if v.get("content_type") == "video/mp4"]
            mp4_variants.sort(key=lambda v: v.get("bitrate", 0), reverse=True)
            media.append(
                TweetMedia(
                    type=media_type,
                    url=mp4_variants[0]["url"] if mp4_variants else media_item.get("media_url_https", ""),
                    width=_deep_get(media_item, "original_info", "width"),
                    height=_deep_get(media_item, "original_info", "height"),
                )
            )
    return media


def _extract_author(user_data, user_legacy):
    # type: (Dict[str, Any], Dict[str, Any]) -> Author
    """Extract Author from user result data."""
    user_core = user_data.get("core", {})
    return Author(
        id=user_data.get("rest_id", ""),
        name=user_core.get("name") or user_legacy.get("name") or user_data.get("name", "Unknown"),
        screen_name=(
            user_core.get("screen_name")
            or user_legacy.get("screen_name")
            or user_data.get("screen_name", "unknown")
        ),
        profile_image_url=(
            user_data.get("avatar", {}).get("image_url")
            or user_legacy.get("profile_image_url_https", "")
        ),
        verified=bool(user_data.get("is_blue_verified") or user_legacy.get("verified", False)),
    )


def _deep_get(data, *keys):
    # type: (Any, *Any) -> Any
    """Safely get nested dict/list values.  Supports int keys for list access."""
    current = data
    for key in keys:
        if isinstance(key, int):
            if isinstance(current, list) and 0 <= key < len(current):
                current = current[key]
            else:
                return None
        elif isinstance(current, dict):
            current = current.get(key)
        else:
            return None
    return current


def _extract_cursor(content):
    # type: (Dict[str, Any]) -> Optional[str]
    """Extract Bottom pagination cursor from timeline content."""
    if content.get("cursorType") == "Bottom":
        return content.get("value")
    return None


def _parse_int(value, default):
    # type: (Any, int) -> int
    """Best-effort integer conversion.  Handles commas and float strings."""
    try:
        text = str(value).replace(",", "").strip()
        if not text:
            return default
        return int(float(text))
    except (TypeError, ValueError):
        return default