refactor: split client.py into graphql.py + parser.py modules
Split the monolithic client.py (1341 lines) into three focused modules: - graphql.py (~200 lines): queryId resolution, URL building, JS bundle scanning, feature flag management - parser.py (~270 lines): Tweet/User/Media/Article parsing, utility functions (_deep_get, _parse_int, _extract_cursor, _extract_media) - client.py (~700 lines): TwitterClient class with HTTP engine, anti-detection, session management, and all public API methods Backward compatibility: client.py re-exports all previously public symbols so existing test imports work without modification. 88/88 tests pass.
This commit is contained in:
221
twitter_cli/graphql.py
Normal file
221
twitter_cli/graphql.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""GraphQL infrastructure for Twitter API.
|
||||
|
||||
Handles queryId resolution, URL building, JS bundle scanning,
|
||||
and feature flag management.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import urllib.parse
|
||||
from typing import Dict
|
||||
|
||||
from .exceptions import QueryIdError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Community OpenAPI queryId source ─────────────────────────────────────
|
||||
TWITTER_OPENAPI_URL = (
|
||||
"https://raw.githubusercontent.com/fa0311/"
|
||||
"twitter-openapi/refs/heads/main/src/config/placeholder.json"
|
||||
)
|
||||
|
||||
# ── Fallback (hardcoded) queryIds ────────────────────────────────────────
|
||||
FALLBACK_QUERY_IDS = {
|
||||
"HomeTimeline": "HCosKfLNW1AcOo3la3mMgg",
|
||||
"HomeLatestTimeline": "U0cdisy7QFIoTfu3-Okw0A",
|
||||
"UserByScreenName": "qRednkZG-rn1P6b48NINmQ",
|
||||
"UserTweets": "E3opETHurmVJflFsUBVuUQ",
|
||||
"TweetDetail": "nBS-WpgA6ZG0CyNHD517JQ",
|
||||
"Likes": "aeJWz7GtGNHHO2Z3GrjCWg",
|
||||
"SearchTimeline": "MJpyQGqgklrVl_0X9gNy3A",
|
||||
"Bookmarks": "uzboyXSHSJrR-mGJqep0TQ",
|
||||
"ListLatestTweetsTimeline": "ZBbXrl0FVnTqp7K6EAADog",
|
||||
"Followers": "t-BPOrMIduGUJWO_LxcvNQ",
|
||||
"Following": "iSicc7LrzWGBgDPL0tM_TQ",
|
||||
"CreateTweet": "bDE2rBtZb3uyrczSZ_pI9g",
|
||||
"DeleteTweet": "VaenaVgh5q5ih7kvyVjgtg",
|
||||
"FavoriteTweet": "lI07N6Otwv1PhnEgXILM7A",
|
||||
"UnfavoriteTweet": "ZYKSe-w7KEslx3JhSIk5LA",
|
||||
"CreateRetweet": "ojPdsZsimiJrUGLR1sjVsA",
|
||||
"DeleteRetweet": "iQtK4dl5hBmXewYZuEOKVw",
|
||||
"CreateBookmark": "aoDbu3RHznuiSkQ9aNM67Q",
|
||||
"DeleteBookmark": "Wlmlj2-xISYCixDmuS8KNg",
|
||||
}
|
||||
|
||||
# ── Default feature flags ────────────────────────────────────────────────
|
||||
_DEFAULT_FEATURES = {
|
||||
"responsive_web_graphql_exclude_directive_enabled": True,
|
||||
"verified_phone_label_enabled": False,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": True,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": True,
|
||||
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
|
||||
"c9s_tweet_anatomy_moderator_badge_enabled": True,
|
||||
"tweetypie_unmention_optimization_enabled": True,
|
||||
"responsive_web_edit_tweet_api_enabled": True,
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
|
||||
"view_counts_everywhere_api_enabled": True,
|
||||
"longform_notetweets_consumption_enabled": True,
|
||||
"responsive_web_twitter_article_tweet_consumption_enabled": True,
|
||||
"tweet_awards_web_tipping_enabled": False,
|
||||
"longform_notetweets_rich_text_read_enabled": True,
|
||||
"longform_notetweets_inline_media_enabled": True,
|
||||
"rweb_video_timestamps_enabled": True,
|
||||
"responsive_web_media_download_video_enabled": True,
|
||||
"freedom_of_speech_not_reach_fetch_enabled": True,
|
||||
"standardized_nudges_misinfo": True,
|
||||
"responsive_web_enhance_cards_enabled": False,
|
||||
}
|
||||
|
||||
# Features dict that gets updated dynamically from x.com JS bundles
|
||||
FEATURES = dict(_DEFAULT_FEATURES)
|
||||
|
||||
# Module-level caches (not thread-safe — CLI is single-threaded)
|
||||
_cached_query_ids: Dict[str, str] = {}
|
||||
_bundles_scanned = False
|
||||
|
||||
|
||||
def _build_graphql_url(query_id, operation_name, variables, features, field_toggles=None):
|
||||
# type: (str, str, Dict[str, Any], Dict[str, Any], Optional[Dict[str, Any]]) -> str
|
||||
"""Build GraphQL GET URL with encoded variables/features/fieldToggles.
|
||||
|
||||
Only includes True-valued feature flags in the URL to avoid 414 URI Too Long.
|
||||
Twitter's API defaults missing features to False.
|
||||
"""
|
||||
# Compact features: omit False values to keep URL under server limits
|
||||
compact_features = {k: v for k, v in features.items() if v is not False}
|
||||
url = "https://x.com/i/api/graphql/%s/%s?variables=%s&features=%s" % (
|
||||
query_id,
|
||||
operation_name,
|
||||
urllib.parse.quote(json.dumps(variables, separators=(",", ":"))),
|
||||
urllib.parse.quote(json.dumps(compact_features, separators=(",", ":"))),
|
||||
)
|
||||
if field_toggles:
|
||||
url += "&fieldToggles=%s" % urllib.parse.quote(
|
||||
json.dumps(field_toggles, separators=(",", ":"))
|
||||
)
|
||||
return url
|
||||
|
||||
|
||||
def _scan_bundles(url_fetch_fn):
|
||||
# type: (Any) -> None
|
||||
"""Scan Twitter JS bundles and cache queryId mappings.
|
||||
|
||||
Args:
|
||||
url_fetch_fn: Function to fetch URLs (injected to avoid circular import).
|
||||
"""
|
||||
global _bundles_scanned
|
||||
if _bundles_scanned:
|
||||
return
|
||||
_bundles_scanned = True
|
||||
|
||||
try:
|
||||
from .constants import get_user_agent
|
||||
html = url_fetch_fn("https://x.com", {"user-agent": get_user_agent()})
|
||||
script_pattern = re.compile(
|
||||
r'(?:src|href)=["\']'
|
||||
r'(https://abs\.twimg\.com/responsive-web/client-web[^"\']+'
|
||||
r'\.js)'
|
||||
r'["\']'
|
||||
)
|
||||
script_urls = script_pattern.findall(html)
|
||||
except Exception as exc: # pragma: no cover - network-dependent branch
|
||||
logger.warning("Failed to scan JS bundles: %s", exc)
|
||||
return
|
||||
|
||||
for script_url in script_urls:
|
||||
try:
|
||||
bundle = url_fetch_fn(script_url)
|
||||
op_pattern = re.compile(
|
||||
r'queryId:\s*"([A-Za-z0-9_-]+)"[^}]{0,200}'
|
||||
r'operationName:\s*"([^"]+)"'
|
||||
)
|
||||
for match in op_pattern.finditer(bundle):
|
||||
query_id, operation_name = match.group(1), match.group(2)
|
||||
_cached_query_ids.setdefault(operation_name, query_id)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
logger.info("Scanned %d JS bundles, cached %d query IDs", len(script_urls), len(_cached_query_ids))
|
||||
|
||||
|
||||
def _update_features_from_html(html):
|
||||
# type: (str) -> None
|
||||
"""Extract live feature flags from x.com HTML and update the global FEATURES dict.
|
||||
|
||||
Twitter embeds feature switch config in inline scripts on the homepage.
|
||||
We parse these to keep FEATURES in sync with the current frontend.
|
||||
Only UPDATES existing keys — never adds new ones to avoid URL bloat.
|
||||
"""
|
||||
try:
|
||||
feature_pattern = re.compile(
|
||||
r'"([a-z][a-z0-9_]+)":\s*\{\s*"value"\s*:\s*(true|false)',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
found = 0
|
||||
for match in feature_pattern.finditer(html):
|
||||
key = match.group(1)
|
||||
value = match.group(2).lower() == "true"
|
||||
# Only update keys already in FEATURES — never add new ones
|
||||
# Adding new keys inflates URL length, causing 414/431 errors
|
||||
if key in FEATURES and FEATURES[key] != value:
|
||||
logger.debug("Feature flag updated: %s = %s -> %s", key, FEATURES[key], value)
|
||||
FEATURES[key] = value
|
||||
found += 1
|
||||
if found:
|
||||
logger.info("Updated %d feature flags from x.com", found)
|
||||
except Exception as exc:
|
||||
logger.debug("Feature extraction from HTML failed: %s", exc)
|
||||
|
||||
|
||||
def _fetch_from_github(url_fetch_fn, operation_name):
|
||||
# type: (Any, str) -> Optional[str]
|
||||
"""Fetch queryId from community-maintained twitter-openapi file."""
|
||||
try:
|
||||
payload = url_fetch_fn(TWITTER_OPENAPI_URL)
|
||||
parsed = json.loads(payload)
|
||||
operation = parsed.get(operation_name, {})
|
||||
query_id = operation.get("queryId")
|
||||
if isinstance(query_id, str) and query_id:
|
||||
return query_id
|
||||
except Exception as exc: # pragma: no cover - network-dependent branch
|
||||
logger.debug("GitHub queryId lookup failed: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def _invalidate_query_id(operation_name):
|
||||
# type: (str) -> None
|
||||
"""Remove a cached queryId for an operation."""
|
||||
_cached_query_ids.pop(operation_name, None)
|
||||
|
||||
|
||||
def _resolve_query_id(operation_name, prefer_fallback=True, url_fetch_fn=None):
|
||||
# type: (str, bool, Any) -> str
|
||||
"""Resolve queryId using cache, remote sources, and fallback constants."""
|
||||
cached = _cached_query_ids.get(operation_name)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
fallback = FALLBACK_QUERY_IDS.get(operation_name)
|
||||
if prefer_fallback and fallback:
|
||||
_cached_query_ids[operation_name] = fallback
|
||||
return fallback
|
||||
|
||||
if url_fetch_fn:
|
||||
github_query_id = _fetch_from_github(url_fetch_fn, operation_name)
|
||||
if github_query_id:
|
||||
_cached_query_ids[operation_name] = github_query_id
|
||||
return github_query_id
|
||||
|
||||
_scan_bundles(url_fetch_fn)
|
||||
cached = _cached_query_ids.get(operation_name)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
if fallback:
|
||||
_cached_query_ids[operation_name] = fallback
|
||||
return fallback
|
||||
|
||||
raise QueryIdError('Cannot resolve queryId for "%s"' % operation_name)
|
||||
Reference in New Issue
Block a user