fix: harden pagination auth and runtime headers

This commit is contained in:
jackwener
2026-03-10 12:33:04 +08:00
parent 4f144d1591
commit d71ad45a0a
8 changed files with 256 additions and 70 deletions

View File

@@ -1,8 +1,8 @@
# twitter-cli
[![CI](https://github.com/jackwener/twitter-cli/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/jackwener/twitter-cli/actions/workflows/ci.yml)
[![PyPI version](https://img.shields.io/pypi/v/twitter-cli.svg)](https://pypi.org/project/twitter-cli/)
[![Python versions](https://img.shields.io/pypi/pyversions/twitter-cli.svg)](https://pypi.org/project/twitter-cli/)
[![PyPI version](https://badge.fury.io/py/twitter-cli.svg)](https://pypi.org/project/twitter-cli/)
[![Python](https://img.shields.io/badge/python-%3E%3D3.8-blue.svg)](https://pypi.org/project/twitter-cli/)
A terminal-first CLI for Twitter/X: read timelines, bookmarks, and user profiles without API keys.
@@ -36,7 +36,7 @@ A terminal-first CLI for Twitter/X: read timelines, bookmarks, and user profiles
**Auth & Anti-Detection:**
- Cookie auth: use browser cookies or environment variables
- Full cookie forwarding: extracts ALL browser cookies for true browser fingerprint
- Full cookie forwarding: extracts ALL browser cookies for richer browser context
- TLS fingerprint impersonation: `curl_cffi` with dynamic Chrome version matching
- `x-client-transaction-id` header generation
- Request timing jitter to avoid pattern detection
@@ -128,7 +128,7 @@ twitter-cli uses this auth priority:
1. **Environment variables**: `TWITTER_AUTH_TOKEN` + `TWITTER_CT0`
2. **Browser cookies** (recommended): auto-extract from Arc/Chrome/Edge/Firefox/Brave
Browser extraction is recommended — it forwards ALL Twitter cookies (not just `auth_token` + `ct0`), making requests indistinguishable from real browser traffic.
Browser extraction is recommended — it forwards ALL Twitter cookies (not just `auth_token` + `ct0`) and aligns request headers with your local runtime, which is closer to normal browser traffic than minimal cookie auth.
After loading cookies, the CLI performs lightweight verification. Commands that require account access fail fast on clear auth errors (`401/403`).
@@ -235,6 +235,8 @@ uv run ruff check .
uv run pytest -q
```
Current CI validates the project on Python 3.12.
### Project Structure
```text
@@ -299,7 +301,7 @@ After installation, OpenClaw can call `twitter-cli` commands directly.
**认证与反风控:**
- Cookie 认证:支持环境变量和浏览器自动提取
- 完整 Cookie 转发:提取浏览器中所有 Twitter Cookie
- 完整 Cookie 转发:提取浏览器中所有 Twitter Cookie,保留更多浏览器上下文
- TLS 指纹伪装:`curl_cffi` 动态匹配 Chrome 版本
- `x-client-transaction-id` 请求头生成
- 请求时序随机化jitter
@@ -364,7 +366,7 @@ twitter unbookmark 1234567890
1. **环境变量**`TWITTER_AUTH_TOKEN` + `TWITTER_CT0`
2. **浏览器提取**推荐Arc/Chrome/Edge/Firefox/Brave 全量 Cookie 提取
推荐使用浏览器提取方式,会转发所有 Twitter Cookie让请求和真实浏览器完全一致
推荐使用浏览器提取方式,会转发所有 Twitter Cookie并按本机运行环境生成语言和平台请求头;它比仅发送 `auth_token` + `ct0` 更接近普通浏览器流量,但不等于完整浏览器自动化
### 代理支持

View File

@@ -17,12 +17,6 @@ classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Utilities",
]
dependencies = [

View File

@@ -125,7 +125,7 @@ def test_extract_via_subprocess_script_includes_arc(monkeypatch) -> None:
seen = {}
def _run(cmd, capture_output=True, text=True, timeout=15):
script = cmd[2]
script = cmd[-1]
seen["script"] = script
return Completed(json.dumps({"error": "No Twitter cookies found", "attempts": []}))
@@ -137,6 +137,29 @@ def test_extract_via_subprocess_script_includes_arc(monkeypatch) -> None:
assert '("arc", browser_cookie3.arc)' in seen["script"]
def test_extract_via_subprocess_retries_uv_when_current_env_has_no_output(monkeypatch) -> None:
class Completed:
def __init__(self, stdout: str, stderr: str = "") -> None:
self.stdout = stdout
self.stderr = stderr
calls = []
def _run(cmd, capture_output=True, text=True, timeout=15):
calls.append(cmd)
if cmd[0] == sys.executable:
return Completed("", "")
return Completed(json.dumps({"auth_token": "token", "ct0": "csrf", "browser": "arc"}))
monkeypatch.setattr(auth.subprocess, "run", _run)
cookies = auth._extract_via_subprocess()
assert cookies == {"auth_token": "token", "ct0": "csrf"}
assert len(calls) == 2
assert calls[1][:5] == ["uv", "run", "--with", "browser-cookie3", "python"]
def test_verify_cookies_logs_attempt_summary_on_non_auth_failures(monkeypatch, caplog) -> None:
class Response:
def __init__(self, status_code: int, payload=None) -> None:

View File

@@ -275,9 +275,19 @@ class TestBuildHeaders:
assert "User-Agent" in headers
assert "sec-ch-ua" in headers
@patch("twitter_cli.client.get_sec_ch_ua_platform", return_value='"Linux"')
@patch("twitter_cli.client.get_accept_language", return_value="zh-CN,zh;q=0.9,en;q=0.8")
@patch("twitter_cli.client.get_twitter_client_language", return_value="zh")
@patch("twitter_cli.client._get_cffi_session")
@patch("twitter_cli.client._gen_ct_headers", return_value={})
def test_cookie_string_used_when_available(self, mock_ct_headers, mock_session):
def test_cookie_string_used_when_available(
self,
mock_ct_headers,
mock_session,
mock_client_language,
mock_accept_language,
mock_platform,
):
mock_session.return_value = MagicMock()
mock_session.return_value.get = MagicMock(side_effect=Exception("skip"))
@@ -294,6 +304,57 @@ class TestBuildHeaders:
headers = client._build_headers()
assert headers["Cookie"] == "auth_token=x; ct0=y; other=z"
assert headers["X-Twitter-Client-Language"] == "zh"
assert headers["Accept-Language"] == "zh-CN,zh;q=0.9,en;q=0.8"
assert headers["sec-ch-ua-platform"] == '"Linux"'
class TestPaginationBehavior:
def test_continues_when_cursor_advances_without_new_tweets(self):
client = TwitterClient.__new__(TwitterClient)
client._request_delay = 0.0
client._max_count = 200
responses = iter(
[
{"page": 1},
{"page": 2},
]
)
def _graphql_get(operation_name, variables, features, field_toggles=None):
return next(responses)
def _parse_timeline_response(data, get_instructions):
if data["page"] == 1:
return [], "cursor-2"
return [MagicMock(id="tweet-1")], None
client._graphql_get = _graphql_get
client._parse_timeline_response = _parse_timeline_response
tweets = client._fetch_timeline("HomeTimeline", 1, lambda data: data)
assert [tweet.id for tweet in tweets] == ["tweet-1"]
def test_stops_when_cursor_does_not_advance(self):
client = TwitterClient.__new__(TwitterClient)
client._request_delay = 0.0
client._max_count = 200
calls = []
def _graphql_get(operation_name, variables, features, field_toggles=None):
calls.append(variables.get("cursor"))
return {"page": len(calls)}
client._graphql_get = _graphql_get
client._parse_timeline_response = lambda data, get_instructions: ([], "cursor-same")
tweets = client._fetch_timeline("HomeTimeline", 1, lambda data: data)
assert tweets == []
assert calls == [None, "cursor-same"]
# ── TwitterClient._parse_tweet_result ─────────────────────────────────────
@@ -407,3 +468,26 @@ class TestTwitterAPIError:
def test_is_runtime_error(self):
err = TwitterAPIError(500, "Server error")
assert isinstance(err, RuntimeError)
class TestParseUserResult:
def test_coerces_count_fields_to_int(self):
user = TwitterClient._parse_user_result(
{
"rest_id": "user-1",
"legacy": {
"name": "Alice",
"screen_name": "alice",
"followers_count": "1,234",
"friends_count": "56",
"statuses_count": "78.9",
"favourites_count": None,
},
}
)
assert user is not None
assert user.followers_count == 1234
assert user.following_count == 56
assert user.tweets_count == 78
assert user.likes_count == 0

View File

@@ -237,35 +237,59 @@ print(json.dumps({
sys.exit(1)
'''
try:
result = subprocess.run(
[sys.executable, "-c", extract_script],
capture_output=True,
text=True,
timeout=15,
)
output = result.stdout.strip()
if not output:
stderr = result.stderr.strip()
if stderr:
logger.debug("Cookie extraction stderr from current env: %s", stderr[:300])
# Maybe browser-cookie3 not installed, try with uv.
result2 = subprocess.run(
["uv", "run", "--with", "browser-cookie3", "python3", "-c", extract_script],
capture_output=True,
text=True,
timeout=30,
)
output = result2.stdout.strip()
if not output:
logger.debug("Cookie extraction stderr from uv fallback: %s", result2.stderr.strip()[:300])
return None
def _run_extract_command(cmd, timeout, label):
# type: (list[str], int, str) -> tuple[Optional[dict], bool]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
)
except subprocess.TimeoutExpired:
logger.debug("Cookie extraction %s timed out", label)
return None, False
except FileNotFoundError as exc:
logger.debug("Cookie extraction %s launcher missing: %s", label, exc)
return None, False
output = result.stdout.strip()
stderr = result.stderr.strip()
if stderr:
logger.debug("Cookie extraction stderr from %s: %s", label, stderr[:300])
if not output:
logger.debug("Cookie extraction from %s produced no stdout", label)
return None, True
try:
data = json.loads(output)
except json.JSONDecodeError as exc:
logger.debug("Cookie extraction %s returned invalid JSON: %s", label, exc)
return None, True
data = json.loads(output)
if "error" in data:
attempts = data.get("attempts") or []
if attempts:
logger.debug("Subprocess extraction attempts: %s", ", ".join(str(item) for item in attempts))
logger.debug("Subprocess extraction attempts (%s): %s", label, ", ".join(str(item) for item in attempts))
retryable = data.get("error") == "browser-cookie3 not installed"
return None, retryable
return data, False
try:
data, retry_with_uv = _run_extract_command(
[sys.executable, "-c", extract_script],
timeout=15,
label="current env",
)
if data is None and retry_with_uv:
data, _ = _run_extract_command(
["uv", "run", "--with", "browser-cookie3", "python", "-c", extract_script],
timeout=30,
label="uv fallback",
)
if data is None:
return None
logger.info("Found cookies in %s (subprocess)", data.get("browser", "unknown"))
@@ -277,18 +301,9 @@ sys.exit(1)
cookies["cookie_string"] = cookie_str
logger.info("Extracted %d total cookies for full browser fingerprint", len(all_cookies))
return cookies
except subprocess.TimeoutExpired:
logger.debug("Cookie extraction subprocess timed out")
return None
except json.JSONDecodeError as exc:
logger.debug("Cookie extraction subprocess returned invalid JSON: %s", exc)
return None
except KeyError as exc:
logger.debug("Cookie extraction subprocess returned incomplete payload: %s", exc)
return None
except FileNotFoundError as exc:
logger.debug("Cookie extraction subprocess launcher missing: %s", exc)
return None
def extract_from_browser() -> Optional[Dict[str, str]]:

View File

@@ -196,6 +196,26 @@ def _fetch_and_display(fetch_fn, label, emoji, max_count, as_json, output_file,
console.print()
def _run_bookmarks_command(max_count, as_json, output_file, do_filter):
# type: (Optional[int], bool, Optional[str], bool) -> None
config = load_config()
def _run():
client = _get_client(config)
_fetch_and_display(
lambda count: client.fetch_bookmarks(count),
"bookmarks",
"🔖",
max_count,
as_json,
output_file,
do_filter,
config,
)
_run_guarded(_run)
@cli.command()
@click.option(
"--type",
@@ -258,14 +278,7 @@ def feed(feed_type, max_count, as_json, input_file, output_file, do_filter):
def favorites(max_count, as_json, output_file, do_filter):
# type: (Optional[int], bool, Optional[str], bool) -> None
"""Fetch bookmarked (favorite) tweets."""
config = load_config()
def _run():
client = _get_client(config)
_fetch_and_display(
lambda count: client.fetch_bookmarks(count),
"bookmarks", "🔖", max_count, as_json, output_file, do_filter, config,
)
_run_guarded(_run)
_run_bookmarks_command(max_count, as_json, output_file, do_filter)
@cli.command(name="bookmarks")
@@ -276,7 +289,7 @@ def favorites(max_count, as_json, output_file, do_filter):
def bookmarks(max_count, as_json, output_file, do_filter):
# type: (Optional[int], bool, Optional[str], bool) -> None
"""Fetch bookmarked tweets."""
favorites.callback(max_count=max_count, as_json=as_json, output_file=output_file, do_filter=do_filter)
_run_bookmarks_command(max_count, as_json, output_file, do_filter)
@cli.command()

View File

@@ -16,8 +16,14 @@ from x_client_transaction import ClientTransaction
from x_client_transaction.utils import generate_headers as _gen_ct_headers, get_ondemand_file_url
from .constants import (
BEARER_TOKEN, SEC_CH_UA_MOBILE, SEC_CH_UA_PLATFORM,
get_sec_ch_ua, get_user_agent, sync_chrome_version,
BEARER_TOKEN,
SEC_CH_UA_MOBILE,
get_accept_language,
get_sec_ch_ua,
get_sec_ch_ua_platform,
get_twitter_client_language,
get_user_agent,
sync_chrome_version,
)
from .models import Author, Metrics, Tweet, TweetMedia, UserProfile
@@ -614,10 +620,16 @@ class TwitterClient:
seen_ids.add(tweet.id)
tweets.append(tweet)
if not next_cursor or not new_tweets:
if not next_cursor:
break
if next_cursor == cursor:
logger.debug("Timeline pagination stopped because cursor did not advance: %s", next_cursor)
break
cursor = next_cursor
if not new_tweets:
logger.debug("Timeline page returned no tweets but exposed next cursor; continuing pagination")
# Rate-limit: sleep between paginated requests with jitter
if len(tweets) < count and self._request_delay > 0:
jitter = self._request_delay * random.uniform(0.7, 1.5)
@@ -688,15 +700,15 @@ class TwitterClient:
"X-Csrf-Token": self._ct0,
"X-Twitter-Active-User": "yes",
"X-Twitter-Auth-Type": "OAuth2Session",
"X-Twitter-Client-Language": "en",
"X-Twitter-Client-Language": get_twitter_client_language(),
"User-Agent": get_user_agent(),
"Origin": "https://x.com",
"Referer": "https://x.com",
"Accept": "*/*",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Language": get_accept_language(),
"sec-ch-ua": get_sec_ch_ua(),
"sec-ch-ua-mobile": SEC_CH_UA_MOBILE,
"sec-ch-ua-platform": SEC_CH_UA_PLATFORM,
"sec-ch-ua-platform": get_sec_ch_ua_platform(),
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
@@ -902,10 +914,10 @@ class TwitterClient:
bio=legacy.get("description", ""),
location=legacy.get("location", ""),
url=_deep_get(legacy, "entities", "url", "urls", 0, "expanded_url") or "",
followers_count=legacy.get("followers_count", 0),
following_count=legacy.get("friends_count", 0),
tweets_count=legacy.get("statuses_count", 0),
likes_count=legacy.get("favourites_count", 0),
followers_count=_parse_int(legacy.get("followers_count"), 0),
following_count=_parse_int(legacy.get("friends_count"), 0),
tweets_count=_parse_int(legacy.get("statuses_count"), 0),
likes_count=_parse_int(legacy.get("favourites_count"), 0),
verified=user_data.get("is_blue_verified", False) or legacy.get("verified", False),
profile_image_url=legacy.get("profile_image_url_https", ""),
created_at=legacy.get("created_at", ""),

View File

@@ -1,6 +1,8 @@
"""Shared constants for twitter-cli."""
import os
import re
import sys
BEARER_TOKEN = (
"AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs"
@@ -26,10 +28,16 @@ def sync_chrome_version(impersonate_target):
def get_user_agent():
# type: () -> str
if sys.platform == "darwin":
platform = "Macintosh; Intel Mac OS X 10_15_7"
elif sys.platform.startswith("win"):
platform = "Windows NT 10.0; Win64; x64"
else:
platform = "X11; Linux x86_64"
return (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"Mozilla/5.0 (%s) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/%s.0.0.0 Safari/537.36" % _chrome_version
"Chrome/%s.0.0.0 Safari/537.36" % (platform, _chrome_version)
)
@@ -40,9 +48,44 @@ def get_sec_ch_ua():
)
def _get_locale_tag():
# type: () -> str
raw = (
os.environ.get("LC_ALL")
or os.environ.get("LC_MESSAGES")
or os.environ.get("LANG")
or "en_US.UTF-8"
)
tag = raw.split(".", 1)[0].replace("_", "-")
return tag or "en-US"
def get_accept_language():
# type: () -> str
tag = _get_locale_tag()
language = tag.split("-", 1)[0] or "en"
if tag == language:
return "%s,%s;q=0.9,en;q=0.8" % (tag, language)
return "%s,%s;q=0.9,en;q=0.8" % (tag, language)
def get_twitter_client_language():
# type: () -> str
return _get_locale_tag().split("-", 1)[0] or "en"
def get_sec_ch_ua_platform():
# type: () -> str
if sys.platform == "darwin":
return '"macOS"'
if sys.platform.startswith("win"):
return '"Windows"'
return '"Linux"'
# Static Client Hints
SEC_CH_UA_MOBILE = "?0"
SEC_CH_UA_PLATFORM = '"macOS"'
SEC_CH_UA_PLATFORM = get_sec_ch_ua_platform()
# Legacy aliases — modules that import these get the default value.
# _build_headers() should use get_user_agent() / get_sec_ch_ua() instead.