diff --git a/README.md b/README.md index df51ebe..90b925e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # twitter-cli [![CI](https://github.com/jackwener/twitter-cli/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/jackwener/twitter-cli/actions/workflows/ci.yml) -[![PyPI version](https://img.shields.io/pypi/v/twitter-cli.svg)](https://pypi.org/project/twitter-cli/) -[![Python versions](https://img.shields.io/pypi/pyversions/twitter-cli.svg)](https://pypi.org/project/twitter-cli/) +[![PyPI version](https://badge.fury.io/py/twitter-cli.svg)](https://pypi.org/project/twitter-cli/) +[![Python](https://img.shields.io/badge/python-%3E%3D3.8-blue.svg)](https://pypi.org/project/twitter-cli/) A terminal-first CLI for Twitter/X: read timelines, bookmarks, and user profiles without API keys. @@ -36,7 +36,7 @@ A terminal-first CLI for Twitter/X: read timelines, bookmarks, and user profiles **Auth & Anti-Detection:** - Cookie auth: use browser cookies or environment variables -- Full cookie forwarding: extracts ALL browser cookies for true browser fingerprint +- Full cookie forwarding: extracts ALL browser cookies for richer browser context - TLS fingerprint impersonation: `curl_cffi` with dynamic Chrome version matching - `x-client-transaction-id` header generation - Request timing jitter to avoid pattern detection @@ -128,7 +128,7 @@ twitter-cli uses this auth priority: 1. **Environment variables**: `TWITTER_AUTH_TOKEN` + `TWITTER_CT0` 2. **Browser cookies** (recommended): auto-extract from Arc/Chrome/Edge/Firefox/Brave -Browser extraction is recommended — it forwards ALL Twitter cookies (not just `auth_token` + `ct0`), making requests indistinguishable from real browser traffic. +Browser extraction is recommended — it forwards ALL Twitter cookies (not just `auth_token` + `ct0`) and aligns request headers with your local runtime, which is closer to normal browser traffic than minimal cookie auth. After loading cookies, the CLI performs lightweight verification. Commands that require account access fail fast on clear auth errors (`401/403`). @@ -235,6 +235,8 @@ uv run ruff check . uv run pytest -q ``` +Current CI validates the project on Python 3.12. + ### Project Structure ```text @@ -299,7 +301,7 @@ After installation, OpenClaw can call `twitter-cli` commands directly. **认证与反风控:** - Cookie 认证:支持环境变量和浏览器自动提取 -- 完整 Cookie 转发:提取浏览器中所有 Twitter Cookie +- 完整 Cookie 转发:提取浏览器中所有 Twitter Cookie,保留更多浏览器上下文 - TLS 指纹伪装:`curl_cffi` 动态匹配 Chrome 版本 - `x-client-transaction-id` 请求头生成 - 请求时序随机化(jitter) @@ -364,7 +366,7 @@ twitter unbookmark 1234567890 1. **环境变量**:`TWITTER_AUTH_TOKEN` + `TWITTER_CT0` 2. **浏览器提取**(推荐):Arc/Chrome/Edge/Firefox/Brave 全量 Cookie 提取 -推荐使用浏览器提取方式,会转发所有 Twitter Cookie,让请求和真实浏览器完全一致。 +推荐使用浏览器提取方式,会转发所有 Twitter Cookie,并按本机运行环境生成语言和平台请求头;它比仅发送 `auth_token` + `ct0` 更接近普通浏览器流量,但不等于完整浏览器自动化。 ### 代理支持 diff --git a/pyproject.toml b/pyproject.toml index f2ba6d7..f212f21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,12 +17,6 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", "Topic :: Utilities", ] dependencies = [ diff --git a/tests/test_auth.py b/tests/test_auth.py index af49302..72e4999 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -125,7 +125,7 @@ def test_extract_via_subprocess_script_includes_arc(monkeypatch) -> None: seen = {} def _run(cmd, capture_output=True, text=True, timeout=15): - script = cmd[2] + script = cmd[-1] seen["script"] = script return Completed(json.dumps({"error": "No Twitter cookies found", "attempts": []})) @@ -137,6 +137,29 @@ def test_extract_via_subprocess_script_includes_arc(monkeypatch) -> None: assert '("arc", browser_cookie3.arc)' in seen["script"] +def test_extract_via_subprocess_retries_uv_when_current_env_has_no_output(monkeypatch) -> None: + class Completed: + def __init__(self, stdout: str, stderr: str = "") -> None: + self.stdout = stdout + self.stderr = stderr + + calls = [] + + def _run(cmd, capture_output=True, text=True, timeout=15): + calls.append(cmd) + if cmd[0] == sys.executable: + return Completed("", "") + return Completed(json.dumps({"auth_token": "token", "ct0": "csrf", "browser": "arc"})) + + monkeypatch.setattr(auth.subprocess, "run", _run) + + cookies = auth._extract_via_subprocess() + + assert cookies == {"auth_token": "token", "ct0": "csrf"} + assert len(calls) == 2 + assert calls[1][:5] == ["uv", "run", "--with", "browser-cookie3", "python"] + + def test_verify_cookies_logs_attempt_summary_on_non_auth_failures(monkeypatch, caplog) -> None: class Response: def __init__(self, status_code: int, payload=None) -> None: diff --git a/tests/test_client.py b/tests/test_client.py index 48b9af9..ac388f9 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -275,9 +275,19 @@ class TestBuildHeaders: assert "User-Agent" in headers assert "sec-ch-ua" in headers + @patch("twitter_cli.client.get_sec_ch_ua_platform", return_value='"Linux"') + @patch("twitter_cli.client.get_accept_language", return_value="zh-CN,zh;q=0.9,en;q=0.8") + @patch("twitter_cli.client.get_twitter_client_language", return_value="zh") @patch("twitter_cli.client._get_cffi_session") @patch("twitter_cli.client._gen_ct_headers", return_value={}) - def test_cookie_string_used_when_available(self, mock_ct_headers, mock_session): + def test_cookie_string_used_when_available( + self, + mock_ct_headers, + mock_session, + mock_client_language, + mock_accept_language, + mock_platform, + ): mock_session.return_value = MagicMock() mock_session.return_value.get = MagicMock(side_effect=Exception("skip")) @@ -294,6 +304,57 @@ class TestBuildHeaders: headers = client._build_headers() assert headers["Cookie"] == "auth_token=x; ct0=y; other=z" + assert headers["X-Twitter-Client-Language"] == "zh" + assert headers["Accept-Language"] == "zh-CN,zh;q=0.9,en;q=0.8" + assert headers["sec-ch-ua-platform"] == '"Linux"' + + +class TestPaginationBehavior: + def test_continues_when_cursor_advances_without_new_tweets(self): + client = TwitterClient.__new__(TwitterClient) + client._request_delay = 0.0 + client._max_count = 200 + + responses = iter( + [ + {"page": 1}, + {"page": 2}, + ] + ) + + def _graphql_get(operation_name, variables, features, field_toggles=None): + return next(responses) + + def _parse_timeline_response(data, get_instructions): + if data["page"] == 1: + return [], "cursor-2" + return [MagicMock(id="tweet-1")], None + + client._graphql_get = _graphql_get + client._parse_timeline_response = _parse_timeline_response + + tweets = client._fetch_timeline("HomeTimeline", 1, lambda data: data) + + assert [tweet.id for tweet in tweets] == ["tweet-1"] + + def test_stops_when_cursor_does_not_advance(self): + client = TwitterClient.__new__(TwitterClient) + client._request_delay = 0.0 + client._max_count = 200 + + calls = [] + + def _graphql_get(operation_name, variables, features, field_toggles=None): + calls.append(variables.get("cursor")) + return {"page": len(calls)} + + client._graphql_get = _graphql_get + client._parse_timeline_response = lambda data, get_instructions: ([], "cursor-same") + + tweets = client._fetch_timeline("HomeTimeline", 1, lambda data: data) + + assert tweets == [] + assert calls == [None, "cursor-same"] # ── TwitterClient._parse_tweet_result ───────────────────────────────────── @@ -407,3 +468,26 @@ class TestTwitterAPIError: def test_is_runtime_error(self): err = TwitterAPIError(500, "Server error") assert isinstance(err, RuntimeError) + + +class TestParseUserResult: + def test_coerces_count_fields_to_int(self): + user = TwitterClient._parse_user_result( + { + "rest_id": "user-1", + "legacy": { + "name": "Alice", + "screen_name": "alice", + "followers_count": "1,234", + "friends_count": "56", + "statuses_count": "78.9", + "favourites_count": None, + }, + } + ) + + assert user is not None + assert user.followers_count == 1234 + assert user.following_count == 56 + assert user.tweets_count == 78 + assert user.likes_count == 0 diff --git a/twitter_cli/auth.py b/twitter_cli/auth.py index f8d50e3..ebe25df 100644 --- a/twitter_cli/auth.py +++ b/twitter_cli/auth.py @@ -237,35 +237,59 @@ print(json.dumps({ sys.exit(1) ''' - try: - result = subprocess.run( - [sys.executable, "-c", extract_script], - capture_output=True, - text=True, - timeout=15, - ) - output = result.stdout.strip() - if not output: - stderr = result.stderr.strip() - if stderr: - logger.debug("Cookie extraction stderr from current env: %s", stderr[:300]) - # Maybe browser-cookie3 not installed, try with uv. - result2 = subprocess.run( - ["uv", "run", "--with", "browser-cookie3", "python3", "-c", extract_script], - capture_output=True, - text=True, - timeout=30, - ) - output = result2.stdout.strip() - if not output: - logger.debug("Cookie extraction stderr from uv fallback: %s", result2.stderr.strip()[:300]) - return None + def _run_extract_command(cmd, timeout, label): + # type: (list[str], int, str) -> tuple[Optional[dict], bool] + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + ) + except subprocess.TimeoutExpired: + logger.debug("Cookie extraction %s timed out", label) + return None, False + except FileNotFoundError as exc: + logger.debug("Cookie extraction %s launcher missing: %s", label, exc) + return None, False + + output = result.stdout.strip() + stderr = result.stderr.strip() + if stderr: + logger.debug("Cookie extraction stderr from %s: %s", label, stderr[:300]) + if not output: + logger.debug("Cookie extraction from %s produced no stdout", label) + return None, True + + try: + data = json.loads(output) + except json.JSONDecodeError as exc: + logger.debug("Cookie extraction %s returned invalid JSON: %s", label, exc) + return None, True - data = json.loads(output) if "error" in data: attempts = data.get("attempts") or [] if attempts: - logger.debug("Subprocess extraction attempts: %s", ", ".join(str(item) for item in attempts)) + logger.debug("Subprocess extraction attempts (%s): %s", label, ", ".join(str(item) for item in attempts)) + retryable = data.get("error") == "browser-cookie3 not installed" + return None, retryable + + return data, False + + try: + data, retry_with_uv = _run_extract_command( + [sys.executable, "-c", extract_script], + timeout=15, + label="current env", + ) + if data is None and retry_with_uv: + data, _ = _run_extract_command( + ["uv", "run", "--with", "browser-cookie3", "python", "-c", extract_script], + timeout=30, + label="uv fallback", + ) + + if data is None: return None logger.info("Found cookies in %s (subprocess)", data.get("browser", "unknown")) @@ -277,18 +301,9 @@ sys.exit(1) cookies["cookie_string"] = cookie_str logger.info("Extracted %d total cookies for full browser fingerprint", len(all_cookies)) return cookies - except subprocess.TimeoutExpired: - logger.debug("Cookie extraction subprocess timed out") - return None - except json.JSONDecodeError as exc: - logger.debug("Cookie extraction subprocess returned invalid JSON: %s", exc) - return None except KeyError as exc: logger.debug("Cookie extraction subprocess returned incomplete payload: %s", exc) return None - except FileNotFoundError as exc: - logger.debug("Cookie extraction subprocess launcher missing: %s", exc) - return None def extract_from_browser() -> Optional[Dict[str, str]]: diff --git a/twitter_cli/cli.py b/twitter_cli/cli.py index b8b4614..8412553 100644 --- a/twitter_cli/cli.py +++ b/twitter_cli/cli.py @@ -196,6 +196,26 @@ def _fetch_and_display(fetch_fn, label, emoji, max_count, as_json, output_file, console.print() +def _run_bookmarks_command(max_count, as_json, output_file, do_filter): + # type: (Optional[int], bool, Optional[str], bool) -> None + config = load_config() + + def _run(): + client = _get_client(config) + _fetch_and_display( + lambda count: client.fetch_bookmarks(count), + "bookmarks", + "🔖", + max_count, + as_json, + output_file, + do_filter, + config, + ) + + _run_guarded(_run) + + @cli.command() @click.option( "--type", @@ -258,14 +278,7 @@ def feed(feed_type, max_count, as_json, input_file, output_file, do_filter): def favorites(max_count, as_json, output_file, do_filter): # type: (Optional[int], bool, Optional[str], bool) -> None """Fetch bookmarked (favorite) tweets.""" - config = load_config() - def _run(): - client = _get_client(config) - _fetch_and_display( - lambda count: client.fetch_bookmarks(count), - "bookmarks", "🔖", max_count, as_json, output_file, do_filter, config, - ) - _run_guarded(_run) + _run_bookmarks_command(max_count, as_json, output_file, do_filter) @cli.command(name="bookmarks") @@ -276,7 +289,7 @@ def favorites(max_count, as_json, output_file, do_filter): def bookmarks(max_count, as_json, output_file, do_filter): # type: (Optional[int], bool, Optional[str], bool) -> None """Fetch bookmarked tweets.""" - favorites.callback(max_count=max_count, as_json=as_json, output_file=output_file, do_filter=do_filter) + _run_bookmarks_command(max_count, as_json, output_file, do_filter) @cli.command() diff --git a/twitter_cli/client.py b/twitter_cli/client.py index bc879df..16aa195 100644 --- a/twitter_cli/client.py +++ b/twitter_cli/client.py @@ -16,8 +16,14 @@ from x_client_transaction import ClientTransaction from x_client_transaction.utils import generate_headers as _gen_ct_headers, get_ondemand_file_url from .constants import ( - BEARER_TOKEN, SEC_CH_UA_MOBILE, SEC_CH_UA_PLATFORM, - get_sec_ch_ua, get_user_agent, sync_chrome_version, + BEARER_TOKEN, + SEC_CH_UA_MOBILE, + get_accept_language, + get_sec_ch_ua, + get_sec_ch_ua_platform, + get_twitter_client_language, + get_user_agent, + sync_chrome_version, ) from .models import Author, Metrics, Tweet, TweetMedia, UserProfile @@ -614,10 +620,16 @@ class TwitterClient: seen_ids.add(tweet.id) tweets.append(tweet) - if not next_cursor or not new_tweets: + if not next_cursor: + break + if next_cursor == cursor: + logger.debug("Timeline pagination stopped because cursor did not advance: %s", next_cursor) break cursor = next_cursor + if not new_tweets: + logger.debug("Timeline page returned no tweets but exposed next cursor; continuing pagination") + # Rate-limit: sleep between paginated requests with jitter if len(tweets) < count and self._request_delay > 0: jitter = self._request_delay * random.uniform(0.7, 1.5) @@ -688,15 +700,15 @@ class TwitterClient: "X-Csrf-Token": self._ct0, "X-Twitter-Active-User": "yes", "X-Twitter-Auth-Type": "OAuth2Session", - "X-Twitter-Client-Language": "en", + "X-Twitter-Client-Language": get_twitter_client_language(), "User-Agent": get_user_agent(), "Origin": "https://x.com", "Referer": "https://x.com", "Accept": "*/*", - "Accept-Language": "en-US,en;q=0.9", + "Accept-Language": get_accept_language(), "sec-ch-ua": get_sec_ch_ua(), "sec-ch-ua-mobile": SEC_CH_UA_MOBILE, - "sec-ch-ua-platform": SEC_CH_UA_PLATFORM, + "sec-ch-ua-platform": get_sec_ch_ua_platform(), "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", @@ -902,10 +914,10 @@ class TwitterClient: bio=legacy.get("description", ""), location=legacy.get("location", ""), url=_deep_get(legacy, "entities", "url", "urls", 0, "expanded_url") or "", - followers_count=legacy.get("followers_count", 0), - following_count=legacy.get("friends_count", 0), - tweets_count=legacy.get("statuses_count", 0), - likes_count=legacy.get("favourites_count", 0), + followers_count=_parse_int(legacy.get("followers_count"), 0), + following_count=_parse_int(legacy.get("friends_count"), 0), + tweets_count=_parse_int(legacy.get("statuses_count"), 0), + likes_count=_parse_int(legacy.get("favourites_count"), 0), verified=user_data.get("is_blue_verified", False) or legacy.get("verified", False), profile_image_url=legacy.get("profile_image_url_https", ""), created_at=legacy.get("created_at", ""), diff --git a/twitter_cli/constants.py b/twitter_cli/constants.py index 0114bb4..5c50474 100644 --- a/twitter_cli/constants.py +++ b/twitter_cli/constants.py @@ -1,6 +1,8 @@ """Shared constants for twitter-cli.""" +import os import re +import sys BEARER_TOKEN = ( "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs" @@ -26,10 +28,16 @@ def sync_chrome_version(impersonate_target): def get_user_agent(): # type: () -> str + if sys.platform == "darwin": + platform = "Macintosh; Intel Mac OS X 10_15_7" + elif sys.platform.startswith("win"): + platform = "Windows NT 10.0; Win64; x64" + else: + platform = "X11; Linux x86_64" return ( - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "Mozilla/5.0 (%s) " "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/%s.0.0.0 Safari/537.36" % _chrome_version + "Chrome/%s.0.0.0 Safari/537.36" % (platform, _chrome_version) ) @@ -40,9 +48,44 @@ def get_sec_ch_ua(): ) +def _get_locale_tag(): + # type: () -> str + raw = ( + os.environ.get("LC_ALL") + or os.environ.get("LC_MESSAGES") + or os.environ.get("LANG") + or "en_US.UTF-8" + ) + tag = raw.split(".", 1)[0].replace("_", "-") + return tag or "en-US" + + +def get_accept_language(): + # type: () -> str + tag = _get_locale_tag() + language = tag.split("-", 1)[0] or "en" + if tag == language: + return "%s,%s;q=0.9,en;q=0.8" % (tag, language) + return "%s,%s;q=0.9,en;q=0.8" % (tag, language) + + +def get_twitter_client_language(): + # type: () -> str + return _get_locale_tag().split("-", 1)[0] or "en" + + +def get_sec_ch_ua_platform(): + # type: () -> str + if sys.platform == "darwin": + return '"macOS"' + if sys.platform.startswith("win"): + return '"Windows"' + return '"Linux"' + + # Static Client Hints SEC_CH_UA_MOBILE = "?0" -SEC_CH_UA_PLATFORM = '"macOS"' +SEC_CH_UA_PLATFORM = get_sec_ch_ua_platform() # Legacy aliases — modules that import these get the default value. # _build_headers() should use get_user_agent() / get_sec_ch_ua() instead.