diff --git a/README.md b/README.md index a110a17..59beb17 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,7 @@ twitter feed --filter ```bash # Feed twitter feed --max 50 +twitter feed --cursor "" twitter feed --full-text twitter feed --output tweets.json twitter feed --input tweets.json @@ -427,6 +428,7 @@ twitter feed twitter feed -t following twitter feed --filter twitter feed --full-text +twitter feed --cursor "<上一页返回的 nextCursor>" # 收藏 twitter bookmarks diff --git a/SCHEMA.md b/SCHEMA.md index 558fee7..2b8a64c 100644 --- a/SCHEMA.md +++ b/SCHEMA.md @@ -8,6 +8,8 @@ ok: true schema_version: "1" data: ... +pagination: + nextCursor: "optional-cursor" ``` ## Error @@ -25,6 +27,7 @@ error: - `--yaml` and `--json` both use this envelope - non-TTY stdout defaults to YAML - tweet and user lists are returned under `data` +- timeline-style list commands may also return `pagination.nextCursor` - `article` returns a single tweet object under `data` - `status` returns `data.authenticated` plus `data.user` - `whoami` returns `data.user` diff --git a/tests/conftest.py b/tests/conftest.py index 645b981..d435289 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,6 +39,7 @@ def tweet_factory(): article_title=overrides.pop("article_title", None), article_text=overrides.pop("article_text", None), is_subscriber_only=overrides.pop("is_subscriber_only", False), + is_promoted=overrides.pop("is_promoted", False), ) return _make_tweet diff --git a/tests/test_cli.py b/tests/test_cli.py index cdf74ce..97944f4 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -57,6 +57,68 @@ def test_cli_feed_input_accepts_structured_json_envelope(tmp_path, tweet_factory assert '"id": "1"' in result.output +def test_cli_feed_passes_include_promoted(monkeypatch, tweet_factory) -> None: + class FakeClient: + def fetch_home_timeline( + self, + count: int, + include_promoted: bool = False, + cursor: str | None = None, + return_cursor: bool = False, + ): + assert count == 20 + assert include_promoted is True + assert cursor is None + assert return_cursor is True + return [tweet_factory("1", is_promoted=True)], "cursor-next" + + monkeypatch.setattr("twitter_cli.cli._get_client", lambda config=None, quiet=False: FakeClient()) + monkeypatch.setattr( + "twitter_cli.cli.load_config", + lambda: {"fetch": {"count": 20}, "filter": {}, "rateLimit": {}}, + ) + runner = CliRunner() + + result = runner.invoke(cli, ["feed", "--json", "--include-promoted"]) + + assert result.exit_code == 0 + payload = json.loads(result.output) + assert payload["ok"] is True + assert payload["data"][0]["isPromoted"] is True + assert payload["pagination"]["nextCursor"] == "cursor-next" + + +def test_cli_feed_accepts_cursor_and_emits_pagination(monkeypatch) -> None: + class FakeClient: + def fetch_following_feed( + self, + count: int, + include_promoted: bool = False, + cursor: str | None = None, + return_cursor: bool = False, + ): + assert count == 20 + assert include_promoted is False + assert cursor == "cursor-prev" + assert return_cursor is True + return [], "cursor-next" + + monkeypatch.setattr("twitter_cli.cli._get_client", lambda config=None, quiet=False: FakeClient()) + monkeypatch.setattr( + "twitter_cli.cli.load_config", + lambda: {"fetch": {"count": 20}, "filter": {}, "rateLimit": {}}, + ) + runner = CliRunner() + + result = runner.invoke(cli, ["feed", "-t", "following", "--cursor", "cursor-prev", "--json"]) + + assert result.exit_code == 0 + payload = json.loads(result.output) + assert payload["ok"] is True + assert payload["data"] == [] + assert payload["pagination"]["nextCursor"] == "cursor-next" + + def test_print_tweet_table_truncates_text_by_default(tweet_factory) -> None: long_text = "A" * 140 console = Console(record=True, width=400) diff --git a/tests/test_client.py b/tests/test_client.py index 66fbb5e..d0a6971 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -333,6 +333,24 @@ class TestBuildHeaders: class TestPaginationBehavior: + def test_fetch_timeline_can_include_promoted_content(self): + client = TwitterClient.__new__(TwitterClient) + client._request_delay = 0.0 + client._max_count = 200 + + calls = [] + + def _graphql_get(operation_name, variables, features, field_toggles=None): + calls.append(variables.copy()) + return {"page": 1} + + client._graphql_get = _graphql_get + + with patch('twitter_cli.client.parse_timeline_response', return_value=([], None)): + client._fetch_timeline("HomeTimeline", 1, lambda data: data, include_promoted=True) + + assert calls[0]["includePromotedContent"] is True + def test_continues_when_cursor_advances_without_new_tweets(self): client = TwitterClient.__new__(TwitterClient) client._request_delay = 0.0 @@ -379,6 +397,33 @@ class TestPaginationBehavior: assert tweets == [] assert calls == [None, "cursor-same"] + def test_fetch_timeline_returns_continuation_cursor(self): + client = TwitterClient.__new__(TwitterClient) + client._request_delay = 0.0 + client._max_count = 200 + + calls = [] + + def _graphql_get(operation_name, variables, features, field_toggles=None): + calls.append(variables.copy()) + return {"page": 1} + + client._graphql_get = _graphql_get + + tweet = MagicMock(id="tweet-1") + with patch('twitter_cli.client.parse_timeline_response', return_value=([tweet], "cursor-next")): + tweets, cursor = client._fetch_timeline( + "HomeTimeline", + 1, + lambda data: data, + start_cursor="cursor-prev", + return_cursor=True, + ) + + assert [item.id for item in tweets] == ["tweet-1"] + assert cursor == "cursor-next" + assert calls[0]["cursor"] == "cursor-prev" + def test_user_list_continues_when_cursor_advances_without_new_users(self): client = TwitterClient.__new__(TwitterClient) client._request_delay = 0.0 diff --git a/tests/test_parser_fixtures.py b/tests/test_parser_fixtures.py index c6b752d..0e37183 100644 --- a/tests/test_parser_fixtures.py +++ b/tests/test_parser_fixtures.py @@ -36,6 +36,21 @@ def test_parse_home_timeline_fixture(fixture_loader) -> None: assert tweets[1].quoted_tweet.id == "30" +def test_parse_home_timeline_fixture_marks_promoted_entries(fixture_loader) -> None: + payload = fixture_loader("home_timeline.json") + entry = payload["data"]["home"]["home_timeline_urt"]["instructions"][0]["entries"][0] + entry["entryId"] = "promoted-tweet-1-demo" + entry["content"]["itemContent"]["promotedMetadata"] = {"impressionId": "demo"} + + tweets, _ = parse_timeline_response( + payload, + lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"), + ) + + assert tweets[0].is_promoted is True + assert tweets[1].is_promoted is False + + def test_parse_tweet_detail_fixture_with_nested_items(fixture_loader) -> None: payload = fixture_loader("tweet_detail.json") diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 22f2d83..cd11545 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -85,3 +85,11 @@ def test_tweet_roundtrip_preserves_subscriber_only(tweet_factory) -> None: assert payload["isSubscriberOnly"] is True restored = tweet_from_dict(payload) assert restored.is_subscriber_only is True + + +def test_tweet_roundtrip_preserves_promoted_flag(tweet_factory) -> None: + tweet = tweet_factory("100", is_promoted=True) + payload = tweet_to_dict(tweet) + assert payload["isPromoted"] is True + restored = tweet_from_dict(payload) + assert restored.is_promoted is True diff --git a/twitter_cli/cli.py b/twitter_cli/cli.py index 88784ec..5393bbc 100644 --- a/twitter_cli/cli.py +++ b/twitter_cli/cli.py @@ -349,6 +349,15 @@ def _fetch_and_display(fetch_fn, label, emoji, max_count, as_json, as_yaml, outp console.print() +def _emit_timeline_structured(tweets, next_cursor, *, as_json, as_yaml): + # type: (TweetList, Optional[str], bool, bool) -> bool + """Emit timeline data with pagination metadata while keeping `data` a tweet list.""" + payload = success_payload(tweets_to_data(tweets)) + if next_cursor: + payload["pagination"] = {"nextCursor": next_cursor} + return emit_structured(payload, as_json=as_json, as_yaml=as_yaml) + + def _run_bookmarks_command(max_count, as_json, as_yaml, output_file, do_filter, compact=False, full_text=False): # type: (Optional[int], bool, bool, Optional[str], bool, bool, bool) -> None config = load_config() @@ -401,17 +410,24 @@ def _inherit_flag(ctx, name, value): help="Feed type: for-you (algorithmic) or following (chronological).", ) @click.option("--max", "-n", "max_count", type=int, default=None, help="Max number of tweets to fetch.") +@click.option("--cursor", type=str, default=None, help="Pagination cursor for continuing a previous feed request.") @structured_output_options @click.option("--input", "-i", "input_file", type=str, default=None, help="Load tweets from JSON file.") @click.option("--output", "-o", "output_file", type=str, default=None, help="Save filtered tweets to JSON file.") @click.option("--filter", "do_filter", is_flag=True, help="Enable score-based filtering.") @click.option("--full-text", is_flag=True, help="Show full tweet text in table output.") +@click.option( + "--include-promoted/--no-include-promoted", + default=False, + help="Include promoted tweets when the timeline endpoint exposes them.", +) @click.pass_context -def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, do_filter, full_text): - # type: (Any, str, Optional[int], bool, bool, Optional[str], Optional[str], bool, bool) -> None +def feed(ctx, feed_type, max_count, cursor, as_json, as_yaml, input_file, output_file, do_filter, full_text, include_promoted): + # type: (Any, str, Optional[int], Optional[str], bool, bool, Optional[str], Optional[str], bool, bool, bool) -> None """Fetch home timeline with optional filtering.""" compact = ctx.obj.get("compact", False) rich_output = use_rich_output(as_json=as_json, as_yaml=as_yaml, compact=compact) + next_cursor = None # type: Optional[str] config = load_config() try: if input_file: @@ -428,9 +444,19 @@ def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, d console.print("📡 Fetching %s (%d tweets)...\n" % (label, fetch_count)) start = time.time() if feed_type == "following": - tweets = client.fetch_following_feed(fetch_count) + tweets, next_cursor = client.fetch_following_feed( + fetch_count, + include_promoted=include_promoted, + cursor=cursor, + return_cursor=True, + ) else: - tweets = client.fetch_home_timeline(fetch_count) + tweets, next_cursor = client.fetch_home_timeline( + fetch_count, + include_promoted=include_promoted, + cursor=cursor, + return_cursor=True, + ) elapsed = time.time() - start if rich_output: console.print("✅ Fetched %d tweets in %.1fs\n" % (len(tweets), elapsed)) @@ -450,7 +476,7 @@ def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, d save_tweet_cache(filtered) - if emit_structured(tweets_to_data(filtered), as_json=as_json, as_yaml=as_yaml): + if _emit_timeline_structured(filtered, next_cursor, as_json=as_json, as_yaml=as_yaml): return title = "👥 Following" if feed_type == "following" else "📱 Twitter" diff --git a/twitter_cli/client.py b/twitter_cli/client.py index 9ef4f5a..4fc36ce 100644 --- a/twitter_cli/client.py +++ b/twitter_cli/client.py @@ -153,22 +153,28 @@ class TwitterClient: # ── Read operations ────────────────────────────────────────────── - def fetch_home_timeline(self, count=20): - # type: (int) -> List[Tweet] + def fetch_home_timeline(self, count=20, include_promoted=False, cursor=None, return_cursor=False): + # type: (int, bool, Optional[str], bool) -> Any """Fetch home timeline tweets.""" return self._fetch_timeline( "HomeTimeline", count, lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"), + include_promoted=include_promoted, + start_cursor=cursor, + return_cursor=return_cursor, ) - def fetch_following_feed(self, count=20): - # type: (int) -> List[Tweet] + def fetch_following_feed(self, count=20, include_promoted=False, cursor=None, return_cursor=False): + # type: (int, bool, Optional[str], bool) -> Any """Fetch chronological following feed.""" return self._fetch_timeline( "HomeLatestTimeline", count, lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"), + include_promoted=include_promoted, + start_cursor=cursor, + return_cursor=return_cursor, ) def fetch_bookmarks(self, count=50): @@ -732,8 +738,8 @@ class TwitterClient: # ── Internal: timeline / user list fetchers ────────────────────── - def _fetch_timeline(self, operation_name, count, get_instructions, extra_variables=None, override_base_variables=False, field_toggles=None, use_post=False): - # type: (str, int, Callable[[Any], Any], Optional[Dict[str, Any]], bool, Optional[Dict[str, Any]], bool) -> List[Tweet] + def _fetch_timeline(self, operation_name, count, get_instructions, extra_variables=None, override_base_variables=False, field_toggles=None, use_post=False, include_promoted=False, start_cursor=None, return_cursor=False): + # type: (str, int, Callable[[Any], Any], Optional[Dict[str, Any]], bool, Optional[Dict[str, Any]], bool, bool, Optional[str], bool) -> Any """Generic timeline fetcher with pagination and deduplication. Args: @@ -751,7 +757,8 @@ class TwitterClient: tweets = [] # type: List[Tweet] seen_ids = set() # type: Set[str] - cursor = None # type: Optional[str] + cursor = start_cursor # type: Optional[str] + continuation_cursor = None # type: Optional[str] attempts = 0 max_attempts = int(math.ceil(count / 20.0)) + 2 @@ -763,7 +770,7 @@ class TwitterClient: else: variables = { "count": min(count - len(tweets) + 5, 40), - "includePromotedContent": False, + "includePromotedContent": include_promoted, "latestControlAvailable": True, "requestContext": "launch", } @@ -784,10 +791,13 @@ class TwitterClient: tweets.append(tweet) if not next_cursor: + continuation_cursor = None break if next_cursor == cursor: logger.debug("Timeline pagination stopped because cursor did not advance: %s", next_cursor) + continuation_cursor = None break + continuation_cursor = next_cursor cursor = next_cursor if not new_tweets: @@ -799,6 +809,8 @@ class TwitterClient: logger.debug("Sleeping %.1fs between requests", jitter) time.sleep(jitter) + if return_cursor: + return tweets[:count], continuation_cursor return tweets[:count] def _fetch_user_list(self, operation_name, user_id, count, get_instructions): diff --git a/twitter_cli/models.py b/twitter_cli/models.py index 02f10f8..ee1d314 100644 --- a/twitter_cli/models.py +++ b/twitter_cli/models.py @@ -53,6 +53,7 @@ class Tweet: article_title: Optional[str] = None article_text: Optional[str] = None is_subscriber_only: bool = False + is_promoted: bool = False @dataclass diff --git a/twitter_cli/parser.py b/twitter_cli/parser.py index a92374b..6dd6f6f 100644 --- a/twitter_cli/parser.py +++ b/twitter_cli/parser.py @@ -504,6 +504,10 @@ def parse_timeline_response(data, get_instructions): if result: tweet = parse_tweet_result(result) if tweet: + tweet.is_promoted = bool( + str(entry.get("entryId") or "").startswith("promoted-") + or item_content.get("promotedMetadata") + ) tweets.append(tweet) for nested_item in content.get("items", []): @@ -517,6 +521,11 @@ def parse_timeline_response(data, get_instructions): if nested_result: tweet = parse_tweet_result(nested_result) if tweet: + nested_item_content = _deep_get(nested_item, "item", "itemContent") or {} + tweet.is_promoted = bool( + str(_deep_get(nested_item, "entryId") or "").startswith("promoted-") + or nested_item_content.get("promotedMetadata") + ) tweets.append(tweet) return tweets, next_cursor diff --git a/twitter_cli/serialization.py b/twitter_cli/serialization.py index f20be6d..25ceede 100644 --- a/twitter_cli/serialization.py +++ b/twitter_cli/serialization.py @@ -47,6 +47,7 @@ def tweet_to_dict(tweet: Tweet) -> Dict[str, Any]: "lang": tweet.lang, "score": tweet.score, "isSubscriberOnly": tweet.is_subscriber_only, + "isPromoted": tweet.is_promoted, } if tweet.article_title is not None: data["articleTitle"] = tweet.article_title @@ -124,6 +125,7 @@ def tweet_from_dict(data: Dict[str, Any]) -> Tweet: article_title=_optional_str(data.get("articleTitle")), article_text=_optional_str(data.get("articleText")), is_subscriber_only=bool(data.get("isSubscriberOnly", False)), + is_promoted=bool(data.get("isPromoted", False)), )