Feed cursor pagination (#49)

* Expose promoted tweets in feed output * Add cursor-based feed pagination output
2026-04-10 01:20:18 +08:00
parent e3545ab069
commit 7816f8d813
12 changed files with 199 additions and 13 deletions
--- a/README.md
+++ b/README.md
@@ -97,6 +97,7 @@ twitter feed --filter
 ```bash
 # Feed
 twitter feed --max 50
 twitter feed --cursor "<next-cursor-from-previous-response>"
 twitter feed --full-text
 twitter feed --output tweets.json
 twitter feed --input tweets.json
@@ -427,6 +428,7 @@ twitter feed
 twitter feed -t following
 twitter feed --filter
 twitter feed --full-text
 twitter feed --cursor "<上一页返回的 nextCursor>"
 # 收藏
 twitter bookmarks
--- a/SCHEMA.md
+++ b/SCHEMA.md
@@ -8,6 +8,8 @@
 ok: true
 schema_version: "1"
 data: ...
 pagination:
  nextCursor: "optional-cursor"
 ```
 ## Error
@@ -25,6 +27,7 @@ error:
 - `--yaml` and `--json` both use this envelope
 - non-TTY stdout defaults to YAML
 - tweet and user lists are returned under `data`
 - timeline-style list commands may also return `pagination.nextCursor`
 - `article` returns a single tweet object under `data`
 - `status` returns `data.authenticated` plus `data.user`
 - `whoami` returns `data.user`
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -39,6 +39,7 @@ def tweet_factory():
            article_title=overrides.pop("article_title", None),
            article_text=overrides.pop("article_text", None),
            is_subscriber_only=overrides.pop("is_subscriber_only", False),
            is_promoted=overrides.pop("is_promoted", False),
        )
    return _make_tweet
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -57,6 +57,68 @@ def test_cli_feed_input_accepts_structured_json_envelope(tmp_path, tweet_factory
    assert '"id": "1"' in result.output
 def test_cli_feed_passes_include_promoted(monkeypatch, tweet_factory) -> None:
    class FakeClient:
        def fetch_home_timeline(
            self,
            count: int,
            include_promoted: bool = False,
            cursor: str | None = None,
            return_cursor: bool = False,
        ):
            assert count == 20
            assert include_promoted is True
            assert cursor is None
            assert return_cursor is True
            return [tweet_factory("1", is_promoted=True)], "cursor-next"
    monkeypatch.setattr("twitter_cli.cli._get_client", lambda config=None, quiet=False: FakeClient())
    monkeypatch.setattr(
        "twitter_cli.cli.load_config",
        lambda: {"fetch": {"count": 20}, "filter": {}, "rateLimit": {}},
    )
    runner = CliRunner()
    result = runner.invoke(cli, ["feed", "--json", "--include-promoted"])
    assert result.exit_code == 0
    payload = json.loads(result.output)
    assert payload["ok"] is True
    assert payload["data"][0]["isPromoted"] is True
    assert payload["pagination"]["nextCursor"] == "cursor-next"
 def test_cli_feed_accepts_cursor_and_emits_pagination(monkeypatch) -> None:
    class FakeClient:
        def fetch_following_feed(
            self,
            count: int,
            include_promoted: bool = False,
            cursor: str | None = None,
            return_cursor: bool = False,
        ):
            assert count == 20
            assert include_promoted is False
            assert cursor == "cursor-prev"
            assert return_cursor is True
            return [], "cursor-next"
    monkeypatch.setattr("twitter_cli.cli._get_client", lambda config=None, quiet=False: FakeClient())
    monkeypatch.setattr(
        "twitter_cli.cli.load_config",
        lambda: {"fetch": {"count": 20}, "filter": {}, "rateLimit": {}},
    )
    runner = CliRunner()
    result = runner.invoke(cli, ["feed", "-t", "following", "--cursor", "cursor-prev", "--json"])
    assert result.exit_code == 0
    payload = json.loads(result.output)
    assert payload["ok"] is True
    assert payload["data"] == []
    assert payload["pagination"]["nextCursor"] == "cursor-next"
 def test_print_tweet_table_truncates_text_by_default(tweet_factory) -> None:
    long_text = "A" * 140
    console = Console(record=True, width=400)
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -333,6 +333,24 @@ class TestBuildHeaders:
 class TestPaginationBehavior:
    def test_fetch_timeline_can_include_promoted_content(self):
        client = TwitterClient.__new__(TwitterClient)
        client._request_delay = 0.0
        client._max_count = 200
        calls = []
        def _graphql_get(operation_name, variables, features, field_toggles=None):
            calls.append(variables.copy())
            return {"page": 1}
        client._graphql_get = _graphql_get
        with patch('twitter_cli.client.parse_timeline_response', return_value=([], None)):
            client._fetch_timeline("HomeTimeline", 1, lambda data: data, include_promoted=True)
        assert calls[0]["includePromotedContent"] is True
    def test_continues_when_cursor_advances_without_new_tweets(self):
        client = TwitterClient.__new__(TwitterClient)
        client._request_delay = 0.0
@@ -379,6 +397,33 @@ class TestPaginationBehavior:
        assert tweets == []
        assert calls == [None, "cursor-same"]
    def test_fetch_timeline_returns_continuation_cursor(self):
        client = TwitterClient.__new__(TwitterClient)
        client._request_delay = 0.0
        client._max_count = 200
        calls = []
        def _graphql_get(operation_name, variables, features, field_toggles=None):
            calls.append(variables.copy())
            return {"page": 1}
        client._graphql_get = _graphql_get
        tweet = MagicMock(id="tweet-1")
        with patch('twitter_cli.client.parse_timeline_response', return_value=([tweet], "cursor-next")):
            tweets, cursor = client._fetch_timeline(
                "HomeTimeline",
                1,
                lambda data: data,
                start_cursor="cursor-prev",
                return_cursor=True,
            )
        assert [item.id for item in tweets] == ["tweet-1"]
        assert cursor == "cursor-next"
        assert calls[0]["cursor"] == "cursor-prev"
    def test_user_list_continues_when_cursor_advances_without_new_users(self):
        client = TwitterClient.__new__(TwitterClient)
        client._request_delay = 0.0
--- a/tests/test_parser_fixtures.py
+++ b/tests/test_parser_fixtures.py
@@ -36,6 +36,21 @@ def test_parse_home_timeline_fixture(fixture_loader) -> None:
    assert tweets[1].quoted_tweet.id == "30"
 def test_parse_home_timeline_fixture_marks_promoted_entries(fixture_loader) -> None:
    payload = fixture_loader("home_timeline.json")
    entry = payload["data"]["home"]["home_timeline_urt"]["instructions"][0]["entries"][0]
    entry["entryId"] = "promoted-tweet-1-demo"
    entry["content"]["itemContent"]["promotedMetadata"] = {"impressionId": "demo"}
    tweets, _ = parse_timeline_response(
        payload,
        lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
    )
    assert tweets[0].is_promoted is True
    assert tweets[1].is_promoted is False
 def test_parse_tweet_detail_fixture_with_nested_items(fixture_loader) -> None:
    payload = fixture_loader("tweet_detail.json")
--- a/tests/test_serialization.py
+++ b/tests/test_serialization.py
@@ -85,3 +85,11 @@ def test_tweet_roundtrip_preserves_subscriber_only(tweet_factory) -> None:
    assert payload["isSubscriberOnly"] is True
    restored = tweet_from_dict(payload)
    assert restored.is_subscriber_only is True
 def test_tweet_roundtrip_preserves_promoted_flag(tweet_factory) -> None:
    tweet = tweet_factory("100", is_promoted=True)
    payload = tweet_to_dict(tweet)
    assert payload["isPromoted"] is True
    restored = tweet_from_dict(payload)
    assert restored.is_promoted is True
--- a/twitter_cli/cli.py
+++ b/twitter_cli/cli.py
@@ -349,6 +349,15 @@ def _fetch_and_display(fetch_fn, label, emoji, max_count, as_json, as_yaml, outp
    console.print()
 def _emit_timeline_structured(tweets, next_cursor, *, as_json, as_yaml):
    # type: (TweetList, Optional[str], bool, bool) -> bool
    """Emit timeline data with pagination metadata while keeping `data` a tweet list."""
    payload = success_payload(tweets_to_data(tweets))
    if next_cursor:
        payload["pagination"] = {"nextCursor": next_cursor}
    return emit_structured(payload, as_json=as_json, as_yaml=as_yaml)
 def _run_bookmarks_command(max_count, as_json, as_yaml, output_file, do_filter, compact=False, full_text=False):
    # type: (Optional[int], bool, bool, Optional[str], bool, bool, bool) -> None
    config = load_config()
@@ -401,17 +410,24 @@ def _inherit_flag(ctx, name, value):
    help="Feed type: for-you (algorithmic) or following (chronological).",
 )
@click.option("--max", "-n", "max_count", type=int, default=None, help="Max number of tweets to fetch.")
@click.option("--cursor", type=str, default=None, help="Pagination cursor for continuing a previous feed request.")
@structured_output_options
@click.option("--input", "-i", "input_file", type=str, default=None, help="Load tweets from JSON file.")
@click.option("--output", "-o", "output_file", type=str, default=None, help="Save filtered tweets to JSON file.")
@click.option("--filter", "do_filter", is_flag=True, help="Enable score-based filtering.")
@click.option("--full-text", is_flag=True, help="Show full tweet text in table output.")
@click.option(
    "--include-promoted/--no-include-promoted",
    default=False,
    help="Include promoted tweets when the timeline endpoint exposes them.",
 )
@click.pass_context
-def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, do_filter, full_text):
+def feed(ctx, feed_type, max_count, cursor, as_json, as_yaml, input_file, output_file, do_filter, full_text, include_promoted):
-    # type: (Any, str, Optional[int], bool, bool, Optional[str], Optional[str], bool, bool) -> None
+    # type: (Any, str, Optional[int], Optional[str], bool, bool, Optional[str], Optional[str], bool, bool, bool) -> None
    """Fetch home timeline with optional filtering."""
    compact = ctx.obj.get("compact", False)
    rich_output = use_rich_output(as_json=as_json, as_yaml=as_yaml, compact=compact)
    next_cursor = None  # type: Optional[str]
    config = load_config()
    try:
        if input_file:
@@ -428,9 +444,19 @@ def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, d
                console.print("📡 Fetching %s (%d tweets)...\n" % (label, fetch_count))
            start = time.time()
            if feed_type == "following":
-                tweets = client.fetch_following_feed(fetch_count)
+                tweets, next_cursor = client.fetch_following_feed(
                    fetch_count,
                    include_promoted=include_promoted,
                    cursor=cursor,
                    return_cursor=True,
                )
            else:
-                tweets = client.fetch_home_timeline(fetch_count)
+                tweets, next_cursor = client.fetch_home_timeline(
                    fetch_count,
                    include_promoted=include_promoted,
                    cursor=cursor,
                    return_cursor=True,
                )
            elapsed = time.time() - start
            if rich_output:
                console.print("✅ Fetched %d tweets in %.1fs\n" % (len(tweets), elapsed))
@@ -450,7 +476,7 @@ def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, d
    save_tweet_cache(filtered)
-    if emit_structured(tweets_to_data(filtered), as_json=as_json, as_yaml=as_yaml):
+    if _emit_timeline_structured(filtered, next_cursor, as_json=as_json, as_yaml=as_yaml):
        return
    title = "👥 Following" if feed_type == "following" else "📱 Twitter"
--- a/twitter_cli/client.py
+++ b/twitter_cli/client.py
@@ -153,22 +153,28 @@ class TwitterClient:
    # ── Read operations ──────────────────────────────────────────────
-    def fetch_home_timeline(self, count=20):
+    def fetch_home_timeline(self, count=20, include_promoted=False, cursor=None, return_cursor=False):
-        # type: (int) -> List[Tweet]
+        # type: (int, bool, Optional[str], bool) -> Any
        """Fetch home timeline tweets."""
        return self._fetch_timeline(
            "HomeTimeline",
            count,
            lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
            include_promoted=include_promoted,
            start_cursor=cursor,
            return_cursor=return_cursor,
        )
-    def fetch_following_feed(self, count=20):
+    def fetch_following_feed(self, count=20, include_promoted=False, cursor=None, return_cursor=False):
-        # type: (int) -> List[Tweet]
+        # type: (int, bool, Optional[str], bool) -> Any
        """Fetch chronological following feed."""
        return self._fetch_timeline(
            "HomeLatestTimeline",
            count,
            lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
            include_promoted=include_promoted,
            start_cursor=cursor,
            return_cursor=return_cursor,
        )
    def fetch_bookmarks(self, count=50):
@@ -732,8 +738,8 @@ class TwitterClient:
    # ── Internal: timeline / user list fetchers ──────────────────────
-    def _fetch_timeline(self, operation_name, count, get_instructions, extra_variables=None, override_base_variables=False, field_toggles=None, use_post=False):
+    def _fetch_timeline(self, operation_name, count, get_instructions, extra_variables=None, override_base_variables=False, field_toggles=None, use_post=False, include_promoted=False, start_cursor=None, return_cursor=False):
-        # type: (str, int, Callable[[Any], Any], Optional[Dict[str, Any]], bool, Optional[Dict[str, Any]], bool) -> List[Tweet]
+        # type: (str, int, Callable[[Any], Any], Optional[Dict[str, Any]], bool, Optional[Dict[str, Any]], bool, bool, Optional[str], bool) -> Any
        """Generic timeline fetcher with pagination and deduplication.
        Args:
@@ -751,7 +757,8 @@ class TwitterClient:
        tweets = []  # type: List[Tweet]
        seen_ids = set()  # type: Set[str]
-        cursor = None  # type: Optional[str]
+        cursor = start_cursor  # type: Optional[str]
        continuation_cursor = None  # type: Optional[str]
        attempts = 0
        max_attempts = int(math.ceil(count / 20.0)) + 2
@@ -763,7 +770,7 @@ class TwitterClient:
            else:
                variables = {
                    "count": min(count - len(tweets) + 5, 40),
-                    "includePromotedContent": False,
+                    "includePromotedContent": include_promoted,
                    "latestControlAvailable": True,
                    "requestContext": "launch",
                }
@@ -784,10 +791,13 @@ class TwitterClient:
                    tweets.append(tweet)
            if not next_cursor:
                continuation_cursor = None
                break
            if next_cursor == cursor:
                logger.debug("Timeline pagination stopped because cursor did not advance: %s", next_cursor)
                continuation_cursor = None
                break
            continuation_cursor = next_cursor
            cursor = next_cursor
            if not new_tweets:
@@ -799,6 +809,8 @@ class TwitterClient:
                logger.debug("Sleeping %.1fs between requests", jitter)
                time.sleep(jitter)
        if return_cursor:
            return tweets[:count], continuation_cursor
        return tweets[:count]
    def _fetch_user_list(self, operation_name, user_id, count, get_instructions):
--- a/twitter_cli/models.py
+++ b/twitter_cli/models.py
@@ -53,6 +53,7 @@ class Tweet:
    article_title: Optional[str] = None
    article_text: Optional[str] = None
    is_subscriber_only: bool = False
    is_promoted: bool = False
@dataclass
--- a/twitter_cli/parser.py
+++ b/twitter_cli/parser.py
@@ -504,6 +504,10 @@ def parse_timeline_response(data, get_instructions):
            if result:
                tweet = parse_tweet_result(result)
                if tweet:
                    tweet.is_promoted = bool(
                        str(entry.get("entryId") or "").startswith("promoted-")
                        or item_content.get("promotedMetadata")
                    )
                    tweets.append(tweet)
            for nested_item in content.get("items", []):
@@ -517,6 +521,11 @@ def parse_timeline_response(data, get_instructions):
                if nested_result:
                    tweet = parse_tweet_result(nested_result)
                    if tweet:
                        nested_item_content = _deep_get(nested_item, "item", "itemContent") or {}
                        tweet.is_promoted = bool(
                            str(_deep_get(nested_item, "entryId") or "").startswith("promoted-")
                            or nested_item_content.get("promotedMetadata")
                        )
                        tweets.append(tweet)
    return tweets, next_cursor
--- a/twitter_cli/serialization.py
+++ b/twitter_cli/serialization.py
@@ -47,6 +47,7 @@ def tweet_to_dict(tweet: Tweet) -> Dict[str, Any]:
        "lang": tweet.lang,
        "score": tweet.score,
        "isSubscriberOnly": tweet.is_subscriber_only,
        "isPromoted": tweet.is_promoted,
    }
    if tweet.article_title is not None:
        data["articleTitle"] = tweet.article_title
@@ -124,6 +125,7 @@ def tweet_from_dict(data: Dict[str, Any]) -> Tweet:
        article_title=_optional_str(data.get("articleTitle")),
        article_text=_optional_str(data.get("articleText")),
        is_subscriber_only=bool(data.get("isSubscriberOnly", False)),
        is_promoted=bool(data.get("isPromoted", False)),
    )