Feed cursor pagination (#49)

* Expose promoted tweets in feed output

* Add cursor-based feed pagination output
This commit is contained in:
Lucius
2026-04-10 01:20:18 +08:00
committed by GitHub
parent e3545ab069
commit 7816f8d813
12 changed files with 199 additions and 13 deletions

View File

@@ -97,6 +97,7 @@ twitter feed --filter
```bash ```bash
# Feed # Feed
twitter feed --max 50 twitter feed --max 50
twitter feed --cursor "<next-cursor-from-previous-response>"
twitter feed --full-text twitter feed --full-text
twitter feed --output tweets.json twitter feed --output tweets.json
twitter feed --input tweets.json twitter feed --input tweets.json
@@ -427,6 +428,7 @@ twitter feed
twitter feed -t following twitter feed -t following
twitter feed --filter twitter feed --filter
twitter feed --full-text twitter feed --full-text
twitter feed --cursor "<上一页返回的 nextCursor>"
# 收藏 # 收藏
twitter bookmarks twitter bookmarks

View File

@@ -8,6 +8,8 @@
ok: true ok: true
schema_version: "1" schema_version: "1"
data: ... data: ...
pagination:
nextCursor: "optional-cursor"
``` ```
## Error ## Error
@@ -25,6 +27,7 @@ error:
- `--yaml` and `--json` both use this envelope - `--yaml` and `--json` both use this envelope
- non-TTY stdout defaults to YAML - non-TTY stdout defaults to YAML
- tweet and user lists are returned under `data` - tweet and user lists are returned under `data`
- timeline-style list commands may also return `pagination.nextCursor`
- `article` returns a single tweet object under `data` - `article` returns a single tweet object under `data`
- `status` returns `data.authenticated` plus `data.user` - `status` returns `data.authenticated` plus `data.user`
- `whoami` returns `data.user` - `whoami` returns `data.user`

View File

@@ -39,6 +39,7 @@ def tweet_factory():
article_title=overrides.pop("article_title", None), article_title=overrides.pop("article_title", None),
article_text=overrides.pop("article_text", None), article_text=overrides.pop("article_text", None),
is_subscriber_only=overrides.pop("is_subscriber_only", False), is_subscriber_only=overrides.pop("is_subscriber_only", False),
is_promoted=overrides.pop("is_promoted", False),
) )
return _make_tweet return _make_tweet

View File

@@ -57,6 +57,68 @@ def test_cli_feed_input_accepts_structured_json_envelope(tmp_path, tweet_factory
assert '"id": "1"' in result.output assert '"id": "1"' in result.output
def test_cli_feed_passes_include_promoted(monkeypatch, tweet_factory) -> None:
class FakeClient:
def fetch_home_timeline(
self,
count: int,
include_promoted: bool = False,
cursor: str | None = None,
return_cursor: bool = False,
):
assert count == 20
assert include_promoted is True
assert cursor is None
assert return_cursor is True
return [tweet_factory("1", is_promoted=True)], "cursor-next"
monkeypatch.setattr("twitter_cli.cli._get_client", lambda config=None, quiet=False: FakeClient())
monkeypatch.setattr(
"twitter_cli.cli.load_config",
lambda: {"fetch": {"count": 20}, "filter": {}, "rateLimit": {}},
)
runner = CliRunner()
result = runner.invoke(cli, ["feed", "--json", "--include-promoted"])
assert result.exit_code == 0
payload = json.loads(result.output)
assert payload["ok"] is True
assert payload["data"][0]["isPromoted"] is True
assert payload["pagination"]["nextCursor"] == "cursor-next"
def test_cli_feed_accepts_cursor_and_emits_pagination(monkeypatch) -> None:
class FakeClient:
def fetch_following_feed(
self,
count: int,
include_promoted: bool = False,
cursor: str | None = None,
return_cursor: bool = False,
):
assert count == 20
assert include_promoted is False
assert cursor == "cursor-prev"
assert return_cursor is True
return [], "cursor-next"
monkeypatch.setattr("twitter_cli.cli._get_client", lambda config=None, quiet=False: FakeClient())
monkeypatch.setattr(
"twitter_cli.cli.load_config",
lambda: {"fetch": {"count": 20}, "filter": {}, "rateLimit": {}},
)
runner = CliRunner()
result = runner.invoke(cli, ["feed", "-t", "following", "--cursor", "cursor-prev", "--json"])
assert result.exit_code == 0
payload = json.loads(result.output)
assert payload["ok"] is True
assert payload["data"] == []
assert payload["pagination"]["nextCursor"] == "cursor-next"
def test_print_tweet_table_truncates_text_by_default(tweet_factory) -> None: def test_print_tweet_table_truncates_text_by_default(tweet_factory) -> None:
long_text = "A" * 140 long_text = "A" * 140
console = Console(record=True, width=400) console = Console(record=True, width=400)

View File

@@ -333,6 +333,24 @@ class TestBuildHeaders:
class TestPaginationBehavior: class TestPaginationBehavior:
def test_fetch_timeline_can_include_promoted_content(self):
client = TwitterClient.__new__(TwitterClient)
client._request_delay = 0.0
client._max_count = 200
calls = []
def _graphql_get(operation_name, variables, features, field_toggles=None):
calls.append(variables.copy())
return {"page": 1}
client._graphql_get = _graphql_get
with patch('twitter_cli.client.parse_timeline_response', return_value=([], None)):
client._fetch_timeline("HomeTimeline", 1, lambda data: data, include_promoted=True)
assert calls[0]["includePromotedContent"] is True
def test_continues_when_cursor_advances_without_new_tweets(self): def test_continues_when_cursor_advances_without_new_tweets(self):
client = TwitterClient.__new__(TwitterClient) client = TwitterClient.__new__(TwitterClient)
client._request_delay = 0.0 client._request_delay = 0.0
@@ -379,6 +397,33 @@ class TestPaginationBehavior:
assert tweets == [] assert tweets == []
assert calls == [None, "cursor-same"] assert calls == [None, "cursor-same"]
def test_fetch_timeline_returns_continuation_cursor(self):
client = TwitterClient.__new__(TwitterClient)
client._request_delay = 0.0
client._max_count = 200
calls = []
def _graphql_get(operation_name, variables, features, field_toggles=None):
calls.append(variables.copy())
return {"page": 1}
client._graphql_get = _graphql_get
tweet = MagicMock(id="tweet-1")
with patch('twitter_cli.client.parse_timeline_response', return_value=([tweet], "cursor-next")):
tweets, cursor = client._fetch_timeline(
"HomeTimeline",
1,
lambda data: data,
start_cursor="cursor-prev",
return_cursor=True,
)
assert [item.id for item in tweets] == ["tweet-1"]
assert cursor == "cursor-next"
assert calls[0]["cursor"] == "cursor-prev"
def test_user_list_continues_when_cursor_advances_without_new_users(self): def test_user_list_continues_when_cursor_advances_without_new_users(self):
client = TwitterClient.__new__(TwitterClient) client = TwitterClient.__new__(TwitterClient)
client._request_delay = 0.0 client._request_delay = 0.0

View File

@@ -36,6 +36,21 @@ def test_parse_home_timeline_fixture(fixture_loader) -> None:
assert tweets[1].quoted_tweet.id == "30" assert tweets[1].quoted_tweet.id == "30"
def test_parse_home_timeline_fixture_marks_promoted_entries(fixture_loader) -> None:
payload = fixture_loader("home_timeline.json")
entry = payload["data"]["home"]["home_timeline_urt"]["instructions"][0]["entries"][0]
entry["entryId"] = "promoted-tweet-1-demo"
entry["content"]["itemContent"]["promotedMetadata"] = {"impressionId": "demo"}
tweets, _ = parse_timeline_response(
payload,
lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
)
assert tweets[0].is_promoted is True
assert tweets[1].is_promoted is False
def test_parse_tweet_detail_fixture_with_nested_items(fixture_loader) -> None: def test_parse_tweet_detail_fixture_with_nested_items(fixture_loader) -> None:
payload = fixture_loader("tweet_detail.json") payload = fixture_loader("tweet_detail.json")

View File

@@ -85,3 +85,11 @@ def test_tweet_roundtrip_preserves_subscriber_only(tweet_factory) -> None:
assert payload["isSubscriberOnly"] is True assert payload["isSubscriberOnly"] is True
restored = tweet_from_dict(payload) restored = tweet_from_dict(payload)
assert restored.is_subscriber_only is True assert restored.is_subscriber_only is True
def test_tweet_roundtrip_preserves_promoted_flag(tweet_factory) -> None:
tweet = tweet_factory("100", is_promoted=True)
payload = tweet_to_dict(tweet)
assert payload["isPromoted"] is True
restored = tweet_from_dict(payload)
assert restored.is_promoted is True

View File

@@ -349,6 +349,15 @@ def _fetch_and_display(fetch_fn, label, emoji, max_count, as_json, as_yaml, outp
console.print() console.print()
def _emit_timeline_structured(tweets, next_cursor, *, as_json, as_yaml):
# type: (TweetList, Optional[str], bool, bool) -> bool
"""Emit timeline data with pagination metadata while keeping `data` a tweet list."""
payload = success_payload(tweets_to_data(tweets))
if next_cursor:
payload["pagination"] = {"nextCursor": next_cursor}
return emit_structured(payload, as_json=as_json, as_yaml=as_yaml)
def _run_bookmarks_command(max_count, as_json, as_yaml, output_file, do_filter, compact=False, full_text=False): def _run_bookmarks_command(max_count, as_json, as_yaml, output_file, do_filter, compact=False, full_text=False):
# type: (Optional[int], bool, bool, Optional[str], bool, bool, bool) -> None # type: (Optional[int], bool, bool, Optional[str], bool, bool, bool) -> None
config = load_config() config = load_config()
@@ -401,17 +410,24 @@ def _inherit_flag(ctx, name, value):
help="Feed type: for-you (algorithmic) or following (chronological).", help="Feed type: for-you (algorithmic) or following (chronological).",
) )
@click.option("--max", "-n", "max_count", type=int, default=None, help="Max number of tweets to fetch.") @click.option("--max", "-n", "max_count", type=int, default=None, help="Max number of tweets to fetch.")
@click.option("--cursor", type=str, default=None, help="Pagination cursor for continuing a previous feed request.")
@structured_output_options @structured_output_options
@click.option("--input", "-i", "input_file", type=str, default=None, help="Load tweets from JSON file.") @click.option("--input", "-i", "input_file", type=str, default=None, help="Load tweets from JSON file.")
@click.option("--output", "-o", "output_file", type=str, default=None, help="Save filtered tweets to JSON file.") @click.option("--output", "-o", "output_file", type=str, default=None, help="Save filtered tweets to JSON file.")
@click.option("--filter", "do_filter", is_flag=True, help="Enable score-based filtering.") @click.option("--filter", "do_filter", is_flag=True, help="Enable score-based filtering.")
@click.option("--full-text", is_flag=True, help="Show full tweet text in table output.") @click.option("--full-text", is_flag=True, help="Show full tweet text in table output.")
@click.option(
"--include-promoted/--no-include-promoted",
default=False,
help="Include promoted tweets when the timeline endpoint exposes them.",
)
@click.pass_context @click.pass_context
def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, do_filter, full_text): def feed(ctx, feed_type, max_count, cursor, as_json, as_yaml, input_file, output_file, do_filter, full_text, include_promoted):
# type: (Any, str, Optional[int], bool, bool, Optional[str], Optional[str], bool, bool) -> None # type: (Any, str, Optional[int], Optional[str], bool, bool, Optional[str], Optional[str], bool, bool, bool) -> None
"""Fetch home timeline with optional filtering.""" """Fetch home timeline with optional filtering."""
compact = ctx.obj.get("compact", False) compact = ctx.obj.get("compact", False)
rich_output = use_rich_output(as_json=as_json, as_yaml=as_yaml, compact=compact) rich_output = use_rich_output(as_json=as_json, as_yaml=as_yaml, compact=compact)
next_cursor = None # type: Optional[str]
config = load_config() config = load_config()
try: try:
if input_file: if input_file:
@@ -428,9 +444,19 @@ def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, d
console.print("📡 Fetching %s (%d tweets)...\n" % (label, fetch_count)) console.print("📡 Fetching %s (%d tweets)...\n" % (label, fetch_count))
start = time.time() start = time.time()
if feed_type == "following": if feed_type == "following":
tweets = client.fetch_following_feed(fetch_count) tweets, next_cursor = client.fetch_following_feed(
fetch_count,
include_promoted=include_promoted,
cursor=cursor,
return_cursor=True,
)
else: else:
tweets = client.fetch_home_timeline(fetch_count) tweets, next_cursor = client.fetch_home_timeline(
fetch_count,
include_promoted=include_promoted,
cursor=cursor,
return_cursor=True,
)
elapsed = time.time() - start elapsed = time.time() - start
if rich_output: if rich_output:
console.print("✅ Fetched %d tweets in %.1fs\n" % (len(tweets), elapsed)) console.print("✅ Fetched %d tweets in %.1fs\n" % (len(tweets), elapsed))
@@ -450,7 +476,7 @@ def feed(ctx, feed_type, max_count, as_json, as_yaml, input_file, output_file, d
save_tweet_cache(filtered) save_tweet_cache(filtered)
if emit_structured(tweets_to_data(filtered), as_json=as_json, as_yaml=as_yaml): if _emit_timeline_structured(filtered, next_cursor, as_json=as_json, as_yaml=as_yaml):
return return
title = "👥 Following" if feed_type == "following" else "📱 Twitter" title = "👥 Following" if feed_type == "following" else "📱 Twitter"

View File

@@ -153,22 +153,28 @@ class TwitterClient:
# ── Read operations ────────────────────────────────────────────── # ── Read operations ──────────────────────────────────────────────
def fetch_home_timeline(self, count=20): def fetch_home_timeline(self, count=20, include_promoted=False, cursor=None, return_cursor=False):
# type: (int) -> List[Tweet] # type: (int, bool, Optional[str], bool) -> Any
"""Fetch home timeline tweets.""" """Fetch home timeline tweets."""
return self._fetch_timeline( return self._fetch_timeline(
"HomeTimeline", "HomeTimeline",
count, count,
lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"), lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
include_promoted=include_promoted,
start_cursor=cursor,
return_cursor=return_cursor,
) )
def fetch_following_feed(self, count=20): def fetch_following_feed(self, count=20, include_promoted=False, cursor=None, return_cursor=False):
# type: (int) -> List[Tweet] # type: (int, bool, Optional[str], bool) -> Any
"""Fetch chronological following feed.""" """Fetch chronological following feed."""
return self._fetch_timeline( return self._fetch_timeline(
"HomeLatestTimeline", "HomeLatestTimeline",
count, count,
lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"), lambda data: _deep_get(data, "data", "home", "home_timeline_urt", "instructions"),
include_promoted=include_promoted,
start_cursor=cursor,
return_cursor=return_cursor,
) )
def fetch_bookmarks(self, count=50): def fetch_bookmarks(self, count=50):
@@ -732,8 +738,8 @@ class TwitterClient:
# ── Internal: timeline / user list fetchers ────────────────────── # ── Internal: timeline / user list fetchers ──────────────────────
def _fetch_timeline(self, operation_name, count, get_instructions, extra_variables=None, override_base_variables=False, field_toggles=None, use_post=False): def _fetch_timeline(self, operation_name, count, get_instructions, extra_variables=None, override_base_variables=False, field_toggles=None, use_post=False, include_promoted=False, start_cursor=None, return_cursor=False):
# type: (str, int, Callable[[Any], Any], Optional[Dict[str, Any]], bool, Optional[Dict[str, Any]], bool) -> List[Tweet] # type: (str, int, Callable[[Any], Any], Optional[Dict[str, Any]], bool, Optional[Dict[str, Any]], bool, bool, Optional[str], bool) -> Any
"""Generic timeline fetcher with pagination and deduplication. """Generic timeline fetcher with pagination and deduplication.
Args: Args:
@@ -751,7 +757,8 @@ class TwitterClient:
tweets = [] # type: List[Tweet] tweets = [] # type: List[Tweet]
seen_ids = set() # type: Set[str] seen_ids = set() # type: Set[str]
cursor = None # type: Optional[str] cursor = start_cursor # type: Optional[str]
continuation_cursor = None # type: Optional[str]
attempts = 0 attempts = 0
max_attempts = int(math.ceil(count / 20.0)) + 2 max_attempts = int(math.ceil(count / 20.0)) + 2
@@ -763,7 +770,7 @@ class TwitterClient:
else: else:
variables = { variables = {
"count": min(count - len(tweets) + 5, 40), "count": min(count - len(tweets) + 5, 40),
"includePromotedContent": False, "includePromotedContent": include_promoted,
"latestControlAvailable": True, "latestControlAvailable": True,
"requestContext": "launch", "requestContext": "launch",
} }
@@ -784,10 +791,13 @@ class TwitterClient:
tweets.append(tweet) tweets.append(tweet)
if not next_cursor: if not next_cursor:
continuation_cursor = None
break break
if next_cursor == cursor: if next_cursor == cursor:
logger.debug("Timeline pagination stopped because cursor did not advance: %s", next_cursor) logger.debug("Timeline pagination stopped because cursor did not advance: %s", next_cursor)
continuation_cursor = None
break break
continuation_cursor = next_cursor
cursor = next_cursor cursor = next_cursor
if not new_tweets: if not new_tweets:
@@ -799,6 +809,8 @@ class TwitterClient:
logger.debug("Sleeping %.1fs between requests", jitter) logger.debug("Sleeping %.1fs between requests", jitter)
time.sleep(jitter) time.sleep(jitter)
if return_cursor:
return tweets[:count], continuation_cursor
return tweets[:count] return tweets[:count]
def _fetch_user_list(self, operation_name, user_id, count, get_instructions): def _fetch_user_list(self, operation_name, user_id, count, get_instructions):

View File

@@ -53,6 +53,7 @@ class Tweet:
article_title: Optional[str] = None article_title: Optional[str] = None
article_text: Optional[str] = None article_text: Optional[str] = None
is_subscriber_only: bool = False is_subscriber_only: bool = False
is_promoted: bool = False
@dataclass @dataclass

View File

@@ -504,6 +504,10 @@ def parse_timeline_response(data, get_instructions):
if result: if result:
tweet = parse_tweet_result(result) tweet = parse_tweet_result(result)
if tweet: if tweet:
tweet.is_promoted = bool(
str(entry.get("entryId") or "").startswith("promoted-")
or item_content.get("promotedMetadata")
)
tweets.append(tweet) tweets.append(tweet)
for nested_item in content.get("items", []): for nested_item in content.get("items", []):
@@ -517,6 +521,11 @@ def parse_timeline_response(data, get_instructions):
if nested_result: if nested_result:
tweet = parse_tweet_result(nested_result) tweet = parse_tweet_result(nested_result)
if tweet: if tweet:
nested_item_content = _deep_get(nested_item, "item", "itemContent") or {}
tweet.is_promoted = bool(
str(_deep_get(nested_item, "entryId") or "").startswith("promoted-")
or nested_item_content.get("promotedMetadata")
)
tweets.append(tweet) tweets.append(tweet)
return tweets, next_cursor return tweets, next_cursor

View File

@@ -47,6 +47,7 @@ def tweet_to_dict(tweet: Tweet) -> Dict[str, Any]:
"lang": tweet.lang, "lang": tweet.lang,
"score": tweet.score, "score": tweet.score,
"isSubscriberOnly": tweet.is_subscriber_only, "isSubscriberOnly": tweet.is_subscriber_only,
"isPromoted": tweet.is_promoted,
} }
if tweet.article_title is not None: if tweet.article_title is not None:
data["articleTitle"] = tweet.article_title data["articleTitle"] = tweet.article_title
@@ -124,6 +125,7 @@ def tweet_from_dict(data: Dict[str, Any]) -> Tweet:
article_title=_optional_str(data.get("articleTitle")), article_title=_optional_str(data.get("articleTitle")),
article_text=_optional_str(data.get("articleText")), article_text=_optional_str(data.get("articleText")),
is_subscriber_only=bool(data.get("isSubscriberOnly", False)), is_subscriber_only=bool(data.get("isSubscriberOnly", False)),
is_promoted=bool(data.get("isPromoted", False)),
) )