fix: extract full text from note_tweet for long tweets (fixes #20)
Twitter long tweets (>280 chars) store full text in note_tweet.note_tweet_results.result.text rather than legacy.full_text. The parser now prioritizes note_tweet text when available.
This commit is contained in:
7
tests/fixtures/home_timeline.json
vendored
7
tests/fixtures/home_timeline.json
vendored
@@ -61,6 +61,13 @@
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"note_tweet": {
|
||||||
|
"note_tweet_results": {
|
||||||
|
"result": {
|
||||||
|
"text": "Hello\nworld\n\nThis is the full text of a long tweet that goes beyond the 280 character limit and contains additional content that would be hidden behind Show More in the Twitter UI."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"views": {
|
"views": {
|
||||||
"count": "1234"
|
"count": "1234"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,6 +26,9 @@ def test_parse_home_timeline_fixture(fixture_loader) -> None:
|
|||||||
assert [tweet.id for tweet in tweets] == ["1", "20"]
|
assert [tweet.id for tweet in tweets] == ["1", "20"]
|
||||||
assert cursor == "cursor-bottom-1"
|
assert cursor == "cursor-bottom-1"
|
||||||
assert tweets[0].media[0].type == "photo"
|
assert tweets[0].media[0].type == "photo"
|
||||||
|
# note_tweet full text should be preferred over legacy.full_text for long tweets
|
||||||
|
assert "Show More" in tweets[0].text
|
||||||
|
assert tweets[0].text.startswith("Hello\nworld\n")
|
||||||
assert tweets[0].urls == ["https://example.com/post"]
|
assert tweets[0].urls == ["https://example.com/post"]
|
||||||
assert tweets[1].is_retweet is True
|
assert tweets[1].is_retweet is True
|
||||||
assert tweets[1].retweeted_by == "bob"
|
assert tweets[1].retweeted_by == "bob"
|
||||||
|
|||||||
@@ -246,9 +246,12 @@ def parse_tweet_result(result, depth=0):
|
|||||||
if is_retweet:
|
if is_retweet:
|
||||||
retweeted_by = user_core.get("screen_name") or user_legacy.get("screen_name", "unknown")
|
retweeted_by = user_core.get("screen_name") or user_legacy.get("screen_name", "unknown")
|
||||||
|
|
||||||
|
# Prefer note_tweet full text for long tweets ("Show More")
|
||||||
|
note_text = _deep_get(actual_data, "note_tweet", "note_tweet_results", "result", "text")
|
||||||
|
|
||||||
return Tweet(
|
return Tweet(
|
||||||
id=actual_data.get("rest_id", ""),
|
id=actual_data.get("rest_id", ""),
|
||||||
text=actual_legacy.get("full_text", ""),
|
text=note_text or actual_legacy.get("full_text", ""),
|
||||||
author=author,
|
author=author,
|
||||||
metrics=Metrics(
|
metrics=Metrics(
|
||||||
likes=_parse_int(actual_legacy.get("favorite_count"), 0),
|
likes=_parse_int(actual_legacy.get("favorite_count"), 0),
|
||||||
|
|||||||
Reference in New Issue
Block a user