feat: preserve article atomic markdown blocks (#37)

* fix: preserve atomic markdown blocks in articles

* test: add parser unit coverage for article markdown blocks
This commit is contained in:
alextuan1024
2026-03-17 18:06:02 +08:00
committed by GitHub
parent 8cb5824ed4
commit 90f0635c50
2 changed files with 186 additions and 0 deletions

View File

@@ -24,8 +24,11 @@ from twitter_cli.graphql import (
)
from twitter_cli.parser import (
_deep_get,
_extract_atomic_markdown,
_extract_cursor,
_extract_media,
_normalize_article_entity_map,
_parse_article,
_parse_int,
parse_tweet_result,
parse_user_result,
@@ -414,6 +417,144 @@ class TestPaginationBehavior:
assert [user.screen_name for user in users] == ["alice"]
# ── Article parsing helpers ───────────────────────────────────────────────
class TestNormalizeArticleEntityMap:
def test_accepts_dict_entity_map(self):
entity_map = {0: {"type": "MARKDOWN"}, "1": {"type": "LINK"}}
normalized = _normalize_article_entity_map(entity_map)
assert normalized == {"0": {"type": "MARKDOWN"}, "1": {"type": "LINK"}}
def test_accepts_list_entity_map(self):
entity_map = [
{"key": "4", "value": {"type": "MARKDOWN", "data": {"markdown": "```md\nhi\n```"}}},
{"key": 5, "value": {"type": "LINK", "data": {"url": "https://example.com"}}},
]
normalized = _normalize_article_entity_map(entity_map)
assert normalized == {
"4": {"type": "MARKDOWN", "data": {"markdown": "```md\nhi\n```"}},
"5": {"type": "LINK", "data": {"url": "https://example.com"}},
}
def test_rejects_unknown_shapes(self):
assert _normalize_article_entity_map(None) == {}
assert _normalize_article_entity_map("bad") == {}
class TestExtractAtomicMarkdown:
def test_extracts_markdown_entity(self):
block = {"entityRanges": [{"key": 4}]}
entity_map = {
"4": {"type": "MARKDOWN", "data": {"markdown": "```markdown\nconst answer = 42;\n```"}}
}
assert _extract_atomic_markdown(block, entity_map) == ["```markdown\nconst answer = 42;\n```"]
def test_ignores_non_markdown_entities(self):
block = {"entityRanges": [{"key": 0}, {"key": 1}]}
entity_map = {
"0": {"type": "MEDIA", "data": {"mediaItems": []}},
"1": {"type": "LINK", "data": {"url": "https://example.com"}},
}
assert _extract_atomic_markdown(block, entity_map) == []
def test_ignores_blank_markdown(self):
block = {"entityRanges": [{"key": 4}]}
entity_map = {"4": {"type": "MARKDOWN", "data": {"markdown": " \n"}}}
assert _extract_atomic_markdown(block, entity_map) == []
class TestParseArticle:
def test_preserves_atomic_markdown_between_text_blocks(self):
result = {
"article": {
"article_results": {
"result": {
"title": "Article title",
"content_state": {
"blocks": [
{"key": "a", "type": "unstyled", "text": "Intro", "entityRanges": []},
{"key": "b", "type": "atomic", "text": " ", "entityRanges": [{"offset": 0, "length": 1, "key": 4}]},
{"key": "c", "type": "unstyled", "text": "Outro", "entityRanges": []},
],
"entityMap": [
{
"key": "4",
"value": {
"type": "MARKDOWN",
"data": {"markdown": "```markdown\nconst answer = 42;\n```"},
},
}
],
},
}
}
}
}
parsed = _parse_article(result)
assert parsed == {
"article_title": "Article title",
"article_text": "Intro\n\n```markdown\nconst answer = 42;\n```\n\nOutro",
}
def test_hooeem_like_payload_keeps_multiple_markdown_blocks(self):
result = {
"article": {
"article_results": {
"result": {
"title": "I want to become a Claude architect (full course).",
"content_state": {
"blocks": [
{"key": "a", "type": "unstyled", "text": "If you have no idea how to get started go to Claude and paste this prompt which will help you with domain 1:", "entityRanges": []},
{"key": "b", "type": "atomic", "text": " ", "entityRanges": [{"offset": 0, "length": 1, "key": 4}]},
{"key": "c", "type": "unstyled", "text": "What to build to learn: A multi-tool agent with 3-4 MCP tools.", "entityRanges": []},
{"key": "d", "type": "atomic", "text": " ", "entityRanges": [{"offset": 0, "length": 1, "key": 5}]},
{"key": "e", "type": "unstyled", "text": "Done.", "entityRanges": []},
],
"entityMap": [
{
"key": "4",
"value": {
"type": "MARKDOWN",
"data": {"markdown": "```markdown\nYou are an expert instructor teaching Domain 1.\n```"},
},
},
{
"key": "5",
"value": {
"type": "MARKDOWN",
"data": {"markdown": "```markdown\nBest for: predictable, structured tasks like code reviews.\n```"},
},
},
],
},
}
}
}
}
parsed = _parse_article(result)
assert parsed == {
"article_title": "I want to become a Claude architect (full course).",
"article_text": (
"If you have no idea how to get started go to Claude and paste this prompt which will help you with domain 1:\n\n"
"```markdown\nYou are an expert instructor teaching Domain 1.\n```\n\n"
"What to build to learn: A multi-tool agent with 3-4 MCP tools.\n\n"
"```markdown\nBest for: predictable, structured tasks like code reviews.\n```\n\n"
"Done."
),
}
# ── TwitterClient._parse_tweet_result ─────────────────────────────────────
class TestParseTweetResult:
@@ -514,6 +655,7 @@ class TestParseTweetResult:
assert parse_tweet_result(self.SAMPLE_TWEET_RESULT, depth=3) is None
# ── TwitterAPIError ──────────────────────────────────────────────────────
class TestTwitterAPIError: