improve: add early return, escape markdown brackets in labels and parentheses in URLs

- Early return when no entityRanges (skip unnecessary processing)
- Escape [ and ] in link labels to prevent nested bracket issues
- Encode ) in URLs as %29 to prevent malformed markdown links (e.g. Wikipedia)
- Add 3 new test cases for the above edge cases
This commit is contained in:
jackwener
2026-03-21 17:48:02 +08:00
parent b7c7ef826c
commit b752c31dfd
2 changed files with 30 additions and 3 deletions

View File

@@ -549,6 +549,26 @@ class TestRenderArticleTextBlock:
assert _render_article_text_block(block, entity_map) == "abc" assert _render_article_text_block(block, entity_map) == "abc"
def test_returns_plain_text_when_no_entity_ranges(self):
block = {"text": "Hello world"}
assert _render_article_text_block(block, {}) == "Hello world"
def test_encodes_parentheses_in_url(self):
block = {"text": "see Wiki", "entityRanges": [{"key": 0, "offset": 4, "length": 4}]}
entity_map = {"0": {"type": "LINK", "data": {"url": "https://en.wikipedia.org/wiki/Rust_(programming_language)"}}}
assert _render_article_text_block(block, entity_map) == (
"see [Wiki](https://en.wikipedia.org/wiki/Rust_(programming_language%29)"
)
def test_escapes_brackets_in_label(self):
block = {"text": "see [docs] now", "entityRanges": [{"key": 0, "offset": 4, "length": 6}]}
entity_map = {"0": {"type": "LINK", "data": {"url": "https://example.com"}}}
assert _render_article_text_block(block, entity_map) == (
"see [\\[docs\\]](https://example.com) now"
)
class TestParseArticle: class TestParseArticle:
def test_preserves_atomic_markdown_between_text_blocks(self): def test_preserves_atomic_markdown_between_text_blocks(self):

View File

@@ -222,9 +222,13 @@ def _render_article_text_block(block, entity_map):
if not isinstance(text, str) or not text: if not isinstance(text, str) or not text:
return "" return ""
entity_ranges = block.get("entityRanges", []) or []
if not entity_ranges:
return text
rendered = text rendered = text
ranges = [] ranges = []
for entity_range in block.get("entityRanges", []) or []: for entity_range in entity_ranges:
if not isinstance(entity_range, dict): if not isinstance(entity_range, dict):
continue continue
entity_key = entity_range.get("key") entity_key = entity_range.get("key")
@@ -248,10 +252,13 @@ def _render_article_text_block(block, entity_map):
label = rendered[offset:offset + length] label = rendered[offset:offset + length]
if not label: if not label:
continue continue
# Escape markdown special chars: ] in labels and ) in URLs
safe_label = label.replace("[", "\\[").replace("]", "\\]")
safe_url = url.replace(")", "%29")
rendered = "%s[%s](%s)%s" % ( rendered = "%s[%s](%s)%s" % (
rendered[:offset], rendered[:offset],
label, safe_label,
url, safe_url,
rendered[offset + length:], rendered[offset + length:],
) )
return rendered return rendered