improve: add early return, escape markdown brackets in labels and parentheses in URLs
- Early return when no entityRanges (skip unnecessary processing) - Escape [ and ] in link labels to prevent nested bracket issues - Encode ) in URLs as %29 to prevent malformed markdown links (e.g. Wikipedia) - Add 3 new test cases for the above edge cases
This commit is contained in:
@@ -549,6 +549,26 @@ class TestRenderArticleTextBlock:
|
|||||||
|
|
||||||
assert _render_article_text_block(block, entity_map) == "abc"
|
assert _render_article_text_block(block, entity_map) == "abc"
|
||||||
|
|
||||||
|
def test_returns_plain_text_when_no_entity_ranges(self):
|
||||||
|
block = {"text": "Hello world"}
|
||||||
|
assert _render_article_text_block(block, {}) == "Hello world"
|
||||||
|
|
||||||
|
def test_encodes_parentheses_in_url(self):
|
||||||
|
block = {"text": "see Wiki", "entityRanges": [{"key": 0, "offset": 4, "length": 4}]}
|
||||||
|
entity_map = {"0": {"type": "LINK", "data": {"url": "https://en.wikipedia.org/wiki/Rust_(programming_language)"}}}
|
||||||
|
|
||||||
|
assert _render_article_text_block(block, entity_map) == (
|
||||||
|
"see [Wiki](https://en.wikipedia.org/wiki/Rust_(programming_language%29)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_escapes_brackets_in_label(self):
|
||||||
|
block = {"text": "see [docs] now", "entityRanges": [{"key": 0, "offset": 4, "length": 6}]}
|
||||||
|
entity_map = {"0": {"type": "LINK", "data": {"url": "https://example.com"}}}
|
||||||
|
|
||||||
|
assert _render_article_text_block(block, entity_map) == (
|
||||||
|
"see [\\[docs\\]](https://example.com) now"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestParseArticle:
|
class TestParseArticle:
|
||||||
def test_preserves_atomic_markdown_between_text_blocks(self):
|
def test_preserves_atomic_markdown_between_text_blocks(self):
|
||||||
|
|||||||
@@ -222,9 +222,13 @@ def _render_article_text_block(block, entity_map):
|
|||||||
if not isinstance(text, str) or not text:
|
if not isinstance(text, str) or not text:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
entity_ranges = block.get("entityRanges", []) or []
|
||||||
|
if not entity_ranges:
|
||||||
|
return text
|
||||||
|
|
||||||
rendered = text
|
rendered = text
|
||||||
ranges = []
|
ranges = []
|
||||||
for entity_range in block.get("entityRanges", []) or []:
|
for entity_range in entity_ranges:
|
||||||
if not isinstance(entity_range, dict):
|
if not isinstance(entity_range, dict):
|
||||||
continue
|
continue
|
||||||
entity_key = entity_range.get("key")
|
entity_key = entity_range.get("key")
|
||||||
@@ -248,10 +252,13 @@ def _render_article_text_block(block, entity_map):
|
|||||||
label = rendered[offset:offset + length]
|
label = rendered[offset:offset + length]
|
||||||
if not label:
|
if not label:
|
||||||
continue
|
continue
|
||||||
|
# Escape markdown special chars: ] in labels and ) in URLs
|
||||||
|
safe_label = label.replace("[", "\\[").replace("]", "\\]")
|
||||||
|
safe_url = url.replace(")", "%29")
|
||||||
rendered = "%s[%s](%s)%s" % (
|
rendered = "%s[%s](%s)%s" % (
|
||||||
rendered[:offset],
|
rendered[:offset],
|
||||||
label,
|
safe_label,
|
||||||
url,
|
safe_url,
|
||||||
rendered[offset + length:],
|
rendered[offset + length:],
|
||||||
)
|
)
|
||||||
return rendered
|
return rendered
|
||||||
|
|||||||
Reference in New Issue
Block a user