improve: add early return, escape markdown brackets in labels and parentheses in URLs

- Early return when no entityRanges (skip unnecessary processing)
- Escape [ and ] in link labels to prevent nested bracket issues
- Encode ) in URLs as %29 to prevent malformed markdown links (e.g. Wikipedia)
- Add 3 new test cases for the above edge cases
This commit is contained in:
jackwener
2026-03-21 17:48:02 +08:00
parent b7c7ef826c
commit b752c31dfd
2 changed files with 30 additions and 3 deletions

View File

@@ -222,9 +222,13 @@ def _render_article_text_block(block, entity_map):
if not isinstance(text, str) or not text:
return ""
entity_ranges = block.get("entityRanges", []) or []
if not entity_ranges:
return text
rendered = text
ranges = []
for entity_range in block.get("entityRanges", []) or []:
for entity_range in entity_ranges:
if not isinstance(entity_range, dict):
continue
entity_key = entity_range.get("key")
@@ -248,10 +252,13 @@ def _render_article_text_block(block, entity_map):
label = rendered[offset:offset + length]
if not label:
continue
# Escape markdown special chars: ] in labels and ) in URLs
safe_label = label.replace("[", "\\[").replace("]", "\\]")
safe_url = url.replace(")", "%29")
rendered = "%s[%s](%s)%s" % (
rendered[:offset],
label,
url,
safe_label,
safe_url,
rendered[offset + length:],
)
return rendered