fix: harden search validation and release v0.7.1

2026-03-13 01:04:29 +08:00
parent dc832f2ee2
commit 7d1b519c85
13 changed files with 203 additions and 490 deletions
--- a/twitter_cli/cli.py
+++ b/twitter_cli/cli.py
@@ -566,8 +566,8 @@ def user_posts(ctx, screen_name, max_count, as_json, as_yaml, output_file, full_
    multiple=True,
    help="Exclude content type (retweets, replies, links). Repeatable.",
 )
-@click.option("--min-likes", type=int, default=None, help="Minimum number of likes.")
-@click.option("--min-retweets", type=int, default=None, help="Minimum number of retweets.")
+@click.option("--min-likes", type=click.IntRange(min=0), default=None, help="Minimum number of likes.")
+@click.option("--min-retweets", type=click.IntRange(min=0), default=None, help="Minimum number of retweets.")
@click.option("--max", "-n", "max_count", type=int, default=None, help="Max number of tweets to fetch.")
@structured_output_options
@click.option("--output", "-o", "output_file", type=str, default=None, help="Save tweets to JSON file.")
@@ -590,18 +590,21 @@ def search(ctx, query, product, from_user, to_user, lang, since, until, has, exc
    """
    from .search import build_search_query

-    composed_query = build_search_query(
-        query,
-        from_user=from_user,
-        to_user=to_user,
-        lang=lang,
-        since=since,
-        until=until,
-        has=list(has) if has else None,
-        exclude=list(exclude) if exclude else None,
-        min_likes=min_likes,
-        min_retweets=min_retweets,
-    )
+    try:
+        composed_query = build_search_query(
+            query,
+            from_user=from_user,
+            to_user=to_user,
+            lang=lang,
+            since=since,
+            until=until,
+            has=list(has) if has else None,
+            exclude=list(exclude) if exclude else None,
+            min_likes=min_likes,
+            min_retweets=min_retweets,
+        )
+    except ValueError as exc:
+        raise click.UsageError(str(exc)) from exc
    if not composed_query:
        raise click.UsageError("Provide a QUERY or at least one advanced filter (e.g. --from, --lang).")

--- a/twitter_cli/client.py
+++ b/twitter_cli/client.py
@@ -9,7 +9,7 @@ import os
 import random
 import time
 import urllib.parse
-from typing import Any, Callable, Dict, cast
+from typing import TYPE_CHECKING, Any, Callable, cast

 import bs4
 from curl_cffi import requests as _cffi_requests
@@ -52,6 +52,11 @@ from .parser import (
    parse_user_result,
 )

+if TYPE_CHECKING:
+    from typing import Dict, List, Optional, Set, Tuple  # noqa: F401
+
+    from .models import Tweet  # noqa: F401
+
 logger = logging.getLogger(__name__)

 # Shared curl_cffi session (single-threaded CLI)
--- a/twitter_cli/graphql.py
+++ b/twitter_cli/graphql.py
@@ -10,7 +10,10 @@ import json
 import logging
 import re
 import urllib.parse
-from typing import Dict
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Dict, Optional  # noqa: F401

 from .exceptions import QueryIdError

--- a/twitter_cli/parser.py
+++ b/twitter_cli/parser.py
@@ -7,6 +7,10 @@ Converts raw GraphQL response JSON into domain model objects
 from __future__ import annotations

 import logging
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Callable, Dict, List, Optional, Tuple  # noqa: F401

 from .models import Author, Metrics, Tweet, TweetMedia, UserProfile

--- a/twitter_cli/search.py
+++ b/twitter_cli/search.py
@@ -8,8 +8,43 @@ Reference: https://help.x.com/en/using-x/x-advanced-search

 from __future__ import annotations

+import re
+from datetime import date
 from typing import List, Optional, Sequence

+_LANG_PATTERN = re.compile(r"^[A-Za-z][A-Za-z-]{1,14}$")
+
+
+def _normalize_handle(value: Optional[str]) -> Optional[str]:
+    if value is None:
+        return None
+    text = value.strip().lstrip("@")
+    return text or None
+
+
+def _normalize_lang(value: Optional[str]) -> Optional[str]:
+    if value is None:
+        return None
+    text = value.strip().lower()
+    if not text:
+        return None
+    if not _LANG_PATTERN.match(text):
+        raise ValueError("--lang must be an ISO language code like en or zh-cn")
+    return text
+
+
+def _normalize_date(flag_name: str, value: Optional[str]) -> Optional[str]:
+    if value is None:
+        return None
+    text = value.strip()
+    if not text:
+        return None
+    try:
+        date.fromisoformat(text)
+    except ValueError as exc:
+        raise ValueError("%s must be in YYYY-MM-DD format" % flag_name) from exc
+    return text
+

 def build_search_query(
    query: str = "",
@@ -44,14 +79,27 @@ def build_search_query(
        Composed query string ready for the rawQuery API parameter.
    """
    parts: List[str] = []
+    query_text = query.strip()
+    from_user = _normalize_handle(from_user)
+    to_user = _normalize_handle(to_user)
+    lang = _normalize_lang(lang)
+    since = _normalize_date("--since", since)
+    until = _normalize_date("--until", until)

-    if query and query.strip():
-        parts.append(query.strip())
+    if min_likes is not None and min_likes < 0:
+        raise ValueError("--min-likes must be greater than or equal to 0")
+    if min_retweets is not None and min_retweets < 0:
+        raise ValueError("--min-retweets must be greater than or equal to 0")
+    if since and until and since > until:
+        raise ValueError("--since must be on or before --until")
+
+    if query_text:
+        parts.append(query_text)

    if from_user:
-        parts.append("from:%s" % from_user.lstrip("@"))
+        parts.append("from:%s" % from_user)
    if to_user:
-        parts.append("to:%s" % to_user.lstrip("@"))
+        parts.append("to:%s" % to_user)
    if lang:
        parts.append("lang:%s" % lang)
    if since:
@@ -60,9 +108,10 @@ def build_search_query(
        parts.append("until:%s" % until)
    if has:
        for item in has:
-            parts.append("filter:%s" % item)
+            parts.append("filter:%s" % item.lower())
    if exclude:
        for item in exclude:
+            item = item.lower()
            if item == "retweets":
                parts.append("-filter:retweets")
            elif item == "replies":
--- a/twitter_cli/serialization.py
+++ b/twitter_cli/serialization.py
@@ -125,6 +125,8 @@ def tweet_from_dict(data: Dict[str, Any]) -> Tweet:
 def tweets_from_json(raw: str) -> List[Tweet]:
    """Parse a JSON string into Tweet objects."""
    payload = json.loads(raw)
+    if isinstance(payload, dict) and payload.get("ok") is True and isinstance(payload.get("data"), list):
+        payload = payload["data"]
    if not isinstance(payload, list):
        raise ValueError("Tweet JSON payload must be a list")
    return [tweet_from_dict(item) for item in payload if isinstance(item, dict)]