fix: harden search validation and release v0.7.1

This commit is contained in:
jackwener
2026-03-13 01:04:29 +08:00
parent dc832f2ee2
commit 7d1b519c85
13 changed files with 203 additions and 490 deletions

View File

@@ -566,8 +566,8 @@ def user_posts(ctx, screen_name, max_count, as_json, as_yaml, output_file, full_
multiple=True,
help="Exclude content type (retweets, replies, links). Repeatable.",
)
@click.option("--min-likes", type=int, default=None, help="Minimum number of likes.")
@click.option("--min-retweets", type=int, default=None, help="Minimum number of retweets.")
@click.option("--min-likes", type=click.IntRange(min=0), default=None, help="Minimum number of likes.")
@click.option("--min-retweets", type=click.IntRange(min=0), default=None, help="Minimum number of retweets.")
@click.option("--max", "-n", "max_count", type=int, default=None, help="Max number of tweets to fetch.")
@structured_output_options
@click.option("--output", "-o", "output_file", type=str, default=None, help="Save tweets to JSON file.")
@@ -590,18 +590,21 @@ def search(ctx, query, product, from_user, to_user, lang, since, until, has, exc
"""
from .search import build_search_query
composed_query = build_search_query(
query,
from_user=from_user,
to_user=to_user,
lang=lang,
since=since,
until=until,
has=list(has) if has else None,
exclude=list(exclude) if exclude else None,
min_likes=min_likes,
min_retweets=min_retweets,
)
try:
composed_query = build_search_query(
query,
from_user=from_user,
to_user=to_user,
lang=lang,
since=since,
until=until,
has=list(has) if has else None,
exclude=list(exclude) if exclude else None,
min_likes=min_likes,
min_retweets=min_retweets,
)
except ValueError as exc:
raise click.UsageError(str(exc)) from exc
if not composed_query:
raise click.UsageError("Provide a QUERY or at least one advanced filter (e.g. --from, --lang).")

View File

@@ -9,7 +9,7 @@ import os
import random
import time
import urllib.parse
from typing import Any, Callable, Dict, cast
from typing import TYPE_CHECKING, Any, Callable, cast
import bs4
from curl_cffi import requests as _cffi_requests
@@ -52,6 +52,11 @@ from .parser import (
parse_user_result,
)
if TYPE_CHECKING:
from typing import Dict, List, Optional, Set, Tuple # noqa: F401
from .models import Tweet # noqa: F401
logger = logging.getLogger(__name__)
# Shared curl_cffi session (single-threaded CLI)

View File

@@ -10,7 +10,10 @@ import json
import logging
import re
import urllib.parse
from typing import Dict
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Any, Dict, Optional # noqa: F401
from .exceptions import QueryIdError

View File

@@ -7,6 +7,10 @@ Converts raw GraphQL response JSON into domain model objects
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple # noqa: F401
from .models import Author, Metrics, Tweet, TweetMedia, UserProfile

View File

@@ -8,8 +8,43 @@ Reference: https://help.x.com/en/using-x/x-advanced-search
from __future__ import annotations
import re
from datetime import date
from typing import List, Optional, Sequence
_LANG_PATTERN = re.compile(r"^[A-Za-z][A-Za-z-]{1,14}$")
def _normalize_handle(value: Optional[str]) -> Optional[str]:
if value is None:
return None
text = value.strip().lstrip("@")
return text or None
def _normalize_lang(value: Optional[str]) -> Optional[str]:
if value is None:
return None
text = value.strip().lower()
if not text:
return None
if not _LANG_PATTERN.match(text):
raise ValueError("--lang must be an ISO language code like en or zh-cn")
return text
def _normalize_date(flag_name: str, value: Optional[str]) -> Optional[str]:
if value is None:
return None
text = value.strip()
if not text:
return None
try:
date.fromisoformat(text)
except ValueError as exc:
raise ValueError("%s must be in YYYY-MM-DD format" % flag_name) from exc
return text
def build_search_query(
query: str = "",
@@ -44,14 +79,27 @@ def build_search_query(
Composed query string ready for the rawQuery API parameter.
"""
parts: List[str] = []
query_text = query.strip()
from_user = _normalize_handle(from_user)
to_user = _normalize_handle(to_user)
lang = _normalize_lang(lang)
since = _normalize_date("--since", since)
until = _normalize_date("--until", until)
if query and query.strip():
parts.append(query.strip())
if min_likes is not None and min_likes < 0:
raise ValueError("--min-likes must be greater than or equal to 0")
if min_retweets is not None and min_retweets < 0:
raise ValueError("--min-retweets must be greater than or equal to 0")
if since and until and since > until:
raise ValueError("--since must be on or before --until")
if query_text:
parts.append(query_text)
if from_user:
parts.append("from:%s" % from_user.lstrip("@"))
parts.append("from:%s" % from_user)
if to_user:
parts.append("to:%s" % to_user.lstrip("@"))
parts.append("to:%s" % to_user)
if lang:
parts.append("lang:%s" % lang)
if since:
@@ -60,9 +108,10 @@ def build_search_query(
parts.append("until:%s" % until)
if has:
for item in has:
parts.append("filter:%s" % item)
parts.append("filter:%s" % item.lower())
if exclude:
for item in exclude:
item = item.lower()
if item == "retweets":
parts.append("-filter:retweets")
elif item == "replies":

View File

@@ -125,6 +125,8 @@ def tweet_from_dict(data: Dict[str, Any]) -> Tweet:
def tweets_from_json(raw: str) -> List[Tweet]:
"""Parse a JSON string into Tweet objects."""
payload = json.loads(raw)
if isinstance(payload, dict) and payload.get("ok") is True and isinstance(payload.get("data"), list):
payload = payload["data"]
if not isinstance(payload, list):
raise ValueError("Tweet JSON payload must be a list")
return [tweet_from_dict(item) for item in payload if isinstance(item, dict)]