fix: harden search validation and release v0.7.1
This commit is contained in:
@@ -566,8 +566,8 @@ def user_posts(ctx, screen_name, max_count, as_json, as_yaml, output_file, full_
|
||||
multiple=True,
|
||||
help="Exclude content type (retweets, replies, links). Repeatable.",
|
||||
)
|
||||
@click.option("--min-likes", type=int, default=None, help="Minimum number of likes.")
|
||||
@click.option("--min-retweets", type=int, default=None, help="Minimum number of retweets.")
|
||||
@click.option("--min-likes", type=click.IntRange(min=0), default=None, help="Minimum number of likes.")
|
||||
@click.option("--min-retweets", type=click.IntRange(min=0), default=None, help="Minimum number of retweets.")
|
||||
@click.option("--max", "-n", "max_count", type=int, default=None, help="Max number of tweets to fetch.")
|
||||
@structured_output_options
|
||||
@click.option("--output", "-o", "output_file", type=str, default=None, help="Save tweets to JSON file.")
|
||||
@@ -590,18 +590,21 @@ def search(ctx, query, product, from_user, to_user, lang, since, until, has, exc
|
||||
"""
|
||||
from .search import build_search_query
|
||||
|
||||
composed_query = build_search_query(
|
||||
query,
|
||||
from_user=from_user,
|
||||
to_user=to_user,
|
||||
lang=lang,
|
||||
since=since,
|
||||
until=until,
|
||||
has=list(has) if has else None,
|
||||
exclude=list(exclude) if exclude else None,
|
||||
min_likes=min_likes,
|
||||
min_retweets=min_retweets,
|
||||
)
|
||||
try:
|
||||
composed_query = build_search_query(
|
||||
query,
|
||||
from_user=from_user,
|
||||
to_user=to_user,
|
||||
lang=lang,
|
||||
since=since,
|
||||
until=until,
|
||||
has=list(has) if has else None,
|
||||
exclude=list(exclude) if exclude else None,
|
||||
min_likes=min_likes,
|
||||
min_retweets=min_retweets,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise click.UsageError(str(exc)) from exc
|
||||
if not composed_query:
|
||||
raise click.UsageError("Provide a QUERY or at least one advanced filter (e.g. --from, --lang).")
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ import os
|
||||
import random
|
||||
import time
|
||||
import urllib.parse
|
||||
from typing import Any, Callable, Dict, cast
|
||||
from typing import TYPE_CHECKING, Any, Callable, cast
|
||||
|
||||
import bs4
|
||||
from curl_cffi import requests as _cffi_requests
|
||||
@@ -52,6 +52,11 @@ from .parser import (
|
||||
parse_user_result,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Dict, List, Optional, Set, Tuple # noqa: F401
|
||||
|
||||
from .models import Tweet # noqa: F401
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Shared curl_cffi session (single-threaded CLI)
|
||||
|
||||
@@ -10,7 +10,10 @@ import json
|
||||
import logging
|
||||
import re
|
||||
import urllib.parse
|
||||
from typing import Dict
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Any, Dict, Optional # noqa: F401
|
||||
|
||||
from .exceptions import QueryIdError
|
||||
|
||||
|
||||
@@ -7,6 +7,10 @@ Converts raw GraphQL response JSON into domain model objects
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple # noqa: F401
|
||||
|
||||
from .models import Author, Metrics, Tweet, TweetMedia, UserProfile
|
||||
|
||||
|
||||
@@ -8,8 +8,43 @@ Reference: https://help.x.com/en/using-x/x-advanced-search
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import date
|
||||
from typing import List, Optional, Sequence
|
||||
|
||||
_LANG_PATTERN = re.compile(r"^[A-Za-z][A-Za-z-]{1,14}$")
|
||||
|
||||
|
||||
def _normalize_handle(value: Optional[str]) -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
text = value.strip().lstrip("@")
|
||||
return text or None
|
||||
|
||||
|
||||
def _normalize_lang(value: Optional[str]) -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
text = value.strip().lower()
|
||||
if not text:
|
||||
return None
|
||||
if not _LANG_PATTERN.match(text):
|
||||
raise ValueError("--lang must be an ISO language code like en or zh-cn")
|
||||
return text
|
||||
|
||||
|
||||
def _normalize_date(flag_name: str, value: Optional[str]) -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
text = value.strip()
|
||||
if not text:
|
||||
return None
|
||||
try:
|
||||
date.fromisoformat(text)
|
||||
except ValueError as exc:
|
||||
raise ValueError("%s must be in YYYY-MM-DD format" % flag_name) from exc
|
||||
return text
|
||||
|
||||
|
||||
def build_search_query(
|
||||
query: str = "",
|
||||
@@ -44,14 +79,27 @@ def build_search_query(
|
||||
Composed query string ready for the rawQuery API parameter.
|
||||
"""
|
||||
parts: List[str] = []
|
||||
query_text = query.strip()
|
||||
from_user = _normalize_handle(from_user)
|
||||
to_user = _normalize_handle(to_user)
|
||||
lang = _normalize_lang(lang)
|
||||
since = _normalize_date("--since", since)
|
||||
until = _normalize_date("--until", until)
|
||||
|
||||
if query and query.strip():
|
||||
parts.append(query.strip())
|
||||
if min_likes is not None and min_likes < 0:
|
||||
raise ValueError("--min-likes must be greater than or equal to 0")
|
||||
if min_retweets is not None and min_retweets < 0:
|
||||
raise ValueError("--min-retweets must be greater than or equal to 0")
|
||||
if since and until and since > until:
|
||||
raise ValueError("--since must be on or before --until")
|
||||
|
||||
if query_text:
|
||||
parts.append(query_text)
|
||||
|
||||
if from_user:
|
||||
parts.append("from:%s" % from_user.lstrip("@"))
|
||||
parts.append("from:%s" % from_user)
|
||||
if to_user:
|
||||
parts.append("to:%s" % to_user.lstrip("@"))
|
||||
parts.append("to:%s" % to_user)
|
||||
if lang:
|
||||
parts.append("lang:%s" % lang)
|
||||
if since:
|
||||
@@ -60,9 +108,10 @@ def build_search_query(
|
||||
parts.append("until:%s" % until)
|
||||
if has:
|
||||
for item in has:
|
||||
parts.append("filter:%s" % item)
|
||||
parts.append("filter:%s" % item.lower())
|
||||
if exclude:
|
||||
for item in exclude:
|
||||
item = item.lower()
|
||||
if item == "retweets":
|
||||
parts.append("-filter:retweets")
|
||||
elif item == "replies":
|
||||
|
||||
@@ -125,6 +125,8 @@ def tweet_from_dict(data: Dict[str, Any]) -> Tweet:
|
||||
def tweets_from_json(raw: str) -> List[Tweet]:
|
||||
"""Parse a JSON string into Tweet objects."""
|
||||
payload = json.loads(raw)
|
||||
if isinstance(payload, dict) and payload.get("ok") is True and isinstance(payload.get("data"), list):
|
||||
payload = payload["data"]
|
||||
if not isinstance(payload, list):
|
||||
raise ValueError("Tweet JSON payload must be a list")
|
||||
return [tweet_from_dict(item) for item in payload if isinstance(item, dict)]
|
||||
|
||||
Reference in New Issue
Block a user