feat: add rate limiting, retry with backoff, and max count cap

- Add configurable request delay between paginated API calls (default 1.5s)
- Add retry with exponential backoff on HTTP 429 and Twitter error code 88
- Add hard max count cap (default 200, absolute ceiling 500)
- Add rateLimit config section with requestDelay, maxRetries, retryBaseDelay, maxCount
- Add normalization tests for rateLimit config
This commit is contained in:
jackwener
2026-03-07 19:02:49 +08:00
parent 0f26e20abb
commit 55c48b077b
6 changed files with 125 additions and 31 deletions

View File

@@ -29,6 +29,12 @@ DEFAULT_CONFIG = {
"views_log": 0.5,
},
},
"rateLimit": {
"requestDelay": 1.5,
"maxRetries": 3,
"retryBaseDelay": 5.0,
"maxCount": 200,
},
} # type: Dict[str, Any]
@@ -128,6 +134,17 @@ def _normalize_config(config):
filter_config["weights"] = normalized_weights
merged["filter"] = filter_config
# Normalize rateLimit section
rl = merged.get("rateLimit")
if not isinstance(rl, dict):
rl = {}
default_rl = DEFAULT_CONFIG["rateLimit"]
rl["requestDelay"] = max(_as_float(rl.get("requestDelay"), default_rl["requestDelay"]), 0.0)
rl["maxRetries"] = max(_as_int(rl.get("maxRetries"), default_rl["maxRetries"]), 0)
rl["retryBaseDelay"] = max(_as_float(rl.get("retryBaseDelay"), default_rl["retryBaseDelay"]), 1.0)
rl["maxCount"] = max(_as_int(rl.get("maxCount"), default_rl["maxCount"]), 1)
merged["rateLimit"] = rl
return merged