Add lightweight analytics dashboard
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -9,6 +9,24 @@ import src.server as server_module
|
||||
from src.config import AppConfig
|
||||
|
||||
|
||||
_TERMINAL_STATUSES = {"completed", "failed", "cancelled"}
|
||||
|
||||
|
||||
def _objective_category(objective: str) -> str:
|
||||
text = objective.lower()
|
||||
if any(keyword in text for keyword in ("browser", "website", "amazon", "google", "login", "shopping", "checkout", "orders")):
|
||||
return "Browser / web"
|
||||
if any(keyword in text for keyword in ("file", "folder", "directory", "terminal", "shell", "command", "cli", "script", "git", "repo", "install", "pip", "npm")):
|
||||
return "Files / terminal"
|
||||
if any(keyword in text for keyword in ("write", "summary", "document", "docs", "report", "email", "message", "readme", "markdown")):
|
||||
return "Writing / docs"
|
||||
if any(keyword in text for keyword in ("data", "analysis", "csv", "spreadsheet", "sheet", "table", "chart", "dashboard", "metric", "sql")):
|
||||
return "Data / analysis"
|
||||
if any(keyword in text for keyword in ("code", "bug", "fix", "test", "debug", "api", "backend", "frontend", "database", "deploy", "docker", "service", "build")):
|
||||
return "Development / ops"
|
||||
return "Other"
|
||||
|
||||
|
||||
class FakeJobManager:
|
||||
def __init__(self, *, config: AppConfig, db: Any, broadcast: Any = None) -> None:
|
||||
self.config = config
|
||||
@@ -39,6 +57,7 @@ class FakeJobManager:
|
||||
artifacts_dir.mkdir(parents=True, exist_ok=True)
|
||||
screenshot_path = artifacts_dir / "screen_step_001.png"
|
||||
screenshot_path.write_bytes(b"not-a-real-png")
|
||||
created_at = f"2026-05-27T00:00:{self._counter:02d}Z"
|
||||
self.last_submit_payload = {
|
||||
"objective": objective,
|
||||
"model": selected_model,
|
||||
@@ -57,6 +76,10 @@ class FakeJobManager:
|
||||
"objective": objective,
|
||||
"model": selected_model,
|
||||
"status": "running",
|
||||
"created_at": created_at,
|
||||
"started_at": created_at,
|
||||
"ended_at": None,
|
||||
"steps": 1,
|
||||
"result": "Running",
|
||||
"response": {"return": "Running", "data": None},
|
||||
"return": "Running",
|
||||
@@ -149,6 +172,114 @@ class FakeJobManager:
|
||||
"live_running_threads": 0,
|
||||
}
|
||||
|
||||
def analytics(self) -> dict[str, Any]:
|
||||
by_category: dict[str, dict[str, Any]] = {}
|
||||
by_day: dict[str, dict[str, Any]] = {}
|
||||
|
||||
def bucket(target: dict[str, dict[str, Any]], key: str) -> dict[str, Any]:
|
||||
return target.setdefault(
|
||||
key,
|
||||
{
|
||||
"label": key,
|
||||
"total_jobs": 0,
|
||||
"finished_jobs": 0,
|
||||
"completed_jobs": 0,
|
||||
"failed_jobs": 0,
|
||||
"cancelled_jobs": 0,
|
||||
"steps_sum": 0,
|
||||
"steps_count": 0,
|
||||
"cost_sum": 0.0,
|
||||
"cost_count": 0,
|
||||
},
|
||||
)
|
||||
|
||||
total_jobs = 0
|
||||
finished_jobs = 0
|
||||
completed_jobs = 0
|
||||
failed_jobs = 0
|
||||
cancelled_jobs = 0
|
||||
steps_sum = 0
|
||||
steps_count = 0
|
||||
cost_sum = 0.0
|
||||
cost_count = 0
|
||||
|
||||
for job in self._jobs.values():
|
||||
total_jobs += 1
|
||||
status = str(job.get("status") or "")
|
||||
finished = status in _TERMINAL_STATUSES
|
||||
category = _objective_category(str(job.get("objective") or ""))
|
||||
day = str(job.get("created_at") or "")[:10] or "unknown"
|
||||
|
||||
category_bucket = bucket(by_category, category)
|
||||
day_bucket = bucket(by_day, day)
|
||||
for item in (category_bucket, day_bucket):
|
||||
item["total_jobs"] += 1
|
||||
|
||||
if not finished:
|
||||
continue
|
||||
|
||||
finished_jobs += 1
|
||||
if status == "completed":
|
||||
completed_jobs += 1
|
||||
elif status == "failed":
|
||||
failed_jobs += 1
|
||||
elif status == "cancelled":
|
||||
cancelled_jobs += 1
|
||||
|
||||
steps_raw = job.get("steps")
|
||||
if steps_raw is not None:
|
||||
steps = int(steps_raw)
|
||||
steps_sum += steps
|
||||
steps_count += 1
|
||||
for item in (category_bucket, day_bucket):
|
||||
item["steps_sum"] += steps
|
||||
item["steps_count"] += 1
|
||||
|
||||
estimated_cost_raw = (job.get("usage") or {}).get("estimated_cost_usd")
|
||||
if estimated_cost_raw is not None:
|
||||
estimated_cost = float(estimated_cost_raw)
|
||||
cost_sum += estimated_cost
|
||||
cost_count += 1
|
||||
for item in (category_bucket, day_bucket):
|
||||
item["cost_sum"] += estimated_cost
|
||||
item["cost_count"] += 1
|
||||
|
||||
for item in (category_bucket, day_bucket):
|
||||
item["finished_jobs"] += 1
|
||||
if status == "completed":
|
||||
item["completed_jobs"] += 1
|
||||
elif status == "failed":
|
||||
item["failed_jobs"] += 1
|
||||
elif status == "cancelled":
|
||||
item["cancelled_jobs"] += 1
|
||||
|
||||
def finalize(item: dict[str, Any]) -> dict[str, Any]:
|
||||
finished = item["finished_jobs"]
|
||||
return {
|
||||
"label": item["label"],
|
||||
"total_jobs": item["total_jobs"],
|
||||
"finished_jobs": finished,
|
||||
"completed_jobs": item["completed_jobs"],
|
||||
"failed_jobs": item["failed_jobs"],
|
||||
"cancelled_jobs": item["cancelled_jobs"],
|
||||
"success_rate": round((item["completed_jobs"] / finished) * 100, 2) if finished else 0.0,
|
||||
"avg_steps": round(item["steps_sum"] / item["steps_count"], 2) if item["steps_count"] else None,
|
||||
"avg_cost_usd": round(item["cost_sum"] / item["cost_count"], 6) if item["cost_count"] else None,
|
||||
}
|
||||
|
||||
return {
|
||||
"total_jobs": total_jobs,
|
||||
"finished_jobs": finished_jobs,
|
||||
"completed_jobs": completed_jobs,
|
||||
"failed_jobs": failed_jobs,
|
||||
"cancelled_jobs": cancelled_jobs,
|
||||
"success_rate": round((completed_jobs / finished_jobs) * 100, 2) if finished_jobs else 0.0,
|
||||
"avg_steps": round(steps_sum / steps_count, 2) if steps_count else None,
|
||||
"avg_cost_usd": round(cost_sum / cost_count, 6) if cost_count else None,
|
||||
"by_category": sorted((finalize(item) for item in by_category.values()), key=lambda item: (-item["success_rate"], item["label"])),
|
||||
"timeline": sorted((finalize(item) for item in by_day.values()), key=lambda item: item["label"]),
|
||||
}
|
||||
|
||||
|
||||
def _build_app(tmp_path: Path, monkeypatch: Any, disable_ui: bool = False):
|
||||
monkeypatch.setattr(server_module, "JobManager", FakeJobManager)
|
||||
@@ -276,12 +407,67 @@ def test_replay_endpoint_skips_visual_paths_outside_artifacts(tmp_path: Path, mo
|
||||
assert payload["total_frames"] == 1
|
||||
|
||||
|
||||
def test_analytics_endpoint_groups_by_category_and_time(tmp_path: Path, monkeypatch: Any) -> None:
|
||||
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
|
||||
manager = app.state.manager
|
||||
client = TestClient(app)
|
||||
headers = {"Authorization": "Bearer test_token"}
|
||||
|
||||
browser_completed = client.post("/api/jobs", headers=headers, json={"job": "Open amazon.de and checkout"}).json()["job_id"]
|
||||
browser_failed = client.post("/api/jobs", headers=headers, json={"job": "Open website and login"}).json()["job_id"]
|
||||
terminal_completed = client.post("/api/jobs", headers=headers, json={"job": "Run a shell command to inspect files"}).json()["job_id"]
|
||||
|
||||
manager._jobs[browser_completed].update(
|
||||
status="completed",
|
||||
ended_at="2026-05-27T00:10:00Z",
|
||||
steps=4,
|
||||
created_at="2026-05-27T00:00:01Z",
|
||||
usage={**manager._jobs[browser_completed]["usage"], "estimated_cost_usd": 0.12},
|
||||
)
|
||||
manager._jobs[browser_failed].update(
|
||||
status="failed",
|
||||
ended_at="2026-05-28T00:10:00Z",
|
||||
steps=6,
|
||||
created_at="2026-05-28T00:00:01Z",
|
||||
usage={**manager._jobs[browser_failed]["usage"], "estimated_cost_usd": 0.24},
|
||||
)
|
||||
manager._jobs[terminal_completed].update(
|
||||
status="completed",
|
||||
ended_at="2026-05-28T00:15:00Z",
|
||||
steps=10,
|
||||
created_at="2026-05-28T00:00:02Z",
|
||||
usage={**manager._jobs[terminal_completed]["usage"], "estimated_cost_usd": 0.05},
|
||||
)
|
||||
|
||||
analytics = client.get("/api/analytics", headers=headers)
|
||||
assert analytics.status_code == 200
|
||||
payload = analytics.json()
|
||||
|
||||
assert payload["total_jobs"] == 3
|
||||
assert payload["finished_jobs"] == 3
|
||||
assert payload["completed_jobs"] == 2
|
||||
assert payload["failed_jobs"] == 1
|
||||
assert payload["success_rate"] == 66.67
|
||||
assert payload["avg_steps"] == 6.67
|
||||
assert payload["avg_cost_usd"] == 0.136667
|
||||
|
||||
browser = next(row for row in payload["by_category"] if row["label"] == "Browser / web")
|
||||
terminal = next(row for row in payload["by_category"] if row["label"] == "Files / terminal")
|
||||
assert browser["finished_jobs"] == 2
|
||||
assert browser["success_rate"] == 50.0
|
||||
assert browser["avg_steps"] == 5.0
|
||||
assert terminal["success_rate"] == 100.0
|
||||
|
||||
assert [row["label"] for row in payload["timeline"]] == ["2026-05-27", "2026-05-28"]
|
||||
|
||||
|
||||
def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
|
||||
app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False)
|
||||
client_enabled = TestClient(app_enabled)
|
||||
root_enabled = client_enabled.get("/")
|
||||
assert root_enabled.status_code == 200
|
||||
assert "ScreenJob Monitor" in root_enabled.text
|
||||
assert "Success by Objective Category" in root_enabled.text
|
||||
js_enabled = client_enabled.get("/ui/monitoring.js")
|
||||
assert js_enabled.status_code == 200
|
||||
assert "const tokenInput" in js_enabled.text
|
||||
|
||||
@@ -72,3 +72,55 @@ def test_storage_response_fallback_uses_result_when_json_missing(tmp_path: Path)
|
||||
assert job is not None
|
||||
assert job["response"]["return"] == "Legacy result string"
|
||||
assert job["response"]["data"] is None
|
||||
|
||||
|
||||
def test_history_db_analytics_groups_by_category_and_day(tmp_path: Path) -> None:
|
||||
db = HistoryDB(tmp_path / "screenjob_test_analytics.db")
|
||||
|
||||
db.create_job(
|
||||
job_id="job_browser_ok",
|
||||
objective="Open amazon.de and checkout",
|
||||
model="gpt-5.4-mini",
|
||||
created_at="2026-05-27T00:00:01Z",
|
||||
safety_override=False,
|
||||
disabled_tools=[],
|
||||
)
|
||||
db.update_job("job_browser_ok", status="completed", steps=4, estimated_cost_usd=0.12)
|
||||
|
||||
db.create_job(
|
||||
job_id="job_browser_fail",
|
||||
objective="Open website and login",
|
||||
model="gpt-5.4-mini",
|
||||
created_at="2026-05-28T00:00:01Z",
|
||||
safety_override=False,
|
||||
disabled_tools=[],
|
||||
)
|
||||
db.update_job("job_browser_fail", status="failed", steps=6, estimated_cost_usd=0.24)
|
||||
|
||||
db.create_job(
|
||||
job_id="job_terminal_ok",
|
||||
objective="Run a shell command to inspect files",
|
||||
model="gpt-5.4-mini",
|
||||
created_at="2026-05-28T00:00:02Z",
|
||||
safety_override=False,
|
||||
disabled_tools=[],
|
||||
)
|
||||
db.update_job("job_terminal_ok", status="completed", steps=10, estimated_cost_usd=0.05)
|
||||
|
||||
analytics = db.analytics()
|
||||
assert analytics["total_jobs"] == 3
|
||||
assert analytics["finished_jobs"] == 3
|
||||
assert analytics["completed_jobs"] == 2
|
||||
assert analytics["failed_jobs"] == 1
|
||||
assert analytics["success_rate"] == 66.67
|
||||
assert analytics["avg_steps"] == 6.67
|
||||
assert analytics["avg_cost_usd"] == 0.136667
|
||||
|
||||
browser = next(row for row in analytics["by_category"] if row["label"] == "Browser / web")
|
||||
terminal = next(row for row in analytics["by_category"] if row["label"] == "Files / terminal")
|
||||
assert browser["finished_jobs"] == 2
|
||||
assert browser["success_rate"] == 50.0
|
||||
assert browser["avg_steps"] == 5.0
|
||||
assert terminal["success_rate"] == 100.0
|
||||
|
||||
assert [row["label"] for row in analytics["timeline"]] == ["2026-05-27", "2026-05-28"]
|
||||
|
||||
Reference in New Issue
Block a user