Add lightweight analytics dashboard #1

Merged
space merged 1 commits from feat/lightweight-dash into master 2026-05-27 22:50:08 +02:00
9 changed files with 644 additions and 1 deletions
Showing only changes of commit 8126b57404 - Show all commits

View File

@@ -156,8 +156,14 @@ Each job payload includes:
- Read-only dashboard (no run controls)
- Requires token input
- Live updates via `/ws`
- Analytics dashboards for success rate by objective category and daily averages
- Set `DISABLE_UI=true` to disable UI
### Analytics API
- `GET /api/analytics`
- Returns objective-category success rates plus average steps/cost over time
## Agent Instructions (Practical)
- Prefer `execute_command` for deterministic actions (opening URLs, filesystem checks).

View File

@@ -16,6 +16,7 @@ from .config import AppConfig, load_app_config
from .storage import HistoryDB
from .task_manager import JobManager
from .ui import monitoring_js_path, monitoring_page_html
from .utils import utc_now_iso
class CreateJobRequest(BaseModel):
@@ -386,6 +387,12 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
def stats(_: None = Depends(require_token)) -> dict[str, Any]:
return manager.stats()
@app.get("/api/analytics")
def analytics(_: None = Depends(require_token)) -> dict[str, Any]:
payload = manager.analytics()
payload["generated_at"] = utc_now_iso()
return payload
if not app_config.disable_ui:
@app.get("/", response_class=HTMLResponse)
def ui_root() -> str:

View File

@@ -7,6 +7,39 @@ from pathlib import Path
from typing import Any
_TERMINAL_STATUSES = {"completed", "failed", "cancelled"}
_CATEGORY_RULES: tuple[tuple[str, tuple[str, ...]], ...] = (
(
"Browser / web",
("browser", "website", "webpage", "chrome", "url", "amazon", "google", "login", "shopping", "checkout", "orders"),
),
(
"Files / terminal",
("file", "folder", "directory", "terminal", "shell", "command", "cli", "script", "git", "repo", "install", "pip", "npm", "powershell", "bash"),
),
(
"Writing / docs",
("write", "summary", "summarize", "document", "docs", "report", "email", "message", "readme", "markdown", "note", "proposal"),
),
(
"Data / analysis",
("data", "analysis", "analyze", "csv", "spreadsheet", "sheet", "table", "chart", "dashboard", "metric", "metrics", "sql"),
),
(
"Development / ops",
("code", "bug", "fix", "test", "debug", "api", "backend", "frontend", "database", "deploy", "docker", "service", "build"),
),
)
def _objective_category(objective: str) -> str:
text = objective.lower()
for category, keywords in _CATEGORY_RULES:
if any(keyword in text for keyword in keywords):
return category
return "Other"
class HistoryDB:
def __init__(self, db_path: Path) -> None:
self.db_path = db_path
@@ -184,6 +217,131 @@ class HistoryDB:
).fetchone()
return dict(totals) if totals else {}
def analytics(self) -> dict[str, Any]:
with self._connect() as conn:
rows = conn.execute(
"""
SELECT job_id, objective, status, steps, estimated_cost_usd, created_at
FROM jobs
ORDER BY created_at ASC, job_id ASC
"""
).fetchall()
total_jobs = 0
finished_jobs = 0
completed_jobs = 0
failed_jobs = 0
cancelled_jobs = 0
steps_sum = 0
steps_count = 0
cost_sum = 0.0
cost_count = 0
by_category: dict[str, dict[str, Any]] = {}
by_day: dict[str, dict[str, Any]] = {}
def _bucket(target: dict[str, dict[str, Any]], key: str) -> dict[str, Any]:
bucket = target.setdefault(
key,
{
"label": key,
"total_jobs": 0,
"finished_jobs": 0,
"completed_jobs": 0,
"failed_jobs": 0,
"cancelled_jobs": 0,
"steps_sum": 0,
"steps_count": 0,
"cost_sum": 0.0,
"cost_count": 0,
},
)
return bucket
for row in rows:
total_jobs += 1
status = str(row["status"] or "")
finished = status in _TERMINAL_STATUSES
completed = status == "completed"
objective = str(row["objective"] or "")
category = _objective_category(objective)
created_at = str(row["created_at"] or "")
day = created_at[:10] if len(created_at) >= 10 else created_at or "unknown"
category_bucket = _bucket(by_category, category)
day_bucket = _bucket(by_day, day)
for bucket in (category_bucket, day_bucket):
bucket["total_jobs"] += 1
if not finished:
continue
finished_jobs += 1
if completed:
completed_jobs += 1
elif status == "failed":
failed_jobs += 1
elif status == "cancelled":
cancelled_jobs += 1
steps = row["steps"]
if steps is not None:
step_value = int(steps)
steps_sum += step_value
steps_count += 1
for bucket in (category_bucket, day_bucket):
bucket["steps_sum"] += step_value
bucket["steps_count"] += 1
estimated_cost = row["estimated_cost_usd"]
if estimated_cost is not None:
cost_value = float(estimated_cost)
cost_sum += cost_value
cost_count += 1
for bucket in (category_bucket, day_bucket):
bucket["cost_sum"] += cost_value
bucket["cost_count"] += 1
for bucket in (category_bucket, day_bucket):
bucket["finished_jobs"] += 1
if completed:
bucket["completed_jobs"] += 1
elif status == "failed":
bucket["failed_jobs"] += 1
elif status == "cancelled":
bucket["cancelled_jobs"] += 1
def _finalize(bucket: dict[str, Any]) -> dict[str, Any]:
finished = bucket["finished_jobs"]
return {
"label": bucket["label"],
"total_jobs": bucket["total_jobs"],
"finished_jobs": finished,
"completed_jobs": bucket["completed_jobs"],
"failed_jobs": bucket["failed_jobs"],
"cancelled_jobs": bucket["cancelled_jobs"],
"success_rate": round((bucket["completed_jobs"] / finished) * 100, 2) if finished else 0.0,
"avg_steps": round(bucket["steps_sum"] / bucket["steps_count"], 2) if bucket["steps_count"] else None,
"avg_cost_usd": round(bucket["cost_sum"] / bucket["cost_count"], 6) if bucket["cost_count"] else None,
}
category_rows = [_finalize(bucket) for bucket in by_category.values()]
category_rows.sort(key=lambda item: (-item["success_rate"], item["label"]))
day_rows = [_finalize(bucket) for bucket in by_day.values()]
day_rows.sort(key=lambda item: item["label"])
return {
"total_jobs": total_jobs,
"finished_jobs": finished_jobs,
"completed_jobs": completed_jobs,
"failed_jobs": failed_jobs,
"cancelled_jobs": cancelled_jobs,
"success_rate": round((completed_jobs / finished_jobs) * 100, 2) if finished_jobs else 0.0,
"avg_steps": round(steps_sum / steps_count, 2) if steps_count else None,
"avg_cost_usd": round(cost_sum / cost_count, 6) if cost_count else None,
"by_category": category_rows,
"timeline": day_rows,
}
def _row_to_job(self, row: sqlite3.Row) -> dict[str, Any]:
disabled_tools: list[str] = []
try:

View File

@@ -351,6 +351,9 @@ class JobManager:
stats["live_running_threads"] = sum(1 for job in self._running.values() if job.thread.is_alive())
return stats
def analytics(self) -> dict[str, Any]:
return self.db.analytics()
def _normalize_job_payload(self, job: dict[str, Any]) -> dict[str, Any]:
response = job.get("response")
if not isinstance(response, dict):

View File

@@ -21,6 +21,30 @@
<section class="grid grid-cols-2 md:grid-cols-6 gap-3" id="stats"></section>
<section class="space-y-3">
<div class="flex items-center justify-between gap-3">
<h2 class="font-semibold">Analytics</h2>
<div id="analyticsMeta" class="text-[11px] text-slate-400"></div>
</div>
<div id="analyticsSummary" class="grid grid-cols-2 md:grid-cols-4 gap-3"></div>
<div class="grid grid-cols-1 xl:grid-cols-2 gap-4">
<div class="bg-slate-900/70 border border-slate-800 rounded-xl p-4 space-y-3">
<div class="flex items-center justify-between gap-3">
<h3 class="font-semibold text-sm">Success by Objective Category</h3>
<div id="analyticsCategorySummary" class="text-[11px] text-slate-400"></div>
</div>
<div id="analyticsCategories" class="space-y-3"></div>
</div>
<div class="bg-slate-900/70 border border-slate-800 rounded-xl p-4 space-y-3">
<div class="flex items-center justify-between gap-3">
<h3 class="font-semibold text-sm">Avg Steps / Cost Over Time</h3>
<div id="analyticsTrendSummary" class="text-[11px] text-slate-400"></div>
</div>
<div id="analyticsTrends" class="space-y-4"></div>
</div>
</div>
</section>
<section class="grid grid-cols-1 lg:grid-cols-5 gap-4">
<div class="lg:col-span-2 bg-slate-900/70 border border-slate-800 rounded-xl p-4">
<div class="flex items-center justify-between mb-3">

View File

@@ -17,6 +17,12 @@ const replayPrevBtn = document.getElementById("replayPrevBtn");
const replayNextBtn = document.getElementById("replayNextBtn");
const replaySpeedEl = document.getElementById("replaySpeed");
const replaySeekEl = document.getElementById("replaySeek");
const analyticsMetaEl = document.getElementById("analyticsMeta");
const analyticsSummaryEl = document.getElementById("analyticsSummary");
const analyticsCategorySummaryEl = document.getElementById("analyticsCategorySummary");
const analyticsCategoriesEl = document.getElementById("analyticsCategories");
const analyticsTrendSummaryEl = document.getElementById("analyticsTrendSummary");
const analyticsTrendsEl = document.getElementById("analyticsTrends");
const state = {
token: localStorage.getItem("screenjob_token") || "",
@@ -35,6 +41,7 @@ const state = {
}
};
const manuallyClosedSockets = new WeakSet();
const analyticsRefreshEvents = new Set(["job_finished", "job_failed", "job_rejected"]);
tokenInput.value = state.token;
function authHeaders() {
@@ -66,6 +73,197 @@ function renderStats(stats) {
`).join("");
}
function escapeHtml(value) {
return String(value ?? "").replace(/[&<>"']/g, (ch) => ({
"&": "&amp;",
"<": "&lt;",
">": "&gt;",
'"': "&quot;",
"'": "&#39;"
})[ch]);
}
function formatNumber(value, digits = 2) {
const num = Number(value);
return Number.isFinite(num) ? num.toFixed(digits) : "—";
}
function formatCurrency(value, digits = 6) {
const num = Number(value);
return Number.isFinite(num) ? `$${num.toFixed(digits)}` : "—";
}
function formatPercent(value) {
const num = Number(value);
return Number.isFinite(num) ? `${num.toFixed(1)}%` : "—";
}
function formatDateLabel(value) {
const dt = new Date(value);
if (Number.isNaN(dt.getTime())) return String(value || "—");
return dt.toLocaleDateString(undefined, { month: "short", day: "numeric" });
}
function renderMetricCard(label, value) {
return `
<div class="bg-slate-950 border border-slate-800 rounded-xl p-3">
<div class="text-[11px] uppercase tracking-wide text-slate-400">${escapeHtml(label)}</div>
<div class="text-xl font-semibold mt-1">${escapeHtml(value)}</div>
</div>
`;
}
function renderLineChart(title, points, options = {}) {
const color = options.color || "#22d3ee";
const valueLabel = options.valueLabel || "";
const sourcePoints = Array.isArray(points)
? points.filter((point) => Number.isFinite(Number(point.value)))
: [];
if (!sourcePoints.length) {
return `
<div class="rounded-lg border border-slate-800 bg-slate-950/70 p-3">
<div class="flex items-center justify-between gap-3">
<div>
<div class="text-xs text-slate-400">${escapeHtml(title)}</div>
<div class="text-sm text-slate-200 font-semibold">No data yet</div>
</div>
</div>
</div>
`;
}
const width = 640;
const height = 220;
const margin = { top: 20, right: 18, bottom: 34, left: 44 };
const values = sourcePoints.map((point) => Number(point.value));
const minValue = Math.min(...values);
const maxValue = Math.max(...values);
const span = maxValue - minValue || 1;
const chartWidth = width - margin.left - margin.right;
const chartHeight = height - margin.top - margin.bottom;
const xStep = sourcePoints.length > 1 ? chartWidth / (sourcePoints.length - 1) : 0;
const coords = sourcePoints.map((point, index) => ({
x: margin.left + (index * xStep),
y: margin.top + ((maxValue - Number(point.value)) / span) * chartHeight,
}));
const linePath = coords.map((point, index) => `${index === 0 ? "M" : "L"} ${point.x} ${point.y}`).join(" ");
const baseline = height - margin.bottom;
const midIndex = Math.floor(sourcePoints.length / 2);
const xLabels = [
{ index: 0, label: sourcePoints[0].label },
{ index: midIndex, label: sourcePoints[midIndex].label },
{ index: sourcePoints.length - 1, label: sourcePoints[sourcePoints.length - 1].label },
].filter((item, index, array) => item.label && array.findIndex((candidate) => candidate.index === item.index) === index);
const minLabel = options.formatValue ? options.formatValue(minValue) : formatNumber(minValue, 2);
const maxLabel = options.formatValue ? options.formatValue(maxValue) : formatNumber(maxValue, 2);
const latest = sourcePoints[sourcePoints.length - 1];
const latestValue = options.formatValue ? options.formatValue(latest.value) : formatNumber(latest.value, 2);
return `
<div class="rounded-lg border border-slate-800 bg-slate-950/70 p-3 space-y-2">
<div class="flex items-center justify-between gap-3">
<div>
<div class="text-xs text-slate-400">${escapeHtml(title)}</div>
<div class="text-sm text-slate-200 font-semibold">${escapeHtml(latestValue)}${valueLabel ? ` <span class="text-slate-500 font-normal">${escapeHtml(valueLabel)}</span>` : ""}</div>
</div>
<div class="text-[11px] text-slate-400 text-right">
<div>${escapeHtml(sourcePoints.length)} points</div>
<div>${escapeHtml(minLabel)} - ${escapeHtml(maxLabel)}</div>
</div>
</div>
<svg viewBox="0 0 ${width} ${height}" class="w-full h-56">
${Array.from({ length: 4 }, (_, idx) => {
const y = margin.top + (chartHeight / 3) * idx;
return `<line x1="${margin.left}" y1="${y}" x2="${width - margin.right}" y2="${y}" stroke="rgba(51, 65, 85, 0.7)" stroke-width="1" />`;
}).join("")}
<line x1="${margin.left}" y1="${baseline}" x2="${width - margin.right}" y2="${baseline}" stroke="rgba(71, 85, 105, 0.8)" stroke-width="1.5" />
<path d="${linePath}" fill="none" stroke="${color}" stroke-width="3" stroke-linecap="round" stroke-linejoin="round" />
${coords.map((point) => `
<circle cx="${point.x}" cy="${point.y}" r="4.5" fill="${color}" />
`).join("")}
<text x="${margin.left - 8}" y="${margin.top + 4}" text-anchor="end" class="fill-slate-400 text-[10px]">${escapeHtml(maxLabel)}</text>
<text x="${margin.left - 8}" y="${baseline}" text-anchor="end" class="fill-slate-400 text-[10px]">${escapeHtml(minLabel)}</text>
${xLabels.map((item) => `
<text x="${coords[item.index].x}" y="${height - 10}" text-anchor="middle" class="fill-slate-500 text-[10px]">${escapeHtml(formatDateLabel(item.label))}</text>
`).join("")}
</svg>
</div>
`;
}
function renderAnalytics(payload) {
const analytics = payload || {};
const categories = Array.isArray(analytics.by_category) ? analytics.by_category : [];
const timeline = Array.isArray(analytics.timeline) ? analytics.timeline : [];
const finishedCategories = categories.filter((row) => Number(row.finished_jobs || 0) > 0);
if (analyticsMetaEl) {
analyticsMetaEl.textContent = analytics.generated_at
? `Updated ${new Date(analytics.generated_at).toLocaleString()}`
: "Historical snapshot";
}
analyticsSummaryEl.innerHTML = [
renderMetricCard("Finished Jobs", analytics.finished_jobs || 0),
renderMetricCard("Success Rate", formatPercent(analytics.success_rate)),
renderMetricCard("Avg Steps", formatNumber(analytics.avg_steps, 1)),
renderMetricCard("Avg Cost", formatCurrency(analytics.avg_cost_usd)),
].join("");
analyticsCategorySummaryEl.textContent = finishedCategories.length
? `${finishedCategories.length} categories`
: "No finished jobs yet";
if (finishedCategories.length) {
analyticsCategoriesEl.innerHTML = finishedCategories.map((row) => {
const successRate = Number(row.success_rate || 0);
const completed = Number(row.completed_jobs || 0);
const finished = Number(row.finished_jobs || 0);
const total = Number(row.total_jobs || 0);
const avgSteps = row.avg_steps == null ? "—" : formatNumber(row.avg_steps, 1);
const avgCost = row.avg_cost_usd == null ? "—" : formatCurrency(row.avg_cost_usd);
return `
<div class="rounded-lg border border-slate-800 bg-slate-950/70 p-3 space-y-2">
<div class="flex items-start justify-between gap-3">
<div>
<div class="font-medium">${escapeHtml(row.label || "Other")}</div>
<div class="text-[11px] text-slate-400">${finished} finished · ${completed} completed · ${total} total</div>
</div>
<div class="text-right">
<div class="text-base font-semibold">${formatPercent(successRate)}</div>
<div class="text-[11px] text-slate-500">success rate</div>
</div>
</div>
<div class="h-2 rounded bg-slate-800 overflow-hidden">
<div class="h-full rounded bg-cyan-400" style="width: ${Math.max(0, Math.min(successRate, 100))}%"></div>
</div>
<div class="grid grid-cols-2 gap-2 text-[11px] text-slate-300">
<div>Avg steps: ${escapeHtml(avgSteps)}</div>
<div>Avg cost: ${escapeHtml(avgCost)}</div>
</div>
</div>
`;
}).join("");
} else {
analyticsCategoriesEl.innerHTML = `
<div class="rounded-lg border border-dashed border-slate-800 bg-slate-950/70 p-4 text-sm text-slate-400">
No finished jobs yet.
</div>
`;
}
analyticsTrendSummaryEl.textContent = timeline.length ? `${timeline.length} days` : "No daily data yet";
analyticsTrendsEl.innerHTML = [
renderLineChart("Average steps per day", timeline.map((row) => ({ label: row.label, value: row.avg_steps })), { color: "#38bdf8" }),
renderLineChart("Average cost per day", timeline.map((row) => ({ label: row.label, value: row.avg_cost_usd })), {
color: "#34d399",
valueLabel: "USD",
formatValue: (value) => formatCurrency(value),
}),
].join("");
}
function renderJobs() {
jobListEl.innerHTML = state.jobs.map((job) => {
const active = job.job_id === state.selectedJobId;
@@ -310,6 +508,11 @@ async function refreshStats() {
renderStats(payload);
}
async function refreshAnalytics() {
const payload = await api("/api/analytics");
renderAnalytics(payload);
}
async function refreshJobDetail() {
if (!state.selectedJobId) return;
const [job, events, replay] = await Promise.all([
@@ -345,6 +548,9 @@ function connectWs() {
}
await refreshJobs();
await refreshStats();
if (analyticsRefreshEvents.has(payload.event_type)) {
await refreshAnalytics();
}
} catch (err) {
console.error(err);
}
@@ -362,6 +568,7 @@ function connectWs() {
async function fullRefresh() {
await refreshJobs();
await refreshStats();
await refreshAnalytics();
await refreshJobDetail();
}

View File

@@ -9,6 +9,24 @@ import src.server as server_module
from src.config import AppConfig
_TERMINAL_STATUSES = {"completed", "failed", "cancelled"}
def _objective_category(objective: str) -> str:
text = objective.lower()
if any(keyword in text for keyword in ("browser", "website", "amazon", "google", "login", "shopping", "checkout", "orders")):
return "Browser / web"
if any(keyword in text for keyword in ("file", "folder", "directory", "terminal", "shell", "command", "cli", "script", "git", "repo", "install", "pip", "npm")):
return "Files / terminal"
if any(keyword in text for keyword in ("write", "summary", "document", "docs", "report", "email", "message", "readme", "markdown")):
return "Writing / docs"
if any(keyword in text for keyword in ("data", "analysis", "csv", "spreadsheet", "sheet", "table", "chart", "dashboard", "metric", "sql")):
return "Data / analysis"
if any(keyword in text for keyword in ("code", "bug", "fix", "test", "debug", "api", "backend", "frontend", "database", "deploy", "docker", "service", "build")):
return "Development / ops"
return "Other"
class FakeJobManager:
def __init__(self, *, config: AppConfig, db: Any, broadcast: Any = None) -> None:
self.config = config
@@ -39,6 +57,7 @@ class FakeJobManager:
artifacts_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = artifacts_dir / "screen_step_001.png"
screenshot_path.write_bytes(b"not-a-real-png")
created_at = f"2026-05-27T00:00:{self._counter:02d}Z"
self.last_submit_payload = {
"objective": objective,
"model": selected_model,
@@ -57,6 +76,10 @@ class FakeJobManager:
"objective": objective,
"model": selected_model,
"status": "running",
"created_at": created_at,
"started_at": created_at,
"ended_at": None,
"steps": 1,
"result": "Running",
"response": {"return": "Running", "data": None},
"return": "Running",
@@ -149,6 +172,114 @@ class FakeJobManager:
"live_running_threads": 0,
}
def analytics(self) -> dict[str, Any]:
by_category: dict[str, dict[str, Any]] = {}
by_day: dict[str, dict[str, Any]] = {}
def bucket(target: dict[str, dict[str, Any]], key: str) -> dict[str, Any]:
return target.setdefault(
key,
{
"label": key,
"total_jobs": 0,
"finished_jobs": 0,
"completed_jobs": 0,
"failed_jobs": 0,
"cancelled_jobs": 0,
"steps_sum": 0,
"steps_count": 0,
"cost_sum": 0.0,
"cost_count": 0,
},
)
total_jobs = 0
finished_jobs = 0
completed_jobs = 0
failed_jobs = 0
cancelled_jobs = 0
steps_sum = 0
steps_count = 0
cost_sum = 0.0
cost_count = 0
for job in self._jobs.values():
total_jobs += 1
status = str(job.get("status") or "")
finished = status in _TERMINAL_STATUSES
category = _objective_category(str(job.get("objective") or ""))
day = str(job.get("created_at") or "")[:10] or "unknown"
category_bucket = bucket(by_category, category)
day_bucket = bucket(by_day, day)
for item in (category_bucket, day_bucket):
item["total_jobs"] += 1
if not finished:
continue
finished_jobs += 1
if status == "completed":
completed_jobs += 1
elif status == "failed":
failed_jobs += 1
elif status == "cancelled":
cancelled_jobs += 1
steps_raw = job.get("steps")
if steps_raw is not None:
steps = int(steps_raw)
steps_sum += steps
steps_count += 1
for item in (category_bucket, day_bucket):
item["steps_sum"] += steps
item["steps_count"] += 1
estimated_cost_raw = (job.get("usage") or {}).get("estimated_cost_usd")
if estimated_cost_raw is not None:
estimated_cost = float(estimated_cost_raw)
cost_sum += estimated_cost
cost_count += 1
for item in (category_bucket, day_bucket):
item["cost_sum"] += estimated_cost
item["cost_count"] += 1
for item in (category_bucket, day_bucket):
item["finished_jobs"] += 1
if status == "completed":
item["completed_jobs"] += 1
elif status == "failed":
item["failed_jobs"] += 1
elif status == "cancelled":
item["cancelled_jobs"] += 1
def finalize(item: dict[str, Any]) -> dict[str, Any]:
finished = item["finished_jobs"]
return {
"label": item["label"],
"total_jobs": item["total_jobs"],
"finished_jobs": finished,
"completed_jobs": item["completed_jobs"],
"failed_jobs": item["failed_jobs"],
"cancelled_jobs": item["cancelled_jobs"],
"success_rate": round((item["completed_jobs"] / finished) * 100, 2) if finished else 0.0,
"avg_steps": round(item["steps_sum"] / item["steps_count"], 2) if item["steps_count"] else None,
"avg_cost_usd": round(item["cost_sum"] / item["cost_count"], 6) if item["cost_count"] else None,
}
return {
"total_jobs": total_jobs,
"finished_jobs": finished_jobs,
"completed_jobs": completed_jobs,
"failed_jobs": failed_jobs,
"cancelled_jobs": cancelled_jobs,
"success_rate": round((completed_jobs / finished_jobs) * 100, 2) if finished_jobs else 0.0,
"avg_steps": round(steps_sum / steps_count, 2) if steps_count else None,
"avg_cost_usd": round(cost_sum / cost_count, 6) if cost_count else None,
"by_category": sorted((finalize(item) for item in by_category.values()), key=lambda item: (-item["success_rate"], item["label"])),
"timeline": sorted((finalize(item) for item in by_day.values()), key=lambda item: item["label"]),
}
def _build_app(tmp_path: Path, monkeypatch: Any, disable_ui: bool = False):
monkeypatch.setattr(server_module, "JobManager", FakeJobManager)
@@ -276,12 +407,67 @@ def test_replay_endpoint_skips_visual_paths_outside_artifacts(tmp_path: Path, mo
assert payload["total_frames"] == 1
def test_analytics_endpoint_groups_by_category_and_time(tmp_path: Path, monkeypatch: Any) -> None:
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
manager = app.state.manager
client = TestClient(app)
headers = {"Authorization": "Bearer test_token"}
browser_completed = client.post("/api/jobs", headers=headers, json={"job": "Open amazon.de and checkout"}).json()["job_id"]
browser_failed = client.post("/api/jobs", headers=headers, json={"job": "Open website and login"}).json()["job_id"]
terminal_completed = client.post("/api/jobs", headers=headers, json={"job": "Run a shell command to inspect files"}).json()["job_id"]
manager._jobs[browser_completed].update(
status="completed",
ended_at="2026-05-27T00:10:00Z",
steps=4,
created_at="2026-05-27T00:00:01Z",
usage={**manager._jobs[browser_completed]["usage"], "estimated_cost_usd": 0.12},
)
manager._jobs[browser_failed].update(
status="failed",
ended_at="2026-05-28T00:10:00Z",
steps=6,
created_at="2026-05-28T00:00:01Z",
usage={**manager._jobs[browser_failed]["usage"], "estimated_cost_usd": 0.24},
)
manager._jobs[terminal_completed].update(
status="completed",
ended_at="2026-05-28T00:15:00Z",
steps=10,
created_at="2026-05-28T00:00:02Z",
usage={**manager._jobs[terminal_completed]["usage"], "estimated_cost_usd": 0.05},
)
analytics = client.get("/api/analytics", headers=headers)
assert analytics.status_code == 200
payload = analytics.json()
assert payload["total_jobs"] == 3
assert payload["finished_jobs"] == 3
assert payload["completed_jobs"] == 2
assert payload["failed_jobs"] == 1
assert payload["success_rate"] == 66.67
assert payload["avg_steps"] == 6.67
assert payload["avg_cost_usd"] == 0.136667
browser = next(row for row in payload["by_category"] if row["label"] == "Browser / web")
terminal = next(row for row in payload["by_category"] if row["label"] == "Files / terminal")
assert browser["finished_jobs"] == 2
assert browser["success_rate"] == 50.0
assert browser["avg_steps"] == 5.0
assert terminal["success_rate"] == 100.0
assert [row["label"] for row in payload["timeline"]] == ["2026-05-27", "2026-05-28"]
def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False)
client_enabled = TestClient(app_enabled)
root_enabled = client_enabled.get("/")
assert root_enabled.status_code == 200
assert "ScreenJob Monitor" in root_enabled.text
assert "Success by Objective Category" in root_enabled.text
js_enabled = client_enabled.get("/ui/monitoring.js")
assert js_enabled.status_code == 200
assert "const tokenInput" in js_enabled.text

View File

@@ -72,3 +72,55 @@ def test_storage_response_fallback_uses_result_when_json_missing(tmp_path: Path)
assert job is not None
assert job["response"]["return"] == "Legacy result string"
assert job["response"]["data"] is None
def test_history_db_analytics_groups_by_category_and_day(tmp_path: Path) -> None:
db = HistoryDB(tmp_path / "screenjob_test_analytics.db")
db.create_job(
job_id="job_browser_ok",
objective="Open amazon.de and checkout",
model="gpt-5.4-mini",
created_at="2026-05-27T00:00:01Z",
safety_override=False,
disabled_tools=[],
)
db.update_job("job_browser_ok", status="completed", steps=4, estimated_cost_usd=0.12)
db.create_job(
job_id="job_browser_fail",
objective="Open website and login",
model="gpt-5.4-mini",
created_at="2026-05-28T00:00:01Z",
safety_override=False,
disabled_tools=[],
)
db.update_job("job_browser_fail", status="failed", steps=6, estimated_cost_usd=0.24)
db.create_job(
job_id="job_terminal_ok",
objective="Run a shell command to inspect files",
model="gpt-5.4-mini",
created_at="2026-05-28T00:00:02Z",
safety_override=False,
disabled_tools=[],
)
db.update_job("job_terminal_ok", status="completed", steps=10, estimated_cost_usd=0.05)
analytics = db.analytics()
assert analytics["total_jobs"] == 3
assert analytics["finished_jobs"] == 3
assert analytics["completed_jobs"] == 2
assert analytics["failed_jobs"] == 1
assert analytics["success_rate"] == 66.67
assert analytics["avg_steps"] == 6.67
assert analytics["avg_cost_usd"] == 0.136667
browser = next(row for row in analytics["by_category"] if row["label"] == "Browser / web")
terminal = next(row for row in analytics["by_category"] if row["label"] == "Files / terminal")
assert browser["finished_jobs"] == 2
assert browser["success_rate"] == 50.0
assert browser["avg_steps"] == 5.0
assert terminal["success_rate"] == 100.0
assert [row["label"] for row in analytics["timeline"]] == ["2026-05-27", "2026-05-28"]

View File

@@ -20,4 +20,4 @@
## P3
- [x] Add Replay Mode; Ability to replay a session by reconstructing the screen from screenshots and overlaying tool calls and click and type events.
- [Idea] Add lightweight analytics dashboards (success rate by objective category, avg steps/cost over time).
- [x] Add lightweight analytics dashboards (success rate by objective category, avg steps/cost over time).