diff --git a/src/ui_assets/monitoring.js b/src/ui_assets/monitoring.js
index 1df1af3..6d514f4 100644
--- a/src/ui_assets/monitoring.js
+++ b/src/ui_assets/monitoring.js
@@ -17,6 +17,12 @@ const replayPrevBtn = document.getElementById("replayPrevBtn");
const replayNextBtn = document.getElementById("replayNextBtn");
const replaySpeedEl = document.getElementById("replaySpeed");
const replaySeekEl = document.getElementById("replaySeek");
+const analyticsMetaEl = document.getElementById("analyticsMeta");
+const analyticsSummaryEl = document.getElementById("analyticsSummary");
+const analyticsCategorySummaryEl = document.getElementById("analyticsCategorySummary");
+const analyticsCategoriesEl = document.getElementById("analyticsCategories");
+const analyticsTrendSummaryEl = document.getElementById("analyticsTrendSummary");
+const analyticsTrendsEl = document.getElementById("analyticsTrends");
const state = {
token: localStorage.getItem("screenjob_token") || "",
@@ -35,6 +41,7 @@ const state = {
}
};
const manuallyClosedSockets = new WeakSet();
+const analyticsRefreshEvents = new Set(["job_finished", "job_failed", "job_rejected"]);
tokenInput.value = state.token;
function authHeaders() {
@@ -66,6 +73,197 @@ function renderStats(stats) {
`).join("");
}
+function escapeHtml(value) {
+ return String(value ?? "").replace(/[&<>"']/g, (ch) => ({
+ "&": "&",
+ "<": "<",
+ ">": ">",
+ '"': """,
+ "'": "'"
+ })[ch]);
+}
+
+function formatNumber(value, digits = 2) {
+ const num = Number(value);
+ return Number.isFinite(num) ? num.toFixed(digits) : "—";
+}
+
+function formatCurrency(value, digits = 6) {
+ const num = Number(value);
+ return Number.isFinite(num) ? `$${num.toFixed(digits)}` : "—";
+}
+
+function formatPercent(value) {
+ const num = Number(value);
+ return Number.isFinite(num) ? `${num.toFixed(1)}%` : "—";
+}
+
+function formatDateLabel(value) {
+ const dt = new Date(value);
+ if (Number.isNaN(dt.getTime())) return String(value || "—");
+ return dt.toLocaleDateString(undefined, { month: "short", day: "numeric" });
+}
+
+function renderMetricCard(label, value) {
+ return `
+
+
${escapeHtml(label)}
+
${escapeHtml(value)}
+
+ `;
+}
+
+function renderLineChart(title, points, options = {}) {
+ const color = options.color || "#22d3ee";
+ const valueLabel = options.valueLabel || "";
+ const sourcePoints = Array.isArray(points)
+ ? points.filter((point) => Number.isFinite(Number(point.value)))
+ : [];
+
+ if (!sourcePoints.length) {
+ return `
+
+
+
+
${escapeHtml(title)}
+
No data yet
+
+
+
+ `;
+ }
+
+ const width = 640;
+ const height = 220;
+ const margin = { top: 20, right: 18, bottom: 34, left: 44 };
+ const values = sourcePoints.map((point) => Number(point.value));
+ const minValue = Math.min(...values);
+ const maxValue = Math.max(...values);
+ const span = maxValue - minValue || 1;
+ const chartWidth = width - margin.left - margin.right;
+ const chartHeight = height - margin.top - margin.bottom;
+ const xStep = sourcePoints.length > 1 ? chartWidth / (sourcePoints.length - 1) : 0;
+ const coords = sourcePoints.map((point, index) => ({
+ x: margin.left + (index * xStep),
+ y: margin.top + ((maxValue - Number(point.value)) / span) * chartHeight,
+ }));
+ const linePath = coords.map((point, index) => `${index === 0 ? "M" : "L"} ${point.x} ${point.y}`).join(" ");
+ const baseline = height - margin.bottom;
+ const midIndex = Math.floor(sourcePoints.length / 2);
+ const xLabels = [
+ { index: 0, label: sourcePoints[0].label },
+ { index: midIndex, label: sourcePoints[midIndex].label },
+ { index: sourcePoints.length - 1, label: sourcePoints[sourcePoints.length - 1].label },
+ ].filter((item, index, array) => item.label && array.findIndex((candidate) => candidate.index === item.index) === index);
+ const minLabel = options.formatValue ? options.formatValue(minValue) : formatNumber(minValue, 2);
+ const maxLabel = options.formatValue ? options.formatValue(maxValue) : formatNumber(maxValue, 2);
+ const latest = sourcePoints[sourcePoints.length - 1];
+ const latestValue = options.formatValue ? options.formatValue(latest.value) : formatNumber(latest.value, 2);
+
+ return `
+
+
+
+
${escapeHtml(title)}
+
${escapeHtml(latestValue)}${valueLabel ? ` ${escapeHtml(valueLabel)}` : ""}
+
+
+
${escapeHtml(sourcePoints.length)} points
+
${escapeHtml(minLabel)} - ${escapeHtml(maxLabel)}
+
+
+
+
+ `;
+}
+
+function renderAnalytics(payload) {
+ const analytics = payload || {};
+ const categories = Array.isArray(analytics.by_category) ? analytics.by_category : [];
+ const timeline = Array.isArray(analytics.timeline) ? analytics.timeline : [];
+ const finishedCategories = categories.filter((row) => Number(row.finished_jobs || 0) > 0);
+
+ if (analyticsMetaEl) {
+ analyticsMetaEl.textContent = analytics.generated_at
+ ? `Updated ${new Date(analytics.generated_at).toLocaleString()}`
+ : "Historical snapshot";
+ }
+
+ analyticsSummaryEl.innerHTML = [
+ renderMetricCard("Finished Jobs", analytics.finished_jobs || 0),
+ renderMetricCard("Success Rate", formatPercent(analytics.success_rate)),
+ renderMetricCard("Avg Steps", formatNumber(analytics.avg_steps, 1)),
+ renderMetricCard("Avg Cost", formatCurrency(analytics.avg_cost_usd)),
+ ].join("");
+
+ analyticsCategorySummaryEl.textContent = finishedCategories.length
+ ? `${finishedCategories.length} categories`
+ : "No finished jobs yet";
+
+ if (finishedCategories.length) {
+ analyticsCategoriesEl.innerHTML = finishedCategories.map((row) => {
+ const successRate = Number(row.success_rate || 0);
+ const completed = Number(row.completed_jobs || 0);
+ const finished = Number(row.finished_jobs || 0);
+ const total = Number(row.total_jobs || 0);
+ const avgSteps = row.avg_steps == null ? "—" : formatNumber(row.avg_steps, 1);
+ const avgCost = row.avg_cost_usd == null ? "—" : formatCurrency(row.avg_cost_usd);
+ return `
+
+
+
+
${escapeHtml(row.label || "Other")}
+
${finished} finished · ${completed} completed · ${total} total
+
+
+
${formatPercent(successRate)}
+
success rate
+
+
+
+
+
Avg steps: ${escapeHtml(avgSteps)}
+
Avg cost: ${escapeHtml(avgCost)}
+
+
+ `;
+ }).join("");
+ } else {
+ analyticsCategoriesEl.innerHTML = `
+
+ No finished jobs yet.
+
+ `;
+ }
+
+ analyticsTrendSummaryEl.textContent = timeline.length ? `${timeline.length} days` : "No daily data yet";
+ analyticsTrendsEl.innerHTML = [
+ renderLineChart("Average steps per day", timeline.map((row) => ({ label: row.label, value: row.avg_steps })), { color: "#38bdf8" }),
+ renderLineChart("Average cost per day", timeline.map((row) => ({ label: row.label, value: row.avg_cost_usd })), {
+ color: "#34d399",
+ valueLabel: "USD",
+ formatValue: (value) => formatCurrency(value),
+ }),
+ ].join("");
+}
+
function renderJobs() {
jobListEl.innerHTML = state.jobs.map((job) => {
const active = job.job_id === state.selectedJobId;
@@ -310,6 +508,11 @@ async function refreshStats() {
renderStats(payload);
}
+async function refreshAnalytics() {
+ const payload = await api("/api/analytics");
+ renderAnalytics(payload);
+}
+
async function refreshJobDetail() {
if (!state.selectedJobId) return;
const [job, events, replay] = await Promise.all([
@@ -345,6 +548,9 @@ function connectWs() {
}
await refreshJobs();
await refreshStats();
+ if (analyticsRefreshEvents.has(payload.event_type)) {
+ await refreshAnalytics();
+ }
} catch (err) {
console.error(err);
}
@@ -362,6 +568,7 @@ function connectWs() {
async function fullRefresh() {
await refreshJobs();
await refreshStats();
+ await refreshAnalytics();
await refreshJobDetail();
}
diff --git a/tests/test_server_api.py b/tests/test_server_api.py
index a3b422a..3fe8e45 100644
--- a/tests/test_server_api.py
+++ b/tests/test_server_api.py
@@ -9,6 +9,24 @@ import src.server as server_module
from src.config import AppConfig
+_TERMINAL_STATUSES = {"completed", "failed", "cancelled"}
+
+
+def _objective_category(objective: str) -> str:
+ text = objective.lower()
+ if any(keyword in text for keyword in ("browser", "website", "amazon", "google", "login", "shopping", "checkout", "orders")):
+ return "Browser / web"
+ if any(keyword in text for keyword in ("file", "folder", "directory", "terminal", "shell", "command", "cli", "script", "git", "repo", "install", "pip", "npm")):
+ return "Files / terminal"
+ if any(keyword in text for keyword in ("write", "summary", "document", "docs", "report", "email", "message", "readme", "markdown")):
+ return "Writing / docs"
+ if any(keyword in text for keyword in ("data", "analysis", "csv", "spreadsheet", "sheet", "table", "chart", "dashboard", "metric", "sql")):
+ return "Data / analysis"
+ if any(keyword in text for keyword in ("code", "bug", "fix", "test", "debug", "api", "backend", "frontend", "database", "deploy", "docker", "service", "build")):
+ return "Development / ops"
+ return "Other"
+
+
class FakeJobManager:
def __init__(self, *, config: AppConfig, db: Any, broadcast: Any = None) -> None:
self.config = config
@@ -39,6 +57,7 @@ class FakeJobManager:
artifacts_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = artifacts_dir / "screen_step_001.png"
screenshot_path.write_bytes(b"not-a-real-png")
+ created_at = f"2026-05-27T00:00:{self._counter:02d}Z"
self.last_submit_payload = {
"objective": objective,
"model": selected_model,
@@ -57,6 +76,10 @@ class FakeJobManager:
"objective": objective,
"model": selected_model,
"status": "running",
+ "created_at": created_at,
+ "started_at": created_at,
+ "ended_at": None,
+ "steps": 1,
"result": "Running",
"response": {"return": "Running", "data": None},
"return": "Running",
@@ -149,6 +172,114 @@ class FakeJobManager:
"live_running_threads": 0,
}
+ def analytics(self) -> dict[str, Any]:
+ by_category: dict[str, dict[str, Any]] = {}
+ by_day: dict[str, dict[str, Any]] = {}
+
+ def bucket(target: dict[str, dict[str, Any]], key: str) -> dict[str, Any]:
+ return target.setdefault(
+ key,
+ {
+ "label": key,
+ "total_jobs": 0,
+ "finished_jobs": 0,
+ "completed_jobs": 0,
+ "failed_jobs": 0,
+ "cancelled_jobs": 0,
+ "steps_sum": 0,
+ "steps_count": 0,
+ "cost_sum": 0.0,
+ "cost_count": 0,
+ },
+ )
+
+ total_jobs = 0
+ finished_jobs = 0
+ completed_jobs = 0
+ failed_jobs = 0
+ cancelled_jobs = 0
+ steps_sum = 0
+ steps_count = 0
+ cost_sum = 0.0
+ cost_count = 0
+
+ for job in self._jobs.values():
+ total_jobs += 1
+ status = str(job.get("status") or "")
+ finished = status in _TERMINAL_STATUSES
+ category = _objective_category(str(job.get("objective") or ""))
+ day = str(job.get("created_at") or "")[:10] or "unknown"
+
+ category_bucket = bucket(by_category, category)
+ day_bucket = bucket(by_day, day)
+ for item in (category_bucket, day_bucket):
+ item["total_jobs"] += 1
+
+ if not finished:
+ continue
+
+ finished_jobs += 1
+ if status == "completed":
+ completed_jobs += 1
+ elif status == "failed":
+ failed_jobs += 1
+ elif status == "cancelled":
+ cancelled_jobs += 1
+
+ steps_raw = job.get("steps")
+ if steps_raw is not None:
+ steps = int(steps_raw)
+ steps_sum += steps
+ steps_count += 1
+ for item in (category_bucket, day_bucket):
+ item["steps_sum"] += steps
+ item["steps_count"] += 1
+
+ estimated_cost_raw = (job.get("usage") or {}).get("estimated_cost_usd")
+ if estimated_cost_raw is not None:
+ estimated_cost = float(estimated_cost_raw)
+ cost_sum += estimated_cost
+ cost_count += 1
+ for item in (category_bucket, day_bucket):
+ item["cost_sum"] += estimated_cost
+ item["cost_count"] += 1
+
+ for item in (category_bucket, day_bucket):
+ item["finished_jobs"] += 1
+ if status == "completed":
+ item["completed_jobs"] += 1
+ elif status == "failed":
+ item["failed_jobs"] += 1
+ elif status == "cancelled":
+ item["cancelled_jobs"] += 1
+
+ def finalize(item: dict[str, Any]) -> dict[str, Any]:
+ finished = item["finished_jobs"]
+ return {
+ "label": item["label"],
+ "total_jobs": item["total_jobs"],
+ "finished_jobs": finished,
+ "completed_jobs": item["completed_jobs"],
+ "failed_jobs": item["failed_jobs"],
+ "cancelled_jobs": item["cancelled_jobs"],
+ "success_rate": round((item["completed_jobs"] / finished) * 100, 2) if finished else 0.0,
+ "avg_steps": round(item["steps_sum"] / item["steps_count"], 2) if item["steps_count"] else None,
+ "avg_cost_usd": round(item["cost_sum"] / item["cost_count"], 6) if item["cost_count"] else None,
+ }
+
+ return {
+ "total_jobs": total_jobs,
+ "finished_jobs": finished_jobs,
+ "completed_jobs": completed_jobs,
+ "failed_jobs": failed_jobs,
+ "cancelled_jobs": cancelled_jobs,
+ "success_rate": round((completed_jobs / finished_jobs) * 100, 2) if finished_jobs else 0.0,
+ "avg_steps": round(steps_sum / steps_count, 2) if steps_count else None,
+ "avg_cost_usd": round(cost_sum / cost_count, 6) if cost_count else None,
+ "by_category": sorted((finalize(item) for item in by_category.values()), key=lambda item: (-item["success_rate"], item["label"])),
+ "timeline": sorted((finalize(item) for item in by_day.values()), key=lambda item: item["label"]),
+ }
+
def _build_app(tmp_path: Path, monkeypatch: Any, disable_ui: bool = False):
monkeypatch.setattr(server_module, "JobManager", FakeJobManager)
@@ -276,12 +407,67 @@ def test_replay_endpoint_skips_visual_paths_outside_artifacts(tmp_path: Path, mo
assert payload["total_frames"] == 1
+def test_analytics_endpoint_groups_by_category_and_time(tmp_path: Path, monkeypatch: Any) -> None:
+ app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
+ manager = app.state.manager
+ client = TestClient(app)
+ headers = {"Authorization": "Bearer test_token"}
+
+ browser_completed = client.post("/api/jobs", headers=headers, json={"job": "Open amazon.de and checkout"}).json()["job_id"]
+ browser_failed = client.post("/api/jobs", headers=headers, json={"job": "Open website and login"}).json()["job_id"]
+ terminal_completed = client.post("/api/jobs", headers=headers, json={"job": "Run a shell command to inspect files"}).json()["job_id"]
+
+ manager._jobs[browser_completed].update(
+ status="completed",
+ ended_at="2026-05-27T00:10:00Z",
+ steps=4,
+ created_at="2026-05-27T00:00:01Z",
+ usage={**manager._jobs[browser_completed]["usage"], "estimated_cost_usd": 0.12},
+ )
+ manager._jobs[browser_failed].update(
+ status="failed",
+ ended_at="2026-05-28T00:10:00Z",
+ steps=6,
+ created_at="2026-05-28T00:00:01Z",
+ usage={**manager._jobs[browser_failed]["usage"], "estimated_cost_usd": 0.24},
+ )
+ manager._jobs[terminal_completed].update(
+ status="completed",
+ ended_at="2026-05-28T00:15:00Z",
+ steps=10,
+ created_at="2026-05-28T00:00:02Z",
+ usage={**manager._jobs[terminal_completed]["usage"], "estimated_cost_usd": 0.05},
+ )
+
+ analytics = client.get("/api/analytics", headers=headers)
+ assert analytics.status_code == 200
+ payload = analytics.json()
+
+ assert payload["total_jobs"] == 3
+ assert payload["finished_jobs"] == 3
+ assert payload["completed_jobs"] == 2
+ assert payload["failed_jobs"] == 1
+ assert payload["success_rate"] == 66.67
+ assert payload["avg_steps"] == 6.67
+ assert payload["avg_cost_usd"] == 0.136667
+
+ browser = next(row for row in payload["by_category"] if row["label"] == "Browser / web")
+ terminal = next(row for row in payload["by_category"] if row["label"] == "Files / terminal")
+ assert browser["finished_jobs"] == 2
+ assert browser["success_rate"] == 50.0
+ assert browser["avg_steps"] == 5.0
+ assert terminal["success_rate"] == 100.0
+
+ assert [row["label"] for row in payload["timeline"]] == ["2026-05-27", "2026-05-28"]
+
+
def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False)
client_enabled = TestClient(app_enabled)
root_enabled = client_enabled.get("/")
assert root_enabled.status_code == 200
assert "ScreenJob Monitor" in root_enabled.text
+ assert "Success by Objective Category" in root_enabled.text
js_enabled = client_enabled.get("/ui/monitoring.js")
assert js_enabled.status_code == 200
assert "const tokenInput" in js_enabled.text
diff --git a/tests/test_storage.py b/tests/test_storage.py
index 155ac7f..02da92c 100644
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -72,3 +72,55 @@ def test_storage_response_fallback_uses_result_when_json_missing(tmp_path: Path)
assert job is not None
assert job["response"]["return"] == "Legacy result string"
assert job["response"]["data"] is None
+
+
+def test_history_db_analytics_groups_by_category_and_day(tmp_path: Path) -> None:
+ db = HistoryDB(tmp_path / "screenjob_test_analytics.db")
+
+ db.create_job(
+ job_id="job_browser_ok",
+ objective="Open amazon.de and checkout",
+ model="gpt-5.4-mini",
+ created_at="2026-05-27T00:00:01Z",
+ safety_override=False,
+ disabled_tools=[],
+ )
+ db.update_job("job_browser_ok", status="completed", steps=4, estimated_cost_usd=0.12)
+
+ db.create_job(
+ job_id="job_browser_fail",
+ objective="Open website and login",
+ model="gpt-5.4-mini",
+ created_at="2026-05-28T00:00:01Z",
+ safety_override=False,
+ disabled_tools=[],
+ )
+ db.update_job("job_browser_fail", status="failed", steps=6, estimated_cost_usd=0.24)
+
+ db.create_job(
+ job_id="job_terminal_ok",
+ objective="Run a shell command to inspect files",
+ model="gpt-5.4-mini",
+ created_at="2026-05-28T00:00:02Z",
+ safety_override=False,
+ disabled_tools=[],
+ )
+ db.update_job("job_terminal_ok", status="completed", steps=10, estimated_cost_usd=0.05)
+
+ analytics = db.analytics()
+ assert analytics["total_jobs"] == 3
+ assert analytics["finished_jobs"] == 3
+ assert analytics["completed_jobs"] == 2
+ assert analytics["failed_jobs"] == 1
+ assert analytics["success_rate"] == 66.67
+ assert analytics["avg_steps"] == 6.67
+ assert analytics["avg_cost_usd"] == 0.136667
+
+ browser = next(row for row in analytics["by_category"] if row["label"] == "Browser / web")
+ terminal = next(row for row in analytics["by_category"] if row["label"] == "Files / terminal")
+ assert browser["finished_jobs"] == 2
+ assert browser["success_rate"] == 50.0
+ assert browser["avg_steps"] == 5.0
+ assert terminal["success_rate"] == 100.0
+
+ assert [row["label"] for row in analytics["timeline"]] == ["2026-05-27", "2026-05-28"]
diff --git a/todo.md b/todo.md
index 7b75d22..0d54606 100644
--- a/todo.md
+++ b/todo.md
@@ -20,4 +20,4 @@
## P3
- [x] Add Replay Mode; Ability to replay a session by reconstructing the screen from screenshots and overlaying tool calls and click and type events.
-- [Idea] Add lightweight analytics dashboards (success rate by objective category, avg steps/cost over time).
+- [x] Add lightweight analytics dashboards (success rate by objective category, avg steps/cost over time).