Compare commits

..

3 Commits

Author SHA1 Message Date
Space-Banane
a8ef8ee552 Split monitor UI into separate HTML and JS assets
All checks were successful
CI / test (push) Successful in 7s
2026-05-27 22:01:06 +02:00
Space-Banane
111a1e84af feat: implement replay functionality with UI controls and backend support 2026-05-27 21:57:37 +02:00
Space-Banane
620fcc4aa6 removed slop 2026-05-27 21:53:32 +02:00
6 changed files with 808 additions and 307 deletions

View File

@@ -15,7 +15,7 @@ from pydantic import BaseModel, Field
from .config import AppConfig, load_app_config
from .storage import HistoryDB
from .task_manager import JobManager
from .ui import monitoring_page_html
from .ui import monitoring_js_path, monitoring_page_html
class CreateJobRequest(BaseModel):
@@ -30,6 +30,181 @@ class CreateJobRequest(BaseModel):
no_failsafe: bool = False
def _safe_int(value: Any) -> int | None:
try:
return int(value)
except Exception: # noqa: BLE001
return None
def _safe_text(value: Any, limit: int = 180) -> str:
text = str(value or "").strip()
if len(text) <= limit:
return text
return f"{text[:limit]}..."
def _resolve_artifact_path(artifacts_dir: Path | None, path_raw: Any) -> Path | None:
if artifacts_dir is None:
return None
text = str(path_raw or "").strip()
if not text:
return None
candidate = Path(text).resolve()
try:
candidate.relative_to(artifacts_dir)
except ValueError:
return None
return candidate
def _extract_replay_action(
event: dict[str, Any],
pending_tool_args: dict[tuple[int, str], list[dict[str, Any]]],
) -> dict[str, Any] | None:
event_type = str(event.get("event_type") or "")
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
step = int(event.get("step") or 0)
ts = str(event.get("ts") or "")
event_id = int(event.get("id") or 0)
if event_type == "tool_called":
tool = str(payload.get("tool") or "").strip()
args = payload.get("args") if isinstance(payload.get("args"), dict) else {}
if tool:
pending_tool_args.setdefault((step, tool), []).append(args)
action: dict[str, Any] = {
"ts": ts,
"step": step,
"event_id": event_id,
"kind": "tool_called",
"tool": tool,
"label": f"Call: {tool}" if tool else "Tool call",
}
if tool == "click":
coord = args.get("coordinate") if isinstance(args, dict) else None
if isinstance(coord, dict):
x = _safe_int(coord.get("x"))
y = _safe_int(coord.get("y"))
if x is not None and y is not None:
action["requested_click"] = {"x": x, "y": y}
action["label"] = f"Call: click ({x}, {y})"
elif tool == "type":
text = _safe_text((args or {}).get("text"), 120)
if text:
action["text_preview"] = text
action["label"] = f"Call: type \"{text}\""
return action
if event_type == "tool_result":
tool = str(payload.get("tool") or "").strip()
result = payload.get("result") if isinstance(payload.get("result"), dict) else {}
matching_args: dict[str, Any] = {}
key = (step, tool)
queued = pending_tool_args.get(key) or []
if queued:
matching_args = queued.pop(0)
if not queued:
pending_tool_args.pop(key, None)
action = {
"ts": ts,
"step": step,
"event_id": event_id,
"kind": "tool_result",
"tool": tool,
"ok": bool(result.get("ok")),
"label": f"Result: {tool}",
}
if tool == "click":
clicked = result.get("clicked") if isinstance(result.get("clicked"), dict) else {}
x = _safe_int(clicked.get("x"))
y = _safe_int(clicked.get("y"))
if x is not None and y is not None:
action["click"] = {"x": x, "y": y}
action["label"] = f"Clicked ({x}, {y})" if bool(result.get("ok")) else f"Click failed ({x}, {y})"
elif tool == "type":
text = _safe_text((matching_args or {}).get("text"), 120)
typed_length = _safe_int(result.get("typed_length"))
if typed_length is not None:
action["typed_length"] = typed_length
if text:
action["text_preview"] = text
action["label"] = f"Typed \"{text}\""
elif tool == "press_key":
key_name = _safe_text(result.get("key"), 80)
if key_name:
action["label"] = f"Pressed {key_name}"
elif tool == "execute_command":
command = _safe_text((matching_args or {}).get("command"), 140)
if command:
action["command_preview"] = command
action["label"] = f"Command: {command}"
return action
return None
def _build_replay_payload(job_id: str, job: dict[str, Any], events: list[dict[str, Any]]) -> dict[str, Any]:
artifacts_dir_raw = str(job.get("artifacts_dir") or "").strip()
artifacts_dir = Path(artifacts_dir_raw).resolve() if artifacts_dir_raw else None
pending_tool_args: dict[tuple[int, str], list[dict[str, Any]]] = {}
buffered_actions: list[dict[str, Any]] = []
frames: list[dict[str, Any]] = []
for event in events:
action = _extract_replay_action(event, pending_tool_args)
if action is not None:
buffered_actions.append(action)
if str(event.get("event_type") or "") != "visual_update":
continue
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
image_meta = payload.get("image_meta") if isinstance(payload.get("image_meta"), dict) else {}
resolved = _resolve_artifact_path(artifacts_dir, image_meta.get("path"))
if resolved is None or not resolved.exists() or not resolved.is_file():
continue
width = _safe_int(image_meta.get("width"))
height = _safe_int(image_meta.get("height"))
if width is None or height is None:
size = image_meta.get("screen_size") if isinstance(image_meta.get("screen_size"), dict) else {}
width = _safe_int(size.get("width"))
height = _safe_int(size.get("height"))
is_fullscreen = (
str(payload.get("kind") or "") == "see_screen"
and bool(image_meta.get("grid"))
and isinstance(width, int)
and isinstance(height, int)
and width > 0
and height > 0
)
frames.append(
{
"frame_index": len(frames),
"event_id": int(event.get("id") or 0),
"ts": str(event.get("ts") or ""),
"step": int(event.get("step") or 0),
"kind": str(payload.get("kind") or "visual_update"),
"image_path": str(resolved),
"image_meta": image_meta,
"screen_size": {"width": width, "height": height} if width and height else None,
"is_fullscreen": is_fullscreen,
"overlays": buffered_actions,
}
)
buffered_actions = []
return {
"job_id": job_id,
"total_events": len(events),
"total_frames": len(frames),
"frames": frames,
"trailing_events": buffered_actions,
}
class _WebSocketHub:
def __init__(self) -> None:
self._connections: set[WebSocket] = set()
@@ -161,6 +336,18 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
raise HTTPException(status_code=404, detail="Job not found")
return {"events": manager.get_events(job_id, limit=limit)}
@app.get("/api/jobs/{job_id}/replay")
def get_job_replay(
job_id: str,
limit: int = Query(default=5000, ge=1, le=5000),
_: None = Depends(require_token),
) -> dict[str, Any]:
job = manager.get_job(job_id)
if job is None:
raise HTTPException(status_code=404, detail="Job not found")
events = manager.get_events(job_id, limit=limit)
return _build_replay_payload(job_id, job, events)
@app.post("/api/jobs/{job_id}/cancel")
def cancel_job(job_id: str, _: None = Depends(require_token)) -> dict[str, Any]:
job = manager.get_job(job_id)
@@ -200,6 +387,10 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
def ui_root() -> str:
return monitoring_page_html(device_hostname=device_hostname)
@app.get("/ui/monitoring.js")
def ui_monitoring_js() -> FileResponse:
return FileResponse(str(monitoring_js_path()), media_type="application/javascript")
@app.websocket("/ws")
async def ws_endpoint(websocket: WebSocket, token: str = Query(default="")) -> None:
if not token or not secrets.compare_digest(token, app_config.screenjob_token):

310
src/ui.py
View File

@@ -1,307 +1,19 @@
from __future__ import annotations
from html import escape
from pathlib import Path
_UI_DIR = Path(__file__).resolve().parent / "ui_assets"
_HTML_TEMPLATE_PATH = _UI_DIR / "monitoring.html"
_JS_PATH = _UI_DIR / "monitoring.js"
def monitoring_page_html(device_hostname: str = "") -> str:
host_suffix = f" ({escape(device_hostname)})" if device_hostname else ""
return """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>ScreenJob Monitor</title>
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body class="bg-slate-950 text-slate-100 min-h-screen">
<div class="max-w-7xl mx-auto p-4 md:p-8 space-y-6">
<header class="flex flex-col gap-3 md:flex-row md:items-center md:justify-between">
<div>
<h1 class="text-2xl md:text-3xl font-bold tracking-tight">ScreenJob Monitor<span class="text-slate-400 text-base md:text-lg font-medium">__MONITOR_HOST__</span></h1>
<p class="text-slate-400 text-sm">Read-only monitoring for active and historical tasks.</p>
</div>
<div class="flex flex-col md:flex-row gap-2 md:items-center">
<input id="tokenInput" type="password" placeholder="SCREENJOB_TOKEN" class="bg-slate-900 border border-slate-700 rounded px-3 py-2 text-sm w-72" />
<button id="saveTokenBtn" class="bg-cyan-500 hover:bg-cyan-400 text-slate-950 font-semibold px-4 py-2 rounded">Connect</button>
</div>
</header>
html = _HTML_TEMPLATE_PATH.read_text(encoding="utf-8")
return html.replace("__MONITOR_HOST__", host_suffix)
<section class="grid grid-cols-2 md:grid-cols-6 gap-3" id="stats"></section>
<section class="grid grid-cols-1 lg:grid-cols-5 gap-4">
<div class="lg:col-span-2 bg-slate-900/70 border border-slate-800 rounded-xl p-4">
<div class="flex items-center justify-between mb-3">
<h2 class="font-semibold">Jobs</h2>
<button id="refreshBtn" class="text-xs bg-slate-800 px-2 py-1 rounded">Refresh</button>
</div>
<div id="jobList" class="space-y-2 max-h-[62vh] overflow-auto"></div>
</div>
<div class="lg:col-span-3 bg-slate-900/70 border border-slate-800 rounded-xl p-4 space-y-3">
<h2 class="font-semibold">Job Detail</h2>
<pre id="jobDetail" class="bg-slate-950 border border-slate-800 rounded p-3 text-xs overflow-auto max-h-[24vh]"></pre>
<h3 class="font-semibold text-sm">Latest Visual</h3>
<div class="bg-slate-950 border border-slate-800 rounded p-2">
<img id="latestVisual" alt="Latest visual update" class="max-h-[24vh] w-full object-contain rounded" />
</div>
<div class="flex items-center justify-between">
<h3 class="font-semibold text-sm">Live Events</h3>
<label for="eventsViewToggle" class="flex items-center gap-2 text-xs text-slate-300 cursor-pointer select-none">
<span>Raw</span>
<input id="eventsViewToggle" type="checkbox" class="accent-cyan-400 h-4 w-4" />
<span>Beautiful</span>
</label>
</div>
<div id="events" class="bg-slate-950 border border-slate-800 rounded p-3 text-xs overflow-auto max-h-[36vh] space-y-1"></div>
</div>
</section>
</div>
<script>
const tokenInput = document.getElementById("tokenInput");
const saveTokenBtn = document.getElementById("saveTokenBtn");
const refreshBtn = document.getElementById("refreshBtn");
const jobListEl = document.getElementById("jobList");
const jobDetailEl = document.getElementById("jobDetail");
const eventsEl = document.getElementById("events");
const statsEl = document.getElementById("stats");
const latestVisualEl = document.getElementById("latestVisual");
const eventsViewToggle = document.getElementById("eventsViewToggle");
const state = {
token: localStorage.getItem("screenjob_token") || "",
jobs: [],
selectedJobId: null,
ws: null,
wsReconnectTimer: null,
eventsViewMode: localStorage.getItem("screenjob_events_view_mode") === "beautiful" ? "beautiful" : "raw"
};
const manuallyClosedSockets = new WeakSet();
tokenInput.value = state.token;
function authHeaders() {
return { "Authorization": "Bearer " + state.token };
}
async function api(path, opts = {}) {
if (!state.token) throw new Error("Token required");
const headers = Object.assign({}, authHeaders(), opts.headers || {});
const response = await fetch(path, Object.assign({}, opts, { headers }));
if (!response.ok) throw new Error(await response.text());
return response.json();
}
function renderStats(stats) {
const cards = [
["Total Jobs", stats.total_jobs || 0],
["Running", stats.running_jobs || 0],
["Completed", stats.completed_jobs || 0],
["Failed", stats.failed_jobs || 0],
["Cancelled", stats.cancelled_jobs || 0],
["Total Cost (USD)", Number(stats.total_estimated_cost || 0).toFixed(4)]
];
statsEl.innerHTML = cards.map(([name, val]) => `
<div class="bg-slate-900/70 border border-slate-800 rounded-xl p-3">
<div class="text-slate-400 text-xs">${name}</div>
<div class="text-lg font-semibold">${val}</div>
</div>
`).join("");
}
function renderJobs() {
jobListEl.innerHTML = state.jobs.map((job) => {
const active = job.job_id === state.selectedJobId;
return `
<button data-job-id="${job.job_id}" class="w-full text-left p-3 rounded border ${active ? "border-cyan-400 bg-slate-800" : "border-slate-800 bg-slate-950"} hover:bg-slate-800">
<div class="flex items-center justify-between">
<span class="font-medium">${job.job_id}</span>
<span class="text-xs px-2 py-0.5 rounded bg-slate-700">${job.status}</span>
</div>
<div class="text-xs text-slate-400 mt-1">${job.model}</div>
<div class="text-xs text-slate-300 mt-1 line-clamp-2">${job.objective}</div>
<div class="text-xs text-slate-500 mt-1">$${Number((job.usage && job.usage.estimated_cost_usd) || 0).toFixed(6)}</div>
</button>
`;
}).join("");
for (const btn of jobListEl.querySelectorAll("button[data-job-id]")) {
btn.addEventListener("click", () => {
state.selectedJobId = btn.getAttribute("data-job-id");
renderJobs();
refreshJobDetail();
});
}
}
function pushEventLine(obj) {
if (!obj || !obj.job_id || !obj.event_type) return;
const line = document.createElement("div");
const ts = obj.ts || "-";
const step = (obj.step ?? "-");
if (state.eventsViewMode === "raw") {
line.className = "border-b border-slate-800 pb-1";
line.textContent = `[${ts}] ${obj.job_id} step=${step} ${obj.event_type} ${JSON.stringify(obj.payload || {})}`;
} else {
const typeColors = {
info: "bg-sky-900/50 text-sky-200 border border-sky-800",
warning: "bg-amber-900/40 text-amber-200 border border-amber-800",
error: "bg-rose-900/40 text-rose-200 border border-rose-800",
visual_update: "bg-emerald-900/40 text-emerald-200 border border-emerald-800",
tool_call: "bg-violet-900/40 text-violet-200 border border-violet-800",
tool_result: "bg-indigo-900/40 text-indigo-200 border border-indigo-800"
};
const dt = new Date(ts);
const tsText = Number.isNaN(dt.getTime()) ? ts : dt.toLocaleString();
const payload = obj.payload || {};
line.className = "rounded-lg border border-slate-800 bg-slate-900/80 p-2 space-y-2";
const header = document.createElement("div");
header.className = "flex flex-wrap items-center gap-2";
const typePill = document.createElement("span");
typePill.className = `px-2 py-0.5 rounded text-[10px] font-semibold ${typeColors[obj.event_type] || "bg-slate-800 text-slate-200 border border-slate-700"}`;
typePill.textContent = obj.event_type;
const stepPill = document.createElement("span");
stepPill.className = "px-2 py-0.5 rounded text-[10px] bg-slate-800 text-slate-300 border border-slate-700";
stepPill.textContent = `step ${step}`;
const tsSpan = document.createElement("span");
tsSpan.className = "text-[10px] text-slate-400";
tsSpan.textContent = tsText;
header.appendChild(typePill);
header.appendChild(stepPill);
header.appendChild(tsSpan);
const jobLine = document.createElement("div");
jobLine.className = "text-[11px] text-slate-300 font-medium";
jobLine.textContent = obj.job_id;
const body = document.createElement("pre");
body.className = "bg-slate-950 border border-slate-800 rounded p-2 text-[11px] text-slate-200 overflow-auto";
body.textContent = JSON.stringify(payload, null, 2);
line.appendChild(header);
line.appendChild(jobLine);
line.appendChild(body);
}
eventsEl.prepend(line);
while (eventsEl.childNodes.length > 400) {
eventsEl.removeChild(eventsEl.lastChild);
}
}
function scheduleWsReconnect() {
if (state.wsReconnectTimer || !state.token) return;
state.wsReconnectTimer = setTimeout(() => {
state.wsReconnectTimer = null;
connectWs();
}, 1200);
}
function updateLatestVisualFromEvent(ev) {
if (!ev || ev.event_type !== "visual_update") return;
if (!state.selectedJobId || ev.job_id !== state.selectedJobId) return;
const imagePath = ev.payload && ev.payload.image_meta && ev.payload.image_meta.path;
if (!imagePath) return;
const q = encodeURIComponent(imagePath);
latestVisualEl.src = `/api/jobs/${state.selectedJobId}/artifact?path=${q}&token=${encodeURIComponent(state.token)}`;
}
async function refreshJobs() {
const payload = await api("/api/jobs?limit=100");
state.jobs = payload.jobs || [];
if (!state.selectedJobId && state.jobs.length > 0) state.selectedJobId = state.jobs[0].job_id;
renderJobs();
}
async function refreshStats() {
const payload = await api("/api/stats");
renderStats(payload);
}
async function refreshJobDetail() {
if (!state.selectedJobId) return;
const [job, events] = await Promise.all([
api(`/api/jobs/${state.selectedJobId}`),
api(`/api/jobs/${state.selectedJobId}/events?limit=120`)
]);
jobDetailEl.textContent = JSON.stringify(job, null, 2);
eventsEl.innerHTML = "";
const list = (events.events || []).slice().reverse();
for (const ev of list) pushEventLine(ev);
const visual = list.find((ev) => ev.event_type === "visual_update");
if (visual) updateLatestVisualFromEvent(visual);
}
function connectWs() {
if (!state.token) return;
if (state.ws && (state.ws.readyState === WebSocket.OPEN || state.ws.readyState === WebSocket.CONNECTING)) {
return;
}
const scheme = location.protocol === "https:" ? "wss" : "ws";
const ws = new WebSocket(`${scheme}://${location.host}/ws?token=${encodeURIComponent(state.token)}`);
state.ws = ws;
ws.onmessage = async (event) => {
try {
const payload = JSON.parse(event.data);
if (!payload || payload.event_type === "connected") return;
pushEventLine(payload);
updateLatestVisualFromEvent(payload);
if (!state.selectedJobId || payload.job_id === state.selectedJobId) {
await refreshJobDetail();
}
await refreshJobs();
await refreshStats();
} catch (err) {
console.error(err);
}
};
ws.onclose = () => {
if (state.ws === ws) state.ws = null;
if (manuallyClosedSockets.has(ws)) {
manuallyClosedSockets.delete(ws);
return;
}
scheduleWsReconnect();
};
}
async function fullRefresh() {
await refreshJobs();
await refreshStats();
await refreshJobDetail();
}
async function connect() {
state.token = tokenInput.value.trim();
localStorage.setItem("screenjob_token", state.token);
if (state.ws) {
manuallyClosedSockets.add(state.ws);
try { state.ws.close(); } catch (_) {}
state.ws = null;
}
if (state.wsReconnectTimer) {
clearTimeout(state.wsReconnectTimer);
state.wsReconnectTimer = null;
}
await fullRefresh();
connectWs();
}
function syncEventsViewToggle() {
eventsViewToggle.checked = state.eventsViewMode === "beautiful";
}
saveTokenBtn.addEventListener("click", () => connect().catch((err) => alert(err.message)));
refreshBtn.addEventListener("click", () => fullRefresh().catch((err) => alert(err.message)));
eventsViewToggle.addEventListener("change", () => {
state.eventsViewMode = eventsViewToggle.checked ? "beautiful" : "raw";
localStorage.setItem("screenjob_events_view_mode", state.eventsViewMode);
refreshJobDetail().catch((err) => alert(err.message));
});
syncEventsViewToggle();
if (state.token) connect().catch(() => {});
</script>
</body>
</html>
""".replace("__MONITOR_HOST__", host_suffix)
def monitoring_js_path() -> Path:
return _JS_PATH

View File

@@ -0,0 +1,82 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>ScreenJob Monitor</title>
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body class="bg-slate-950 text-slate-100 min-h-screen">
<div class="max-w-7xl mx-auto p-4 md:p-8 space-y-6">
<header class="flex flex-col gap-3 md:flex-row md:items-center md:justify-between">
<div>
<h1 class="text-2xl md:text-3xl font-bold tracking-tight">ScreenJob Monitor<span class="text-slate-400 text-base md:text-lg font-medium">__MONITOR_HOST__</span></h1>
<p class="text-slate-400 text-sm">Read-only monitoring for active and historical tasks.</p>
</div>
<div class="flex flex-col md:flex-row gap-2 md:items-center">
<input id="tokenInput" type="password" placeholder="SCREENJOB_TOKEN" class="bg-slate-900 border border-slate-700 rounded px-3 py-2 text-sm w-72" />
<button id="saveTokenBtn" class="bg-cyan-500 hover:bg-cyan-400 text-slate-950 font-semibold px-4 py-2 rounded">Connect</button>
</div>
</header>
<section class="grid grid-cols-2 md:grid-cols-6 gap-3" id="stats"></section>
<section class="grid grid-cols-1 lg:grid-cols-5 gap-4">
<div class="lg:col-span-2 bg-slate-900/70 border border-slate-800 rounded-xl p-4">
<div class="flex items-center justify-between mb-3">
<h2 class="font-semibold">Jobs</h2>
<button id="refreshBtn" class="text-xs bg-slate-800 px-2 py-1 rounded">Refresh</button>
</div>
<div id="jobList" class="space-y-2 max-h-[62vh] overflow-auto"></div>
</div>
<div class="lg:col-span-3 bg-slate-900/70 border border-slate-800 rounded-xl p-4 space-y-3">
<h2 class="font-semibold">Job Detail</h2>
<pre id="jobDetail" class="bg-slate-950 border border-slate-800 rounded p-3 text-xs overflow-auto max-h-[24vh]"></pre>
<h3 class="font-semibold text-sm">Latest Visual</h3>
<div class="bg-slate-950 border border-slate-800 rounded p-2">
<img id="latestVisual" alt="Latest visual update" class="max-h-[24vh] w-full object-contain rounded" />
</div>
<div class="flex items-center justify-between">
<h3 class="font-semibold text-sm">Replay</h3>
<div id="replayStatus" class="text-[11px] text-slate-400">No replay loaded.</div>
</div>
<div class="flex flex-wrap items-center gap-2">
<button id="replayPlayBtn" class="text-xs bg-slate-800 px-2 py-1 rounded">Play</button>
<button id="replayPrevBtn" class="text-xs bg-slate-800 px-2 py-1 rounded">Prev</button>
<button id="replayNextBtn" class="text-xs bg-slate-800 px-2 py-1 rounded">Next</button>
<label class="text-xs text-slate-300 flex items-center gap-1">
Speed
<select id="replaySpeed" class="bg-slate-900 border border-slate-700 rounded px-1 py-0.5">
<option value="0.5">0.5x</option>
<option value="1" selected>1.0x</option>
<option value="1.5">1.5x</option>
<option value="2">2.0x</option>
</select>
</label>
</div>
<input id="replaySeek" type="range" min="0" max="0" value="0" class="w-full accent-cyan-400" />
<div class="bg-slate-950 border border-slate-800 rounded p-2">
<div class="relative w-full min-h-[180px] bg-black/40 rounded">
<img id="replayVisual" alt="Replay frame" class="max-h-[30vh] w-full object-contain rounded" />
<svg id="replayOverlay" class="absolute inset-0 w-full h-full pointer-events-none" preserveAspectRatio="xMidYMid meet"></svg>
</div>
<div id="replayFrameMeta" class="text-[11px] text-slate-400 mt-2"></div>
<div id="replayFrameEvents" class="mt-2 space-y-1"></div>
</div>
<div class="flex items-center justify-between">
<h3 class="font-semibold text-sm">Live Events</h3>
<label for="eventsViewToggle" class="flex items-center gap-2 text-xs text-slate-300 cursor-pointer select-none">
<span>Raw</span>
<input id="eventsViewToggle" type="checkbox" class="accent-cyan-400 h-4 w-4" />
<span>Beautiful</span>
</label>
</div>
<div id="events" class="bg-slate-950 border border-slate-800 rounded p-3 text-xs overflow-auto max-h-[36vh] space-y-1"></div>
</div>
</section>
</div>
<script src="/ui/monitoring.js"></script>
</body>
</html>

418
src/ui_assets/monitoring.js Normal file
View File

@@ -0,0 +1,418 @@
const tokenInput = document.getElementById("tokenInput");
const saveTokenBtn = document.getElementById("saveTokenBtn");
const refreshBtn = document.getElementById("refreshBtn");
const jobListEl = document.getElementById("jobList");
const jobDetailEl = document.getElementById("jobDetail");
const eventsEl = document.getElementById("events");
const statsEl = document.getElementById("stats");
const latestVisualEl = document.getElementById("latestVisual");
const eventsViewToggle = document.getElementById("eventsViewToggle");
const replayVisualEl = document.getElementById("replayVisual");
const replayOverlayEl = document.getElementById("replayOverlay");
const replayFrameMetaEl = document.getElementById("replayFrameMeta");
const replayFrameEventsEl = document.getElementById("replayFrameEvents");
const replayStatusEl = document.getElementById("replayStatus");
const replayPlayBtn = document.getElementById("replayPlayBtn");
const replayPrevBtn = document.getElementById("replayPrevBtn");
const replayNextBtn = document.getElementById("replayNextBtn");
const replaySpeedEl = document.getElementById("replaySpeed");
const replaySeekEl = document.getElementById("replaySeek");
const state = {
token: localStorage.getItem("screenjob_token") || "",
jobs: [],
selectedJobId: null,
ws: null,
wsReconnectTimer: null,
eventsViewMode: localStorage.getItem("screenjob_events_view_mode") === "beautiful" ? "beautiful" : "raw",
replay: {
frames: [],
trailingEvents: [],
frameIndex: 0,
isPlaying: false,
speed: 1,
timer: null
}
};
const manuallyClosedSockets = new WeakSet();
tokenInput.value = state.token;
function authHeaders() {
return { "Authorization": "Bearer " + state.token };
}
async function api(path, opts = {}) {
if (!state.token) throw new Error("Token required");
const headers = Object.assign({}, authHeaders(), opts.headers || {});
const response = await fetch(path, Object.assign({}, opts, { headers }));
if (!response.ok) throw new Error(await response.text());
return response.json();
}
function renderStats(stats) {
const cards = [
["Total Jobs", stats.total_jobs || 0],
["Running", stats.running_jobs || 0],
["Completed", stats.completed_jobs || 0],
["Failed", stats.failed_jobs || 0],
["Cancelled", stats.cancelled_jobs || 0],
["Total Cost (USD)", Number(stats.total_estimated_cost || 0).toFixed(4)]
];
statsEl.innerHTML = cards.map(([name, val]) => `
<div class="bg-slate-900/70 border border-slate-800 rounded-xl p-3">
<div class="text-slate-400 text-xs">${name}</div>
<div class="text-lg font-semibold">${val}</div>
</div>
`).join("");
}
function renderJobs() {
jobListEl.innerHTML = state.jobs.map((job) => {
const active = job.job_id === state.selectedJobId;
return `
<button data-job-id="${job.job_id}" class="w-full text-left p-3 rounded border ${active ? "border-cyan-400 bg-slate-800" : "border-slate-800 bg-slate-950"} hover:bg-slate-800">
<div class="flex items-center justify-between">
<span class="font-medium">${job.job_id}</span>
<span class="text-xs px-2 py-0.5 rounded bg-slate-700">${job.status}</span>
</div>
<div class="text-xs text-slate-400 mt-1">${job.model}</div>
<div class="text-xs text-slate-300 mt-1 line-clamp-2">${job.objective}</div>
<div class="text-xs text-slate-500 mt-1">$${Number((job.usage && job.usage.estimated_cost_usd) || 0).toFixed(6)}</div>
</button>
`;
}).join("");
for (const btn of jobListEl.querySelectorAll("button[data-job-id]")) {
btn.addEventListener("click", () => {
state.selectedJobId = btn.getAttribute("data-job-id");
renderJobs();
refreshJobDetail();
});
}
}
function pushEventLine(obj) {
if (!obj || !obj.job_id || !obj.event_type) return;
const line = document.createElement("div");
const ts = obj.ts || "-";
const step = (obj.step ?? "-");
if (state.eventsViewMode === "raw") {
line.className = "border-b border-slate-800 pb-1";
line.textContent = `[${ts}] ${obj.job_id} step=${step} ${obj.event_type} ${JSON.stringify(obj.payload || {})}`;
} else {
const typeColors = {
info: "bg-sky-900/50 text-sky-200 border border-sky-800",
warning: "bg-amber-900/40 text-amber-200 border border-amber-800",
error: "bg-rose-900/40 text-rose-200 border border-rose-800",
visual_update: "bg-emerald-900/40 text-emerald-200 border border-emerald-800",
tool_call: "bg-violet-900/40 text-violet-200 border border-violet-800",
tool_result: "bg-indigo-900/40 text-indigo-200 border border-indigo-800"
};
const dt = new Date(ts);
const tsText = Number.isNaN(dt.getTime()) ? ts : dt.toLocaleString();
const payload = obj.payload || {};
line.className = "rounded-lg border border-slate-800 bg-slate-900/80 p-2 space-y-2";
const header = document.createElement("div");
header.className = "flex flex-wrap items-center gap-2";
const typePill = document.createElement("span");
typePill.className = `px-2 py-0.5 rounded text-[10px] font-semibold ${typeColors[obj.event_type] || "bg-slate-800 text-slate-200 border border-slate-700"}`;
typePill.textContent = obj.event_type;
const stepPill = document.createElement("span");
stepPill.className = "px-2 py-0.5 rounded text-[10px] bg-slate-800 text-slate-300 border border-slate-700";
stepPill.textContent = `step ${step}`;
const tsSpan = document.createElement("span");
tsSpan.className = "text-[10px] text-slate-400";
tsSpan.textContent = tsText;
header.appendChild(typePill);
header.appendChild(stepPill);
header.appendChild(tsSpan);
const jobLine = document.createElement("div");
jobLine.className = "text-[11px] text-slate-300 font-medium";
jobLine.textContent = obj.job_id;
const body = document.createElement("pre");
body.className = "bg-slate-950 border border-slate-800 rounded p-2 text-[11px] text-slate-200 overflow-auto";
body.textContent = JSON.stringify(payload, null, 2);
line.appendChild(header);
line.appendChild(jobLine);
line.appendChild(body);
}
eventsEl.prepend(line);
while (eventsEl.childNodes.length > 400) {
eventsEl.removeChild(eventsEl.lastChild);
}
}
function clearReplayTimer() {
if (state.replay.timer) {
clearTimeout(state.replay.timer);
state.replay.timer = null;
}
}
function stopReplay() {
state.replay.isPlaying = false;
clearReplayTimer();
replayPlayBtn.textContent = "Play";
}
function replayImageSrc(path) {
const q = encodeURIComponent(path || "");
return `/api/jobs/${state.selectedJobId}/artifact?path=${q}&token=${encodeURIComponent(state.token)}`;
}
function renderReplayOverlay(frame) {
replayOverlayEl.innerHTML = "";
const size = frame && frame.screen_size;
if (!frame || !frame.is_fullscreen || !size || !size.width || !size.height) {
replayOverlayEl.removeAttribute("viewBox");
return;
}
replayOverlayEl.setAttribute("viewBox", `0 0 ${size.width} ${size.height}`);
const overlayEvents = Array.isArray(frame.overlays) ? frame.overlays : [];
const points = overlayEvents.filter((ev) => ev && ev.kind === "tool_result" && ev.tool === "click" && ev.click);
for (const ev of points) {
const x = Number(ev.click.x);
const y = Number(ev.click.y);
if (!Number.isFinite(x) || !Number.isFinite(y)) continue;
const halo = document.createElementNS("http://www.w3.org/2000/svg", "circle");
halo.setAttribute("cx", String(x));
halo.setAttribute("cy", String(y));
halo.setAttribute("r", "14");
halo.setAttribute("fill", "rgba(14, 165, 233, 0.22)");
halo.setAttribute("stroke", "#38bdf8");
halo.setAttribute("stroke-width", "2");
const dot = document.createElementNS("http://www.w3.org/2000/svg", "circle");
dot.setAttribute("cx", String(x));
dot.setAttribute("cy", String(y));
dot.setAttribute("r", "4");
dot.setAttribute("fill", "#38bdf8");
replayOverlayEl.appendChild(halo);
replayOverlayEl.appendChild(dot);
}
}
function renderReplayFrameEvents(frame) {
replayFrameEventsEl.innerHTML = "";
if (!frame) return;
const events = Array.isArray(frame.overlays) ? frame.overlays : [];
const shown = events.slice(-8);
for (const ev of shown) {
const row = document.createElement("div");
row.className = "text-[11px] rounded border border-slate-800 bg-slate-900/80 px-2 py-1";
row.textContent = ev.label || `${ev.kind || "event"} ${ev.tool || ""}`.trim();
replayFrameEventsEl.appendChild(row);
}
if (!shown.length) {
const empty = document.createElement("div");
empty.className = "text-[11px] text-slate-500";
empty.textContent = "No overlay events for this frame.";
replayFrameEventsEl.appendChild(empty);
}
}
function setReplayFrame(index) {
const frames = state.replay.frames;
if (!frames.length) {
replayVisualEl.removeAttribute("src");
replayOverlayEl.innerHTML = "";
replayFrameMetaEl.textContent = "No replay frames.";
replaySeekEl.value = "0";
replaySeekEl.max = "0";
replayStatusEl.textContent = "No replay loaded.";
return;
}
const bounded = Math.max(0, Math.min(index, frames.length - 1));
state.replay.frameIndex = bounded;
const frame = frames[bounded];
replayVisualEl.src = replayImageSrc(frame.image_path);
replayFrameMetaEl.textContent = `Frame ${bounded + 1}/${frames.length} | step ${frame.step} | ${frame.kind} | ${frame.ts}`;
replaySeekEl.max = String(Math.max(0, frames.length - 1));
replaySeekEl.value = String(bounded);
replayStatusEl.textContent = state.replay.isPlaying ? "Playing replay." : "Replay ready.";
renderReplayOverlay(frame);
renderReplayFrameEvents(frame);
}
function advanceReplay() {
const frames = state.replay.frames;
if (!state.replay.isPlaying || !frames.length) return;
if (state.replay.frameIndex >= frames.length - 1) {
stopReplay();
setReplayFrame(frames.length - 1);
replayStatusEl.textContent = "Replay finished.";
return;
}
setReplayFrame(state.replay.frameIndex + 1);
clearReplayTimer();
const delayMs = Math.max(120, Math.round(700 / (state.replay.speed || 1)));
state.replay.timer = setTimeout(advanceReplay, delayMs);
}
function toggleReplayPlay() {
if (!state.replay.frames.length) return;
if (state.replay.isPlaying) {
stopReplay();
setReplayFrame(state.replay.frameIndex);
return;
}
state.replay.isPlaying = true;
replayPlayBtn.textContent = "Pause";
replayStatusEl.textContent = "Playing replay.";
advanceReplay();
}
function resetReplay(payload) {
stopReplay();
const replayPayload = payload || {};
state.replay.frames = Array.isArray(replayPayload.frames) ? replayPayload.frames : [];
state.replay.trailingEvents = Array.isArray(replayPayload.trailing_events) ? replayPayload.trailing_events : [];
state.replay.frameIndex = 0;
setReplayFrame(0);
}
function scheduleWsReconnect() {
if (state.wsReconnectTimer || !state.token) return;
state.wsReconnectTimer = setTimeout(() => {
state.wsReconnectTimer = null;
connectWs();
}, 1200);
}
function updateLatestVisualFromEvent(ev) {
if (!ev || ev.event_type !== "visual_update") return;
if (!state.selectedJobId || ev.job_id !== state.selectedJobId) return;
const imagePath = ev.payload && ev.payload.image_meta && ev.payload.image_meta.path;
if (!imagePath) return;
const q = encodeURIComponent(imagePath);
latestVisualEl.src = `/api/jobs/${state.selectedJobId}/artifact?path=${q}&token=${encodeURIComponent(state.token)}`;
}
async function refreshJobs() {
const payload = await api("/api/jobs?limit=100");
state.jobs = payload.jobs || [];
if (!state.selectedJobId && state.jobs.length > 0) state.selectedJobId = state.jobs[0].job_id;
renderJobs();
}
async function refreshStats() {
const payload = await api("/api/stats");
renderStats(payload);
}
async function refreshJobDetail() {
if (!state.selectedJobId) return;
const [job, events, replay] = await Promise.all([
api(`/api/jobs/${state.selectedJobId}`),
api(`/api/jobs/${state.selectedJobId}/events?limit=120`),
api(`/api/jobs/${state.selectedJobId}/replay?limit=5000`)
]);
jobDetailEl.textContent = JSON.stringify(job, null, 2);
eventsEl.innerHTML = "";
const list = (events.events || []).slice().reverse();
for (const ev of list) pushEventLine(ev);
const visual = list.find((ev) => ev.event_type === "visual_update");
if (visual) updateLatestVisualFromEvent(visual);
resetReplay(replay);
}
function connectWs() {
if (!state.token) return;
if (state.ws && (state.ws.readyState === WebSocket.OPEN || state.ws.readyState === WebSocket.CONNECTING)) {
return;
}
const scheme = location.protocol === "https:" ? "wss" : "ws";
const ws = new WebSocket(`${scheme}://${location.host}/ws?token=${encodeURIComponent(state.token)}`);
state.ws = ws;
ws.onmessage = async (event) => {
try {
const payload = JSON.parse(event.data);
if (!payload || payload.event_type === "connected") return;
pushEventLine(payload);
updateLatestVisualFromEvent(payload);
if (!state.selectedJobId || payload.job_id === state.selectedJobId) {
await refreshJobDetail();
}
await refreshJobs();
await refreshStats();
} catch (err) {
console.error(err);
}
};
ws.onclose = () => {
if (state.ws === ws) state.ws = null;
if (manuallyClosedSockets.has(ws)) {
manuallyClosedSockets.delete(ws);
return;
}
scheduleWsReconnect();
};
}
async function fullRefresh() {
await refreshJobs();
await refreshStats();
await refreshJobDetail();
}
async function connect() {
state.token = tokenInput.value.trim();
localStorage.setItem("screenjob_token", state.token);
if (state.ws) {
manuallyClosedSockets.add(state.ws);
try { state.ws.close(); } catch (_) {}
state.ws = null;
}
if (state.wsReconnectTimer) {
clearTimeout(state.wsReconnectTimer);
state.wsReconnectTimer = null;
}
await fullRefresh();
connectWs();
}
function syncEventsViewToggle() {
eventsViewToggle.checked = state.eventsViewMode === "beautiful";
}
saveTokenBtn.addEventListener("click", () => connect().catch((err) => alert(err.message)));
refreshBtn.addEventListener("click", () => fullRefresh().catch((err) => alert(err.message)));
eventsViewToggle.addEventListener("change", () => {
state.eventsViewMode = eventsViewToggle.checked ? "beautiful" : "raw";
localStorage.setItem("screenjob_events_view_mode", state.eventsViewMode);
refreshJobDetail().catch((err) => alert(err.message));
});
replayPlayBtn.addEventListener("click", () => toggleReplayPlay());
replayPrevBtn.addEventListener("click", () => {
stopReplay();
setReplayFrame(state.replay.frameIndex - 1);
});
replayNextBtn.addEventListener("click", () => {
stopReplay();
setReplayFrame(state.replay.frameIndex + 1);
});
replaySpeedEl.addEventListener("change", () => {
const speed = Number(replaySpeedEl.value);
state.replay.speed = Number.isFinite(speed) && speed > 0 ? speed : 1;
if (state.replay.isPlaying) {
clearReplayTimer();
advanceReplay();
}
});
replaySeekEl.addEventListener("input", () => {
stopReplay();
setReplayFrame(Number(replaySeekEl.value || 0));
});
syncEventsViewToggle();
resetReplay(null);
if (state.token) connect().catch(() => {});

View File

@@ -33,6 +33,10 @@ class FakeJobManager:
self._counter += 1
job_id = f"job_fake_{self._counter:03d}"
selected_model = (model or self.config.default_model).strip()
artifacts_dir = (self.config.runs_dir / f"run_{job_id}").resolve()
artifacts_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = artifacts_dir / "screen_step_001.png"
screenshot_path.write_bytes(b"not-a-real-png")
self.last_submit_payload = {
"objective": objective,
"model": selected_model,
@@ -61,7 +65,7 @@ class FakeJobManager:
"total_tokens": 14,
"estimated_cost_usd": 0.0001,
},
"artifacts_dir": str(self.config.runs_dir.resolve()),
"artifacts_dir": str(artifacts_dir),
}
self._events[job_id] = [
{
@@ -70,7 +74,47 @@ class FakeJobManager:
"ts": "2026-05-27T00:00:00Z",
"step": 1,
"event_type": "tool_called",
"payload": {"tool": "execute_command"},
"payload": {"tool": "click", "args": {"coordinate": {"x": 320, "y": 180}}},
},
{
"id": 2,
"job_id": job_id,
"ts": "2026-05-27T00:00:01Z",
"step": 1,
"event_type": "tool_result",
"payload": {"tool": "click", "result": {"ok": True, "clicked": {"x": 322, "y": 182}}},
},
{
"id": 3,
"job_id": job_id,
"ts": "2026-05-27T00:00:02Z",
"step": 1,
"event_type": "tool_called",
"payload": {"tool": "type", "args": {"text": "hello world"}},
},
{
"id": 4,
"job_id": job_id,
"ts": "2026-05-27T00:00:03Z",
"step": 1,
"event_type": "tool_result",
"payload": {"tool": "type", "result": {"ok": True, "typed_length": 11}},
},
{
"id": 5,
"job_id": job_id,
"ts": "2026-05-27T00:00:04Z",
"step": 1,
"event_type": "visual_update",
"payload": {
"kind": "see_screen",
"image_meta": {
"path": str(screenshot_path),
"width": 1920,
"height": 1080,
"grid": True,
},
},
}
]
return job_id
@@ -174,12 +218,67 @@ def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
assert status_after["data"] is None
def test_replay_endpoint_builds_frames_and_overlays(tmp_path: Path, monkeypatch: Any) -> None:
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
client = TestClient(app)
headers = {"Authorization": "Bearer test_token"}
create = client.post("/api/jobs", headers=headers, json={"job": "Replay test"})
job_id = create.json()["job_id"]
replay = client.get(f"/api/jobs/{job_id}/replay?limit=200", headers=headers)
assert replay.status_code == 200
payload = replay.json()
assert payload["job_id"] == job_id
assert payload["total_frames"] == 1
frame = payload["frames"][0]
assert frame["kind"] == "see_screen"
assert frame["is_fullscreen"] is True
labels = [item.get("label", "") for item in frame["overlays"]]
assert any("click" in text.lower() for text in labels)
assert any("typed" in text.lower() for text in labels)
def test_replay_endpoint_skips_visual_paths_outside_artifacts(tmp_path: Path, monkeypatch: Any) -> None:
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
manager = app.state.manager
client = TestClient(app)
headers = {"Authorization": "Bearer test_token"}
create = client.post("/api/jobs", headers=headers, json={"job": "Replay path check"})
job_id = create.json()["job_id"]
manager._events[job_id].append(
{
"id": 999,
"job_id": job_id,
"ts": "2026-05-27T00:01:00Z",
"step": 2,
"event_type": "visual_update",
"payload": {
"kind": "see_screen",
"image_meta": {
"path": str((tmp_path / "outside.png").resolve()),
"width": 100,
"height": 100,
"grid": True,
},
},
}
)
replay = client.get(f"/api/jobs/{job_id}/replay?limit=500", headers=headers)
assert replay.status_code == 200
payload = replay.json()
assert payload["total_frames"] == 1
def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False)
client_enabled = TestClient(app_enabled)
root_enabled = client_enabled.get("/")
assert root_enabled.status_code == 200
assert "ScreenJob Monitor" in root_enabled.text
js_enabled = client_enabled.get("/ui/monitoring.js")
assert js_enabled.status_code == 200
assert "const tokenInput" in js_enabled.text
app_disabled, _ = _build_app(tmp_path / "disabled", monkeypatch, disable_ui=True)
client_disabled = TestClient(app_disabled)

View File

@@ -7,18 +7,17 @@
- [Bug] More consistent clicks and more uses of enhance images.
## P1
- [Idea] Move ui.py into a seperate html file and js file.
- [Idea] Think harder using effort "medium" by default.
- [Idea] Decay old screenshots after 3 to 5 steps to save (1) tokens and (2) brain fuck in the agents.
- [Bug] Validate `disabled_tools` against an allowlist and disallow disabling critical completion flow (`task_complete`) to avoid guaranteed step-limit failures.
- [Bug] Improve `execute_command` cancellation/timeout handling to terminate full process trees, not only the parent shell process.
- [Bug] Reduce API/UI token leakage risk by moving away from query-string token usage for websocket/artifact access where possible.
- [Idea] Add per-token rate limiting and request size limits (objective length + payload bounds) for API hardening.
## P2
- [Bug] Fix UI event style mapping mismatch (`tool_called` events are emitted, but UI color map expects `tool_call`).
- [Idea] Reduce monitoring UI backend load by throttling websocket-triggered refreshes and avoiding full job/event re-fetch on every event.
- [Idea] Add cursor-based pagination for jobs/events instead of large fixed limits.
- [Idea] Support offline/self-hosted UI assets (bundle Tailwind instead of CDN dependency).
- [Idea] Add retention controls/pruning for old runs, screenshots, and DB rows.
## P3
- [Idea] Add Replay Mode; Ability to replay a session by reconstructing the screen from screenshots and overlaying tool calls and click and type events.
- [x] Add Replay Mode; Ability to replay a session by reconstructing the screen from screenshots and overlaying tool calls and click and type events.
- [Idea] Add lightweight analytics dashboards (success rate by objective category, avg steps/cost over time).