feat: implement replay functionality with UI controls and backend support

This commit is contained in:
Space-Banane
2026-05-27 21:57:37 +02:00
parent 620fcc4aa6
commit 111a1e84af
4 changed files with 491 additions and 5 deletions

View File

@@ -30,6 +30,181 @@ class CreateJobRequest(BaseModel):
no_failsafe: bool = False
def _safe_int(value: Any) -> int | None:
try:
return int(value)
except Exception: # noqa: BLE001
return None
def _safe_text(value: Any, limit: int = 180) -> str:
text = str(value or "").strip()
if len(text) <= limit:
return text
return f"{text[:limit]}..."
def _resolve_artifact_path(artifacts_dir: Path | None, path_raw: Any) -> Path | None:
if artifacts_dir is None:
return None
text = str(path_raw or "").strip()
if not text:
return None
candidate = Path(text).resolve()
try:
candidate.relative_to(artifacts_dir)
except ValueError:
return None
return candidate
def _extract_replay_action(
event: dict[str, Any],
pending_tool_args: dict[tuple[int, str], list[dict[str, Any]]],
) -> dict[str, Any] | None:
event_type = str(event.get("event_type") or "")
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
step = int(event.get("step") or 0)
ts = str(event.get("ts") or "")
event_id = int(event.get("id") or 0)
if event_type == "tool_called":
tool = str(payload.get("tool") or "").strip()
args = payload.get("args") if isinstance(payload.get("args"), dict) else {}
if tool:
pending_tool_args.setdefault((step, tool), []).append(args)
action: dict[str, Any] = {
"ts": ts,
"step": step,
"event_id": event_id,
"kind": "tool_called",
"tool": tool,
"label": f"Call: {tool}" if tool else "Tool call",
}
if tool == "click":
coord = args.get("coordinate") if isinstance(args, dict) else None
if isinstance(coord, dict):
x = _safe_int(coord.get("x"))
y = _safe_int(coord.get("y"))
if x is not None and y is not None:
action["requested_click"] = {"x": x, "y": y}
action["label"] = f"Call: click ({x}, {y})"
elif tool == "type":
text = _safe_text((args or {}).get("text"), 120)
if text:
action["text_preview"] = text
action["label"] = f"Call: type \"{text}\""
return action
if event_type == "tool_result":
tool = str(payload.get("tool") or "").strip()
result = payload.get("result") if isinstance(payload.get("result"), dict) else {}
matching_args: dict[str, Any] = {}
key = (step, tool)
queued = pending_tool_args.get(key) or []
if queued:
matching_args = queued.pop(0)
if not queued:
pending_tool_args.pop(key, None)
action = {
"ts": ts,
"step": step,
"event_id": event_id,
"kind": "tool_result",
"tool": tool,
"ok": bool(result.get("ok")),
"label": f"Result: {tool}",
}
if tool == "click":
clicked = result.get("clicked") if isinstance(result.get("clicked"), dict) else {}
x = _safe_int(clicked.get("x"))
y = _safe_int(clicked.get("y"))
if x is not None and y is not None:
action["click"] = {"x": x, "y": y}
action["label"] = f"Clicked ({x}, {y})" if bool(result.get("ok")) else f"Click failed ({x}, {y})"
elif tool == "type":
text = _safe_text((matching_args or {}).get("text"), 120)
typed_length = _safe_int(result.get("typed_length"))
if typed_length is not None:
action["typed_length"] = typed_length
if text:
action["text_preview"] = text
action["label"] = f"Typed \"{text}\""
elif tool == "press_key":
key_name = _safe_text(result.get("key"), 80)
if key_name:
action["label"] = f"Pressed {key_name}"
elif tool == "execute_command":
command = _safe_text((matching_args or {}).get("command"), 140)
if command:
action["command_preview"] = command
action["label"] = f"Command: {command}"
return action
return None
def _build_replay_payload(job_id: str, job: dict[str, Any], events: list[dict[str, Any]]) -> dict[str, Any]:
artifacts_dir_raw = str(job.get("artifacts_dir") or "").strip()
artifacts_dir = Path(artifacts_dir_raw).resolve() if artifacts_dir_raw else None
pending_tool_args: dict[tuple[int, str], list[dict[str, Any]]] = {}
buffered_actions: list[dict[str, Any]] = []
frames: list[dict[str, Any]] = []
for event in events:
action = _extract_replay_action(event, pending_tool_args)
if action is not None:
buffered_actions.append(action)
if str(event.get("event_type") or "") != "visual_update":
continue
payload = event.get("payload") if isinstance(event.get("payload"), dict) else {}
image_meta = payload.get("image_meta") if isinstance(payload.get("image_meta"), dict) else {}
resolved = _resolve_artifact_path(artifacts_dir, image_meta.get("path"))
if resolved is None or not resolved.exists() or not resolved.is_file():
continue
width = _safe_int(image_meta.get("width"))
height = _safe_int(image_meta.get("height"))
if width is None or height is None:
size = image_meta.get("screen_size") if isinstance(image_meta.get("screen_size"), dict) else {}
width = _safe_int(size.get("width"))
height = _safe_int(size.get("height"))
is_fullscreen = (
str(payload.get("kind") or "") == "see_screen"
and bool(image_meta.get("grid"))
and isinstance(width, int)
and isinstance(height, int)
and width > 0
and height > 0
)
frames.append(
{
"frame_index": len(frames),
"event_id": int(event.get("id") or 0),
"ts": str(event.get("ts") or ""),
"step": int(event.get("step") or 0),
"kind": str(payload.get("kind") or "visual_update"),
"image_path": str(resolved),
"image_meta": image_meta,
"screen_size": {"width": width, "height": height} if width and height else None,
"is_fullscreen": is_fullscreen,
"overlays": buffered_actions,
}
)
buffered_actions = []
return {
"job_id": job_id,
"total_events": len(events),
"total_frames": len(frames),
"frames": frames,
"trailing_events": buffered_actions,
}
class _WebSocketHub:
def __init__(self) -> None:
self._connections: set[WebSocket] = set()
@@ -161,6 +336,18 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
raise HTTPException(status_code=404, detail="Job not found")
return {"events": manager.get_events(job_id, limit=limit)}
@app.get("/api/jobs/{job_id}/replay")
def get_job_replay(
job_id: str,
limit: int = Query(default=5000, ge=1, le=5000),
_: None = Depends(require_token),
) -> dict[str, Any]:
job = manager.get_job(job_id)
if job is None:
raise HTTPException(status_code=404, detail="Job not found")
events = manager.get_events(job_id, limit=limit)
return _build_replay_payload(job_id, job, events)
@app.post("/api/jobs/{job_id}/cancel")
def cancel_job(job_id: str, _: None = Depends(require_token)) -> dict[str, Any]:
job = manager.get_job(job_id)

207
src/ui.py
View File

@@ -43,6 +43,33 @@ def monitoring_page_html(device_hostname: str = "") -> str:
<div class="bg-slate-950 border border-slate-800 rounded p-2">
<img id="latestVisual" alt="Latest visual update" class="max-h-[24vh] w-full object-contain rounded" />
</div>
<div class="flex items-center justify-between">
<h3 class="font-semibold text-sm">Replay</h3>
<div id="replayStatus" class="text-[11px] text-slate-400">No replay loaded.</div>
</div>
<div class="flex flex-wrap items-center gap-2">
<button id="replayPlayBtn" class="text-xs bg-slate-800 px-2 py-1 rounded">Play</button>
<button id="replayPrevBtn" class="text-xs bg-slate-800 px-2 py-1 rounded">Prev</button>
<button id="replayNextBtn" class="text-xs bg-slate-800 px-2 py-1 rounded">Next</button>
<label class="text-xs text-slate-300 flex items-center gap-1">
Speed
<select id="replaySpeed" class="bg-slate-900 border border-slate-700 rounded px-1 py-0.5">
<option value="0.5">0.5x</option>
<option value="1" selected>1.0x</option>
<option value="1.5">1.5x</option>
<option value="2">2.0x</option>
</select>
</label>
</div>
<input id="replaySeek" type="range" min="0" max="0" value="0" class="w-full accent-cyan-400" />
<div class="bg-slate-950 border border-slate-800 rounded p-2">
<div class="relative w-full min-h-[180px] bg-black/40 rounded">
<img id="replayVisual" alt="Replay frame" class="max-h-[30vh] w-full object-contain rounded" />
<svg id="replayOverlay" class="absolute inset-0 w-full h-full pointer-events-none" preserveAspectRatio="xMidYMid meet"></svg>
</div>
<div id="replayFrameMeta" class="text-[11px] text-slate-400 mt-2"></div>
<div id="replayFrameEvents" class="mt-2 space-y-1"></div>
</div>
<div class="flex items-center justify-between">
<h3 class="font-semibold text-sm">Live Events</h3>
<label for="eventsViewToggle" class="flex items-center gap-2 text-xs text-slate-300 cursor-pointer select-none">
@@ -66,6 +93,16 @@ def monitoring_page_html(device_hostname: str = "") -> str:
const statsEl = document.getElementById("stats");
const latestVisualEl = document.getElementById("latestVisual");
const eventsViewToggle = document.getElementById("eventsViewToggle");
const replayVisualEl = document.getElementById("replayVisual");
const replayOverlayEl = document.getElementById("replayOverlay");
const replayFrameMetaEl = document.getElementById("replayFrameMeta");
const replayFrameEventsEl = document.getElementById("replayFrameEvents");
const replayStatusEl = document.getElementById("replayStatus");
const replayPlayBtn = document.getElementById("replayPlayBtn");
const replayPrevBtn = document.getElementById("replayPrevBtn");
const replayNextBtn = document.getElementById("replayNextBtn");
const replaySpeedEl = document.getElementById("replaySpeed");
const replaySeekEl = document.getElementById("replaySeek");
const state = {
token: localStorage.getItem("screenjob_token") || "",
@@ -73,7 +110,15 @@ def monitoring_page_html(device_hostname: str = "") -> str:
selectedJobId: null,
ws: null,
wsReconnectTimer: null,
eventsViewMode: localStorage.getItem("screenjob_events_view_mode") === "beautiful" ? "beautiful" : "raw"
eventsViewMode: localStorage.getItem("screenjob_events_view_mode") === "beautiful" ? "beautiful" : "raw",
replay: {
frames: [],
trailingEvents: [],
frameIndex: 0,
isPlaying: false,
speed: 1,
timer: null
}
};
const manuallyClosedSockets = new WeakSet();
tokenInput.value = state.token;
@@ -190,6 +235,138 @@ def monitoring_page_html(device_hostname: str = "") -> str:
}
}
function clearReplayTimer() {
if (state.replay.timer) {
clearTimeout(state.replay.timer);
state.replay.timer = null;
}
}
function stopReplay() {
state.replay.isPlaying = false;
clearReplayTimer();
replayPlayBtn.textContent = "Play";
}
function replayImageSrc(path) {
const q = encodeURIComponent(path || "");
return `/api/jobs/${state.selectedJobId}/artifact?path=${q}&token=${encodeURIComponent(state.token)}`;
}
function renderReplayOverlay(frame) {
replayOverlayEl.innerHTML = "";
const size = frame && frame.screen_size;
if (!frame || !frame.is_fullscreen || !size || !size.width || !size.height) {
replayOverlayEl.removeAttribute("viewBox");
return;
}
replayOverlayEl.setAttribute("viewBox", `0 0 ${size.width} ${size.height}`);
const overlayEvents = Array.isArray(frame.overlays) ? frame.overlays : [];
const points = overlayEvents.filter((ev) => ev && ev.kind === "tool_result" && ev.tool === "click" && ev.click);
for (const ev of points) {
const x = Number(ev.click.x);
const y = Number(ev.click.y);
if (!Number.isFinite(x) || !Number.isFinite(y)) continue;
const halo = document.createElementNS("http://www.w3.org/2000/svg", "circle");
halo.setAttribute("cx", String(x));
halo.setAttribute("cy", String(y));
halo.setAttribute("r", "14");
halo.setAttribute("fill", "rgba(14, 165, 233, 0.22)");
halo.setAttribute("stroke", "#38bdf8");
halo.setAttribute("stroke-width", "2");
const dot = document.createElementNS("http://www.w3.org/2000/svg", "circle");
dot.setAttribute("cx", String(x));
dot.setAttribute("cy", String(y));
dot.setAttribute("r", "4");
dot.setAttribute("fill", "#38bdf8");
replayOverlayEl.appendChild(halo);
replayOverlayEl.appendChild(dot);
}
}
function renderReplayFrameEvents(frame) {
replayFrameEventsEl.innerHTML = "";
if (!frame) return;
const events = Array.isArray(frame.overlays) ? frame.overlays : [];
const shown = events.slice(-8);
for (const ev of shown) {
const row = document.createElement("div");
row.className = "text-[11px] rounded border border-slate-800 bg-slate-900/80 px-2 py-1";
row.textContent = ev.label || `${ev.kind || "event"} ${ev.tool || ""}`.trim();
replayFrameEventsEl.appendChild(row);
}
if (!shown.length) {
const empty = document.createElement("div");
empty.className = "text-[11px] text-slate-500";
empty.textContent = "No overlay events for this frame.";
replayFrameEventsEl.appendChild(empty);
}
}
function setReplayFrame(index) {
const frames = state.replay.frames;
if (!frames.length) {
replayVisualEl.removeAttribute("src");
replayOverlayEl.innerHTML = "";
replayFrameMetaEl.textContent = "No replay frames.";
replaySeekEl.value = "0";
replaySeekEl.max = "0";
replayStatusEl.textContent = "No replay loaded.";
return;
}
const bounded = Math.max(0, Math.min(index, frames.length - 1));
state.replay.frameIndex = bounded;
const frame = frames[bounded];
replayVisualEl.src = replayImageSrc(frame.image_path);
replayFrameMetaEl.textContent = `Frame ${bounded + 1}/${frames.length} | step ${frame.step} | ${frame.kind} | ${frame.ts}`;
replaySeekEl.max = String(Math.max(0, frames.length - 1));
replaySeekEl.value = String(bounded);
replayStatusEl.textContent = state.replay.isPlaying ? "Playing replay." : "Replay ready.";
renderReplayOverlay(frame);
renderReplayFrameEvents(frame);
}
function advanceReplay() {
const frames = state.replay.frames;
if (!state.replay.isPlaying || !frames.length) return;
if (state.replay.frameIndex >= frames.length - 1) {
stopReplay();
setReplayFrame(frames.length - 1);
replayStatusEl.textContent = "Replay finished.";
return;
}
setReplayFrame(state.replay.frameIndex + 1);
clearReplayTimer();
const delayMs = Math.max(120, Math.round(700 / (state.replay.speed || 1)));
state.replay.timer = setTimeout(advanceReplay, delayMs);
}
function toggleReplayPlay() {
if (!state.replay.frames.length) return;
if (state.replay.isPlaying) {
stopReplay();
setReplayFrame(state.replay.frameIndex);
return;
}
state.replay.isPlaying = true;
replayPlayBtn.textContent = "Pause";
replayStatusEl.textContent = "Playing replay.";
advanceReplay();
}
function resetReplay(payload) {
stopReplay();
const replayPayload = payload || {};
state.replay.frames = Array.isArray(replayPayload.frames) ? replayPayload.frames : [];
state.replay.trailingEvents = Array.isArray(replayPayload.trailing_events) ? replayPayload.trailing_events : [];
state.replay.frameIndex = 0;
setReplayFrame(0);
}
function scheduleWsReconnect() {
if (state.wsReconnectTimer || !state.token) return;
state.wsReconnectTimer = setTimeout(() => {
@@ -221,9 +398,10 @@ def monitoring_page_html(device_hostname: str = "") -> str:
async function refreshJobDetail() {
if (!state.selectedJobId) return;
const [job, events] = await Promise.all([
const [job, events, replay] = await Promise.all([
api(`/api/jobs/${state.selectedJobId}`),
api(`/api/jobs/${state.selectedJobId}/events?limit=120`)
api(`/api/jobs/${state.selectedJobId}/events?limit=120`),
api(`/api/jobs/${state.selectedJobId}/replay?limit=5000`)
]);
jobDetailEl.textContent = JSON.stringify(job, null, 2);
eventsEl.innerHTML = "";
@@ -231,6 +409,7 @@ def monitoring_page_html(device_hostname: str = "") -> str:
for (const ev of list) pushEventLine(ev);
const visual = list.find((ev) => ev.event_type === "visual_update");
if (visual) updateLatestVisualFromEvent(visual);
resetReplay(replay);
}
function connectWs() {
@@ -299,7 +478,29 @@ def monitoring_page_html(device_hostname: str = "") -> str:
localStorage.setItem("screenjob_events_view_mode", state.eventsViewMode);
refreshJobDetail().catch((err) => alert(err.message));
});
replayPlayBtn.addEventListener("click", () => toggleReplayPlay());
replayPrevBtn.addEventListener("click", () => {
stopReplay();
setReplayFrame(state.replay.frameIndex - 1);
});
replayNextBtn.addEventListener("click", () => {
stopReplay();
setReplayFrame(state.replay.frameIndex + 1);
});
replaySpeedEl.addEventListener("change", () => {
const speed = Number(replaySpeedEl.value);
state.replay.speed = Number.isFinite(speed) && speed > 0 ? speed : 1;
if (state.replay.isPlaying) {
clearReplayTimer();
advanceReplay();
}
});
replaySeekEl.addEventListener("input", () => {
stopReplay();
setReplayFrame(Number(replaySeekEl.value || 0));
});
syncEventsViewToggle();
resetReplay(null);
if (state.token) connect().catch(() => {});
</script>
</body>