from __future__ import annotations from pathlib import Path from typing import Any from fastapi.testclient import TestClient import src.server as server_module from src.config import AppConfig _TERMINAL_STATUSES = {"completed", "failed", "cancelled"} def _objective_category(objective: str) -> str: text = objective.lower() if any(keyword in text for keyword in ("browser", "website", "amazon", "google", "login", "shopping", "checkout", "orders")): return "Browser / web" if any(keyword in text for keyword in ("file", "folder", "directory", "terminal", "shell", "command", "cli", "script", "git", "repo", "install", "pip", "npm")): return "Files / terminal" if any(keyword in text for keyword in ("write", "summary", "document", "docs", "report", "email", "message", "readme", "markdown")): return "Writing / docs" if any(keyword in text for keyword in ("data", "analysis", "csv", "spreadsheet", "sheet", "table", "chart", "dashboard", "metric", "sql")): return "Data / analysis" if any(keyword in text for keyword in ("code", "bug", "fix", "test", "debug", "api", "backend", "frontend", "database", "deploy", "docker", "service", "build")): return "Development / ops" return "Other" class FakeJobManager: def __init__(self, *, config: AppConfig, db: Any, broadcast: Any = None) -> None: self.config = config self._jobs: dict[str, dict[str, Any]] = {} self._events: dict[str, list[dict[str, Any]]] = {} self._counter = 0 self.last_submit_payload: dict[str, Any] | None = None def submit_job( self, *, objective: str, model: str | None = None, max_steps: int = 60, command_timeout: int = 45, type_interval: float = 0.02, click_pause: float = 0.10, reasoning_effort: str = "medium", screen_context_decay_steps: int = 4, max_visual_context_images: int = 3, native_automation_mode: str = "prefer", dialog_timeout_seconds: float = 12.0, focus_timeout_seconds: float = 8.0, ui_element_timeout_seconds: float = 8.0, max_retries_per_surface: int = 3, pretty_logs: bool = False, disabled_tools: list[str] | None = None, safety_override: bool = False, no_failsafe: bool = False, ) -> str: self._counter += 1 job_id = f"job_fake_{self._counter:03d}" selected_model = (model or self.config.default_model).strip() artifacts_dir = (self.config.runs_dir / f"run_{job_id}").resolve() artifacts_dir.mkdir(parents=True, exist_ok=True) screenshot_path = artifacts_dir / "screen_step_001.png" screenshot_path.write_bytes(b"not-a-real-png") created_at = f"2026-05-27T00:00:{self._counter:02d}Z" self.last_submit_payload = { "objective": objective, "model": selected_model, "disabled_tools": disabled_tools or [], "safety_override": safety_override, "max_steps": max_steps, "command_timeout": command_timeout, "type_interval": type_interval, "click_pause": click_pause, "reasoning_effort": reasoning_effort, "screen_context_decay_steps": screen_context_decay_steps, "max_visual_context_images": max_visual_context_images, "native_automation_mode": native_automation_mode, "dialog_timeout_seconds": dialog_timeout_seconds, "focus_timeout_seconds": focus_timeout_seconds, "ui_element_timeout_seconds": ui_element_timeout_seconds, "max_retries_per_surface": max_retries_per_surface, "pretty_logs": pretty_logs, "no_failsafe": no_failsafe, } self._jobs[job_id] = { "job_id": job_id, "objective": objective, "model": selected_model, "status": "running", "created_at": created_at, "started_at": created_at, "ended_at": None, "steps": 1, "result": "Running", "response": {"return": "Running", "data": None}, "return": "Running", "data": None, "usage": { "input_tokens": 10, "cached_input_tokens": 2, "output_tokens": 4, "reasoning_tokens": 0, "total_tokens": 14, "estimated_cost_usd": 0.0001, }, "artifacts_dir": str(artifacts_dir), } self._events[job_id] = [ { "id": 1, "job_id": job_id, "ts": "2026-05-27T00:00:00Z", "step": 1, "event_type": "tool_called", "payload": {"tool": "click", "args": {"coordinate": {"x": 320, "y": 180}}}, }, { "id": 2, "job_id": job_id, "ts": "2026-05-27T00:00:01Z", "step": 1, "event_type": "tool_result", "payload": {"tool": "click", "result": {"ok": True, "clicked": {"x": 322, "y": 182}}}, }, { "id": 3, "job_id": job_id, "ts": "2026-05-27T00:00:02Z", "step": 1, "event_type": "tool_called", "payload": {"tool": "type", "args": {"text": "hello world"}}, }, { "id": 4, "job_id": job_id, "ts": "2026-05-27T00:00:03Z", "step": 1, "event_type": "tool_result", "payload": {"tool": "type", "result": {"ok": True, "typed_length": 11}}, }, { "id": 5, "job_id": job_id, "ts": "2026-05-27T00:00:04Z", "step": 1, "event_type": "visual_update", "payload": { "kind": "see_screen", "image_meta": { "path": str(screenshot_path), "width": 1920, "height": 1080, "grid": True, }, }, } ] return job_id def list_jobs(self, limit: int = 100) -> list[dict[str, Any]]: return list(self._jobs.values())[:limit] def get_job(self, job_id: str) -> dict[str, Any] | None: return self._jobs.get(job_id) def get_events(self, job_id: str, limit: int = 500) -> list[dict[str, Any]]: return self._events.get(job_id, [])[:limit] def cancel_job(self, job_id: str) -> bool: if job_id not in self._jobs: return False self._jobs[job_id]["status"] = "cancelling" return True def stats(self) -> dict[str, Any]: return { "total_jobs": len(self._jobs), "running_jobs": sum(1 for x in self._jobs.values() if x["status"] == "running"), "completed_jobs": 0, "failed_jobs": 0, "cancelled_jobs": 0, "total_estimated_cost": sum(float((x["usage"] or {}).get("estimated_cost_usd") or 0) for x in self._jobs.values()), "live_running_threads": 0, } def analytics(self) -> dict[str, Any]: by_category: dict[str, dict[str, Any]] = {} by_day: dict[str, dict[str, Any]] = {} def bucket(target: dict[str, dict[str, Any]], key: str) -> dict[str, Any]: return target.setdefault( key, { "label": key, "total_jobs": 0, "finished_jobs": 0, "completed_jobs": 0, "failed_jobs": 0, "cancelled_jobs": 0, "steps_sum": 0, "steps_count": 0, "cost_sum": 0.0, "cost_count": 0, }, ) total_jobs = 0 finished_jobs = 0 completed_jobs = 0 failed_jobs = 0 cancelled_jobs = 0 steps_sum = 0 steps_count = 0 cost_sum = 0.0 cost_count = 0 for job in self._jobs.values(): total_jobs += 1 status = str(job.get("status") or "") finished = status in _TERMINAL_STATUSES category = _objective_category(str(job.get("objective") or "")) day = str(job.get("created_at") or "")[:10] or "unknown" category_bucket = bucket(by_category, category) day_bucket = bucket(by_day, day) for item in (category_bucket, day_bucket): item["total_jobs"] += 1 if not finished: continue finished_jobs += 1 if status == "completed": completed_jobs += 1 elif status == "failed": failed_jobs += 1 elif status == "cancelled": cancelled_jobs += 1 steps_raw = job.get("steps") if steps_raw is not None: steps = int(steps_raw) steps_sum += steps steps_count += 1 for item in (category_bucket, day_bucket): item["steps_sum"] += steps item["steps_count"] += 1 estimated_cost_raw = (job.get("usage") or {}).get("estimated_cost_usd") if estimated_cost_raw is not None: estimated_cost = float(estimated_cost_raw) cost_sum += estimated_cost cost_count += 1 for item in (category_bucket, day_bucket): item["cost_sum"] += estimated_cost item["cost_count"] += 1 for item in (category_bucket, day_bucket): item["finished_jobs"] += 1 if status == "completed": item["completed_jobs"] += 1 elif status == "failed": item["failed_jobs"] += 1 elif status == "cancelled": item["cancelled_jobs"] += 1 def finalize(item: dict[str, Any]) -> dict[str, Any]: finished = item["finished_jobs"] return { "label": item["label"], "total_jobs": item["total_jobs"], "finished_jobs": finished, "completed_jobs": item["completed_jobs"], "failed_jobs": item["failed_jobs"], "cancelled_jobs": item["cancelled_jobs"], "success_rate": round((item["completed_jobs"] / finished) * 100, 2) if finished else 0.0, "avg_steps": round(item["steps_sum"] / item["steps_count"], 2) if item["steps_count"] else None, "avg_cost_usd": round(item["cost_sum"] / item["cost_count"], 6) if item["cost_count"] else None, } return { "total_jobs": total_jobs, "finished_jobs": finished_jobs, "completed_jobs": completed_jobs, "failed_jobs": failed_jobs, "cancelled_jobs": cancelled_jobs, "success_rate": round((completed_jobs / finished_jobs) * 100, 2) if finished_jobs else 0.0, "avg_steps": round(steps_sum / steps_count, 2) if steps_count else None, "avg_cost_usd": round(cost_sum / cost_count, 6) if cost_count else None, "by_category": sorted((finalize(item) for item in by_category.values()), key=lambda item: (-item["success_rate"], item["label"])), "timeline": sorted((finalize(item) for item in by_day.values()), key=lambda item: item["label"]), } def _build_app(tmp_path: Path, monkeypatch: Any, disable_ui: bool = False): monkeypatch.setattr(server_module, "JobManager", FakeJobManager) config = AppConfig( openai_api_key="test_key", screenjob_token="test_token", disable_ui=disable_ui, default_model="gpt-5.4-mini", safety_model="gpt-5.4-mini", host="127.0.0.1", port=8787, runs_dir=tmp_path / "runs", db_path=tmp_path / "screenjob_test.db", prohibited_key_combos=("ctrl+shift+s",), ) config.runs_dir.mkdir(parents=True, exist_ok=True) app = server_module.create_app(config) return app, config def test_api_requires_auth(tmp_path: Path, monkeypatch: Any) -> None: app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) client = TestClient(app) assert client.get("/api/jobs").status_code == 401 assert client.post("/api/jobs", json={"job": "x"}).status_code == 401 def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monkeypatch: Any) -> None: app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) client = TestClient(app) headers = {"Authorization": "Bearer test_token"} response = client.post( "/api/jobs", headers=headers, json={"job": "Open amazon.de", "disabled_tools": ["click"], "safety_override": True}, ) assert response.status_code == 200 payload = response.json() assert list(payload.keys()) == ["job_id"] job_id = payload["job_id"] manager = app.state.manager assert manager.last_submit_payload["model"] == "gpt-5.4-mini" assert manager.last_submit_payload["disabled_tools"] == ["click"] assert manager.last_submit_payload["reasoning_effort"] == "medium" assert manager.last_submit_payload["screen_context_decay_steps"] == 4 assert manager.last_submit_payload["max_visual_context_images"] == 3 assert manager.last_submit_payload["native_automation_mode"] == "prefer" assert manager.last_submit_payload["dialog_timeout_seconds"] == 12.0 assert manager.last_submit_payload["focus_timeout_seconds"] == 8.0 assert manager.last_submit_payload["ui_element_timeout_seconds"] == 8.0 assert manager.last_submit_payload["max_retries_per_surface"] == 3 assert manager.last_submit_payload["pretty_logs"] is False status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers) assert status_res.status_code == 200 assert status_res.json()["job_id"] == job_id assert status_res.json()["response"]["return"] == "Running" assert "data" in status_res.json()["response"] def test_create_job_rejects_invalid_disabled_tool_names(tmp_path: Path, monkeypatch: Any) -> None: app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) client = TestClient(app) headers = {"Authorization": "Bearer test_token"} response = client.post( "/api/jobs", headers=headers, json={"job": "Open amazon.de", "disabled_tools": ["not_a_real_tool"], "safety_override": True}, ) assert response.status_code == 400 assert "Unknown disabled tool" in response.json()["detail"] def test_create_job_rejects_disabling_task_complete(tmp_path: Path, monkeypatch: Any) -> None: app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) client = TestClient(app) headers = {"Authorization": "Bearer test_token"} response = client.post( "/api/jobs", headers=headers, json={"job": "Open amazon.de", "disabled_tools": ["task_complete"], "safety_override": True}, ) assert response.status_code == 400 assert "Cannot disable required tool" in response.json()["detail"] def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None: app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) client = TestClient(app) headers = {"Authorization": "Bearer test_token"} create = client.post("/api/jobs", headers=headers, json={"job": "Test job"}) job_id = create.json()["job_id"] events = client.get(f"/api/jobs/{job_id}/events?limit=20", headers=headers) assert events.status_code == 200 assert len(events.json()["events"]) >= 1 cancel = client.post(f"/api/jobs/{job_id}/cancel", headers=headers) assert cancel.status_code == 200 assert cancel.json()["cancel_requested"] is True status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json() assert status_after["status"] == "cancelling" assert status_after["return"] == "Running" assert status_after["data"] is None def test_replay_endpoint_builds_frames_and_overlays(tmp_path: Path, monkeypatch: Any) -> None: app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) client = TestClient(app) headers = {"Authorization": "Bearer test_token"} create = client.post("/api/jobs", headers=headers, json={"job": "Replay test"}) job_id = create.json()["job_id"] replay = client.get(f"/api/jobs/{job_id}/replay?limit=200", headers=headers) assert replay.status_code == 200 payload = replay.json() assert payload["job_id"] == job_id assert payload["total_frames"] == 1 frame = payload["frames"][0] assert frame["kind"] == "see_screen" assert frame["is_fullscreen"] is True labels = [item.get("label", "") for item in frame["overlays"]] assert any("click" in text.lower() for text in labels) assert any("typed" in text.lower() for text in labels) def test_replay_endpoint_skips_visual_paths_outside_artifacts(tmp_path: Path, monkeypatch: Any) -> None: app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) manager = app.state.manager client = TestClient(app) headers = {"Authorization": "Bearer test_token"} create = client.post("/api/jobs", headers=headers, json={"job": "Replay path check"}) job_id = create.json()["job_id"] manager._events[job_id].append( { "id": 999, "job_id": job_id, "ts": "2026-05-27T00:01:00Z", "step": 2, "event_type": "visual_update", "payload": { "kind": "see_screen", "image_meta": { "path": str((tmp_path / "outside.png").resolve()), "width": 100, "height": 100, "grid": True, }, }, } ) replay = client.get(f"/api/jobs/{job_id}/replay?limit=500", headers=headers) assert replay.status_code == 200 payload = replay.json() assert payload["total_frames"] == 1 def test_analytics_endpoint_groups_by_category_and_time(tmp_path: Path, monkeypatch: Any) -> None: app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) manager = app.state.manager client = TestClient(app) headers = {"Authorization": "Bearer test_token"} browser_completed = client.post("/api/jobs", headers=headers, json={"job": "Open amazon.de and checkout"}).json()["job_id"] browser_failed = client.post("/api/jobs", headers=headers, json={"job": "Open website and login"}).json()["job_id"] terminal_completed = client.post("/api/jobs", headers=headers, json={"job": "Run a shell command to inspect files"}).json()["job_id"] manager._jobs[browser_completed].update( status="completed", ended_at="2026-05-27T00:10:00Z", steps=4, created_at="2026-05-27T00:00:01Z", usage={**manager._jobs[browser_completed]["usage"], "estimated_cost_usd": 0.12}, ) manager._jobs[browser_failed].update( status="failed", ended_at="2026-05-28T00:10:00Z", steps=6, created_at="2026-05-28T00:00:01Z", usage={**manager._jobs[browser_failed]["usage"], "estimated_cost_usd": 0.24}, ) manager._jobs[terminal_completed].update( status="completed", ended_at="2026-05-28T00:15:00Z", steps=10, created_at="2026-05-28T00:00:02Z", usage={**manager._jobs[terminal_completed]["usage"], "estimated_cost_usd": 0.05}, ) analytics = client.get("/api/analytics", headers=headers) assert analytics.status_code == 200 payload = analytics.json() assert payload["total_jobs"] == 3 assert payload["finished_jobs"] == 3 assert payload["completed_jobs"] == 2 assert payload["failed_jobs"] == 1 assert payload["success_rate"] == 66.67 assert payload["avg_steps"] == 6.67 assert payload["avg_cost_usd"] == 0.136667 browser = next(row for row in payload["by_category"] if row["label"] == "Browser / web") terminal = next(row for row in payload["by_category"] if row["label"] == "Files / terminal") assert browser["finished_jobs"] == 2 assert browser["success_rate"] == 50.0 assert browser["avg_steps"] == 5.0 assert terminal["success_rate"] == 100.0 assert [row["label"] for row in payload["timeline"]] == ["2026-05-27", "2026-05-28"] def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None: app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False) client_enabled = TestClient(app_enabled) root_enabled = client_enabled.get("/") assert root_enabled.status_code == 200 assert "ScreenJob Monitor" in root_enabled.text assert "Success by Objective Category" in root_enabled.text js_enabled = client_enabled.get("/ui/monitoring.js") assert js_enabled.status_code == 200 assert "const tokenInput" in js_enabled.text app_disabled, _ = _build_app(tmp_path / "disabled", monkeypatch, disable_ui=True) client_disabled = TestClient(app_disabled) root_disabled = client_disabled.get("/") assert root_disabled.status_code == 200 assert root_disabled.json()["ui_disabled"] is True