feat: finalize production cleanup with structured agent responses and project governance

2026-05-27 18:08:52 +02:00
parent a19b285232
commit c09f0ee9c0
17 changed files with 737 additions and 126 deletions
--- a/tests/test_agent_tools.py
+++ b/tests/test_agent_tools.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from PIL import Image
+
+import src.agent as agent_module
+from src.models import RunArtifacts, RuntimeOptions
+
+
+class _DummyPyAutoGUI:
+    FAILSAFE = True
+    PAUSE = 0.0
+
+    def __init__(self) -> None:
+        self.last_move_to: tuple[int, int] | None = None
+        self.last_click: tuple[int, int] | None = None
+
+    def screenshot(self) -> Image.Image:
+        return Image.new("RGB", (1280, 720), color=(24, 24, 24))
+
+    def size(self) -> tuple[int, int]:
+        return (1280, 720)
+
+    def moveTo(self, x: int, y: int, duration: float = 0.0) -> None:  # noqa: N802
+        self.last_move_to = (x, y)
+
+    def click(self, x: int, y: int) -> None:
+        self.last_click = (x, y)
+
+    def write(self, _: str, interval: float = 0.0) -> None:
+        return None
+
+    def press(self, _: str) -> None:
+        return None
+
+
+def _build_agent(tmp_path: Path, monkeypatch) -> agent_module.ScreenJobAgent:
+    dummy_gui = _DummyPyAutoGUI()
+    monkeypatch.setattr(agent_module, "pyautogui", dummy_gui)
+    monkeypatch.setattr(agent_module.time, "sleep", lambda _: None)
+
+    run_dir = tmp_path / "run"
+    run_dir.mkdir(parents=True, exist_ok=True)
+    artifacts = RunArtifacts(
+        run_id="test_run",
+        root_dir=run_dir,
+        logs_dir=run_dir / "logs",
+        shots_dir=run_dir / "shots",
+        enhance_dir=run_dir / "enhance",
+        log_file=run_dir / "screenjob.log",
+    )
+    options = RuntimeOptions(model="gpt-5.4-mini")
+    logger = logging.getLogger("screenjob-test-agent")
+    return agent_module.ScreenJobAgent(
+        client=object(),  # type: ignore[arg-type]
+        logger=logger,
+        artifacts=artifacts,
+        options=options,
+    )
+
+
+def test_task_complete_captures_return_and_data(tmp_path: Path, monkeypatch) -> None:
+    agent = _build_agent(tmp_path, monkeypatch)
+    result = agent._tool_task_complete({"return": "Task completed successfully", "data": "file1\nfile2"})
+    assert result["ok"] is True
+    assert result["return"] == "Task completed successfully"
+    assert result["data"] == "file1\nfile2"
+    assert agent.final_result == "Task completed successfully"
+    assert agent.final_data == "file1\nfile2"
+
+
+def test_click_supports_directional_offsets(tmp_path: Path, monkeypatch) -> None:
+    agent = _build_agent(tmp_path, monkeypatch)
+    click_result = agent._tool_click(
+        {
+            "coordinate": {"x": 100, "y": 100},
+            "offset_up": "2px",
+            "offset_right": 7,
+            "offset": {"x": 3, "y": 4},
+            "sleep_after_seconds": 0,
+        }
+    )
+    assert click_result["ok"] is True
+    assert click_result["clicked"] == {"x": 110, "y": 102}
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import src.cli as cli_module
+from src.config import AppConfig
+from src.models import AgentResult, RunArtifacts, UsageSummary
+
+
+def test_cli_emits_structured_return_and_data(monkeypatch: Any, capsys, tmp_path: Path) -> None:
+    config = AppConfig(
+        openai_api_key="test_key",
+        screenjob_token="test_token",
+        disable_ui=False,
+        default_model="gpt-5.4-mini",
+        safety_model="gpt-5.4-mini",
+        host="127.0.0.1",
+        port=8787,
+        runs_dir=tmp_path / "runs",
+        db_path=tmp_path / "screenjob.db",
+    )
+    config.runs_dir.mkdir(parents=True, exist_ok=True)
+
+    def fake_load_app_config(_: Path) -> AppConfig:
+        return config
+
+    def fake_assess_task_safety(*_args, **_kwargs):
+        return True, "safe", {"safe": True}
+
+    def fake_run_job(*_args, **_kwargs):
+        result = AgentResult(
+            completed=True,
+            result="Done",
+            return_message="Task completed successfully",
+            data="file1.txt\nfile2.txt",
+            steps=3,
+            started_at=10.0,
+            ended_at=12.5,
+            usage=UsageSummary(total_tokens=123),
+            error=None,
+            cancelled=False,
+        )
+        artifacts = RunArtifacts(
+            run_id="20260527_000001",
+            root_dir=config.runs_dir / "run_20260527_000001",
+            logs_dir=config.runs_dir / "run_20260527_000001" / "logs",
+            shots_dir=config.runs_dir / "run_20260527_000001" / "shots",
+            enhance_dir=config.runs_dir / "run_20260527_000001" / "enhance",
+            log_file=config.runs_dir / "run_20260527_000001" / "screenjob.log",
+        )
+        return result, artifacts
+
+    monkeypatch.setattr(cli_module, "load_app_config", fake_load_app_config)
+    monkeypatch.setattr(cli_module, "assess_task_safety", fake_assess_task_safety)
+    monkeypatch.setattr(cli_module, "run_job", fake_run_job)
+    monkeypatch.setattr(cli_module, "create_openai_client", lambda *_args, **_kwargs: object())
+
+    code = cli_module.main(["Open amazon.de"])
+    assert code == 0
+
+    out = capsys.readouterr().out
+    payload = json.loads(out)
+    assert payload["response"]["return"] == "Task completed successfully"
+    assert payload["response"]["data"] == "file1.txt\nfile2.txt"
+    assert payload["return"] == "Task completed successfully"
+    assert payload["data"] == "file1.txt\nfile2.txt"
--- a/tests/test_server_api.py
+++ b/tests/test_server_api.py
@@ -49,6 +49,10 @@ class FakeJobManager:
            "objective": objective,
            "model": selected_model,
            "status": "running",
+            "result": "Running",
+            "response": {"return": "Running", "data": None},
+            "return": "Running",
+            "data": None,
            "usage": {
                "input_tokens": 10,
                "cached_input_tokens": 2,
@@ -145,6 +149,8 @@ def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monke
    status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
    assert status_res.status_code == 200
    assert status_res.json()["job_id"] == job_id
+    assert status_res.json()["response"]["return"] == "Running"
+    assert "data" in status_res.json()["response"]


 def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
@@ -164,6 +170,8 @@ def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:

    status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json()
    assert status_after["status"] == "cancelling"
+    assert status_after["return"] == "Running"
+    assert status_after["data"] is None


 def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
@@ -178,4 +186,3 @@ def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
    root_disabled = client_disabled.get("/")
    assert root_disabled.status_code == 200
    assert root_disabled.json()["ui_disabled"] is True
-
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+import json

 from src.storage import HistoryDB

@@ -26,6 +27,7 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
        status="completed",
        ended_at="2026-05-27T00:00:02Z",
        result="Done",
+        response_json=json.dumps({"return": "Done", "data": {"files": ["a.txt", "b.txt"]}}, ensure_ascii=False),
        steps=2,
        estimated_cost_usd=0.1234,
    )
@@ -35,6 +37,8 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
    assert job["status"] == "completed"
    assert job["model"] == "gpt-5.4-mini"
    assert job["disabled_tools"] == ["click"]
+    assert job["response"]["return"] == "Done"
+    assert job["response"]["data"]["files"] == ["a.txt", "b.txt"]
    assert job["usage"]["estimated_cost_usd"] == 0.1234

    events = db.get_job_events(job_id, limit=10)
@@ -51,3 +55,20 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
    assert stats["completed_jobs"] == 1
    assert abs(stats["total_estimated_cost"] - 0.1234) < 1e-9

+
+def test_storage_response_fallback_uses_result_when_json_missing(tmp_path: Path) -> None:
+    db = HistoryDB(tmp_path / "screenjob_test_fallback.db")
+    job_id = "job_test_002"
+    db.create_job(
+        job_id=job_id,
+        objective="Fallback check",
+        model="gpt-5.4-mini",
+        created_at="2026-05-27T00:00:00Z",
+        safety_override=False,
+        disabled_tools=[],
+    )
+    db.update_job(job_id, status="completed", result="Legacy result string")
+    job = db.get_job(job_id)
+    assert job is not None
+    assert job["response"]["return"] == "Legacy result string"
+    assert job["response"]["data"] is None