feat: finalize production cleanup with structured agent responses and project governance

This commit is contained in:
Space-Banane
2026-05-27 18:08:52 +02:00
parent a19b285232
commit c09f0ee9c0
17 changed files with 737 additions and 126 deletions

86
tests/test_agent_tools.py Normal file
View File

@@ -0,0 +1,86 @@
from __future__ import annotations
import logging
from pathlib import Path
from PIL import Image
import src.agent as agent_module
from src.models import RunArtifacts, RuntimeOptions
class _DummyPyAutoGUI:
FAILSAFE = True
PAUSE = 0.0
def __init__(self) -> None:
self.last_move_to: tuple[int, int] | None = None
self.last_click: tuple[int, int] | None = None
def screenshot(self) -> Image.Image:
return Image.new("RGB", (1280, 720), color=(24, 24, 24))
def size(self) -> tuple[int, int]:
return (1280, 720)
def moveTo(self, x: int, y: int, duration: float = 0.0) -> None: # noqa: N802
self.last_move_to = (x, y)
def click(self, x: int, y: int) -> None:
self.last_click = (x, y)
def write(self, _: str, interval: float = 0.0) -> None:
return None
def press(self, _: str) -> None:
return None
def _build_agent(tmp_path: Path, monkeypatch) -> agent_module.ScreenJobAgent:
dummy_gui = _DummyPyAutoGUI()
monkeypatch.setattr(agent_module, "pyautogui", dummy_gui)
monkeypatch.setattr(agent_module.time, "sleep", lambda _: None)
run_dir = tmp_path / "run"
run_dir.mkdir(parents=True, exist_ok=True)
artifacts = RunArtifacts(
run_id="test_run",
root_dir=run_dir,
logs_dir=run_dir / "logs",
shots_dir=run_dir / "shots",
enhance_dir=run_dir / "enhance",
log_file=run_dir / "screenjob.log",
)
options = RuntimeOptions(model="gpt-5.4-mini")
logger = logging.getLogger("screenjob-test-agent")
return agent_module.ScreenJobAgent(
client=object(), # type: ignore[arg-type]
logger=logger,
artifacts=artifacts,
options=options,
)
def test_task_complete_captures_return_and_data(tmp_path: Path, monkeypatch) -> None:
agent = _build_agent(tmp_path, monkeypatch)
result = agent._tool_task_complete({"return": "Task completed successfully", "data": "file1\nfile2"})
assert result["ok"] is True
assert result["return"] == "Task completed successfully"
assert result["data"] == "file1\nfile2"
assert agent.final_result == "Task completed successfully"
assert agent.final_data == "file1\nfile2"
def test_click_supports_directional_offsets(tmp_path: Path, monkeypatch) -> None:
agent = _build_agent(tmp_path, monkeypatch)
click_result = agent._tool_click(
{
"coordinate": {"x": 100, "y": 100},
"offset_up": "2px",
"offset_right": 7,
"offset": {"x": 3, "y": 4},
"sleep_after_seconds": 0,
}
)
assert click_result["ok"] is True
assert click_result["clicked"] == {"x": 110, "y": 102}

68
tests/test_cli.py Normal file
View File

@@ -0,0 +1,68 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
import src.cli as cli_module
from src.config import AppConfig
from src.models import AgentResult, RunArtifacts, UsageSummary
def test_cli_emits_structured_return_and_data(monkeypatch: Any, capsys, tmp_path: Path) -> None:
config = AppConfig(
openai_api_key="test_key",
screenjob_token="test_token",
disable_ui=False,
default_model="gpt-5.4-mini",
safety_model="gpt-5.4-mini",
host="127.0.0.1",
port=8787,
runs_dir=tmp_path / "runs",
db_path=tmp_path / "screenjob.db",
)
config.runs_dir.mkdir(parents=True, exist_ok=True)
def fake_load_app_config(_: Path) -> AppConfig:
return config
def fake_assess_task_safety(*_args, **_kwargs):
return True, "safe", {"safe": True}
def fake_run_job(*_args, **_kwargs):
result = AgentResult(
completed=True,
result="Done",
return_message="Task completed successfully",
data="file1.txt\nfile2.txt",
steps=3,
started_at=10.0,
ended_at=12.5,
usage=UsageSummary(total_tokens=123),
error=None,
cancelled=False,
)
artifacts = RunArtifacts(
run_id="20260527_000001",
root_dir=config.runs_dir / "run_20260527_000001",
logs_dir=config.runs_dir / "run_20260527_000001" / "logs",
shots_dir=config.runs_dir / "run_20260527_000001" / "shots",
enhance_dir=config.runs_dir / "run_20260527_000001" / "enhance",
log_file=config.runs_dir / "run_20260527_000001" / "screenjob.log",
)
return result, artifacts
monkeypatch.setattr(cli_module, "load_app_config", fake_load_app_config)
monkeypatch.setattr(cli_module, "assess_task_safety", fake_assess_task_safety)
monkeypatch.setattr(cli_module, "run_job", fake_run_job)
monkeypatch.setattr(cli_module, "create_openai_client", lambda *_args, **_kwargs: object())
code = cli_module.main(["Open amazon.de"])
assert code == 0
out = capsys.readouterr().out
payload = json.loads(out)
assert payload["response"]["return"] == "Task completed successfully"
assert payload["response"]["data"] == "file1.txt\nfile2.txt"
assert payload["return"] == "Task completed successfully"
assert payload["data"] == "file1.txt\nfile2.txt"

View File

@@ -49,6 +49,10 @@ class FakeJobManager:
"objective": objective,
"model": selected_model,
"status": "running",
"result": "Running",
"response": {"return": "Running", "data": None},
"return": "Running",
"data": None,
"usage": {
"input_tokens": 10,
"cached_input_tokens": 2,
@@ -145,6 +149,8 @@ def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monke
status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
assert status_res.status_code == 200
assert status_res.json()["job_id"] == job_id
assert status_res.json()["response"]["return"] == "Running"
assert "data" in status_res.json()["response"]
def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
@@ -164,6 +170,8 @@ def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json()
assert status_after["status"] == "cancelling"
assert status_after["return"] == "Running"
assert status_after["data"] is None
def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
@@ -178,4 +186,3 @@ def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
root_disabled = client_disabled.get("/")
assert root_disabled.status_code == 200
assert root_disabled.json()["ui_disabled"] is True

View File

@@ -1,4 +1,5 @@
from pathlib import Path
import json
from src.storage import HistoryDB
@@ -26,6 +27,7 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
status="completed",
ended_at="2026-05-27T00:00:02Z",
result="Done",
response_json=json.dumps({"return": "Done", "data": {"files": ["a.txt", "b.txt"]}}, ensure_ascii=False),
steps=2,
estimated_cost_usd=0.1234,
)
@@ -35,6 +37,8 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
assert job["status"] == "completed"
assert job["model"] == "gpt-5.4-mini"
assert job["disabled_tools"] == ["click"]
assert job["response"]["return"] == "Done"
assert job["response"]["data"]["files"] == ["a.txt", "b.txt"]
assert job["usage"]["estimated_cost_usd"] == 0.1234
events = db.get_job_events(job_id, limit=10)
@@ -51,3 +55,20 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
assert stats["completed_jobs"] == 1
assert abs(stats["total_estimated_cost"] - 0.1234) < 1e-9
def test_storage_response_fallback_uses_result_when_json_missing(tmp_path: Path) -> None:
db = HistoryDB(tmp_path / "screenjob_test_fallback.db")
job_id = "job_test_002"
db.create_job(
job_id=job_id,
objective="Fallback check",
model="gpt-5.4-mini",
created_at="2026-05-27T00:00:00Z",
safety_override=False,
disabled_tools=[],
)
db.update_job(job_id, status="completed", result="Legacy result string")
job = db.get_job(job_id)
assert job is not None
assert job["response"]["return"] == "Legacy result string"
assert job["response"]["data"] is None