feat: finalize production cleanup with structured agent responses and project governance
This commit is contained in:
86
tests/test_agent_tools.py
Normal file
86
tests/test_agent_tools.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image
|
||||
|
||||
import src.agent as agent_module
|
||||
from src.models import RunArtifacts, RuntimeOptions
|
||||
|
||||
|
||||
class _DummyPyAutoGUI:
|
||||
FAILSAFE = True
|
||||
PAUSE = 0.0
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.last_move_to: tuple[int, int] | None = None
|
||||
self.last_click: tuple[int, int] | None = None
|
||||
|
||||
def screenshot(self) -> Image.Image:
|
||||
return Image.new("RGB", (1280, 720), color=(24, 24, 24))
|
||||
|
||||
def size(self) -> tuple[int, int]:
|
||||
return (1280, 720)
|
||||
|
||||
def moveTo(self, x: int, y: int, duration: float = 0.0) -> None: # noqa: N802
|
||||
self.last_move_to = (x, y)
|
||||
|
||||
def click(self, x: int, y: int) -> None:
|
||||
self.last_click = (x, y)
|
||||
|
||||
def write(self, _: str, interval: float = 0.0) -> None:
|
||||
return None
|
||||
|
||||
def press(self, _: str) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def _build_agent(tmp_path: Path, monkeypatch) -> agent_module.ScreenJobAgent:
|
||||
dummy_gui = _DummyPyAutoGUI()
|
||||
monkeypatch.setattr(agent_module, "pyautogui", dummy_gui)
|
||||
monkeypatch.setattr(agent_module.time, "sleep", lambda _: None)
|
||||
|
||||
run_dir = tmp_path / "run"
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
artifacts = RunArtifacts(
|
||||
run_id="test_run",
|
||||
root_dir=run_dir,
|
||||
logs_dir=run_dir / "logs",
|
||||
shots_dir=run_dir / "shots",
|
||||
enhance_dir=run_dir / "enhance",
|
||||
log_file=run_dir / "screenjob.log",
|
||||
)
|
||||
options = RuntimeOptions(model="gpt-5.4-mini")
|
||||
logger = logging.getLogger("screenjob-test-agent")
|
||||
return agent_module.ScreenJobAgent(
|
||||
client=object(), # type: ignore[arg-type]
|
||||
logger=logger,
|
||||
artifacts=artifacts,
|
||||
options=options,
|
||||
)
|
||||
|
||||
|
||||
def test_task_complete_captures_return_and_data(tmp_path: Path, monkeypatch) -> None:
|
||||
agent = _build_agent(tmp_path, monkeypatch)
|
||||
result = agent._tool_task_complete({"return": "Task completed successfully", "data": "file1\nfile2"})
|
||||
assert result["ok"] is True
|
||||
assert result["return"] == "Task completed successfully"
|
||||
assert result["data"] == "file1\nfile2"
|
||||
assert agent.final_result == "Task completed successfully"
|
||||
assert agent.final_data == "file1\nfile2"
|
||||
|
||||
|
||||
def test_click_supports_directional_offsets(tmp_path: Path, monkeypatch) -> None:
|
||||
agent = _build_agent(tmp_path, monkeypatch)
|
||||
click_result = agent._tool_click(
|
||||
{
|
||||
"coordinate": {"x": 100, "y": 100},
|
||||
"offset_up": "2px",
|
||||
"offset_right": 7,
|
||||
"offset": {"x": 3, "y": 4},
|
||||
"sleep_after_seconds": 0,
|
||||
}
|
||||
)
|
||||
assert click_result["ok"] is True
|
||||
assert click_result["clicked"] == {"x": 110, "y": 102}
|
||||
68
tests/test_cli.py
Normal file
68
tests/test_cli.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import src.cli as cli_module
|
||||
from src.config import AppConfig
|
||||
from src.models import AgentResult, RunArtifacts, UsageSummary
|
||||
|
||||
|
||||
def test_cli_emits_structured_return_and_data(monkeypatch: Any, capsys, tmp_path: Path) -> None:
|
||||
config = AppConfig(
|
||||
openai_api_key="test_key",
|
||||
screenjob_token="test_token",
|
||||
disable_ui=False,
|
||||
default_model="gpt-5.4-mini",
|
||||
safety_model="gpt-5.4-mini",
|
||||
host="127.0.0.1",
|
||||
port=8787,
|
||||
runs_dir=tmp_path / "runs",
|
||||
db_path=tmp_path / "screenjob.db",
|
||||
)
|
||||
config.runs_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def fake_load_app_config(_: Path) -> AppConfig:
|
||||
return config
|
||||
|
||||
def fake_assess_task_safety(*_args, **_kwargs):
|
||||
return True, "safe", {"safe": True}
|
||||
|
||||
def fake_run_job(*_args, **_kwargs):
|
||||
result = AgentResult(
|
||||
completed=True,
|
||||
result="Done",
|
||||
return_message="Task completed successfully",
|
||||
data="file1.txt\nfile2.txt",
|
||||
steps=3,
|
||||
started_at=10.0,
|
||||
ended_at=12.5,
|
||||
usage=UsageSummary(total_tokens=123),
|
||||
error=None,
|
||||
cancelled=False,
|
||||
)
|
||||
artifacts = RunArtifacts(
|
||||
run_id="20260527_000001",
|
||||
root_dir=config.runs_dir / "run_20260527_000001",
|
||||
logs_dir=config.runs_dir / "run_20260527_000001" / "logs",
|
||||
shots_dir=config.runs_dir / "run_20260527_000001" / "shots",
|
||||
enhance_dir=config.runs_dir / "run_20260527_000001" / "enhance",
|
||||
log_file=config.runs_dir / "run_20260527_000001" / "screenjob.log",
|
||||
)
|
||||
return result, artifacts
|
||||
|
||||
monkeypatch.setattr(cli_module, "load_app_config", fake_load_app_config)
|
||||
monkeypatch.setattr(cli_module, "assess_task_safety", fake_assess_task_safety)
|
||||
monkeypatch.setattr(cli_module, "run_job", fake_run_job)
|
||||
monkeypatch.setattr(cli_module, "create_openai_client", lambda *_args, **_kwargs: object())
|
||||
|
||||
code = cli_module.main(["Open amazon.de"])
|
||||
assert code == 0
|
||||
|
||||
out = capsys.readouterr().out
|
||||
payload = json.loads(out)
|
||||
assert payload["response"]["return"] == "Task completed successfully"
|
||||
assert payload["response"]["data"] == "file1.txt\nfile2.txt"
|
||||
assert payload["return"] == "Task completed successfully"
|
||||
assert payload["data"] == "file1.txt\nfile2.txt"
|
||||
@@ -49,6 +49,10 @@ class FakeJobManager:
|
||||
"objective": objective,
|
||||
"model": selected_model,
|
||||
"status": "running",
|
||||
"result": "Running",
|
||||
"response": {"return": "Running", "data": None},
|
||||
"return": "Running",
|
||||
"data": None,
|
||||
"usage": {
|
||||
"input_tokens": 10,
|
||||
"cached_input_tokens": 2,
|
||||
@@ -145,6 +149,8 @@ def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monke
|
||||
status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
|
||||
assert status_res.status_code == 200
|
||||
assert status_res.json()["job_id"] == job_id
|
||||
assert status_res.json()["response"]["return"] == "Running"
|
||||
assert "data" in status_res.json()["response"]
|
||||
|
||||
|
||||
def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
|
||||
@@ -164,6 +170,8 @@ def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
|
||||
|
||||
status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json()
|
||||
assert status_after["status"] == "cancelling"
|
||||
assert status_after["return"] == "Running"
|
||||
assert status_after["data"] is None
|
||||
|
||||
|
||||
def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
|
||||
@@ -178,4 +186,3 @@ def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
|
||||
root_disabled = client_disabled.get("/")
|
||||
assert root_disabled.status_code == 200
|
||||
assert root_disabled.json()["ui_disabled"] is True
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
from src.storage import HistoryDB
|
||||
|
||||
@@ -26,6 +27,7 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
|
||||
status="completed",
|
||||
ended_at="2026-05-27T00:00:02Z",
|
||||
result="Done",
|
||||
response_json=json.dumps({"return": "Done", "data": {"files": ["a.txt", "b.txt"]}}, ensure_ascii=False),
|
||||
steps=2,
|
||||
estimated_cost_usd=0.1234,
|
||||
)
|
||||
@@ -35,6 +37,8 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
|
||||
assert job["status"] == "completed"
|
||||
assert job["model"] == "gpt-5.4-mini"
|
||||
assert job["disabled_tools"] == ["click"]
|
||||
assert job["response"]["return"] == "Done"
|
||||
assert job["response"]["data"]["files"] == ["a.txt", "b.txt"]
|
||||
assert job["usage"]["estimated_cost_usd"] == 0.1234
|
||||
|
||||
events = db.get_job_events(job_id, limit=10)
|
||||
@@ -51,3 +55,20 @@ def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
|
||||
assert stats["completed_jobs"] == 1
|
||||
assert abs(stats["total_estimated_cost"] - 0.1234) < 1e-9
|
||||
|
||||
|
||||
def test_storage_response_fallback_uses_result_when_json_missing(tmp_path: Path) -> None:
|
||||
db = HistoryDB(tmp_path / "screenjob_test_fallback.db")
|
||||
job_id = "job_test_002"
|
||||
db.create_job(
|
||||
job_id=job_id,
|
||||
objective="Fallback check",
|
||||
model="gpt-5.4-mini",
|
||||
created_at="2026-05-27T00:00:00Z",
|
||||
safety_override=False,
|
||||
disabled_tools=[],
|
||||
)
|
||||
db.update_job(job_id, status="completed", result="Legacy result string")
|
||||
job = db.get_job(job_id)
|
||||
assert job is not None
|
||||
assert job["response"]["return"] == "Legacy result string"
|
||||
assert job["response"]["data"] is None
|
||||
|
||||
Reference in New Issue
Block a user