Files
screenjob/tests/test_agent_tools.py
Space-Banane cceed18cf1
All checks were successful
CI / test (push) Successful in 7s
feat: (literally) "enhance" functionality with new parameters and improved image processing
2026-05-27 22:14:32 +02:00

154 lines
5.1 KiB
Python

from __future__ import annotations
import logging
from pathlib import Path
from PIL import Image
import src.agent as agent_module
from src.models import RunArtifacts, RuntimeOptions
class _DummyPyAutoGUI:
FAILSAFE = True
PAUSE = 0.0
def __init__(self) -> None:
self.last_move_to: tuple[int, int] | None = None
self.last_click: tuple[int, int] | None = None
self.last_hotkey: tuple[str, ...] | None = None
def screenshot(self) -> Image.Image:
return Image.new("RGB", (1280, 720), color=(24, 24, 24))
def size(self) -> tuple[int, int]:
return (1280, 720)
def moveTo(self, x: int, y: int, duration: float = 0.0) -> None: # noqa: N802
self.last_move_to = (x, y)
def click(self, x: int, y: int) -> None:
self.last_click = (x, y)
def write(self, _: str, interval: float = 0.0) -> None:
return None
def press(self, _: str) -> None:
return None
def hotkey(self, *keys: str) -> None:
self.last_hotkey = tuple(keys)
def _build_agent(tmp_path: Path, monkeypatch) -> agent_module.ScreenJobAgent:
dummy_gui = _DummyPyAutoGUI()
monkeypatch.setattr(agent_module, "pyautogui", dummy_gui)
monkeypatch.setattr(agent_module.time, "sleep", lambda _: None)
run_dir = tmp_path / "run"
run_dir.mkdir(parents=True, exist_ok=True)
artifacts = RunArtifacts(
run_id="test_run",
root_dir=run_dir,
logs_dir=run_dir / "logs",
shots_dir=run_dir / "shots",
enhance_dir=run_dir / "enhance",
log_file=run_dir / "screenjob.log",
)
options = RuntimeOptions(model="gpt-5.4-mini")
logger = logging.getLogger("screenjob-test-agent")
return agent_module.ScreenJobAgent(
client=object(), # type: ignore[arg-type]
logger=logger,
artifacts=artifacts,
options=options,
)
def test_task_complete_captures_return_and_data(tmp_path: Path, monkeypatch) -> None:
agent = _build_agent(tmp_path, monkeypatch)
result = agent._tool_task_complete({"return": "Task completed successfully", "data": "file1\nfile2"})
assert result["ok"] is True
assert result["return"] == "Task completed successfully"
assert result["data"] == "file1\nfile2"
assert "verification" not in result
assert agent.final_result == "Task completed successfully"
assert agent.final_data == "file1\nfile2"
def test_click_supports_directional_offsets(tmp_path: Path, monkeypatch) -> None:
agent = _build_agent(tmp_path, monkeypatch)
click_result = agent._tool_click(
{
"coordinate": {"x": 100, "y": 100},
"offset_up": "2px",
"offset_right": 7,
"offset": {"x": 3, "y": 4},
"sleep_after_seconds": 0,
}
)
assert click_result["ok"] is True
assert click_result["clicked"] == {"x": 110, "y": 102}
def test_enhance_defaults_to_small_ui_preset(tmp_path: Path, monkeypatch) -> None:
agent = _build_agent(tmp_path, monkeypatch)
result = agent._tool_enhance({"coordinate": {"x": 100, "y": 120}})
assert result["ok"] is True
meta = result["meta"]
assert meta["region"] == "small"
assert meta["mode"] == "ui"
assert meta["scale"] == 4
assert Path(meta["path"]).exists()
assert meta["target_pixel"]["x"] >= 0
assert meta["target_pixel"]["y"] >= 0
def test_enhance_supports_text_mode_and_scale_clamp(tmp_path: Path, monkeypatch) -> None:
agent = _build_agent(tmp_path, monkeypatch)
result = agent._tool_enhance(
{
"coordinate": {"x": -99, "y": 9999},
"region": "medium",
"mode": "text",
"scale": 99,
}
)
assert result["ok"] is True
meta = result["meta"]
assert meta["region"] == "medium"
assert meta["mode"] == "text"
assert meta["scale"] == 6
assert meta["requested_coord"] == {"x": -99, "y": 9999}
assert meta["source_coord"] == {"x": 0, "y": 719}
assert Path(meta["path"]).exists()
def test_press_key_supports_hotkey_combo(tmp_path: Path, monkeypatch) -> None:
agent = _build_agent(tmp_path, monkeypatch)
result = agent._tool_press_key({"key": "meta+r"})
assert result["ok"] is True
assert result["key"] == "win+r"
assert result["message"] == "Key combo executed."
assert agent_module.pyautogui.last_hotkey == ("win", "r")
def test_context_compaction_trigger_and_payload(tmp_path: Path, monkeypatch) -> None:
agent = _build_agent(tmp_path, monkeypatch)
agent.objective = "Open settings app"
agent.previous_response_id = "resp_123"
agent.step = 4
agent.last_context_compact_step = 0
agent.options.screen_context_decay_steps = 4
agent.recent_tool_summaries = ["step=1 tool=see_screen status=ok"]
agent.last_screen_data_url = "data:image/png;base64,abc"
agent.last_screen_meta = {"width": 1280, "height": 720, "path": "C:/tmp/frame.png"}
assert agent._should_compact_context() is True
compacted = agent._build_compacted_pending_input()
assert len(compacted) == 2
assert "Context compaction activated" in compacted[0]["content"][0]["text"]
assert "Open settings app" in compacted[0]["content"][0]["text"]