screenjob/tests/test_agent_tools.py

from __future__ import annotations

import logging
from pathlib import Path

from PIL import Image

import src.agent as agent_module
from src.models import RunArtifacts, RuntimeOptions


class _DummyPyAutoGUI:
    FAILSAFE = True
    PAUSE = 0.0

    def __init__(self) -> None:
        self.last_move_to: tuple[int, int] | None = None
        self.last_click: tuple[int, int] | None = None
        self.last_hotkey: tuple[str, ...] | None = None

    def screenshot(self) -> Image.Image:
        return Image.new("RGB", (1280, 720), color=(24, 24, 24))

    def size(self) -> tuple[int, int]:
        return (1280, 720)

    def moveTo(self, x: int, y: int, duration: float = 0.0) -> None:  # noqa: N802
        self.last_move_to = (x, y)

    def click(self, x: int, y: int) -> None:
        self.last_click = (x, y)

    def write(self, _: str, interval: float = 0.0) -> None:
        return None

    def press(self, _: str) -> None:
        return None

    def hotkey(self, *keys: str) -> None:
        self.last_hotkey = tuple(keys)


def _build_agent(tmp_path: Path, monkeypatch) -> agent_module.ScreenJobAgent:
    dummy_gui = _DummyPyAutoGUI()
    monkeypatch.setattr(agent_module, "pyautogui", dummy_gui)
    monkeypatch.setattr(agent_module.time, "sleep", lambda _: None)

    run_dir = tmp_path / "run"
    run_dir.mkdir(parents=True, exist_ok=True)
    artifacts = RunArtifacts(
        run_id="test_run",
        root_dir=run_dir,
        logs_dir=run_dir / "logs",
        shots_dir=run_dir / "shots",
        enhance_dir=run_dir / "enhance",
        log_file=run_dir / "screenjob.log",
    )
    options = RuntimeOptions(model="gpt-5.4-mini")
    logger = logging.getLogger("screenjob-test-agent")
    return agent_module.ScreenJobAgent(
        client=object(),  # type: ignore[arg-type]
        logger=logger,
        artifacts=artifacts,
        options=options,
    )


def test_task_complete_captures_return_and_data(tmp_path: Path, monkeypatch) -> None:
    agent = _build_agent(tmp_path, monkeypatch)
    result = agent._tool_task_complete({"return": "Task completed successfully", "data": "file1\nfile2"})
    assert result["ok"] is True
    assert result["return"] == "Task completed successfully"
    assert result["data"] == "file1\nfile2"
    assert "verification" not in result
    assert agent.final_result == "Task completed successfully"
    assert agent.final_data == "file1\nfile2"


def test_click_supports_directional_offsets(tmp_path: Path, monkeypatch) -> None:
    agent = _build_agent(tmp_path, monkeypatch)
    click_result = agent._tool_click(
        {
            "coordinate": {"x": 100, "y": 100},
            "offset_up": "2px",
            "offset_right": 7,
            "offset": {"x": 3, "y": 4},
            "sleep_after_seconds": 0,
        }
    )
    assert click_result["ok"] is True
    assert click_result["clicked"] == {"x": 110, "y": 102}


def test_enhance_defaults_to_small_ui_preset(tmp_path: Path, monkeypatch) -> None:
    agent = _build_agent(tmp_path, monkeypatch)
    result = agent._tool_enhance({"coordinate": {"x": 100, "y": 120}})

    assert result["ok"] is True
    meta = result["meta"]
    assert meta["region"] == "small"
    assert meta["mode"] == "ui"
    assert meta["scale"] == 4
    assert Path(meta["path"]).exists()
    assert meta["target_pixel"]["x"] >= 0
    assert meta["target_pixel"]["y"] >= 0


def test_enhance_supports_text_mode_and_scale_clamp(tmp_path: Path, monkeypatch) -> None:
    agent = _build_agent(tmp_path, monkeypatch)
    result = agent._tool_enhance(
        {
            "coordinate": {"x": -99, "y": 9999},
            "region": "medium",
            "mode": "text",
            "scale": 99,
        }
    )

    assert result["ok"] is True
    meta = result["meta"]
    assert meta["region"] == "medium"
    assert meta["mode"] == "text"
    assert meta["scale"] == 6
    assert meta["requested_coord"] == {"x": -99, "y": 9999}
    assert meta["source_coord"] == {"x": 0, "y": 719}
    assert Path(meta["path"]).exists()


def test_press_key_supports_hotkey_combo(tmp_path: Path, monkeypatch) -> None:
    agent = _build_agent(tmp_path, monkeypatch)
    result = agent._tool_press_key({"key": "meta+r"})
    assert result["ok"] is True
    assert result["key"] == "win+r"
    assert result["message"] == "Key combo executed."
    assert agent_module.pyautogui.last_hotkey == ("win", "r")


def test_context_compaction_trigger_and_payload(tmp_path: Path, monkeypatch) -> None:
    agent = _build_agent(tmp_path, monkeypatch)
    agent.objective = "Open settings app"
    agent.previous_response_id = "resp_123"
    agent.step = 4
    agent.last_context_compact_step = 0
    agent.options.screen_context_decay_steps = 4
    agent.recent_tool_summaries = ["step=1 tool=see_screen status=ok"]
    agent.last_screen_data_url = "data:image/png;base64,abc"
    agent.last_screen_meta = {"width": 1280, "height": 720, "path": "C:/tmp/frame.png"}

    assert agent._should_compact_context() is True
    compacted = agent._build_compacted_pending_input()
    assert len(compacted) == 2
    assert "Context compaction activated" in compacted[0]["content"][0]["text"]
    assert "Open settings app" in compacted[0]["content"][0]["text"]