from typing import Any, Dict from skill.agent_runner import ClickthroughAgentRunner from skill.clickthrough_skill import ActionPlan, ClickthroughSkill class DummySkill(ClickthroughSkill): def __init__(self): self.last_plan: ActionPlan | None = None def describe_grid( self, screenshot_base64: str, width: int, height: int, rows: int = 4, columns: int = 4, ) -> Dict[str, Any]: return { "grid_id": "dummy-grid", "cells": [ {"cell_id": "dummy-grid-1", "label": "button", "bounds": [0, 0, 100, 100]}, {"cell_id": "dummy-grid-2", "label": "target", "bounds": [100, 0, 200, 100]}, ], } def plan_with_planner( self, grid_id: str, preferred_label: str | None = None, action: str = "click", text: str | None = None, comment: str | None = None, ) -> Dict[str, Any]: cells = ["dummy-grid-1", "dummy-grid-2"] if preferred_label == "target": target = "dummy-grid-2" else: target = cells[len(cells) // 2] plan = { "grid_id": grid_id, "target_cell": target, "action": action, "text": text, "comment": comment, } return { "plan": plan, "result": {"success": True, "detail": "preview"}, "descriptor": {"grid_id": grid_id}, } def plan_action(self, plan: ActionPlan) -> Dict[str, Any]: self.last_plan = plan return {"success": True, "target_cell": plan.target_cell} def grid_summary(self, grid_id: str) -> Dict[str, Any]: return {"grid_id": grid_id, "summary": "ok"} def grid_history(self, grid_id: str) -> Dict[str, Any]: return {"grid_id": grid_id, "history": []} def test_agent_runner_prefers_label(): runner = ClickthroughAgentRunner(DummySkill()) result = runner.run_once( screenshot_base64="AA==", width=120, height=80, preferred_label="target", ) assert result.action["target_cell"] == "dummy-grid-2" assert result.summary["summary"] == "ok" def test_agent_runner_defaults_to_center(): runner = ClickthroughAgentRunner(DummySkill()) result = runner.run_once(screenshot_base64="AA==", width=120, height=80) assert result.action["target_cell"] == "dummy-grid-2"