from dataclasses import dataclass from typing import Any, Dict, Sequence from .clickthrough_skill import ActionPlan, ClickthroughSkill @dataclass class AgentRunResult: summary: Dict[str, Any] action: Dict[str, Any] history: Dict[str, Any] grid: Dict[str, Any] class ClickthroughAgentRunner: def __init__(self, skill: ClickthroughSkill) -> None: self.skill = skill def run_once( self, screenshot_base64: str, width: int, height: int, rows: int = 4, columns: int = 4, preferred_label: str | None = None, action: str = "click", text: str | None = None, ) -> AgentRunResult: grid = self.skill.describe_grid( screenshot_base64=screenshot_base64, width=width, height=height, rows=rows, columns=columns, ) cells = grid.get("cells") or [] target_cell = self._choose_cell(cells, preferred_label) plan = ActionPlan( grid_id=grid["grid_id"], target_cell=target_cell, action=action, text=text, ) action_result = self.skill.plan_action(plan) summary = self.skill.grid_summary(grid["grid_id"]) history = self.skill.grid_history(grid["grid_id"]) return AgentRunResult(summary=summary, action=action_result, history=history, grid=grid) def _choose_cell( self, cells: Sequence[dict[str, Any]], preferred_label: str | None ) -> str: if not cells: raise ValueError("Grid contains no cells") if preferred_label: search = preferred_label.lower() for cell in cells: label_value = cell.get("label") if label_value and search in label_value.lower(): return cell["cell_id"] center_index = len(cells) // 2 return cells[center_index]["cell_id"]