from dataclasses import dataclass from typing import Any, Dict from .clickthrough_skill import ActionPlan, ClickthroughSkill @dataclass class AgentRunResult: summary: Dict[str, Any] action: Dict[str, Any] history: Dict[str, Any] grid: Dict[str, Any] plan_preview: Dict[str, Any] class ClickthroughAgentRunner: def __init__(self, skill: ClickthroughSkill) -> None: self.skill = skill def run_once( self, screenshot_base64: str, width: int, height: int, rows: int = 4, columns: int = 4, preferred_label: str | None = None, action: str = "click", text: str | None = None, ) -> AgentRunResult: grid = self.skill.describe_grid( screenshot_base64=screenshot_base64, width=width, height=height, rows=rows, columns=columns, ) plan_response = self.skill.plan_with_planner( grid_id=grid["grid_id"], preferred_label=preferred_label, action=action, text=text, ) plan_payload = plan_response["plan"] plan = ActionPlan( grid_id=plan_payload["grid_id"], target_cell=plan_payload.get("target_cell"), action=plan_payload["action"], text=plan_payload.get("text"), ) action_result = self.skill.plan_action(plan) summary = self.skill.grid_summary(grid["grid_id"]) history = self.skill.grid_history(grid["grid_id"]) return AgentRunResult( summary=summary, action=action_result, history=history, grid=grid, plan_preview=plan_response, )