61 lines
1.7 KiB
Python
61 lines
1.7 KiB
Python
from dataclasses import dataclass
|
|
from typing import Any, Dict
|
|
|
|
from .clickthrough_skill import ActionPlan, ClickthroughSkill
|
|
|
|
|
|
@dataclass
|
|
class AgentRunResult:
|
|
summary: Dict[str, Any]
|
|
action: Dict[str, Any]
|
|
history: Dict[str, Any]
|
|
grid: Dict[str, Any]
|
|
plan_preview: Dict[str, Any]
|
|
|
|
|
|
class ClickthroughAgentRunner:
|
|
def __init__(self, skill: ClickthroughSkill) -> None:
|
|
self.skill = skill
|
|
|
|
def run_once(
|
|
self,
|
|
screenshot_base64: str,
|
|
width: int,
|
|
height: int,
|
|
rows: int = 4,
|
|
columns: int = 4,
|
|
preferred_label: str | None = None,
|
|
action: str = "click",
|
|
text: str | None = None,
|
|
) -> AgentRunResult:
|
|
grid = self.skill.describe_grid(
|
|
screenshot_base64=screenshot_base64,
|
|
width=width,
|
|
height=height,
|
|
rows=rows,
|
|
columns=columns,
|
|
)
|
|
plan_response = self.skill.plan_with_planner(
|
|
grid_id=grid["grid_id"],
|
|
preferred_label=preferred_label,
|
|
action=action,
|
|
text=text,
|
|
)
|
|
plan_payload = plan_response["plan"]
|
|
plan = ActionPlan(
|
|
grid_id=plan_payload["grid_id"],
|
|
target_cell=plan_payload.get("target_cell"),
|
|
action=plan_payload["action"],
|
|
text=plan_payload.get("text"),
|
|
)
|
|
action_result = self.skill.plan_action(plan)
|
|
summary = self.skill.grid_summary(grid["grid_id"])
|
|
history = self.skill.grid_history(grid["grid_id"])
|
|
return AgentRunResult(
|
|
summary=summary,
|
|
action=action_result,
|
|
history=history,
|
|
grid=grid,
|
|
plan_preview=plan_response,
|
|
)
|