63 lines
1.9 KiB
Python
63 lines
1.9 KiB
Python
from dataclasses import dataclass
|
|
from typing import Any, Dict, Sequence
|
|
|
|
from .clickthrough_skill import ActionPlan, ClickthroughSkill
|
|
|
|
|
|
@dataclass
|
|
class AgentRunResult:
|
|
summary: Dict[str, Any]
|
|
action: Dict[str, Any]
|
|
history: Dict[str, Any]
|
|
grid: Dict[str, Any]
|
|
|
|
|
|
class ClickthroughAgentRunner:
|
|
def __init__(self, skill: ClickthroughSkill) -> None:
|
|
self.skill = skill
|
|
|
|
def run_once(
|
|
self,
|
|
screenshot_base64: str,
|
|
width: int,
|
|
height: int,
|
|
rows: int = 4,
|
|
columns: int = 4,
|
|
preferred_label: str | None = None,
|
|
action: str = "click",
|
|
text: str | None = None,
|
|
) -> AgentRunResult:
|
|
grid = self.skill.describe_grid(
|
|
screenshot_base64=screenshot_base64,
|
|
width=width,
|
|
height=height,
|
|
rows=rows,
|
|
columns=columns,
|
|
)
|
|
cells = grid.get("cells") or []
|
|
target_cell = self._choose_cell(cells, preferred_label)
|
|
plan = ActionPlan(
|
|
grid_id=grid["grid_id"],
|
|
target_cell=target_cell,
|
|
action=action,
|
|
text=text,
|
|
)
|
|
action_result = self.skill.plan_action(plan)
|
|
summary = self.skill.grid_summary(grid["grid_id"])
|
|
history = self.skill.grid_history(grid["grid_id"])
|
|
return AgentRunResult(summary=summary, action=action_result, history=history, grid=grid)
|
|
|
|
def _choose_cell(
|
|
self, cells: Sequence[dict[str, Any]], preferred_label: str | None
|
|
) -> str:
|
|
if not cells:
|
|
raise ValueError("Grid contains no cells")
|
|
if preferred_label:
|
|
search = preferred_label.lower()
|
|
for cell in cells:
|
|
label_value = cell.get("label")
|
|
if label_value and search in label_value.lower():
|
|
return cell["cell_id"]
|
|
center_index = len(cells) // 2
|
|
return cells[center_index]["cell_id"]
|