Files
clickthrough/skill/clickthrough_skill.py
Luna 1b0b9cfdef
Some checks failed
CI / test (push) Failing after 45s
Add planner previews and streaming
2026-04-05 19:33:24 +02:00

99 lines
3.1 KiB
Python

from dataclasses import dataclass
from typing import Any, Dict
import httpx
@dataclass
class ActionPlan:
grid_id: str
target_cell: str | None
action: str
text: str | None = None
class ClickthroughSkill:
"""Lightweight wrapper around the Clickthrough HTTP API."""
def __init__(self, server_url: str = "http://localhost:8000") -> None:
self._client = httpx.Client(base_url=server_url, timeout=10)
def describe_grid(
self,
screenshot_base64: str,
width: int,
height: int,
rows: int = 4,
columns: int = 4,
) -> Dict[str, Any]:
payload = {
"width": width,
"height": height,
"rows": rows,
"columns": columns,
"screenshot_base64": screenshot_base64,
"memo": "agent-powered grid",
}
response = self._client.post("/grid/init", json=payload)
response.raise_for_status()
return response.json()
def plan_action(self, plan: ActionPlan) -> Dict[str, Any]:
payload = {
"grid_id": plan.grid_id,
"action": plan.action,
"target_cell": plan.target_cell,
"text": plan.text,
"comment": "skill-generated plan",
}
response = self._client.post("/grid/action", json=payload)
response.raise_for_status()
return response.json()
def grid_summary(self, grid_id: str) -> Dict[str, Any]:
response = self._client.get(f"/grid/{grid_id}/summary")
response.raise_for_status()
return response.json()
def grid_history(self, grid_id: str) -> Dict[str, Any]:
response = self._client.get(f"/grid/{grid_id}/history")
response.raise_for_status()
return response.json()
def plan_with_planner(
self,
grid_id: str,
preferred_label: str | None = None,
action: str = "click",
text: str | None = None,
comment: str | None = None,
) -> Dict[str, Any]:
payload = {
"preferred_label": preferred_label,
"action": action,
"text": text,
"comment": comment or "planner-generated",
}
response = self._client.post(f"/grid/{grid_id}/plan", json=payload)
response.raise_for_status()
return response.json()
def refresh_grid(self, grid_id: str, screenshot_base64: str, memo: str | None = None) -> Dict[str, Any]:
payload = {"screenshot_base64": screenshot_base64, "memo": memo}
response = self._client.post(f"/grid/{grid_id}/refresh", json=payload)
response.raise_for_status()
return response.json()
if __name__ == "__main__":
import base64
dummy = base64.b64encode(b"fake-screenshot").decode()
skill = ClickthroughSkill()
grid = skill.describe_grid(dummy, width=800, height=600)
print("Grid cells:", len(grid.get("cells", [])))
if grid.get("cells"):
first_cell = grid["cells"][0]["cell_id"]
result = skill.plan_action(ActionPlan(grid_id=grid["grid_id"], target_cell=first_cell, action="click"))
print("Action result:", result)