Add grid planner, CI, and tests
Some checks failed
CI / test (push) Failing after 1m12s

This commit is contained in:
2026-04-05 19:27:55 +02:00
parent a2ef50401b
commit b1d2b6b321
16 changed files with 383 additions and 19 deletions

View File

@@ -1,6 +1,7 @@
from pathlib import Path
from pydantic import BaseSettings
from pydantic import ConfigDict
from pydantic_settings import BaseSettings
class ServerSettings(BaseSettings):
@@ -10,6 +11,4 @@ class ServerSettings(BaseSettings):
storage_dir: Path = Path("data/screenshots")
default_timeout: int = 10
class Config:
env_prefix = "CLICKTHROUGH_"
env_file = ".env"
model_config = ConfigDict(env_prefix="CLICKTHROUGH_", env_file=".env")

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, Tuple
from typing import Dict, List, Tuple, Any
import uuid
from .actions import ActionEngine
@@ -31,6 +31,7 @@ class VisionGrid:
self.width = request.width
self.height = request.height
self.cells: Dict[str, _StoredCell] = {}
self._action_history: List[dict[str, Any]] = []
self._engine = ActionEngine(self)
self._build_cells()
@@ -75,7 +76,22 @@ class VisionGrid:
return cell.center
def apply_action(self, payload: ActionPayload) -> ActionResult:
return self._engine.plan(payload)
result = self._engine.plan(payload)
self._action_history.append(result.model_dump())
return result
@property
def action_history(self) -> List[dict[str, Any]]:
return list(self._action_history)
def summary(self) -> str:
last_action = self._action_history[-1] if self._action_history else None
last_summary = (
f"Last action: {last_action.get('detail')}" if last_action else "No actions recorded yet"
)
return (
f"Grid {self.grid_id} ({self.rows}x{self.columns}) with {len(self.cells)} cells. {last_summary}."
)
class GridManager:
@@ -100,3 +116,9 @@ class GridManager:
return self._grids[grid_id]
except KeyError as exc:
raise KeyError(f"Grid {grid_id} not found") from exc
def get_history(self, grid_id: str) -> List[dict[str, Any]]:
return self.get_grid(grid_id).action_history
def clear(self) -> None:
self._grids.clear()

View File

@@ -3,15 +3,17 @@ from fastapi import FastAPI, HTTPException
from .config import ServerSettings
from .grid import GridManager
from .models import ActionPayload, GridDescriptor, GridInitRequest
from .planner import GridPlanner
settings = ServerSettings()
manager = GridManager(settings)
planner = GridPlanner()
app = FastAPI(
title="Clickthrough",
description="Grid-aware surface that lets an agent plan clicks, drags, and typing on a fake screenshot",
version="0.1.0",
version="0.2.0",
)
@@ -33,3 +35,27 @@ def apply_action(payload: ActionPayload):
except KeyError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return grid.apply_action(payload)
@app.get("/grid/{grid_id}/summary")
def grid_summary(grid_id: str):
try:
grid = manager.get_grid(grid_id)
except KeyError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
descriptor = grid.describe()
return {
"grid_id": grid_id,
"summary": planner.describe(descriptor),
"details": grid.summary(),
"descriptor": descriptor,
}
@app.get("/grid/{grid_id}/history")
def grid_history(grid_id: str):
try:
history = manager.get_history(grid_id)
except KeyError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return {"grid_id": grid_id, "history": history}

53
server/planner.py Normal file
View File

@@ -0,0 +1,53 @@
from __future__ import annotations
from math import hypot
from typing import Sequence
from .models import GridCellModel, GridDescriptor
class GridPlanner:
"""Helper that picks a grid cell using simple heuristics."""
def select_cell(
self, descriptor: GridDescriptor, preferred_label: str | None = None
) -> GridCellModel | None:
if not descriptor.cells:
return None
if preferred_label:
match = self._match_label(descriptor.cells, preferred_label)
if match:
return match
center_point = self._grid_center(descriptor)
return min(descriptor.cells, key=lambda cell: self._distance(self._cell_center(cell), center_point))
def describe(self, descriptor: GridDescriptor) -> str:
cell_count = len(descriptor.cells)
return (
f"Grid {descriptor.grid_id} is {descriptor.rows}x{descriptor.columns} with {cell_count} cells."
)
def _grid_center(self, descriptor: GridDescriptor) -> tuple[float, float]:
width = descriptor.metadata.get("width", 0)
height = descriptor.metadata.get("height", 0)
return (width / 2, height / 2)
def _cell_center(self, cell: GridCellModel) -> tuple[float, float]:
left, top, right, bottom = cell.bounds
return ((left + right) / 2, (top + bottom) / 2)
def _distance(
self, first: tuple[float, float], second: tuple[float, float]
) -> float:
return hypot(first[0] - second[0], first[1] - second[1])
def _match_label(
self, cells: Sequence[GridCellModel], label: str
) -> GridCellModel | None:
lowered = label.lower()
for cell in cells:
if cell.label and lowered in cell.label.lower():
return cell
return None