This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseSettings
|
||||
from pydantic import ConfigDict
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class ServerSettings(BaseSettings):
|
||||
@@ -10,6 +11,4 @@ class ServerSettings(BaseSettings):
|
||||
storage_dir: Path = Path("data/screenshots")
|
||||
default_timeout: int = 10
|
||||
|
||||
class Config:
|
||||
env_prefix = "CLICKTHROUGH_"
|
||||
env_file = ".env"
|
||||
model_config = ConfigDict(env_prefix="CLICKTHROUGH_", env_file=".env")
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Tuple
|
||||
from typing import Dict, List, Tuple, Any
|
||||
import uuid
|
||||
|
||||
from .actions import ActionEngine
|
||||
@@ -31,6 +31,7 @@ class VisionGrid:
|
||||
self.width = request.width
|
||||
self.height = request.height
|
||||
self.cells: Dict[str, _StoredCell] = {}
|
||||
self._action_history: List[dict[str, Any]] = []
|
||||
self._engine = ActionEngine(self)
|
||||
self._build_cells()
|
||||
|
||||
@@ -75,7 +76,22 @@ class VisionGrid:
|
||||
return cell.center
|
||||
|
||||
def apply_action(self, payload: ActionPayload) -> ActionResult:
|
||||
return self._engine.plan(payload)
|
||||
result = self._engine.plan(payload)
|
||||
self._action_history.append(result.model_dump())
|
||||
return result
|
||||
|
||||
@property
|
||||
def action_history(self) -> List[dict[str, Any]]:
|
||||
return list(self._action_history)
|
||||
|
||||
def summary(self) -> str:
|
||||
last_action = self._action_history[-1] if self._action_history else None
|
||||
last_summary = (
|
||||
f"Last action: {last_action.get('detail')}" if last_action else "No actions recorded yet"
|
||||
)
|
||||
return (
|
||||
f"Grid {self.grid_id} ({self.rows}x{self.columns}) with {len(self.cells)} cells. {last_summary}."
|
||||
)
|
||||
|
||||
|
||||
class GridManager:
|
||||
@@ -100,3 +116,9 @@ class GridManager:
|
||||
return self._grids[grid_id]
|
||||
except KeyError as exc:
|
||||
raise KeyError(f"Grid {grid_id} not found") from exc
|
||||
|
||||
def get_history(self, grid_id: str) -> List[dict[str, Any]]:
|
||||
return self.get_grid(grid_id).action_history
|
||||
|
||||
def clear(self) -> None:
|
||||
self._grids.clear()
|
||||
|
||||
@@ -3,15 +3,17 @@ from fastapi import FastAPI, HTTPException
|
||||
from .config import ServerSettings
|
||||
from .grid import GridManager
|
||||
from .models import ActionPayload, GridDescriptor, GridInitRequest
|
||||
from .planner import GridPlanner
|
||||
|
||||
|
||||
settings = ServerSettings()
|
||||
manager = GridManager(settings)
|
||||
planner = GridPlanner()
|
||||
|
||||
app = FastAPI(
|
||||
title="Clickthrough",
|
||||
description="Grid-aware surface that lets an agent plan clicks, drags, and typing on a fake screenshot",
|
||||
version="0.1.0",
|
||||
version="0.2.0",
|
||||
)
|
||||
|
||||
|
||||
@@ -33,3 +35,27 @@ def apply_action(payload: ActionPayload):
|
||||
except KeyError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return grid.apply_action(payload)
|
||||
|
||||
|
||||
@app.get("/grid/{grid_id}/summary")
|
||||
def grid_summary(grid_id: str):
|
||||
try:
|
||||
grid = manager.get_grid(grid_id)
|
||||
except KeyError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
descriptor = grid.describe()
|
||||
return {
|
||||
"grid_id": grid_id,
|
||||
"summary": planner.describe(descriptor),
|
||||
"details": grid.summary(),
|
||||
"descriptor": descriptor,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/grid/{grid_id}/history")
|
||||
def grid_history(grid_id: str):
|
||||
try:
|
||||
history = manager.get_history(grid_id)
|
||||
except KeyError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return {"grid_id": grid_id, "history": history}
|
||||
|
||||
53
server/planner.py
Normal file
53
server/planner.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from math import hypot
|
||||
from typing import Sequence
|
||||
|
||||
from .models import GridCellModel, GridDescriptor
|
||||
|
||||
|
||||
class GridPlanner:
|
||||
"""Helper that picks a grid cell using simple heuristics."""
|
||||
|
||||
def select_cell(
|
||||
self, descriptor: GridDescriptor, preferred_label: str | None = None
|
||||
) -> GridCellModel | None:
|
||||
if not descriptor.cells:
|
||||
return None
|
||||
|
||||
if preferred_label:
|
||||
match = self._match_label(descriptor.cells, preferred_label)
|
||||
if match:
|
||||
return match
|
||||
|
||||
center_point = self._grid_center(descriptor)
|
||||
return min(descriptor.cells, key=lambda cell: self._distance(self._cell_center(cell), center_point))
|
||||
|
||||
def describe(self, descriptor: GridDescriptor) -> str:
|
||||
cell_count = len(descriptor.cells)
|
||||
return (
|
||||
f"Grid {descriptor.grid_id} is {descriptor.rows}x{descriptor.columns} with {cell_count} cells."
|
||||
)
|
||||
|
||||
def _grid_center(self, descriptor: GridDescriptor) -> tuple[float, float]:
|
||||
width = descriptor.metadata.get("width", 0)
|
||||
height = descriptor.metadata.get("height", 0)
|
||||
return (width / 2, height / 2)
|
||||
|
||||
def _cell_center(self, cell: GridCellModel) -> tuple[float, float]:
|
||||
left, top, right, bottom = cell.bounds
|
||||
return ((left + right) / 2, (top + bottom) / 2)
|
||||
|
||||
def _distance(
|
||||
self, first: tuple[float, float], second: tuple[float, float]
|
||||
) -> float:
|
||||
return hypot(first[0] - second[0], first[1] - second[1])
|
||||
|
||||
def _match_label(
|
||||
self, cells: Sequence[GridCellModel], label: str
|
||||
) -> GridCellModel | None:
|
||||
lowered = label.lower()
|
||||
for cell in cells:
|
||||
if cell.label and lowered in cell.label.lower():
|
||||
return cell
|
||||
return None
|
||||
Reference in New Issue
Block a user