from __future__ import annotations from dataclasses import dataclass from typing import Any, Dict, List, Tuple import uuid from .actions import ActionEngine from .config import ServerSettings from .models import ( ActionPayload, ActionResult, GridCellModel, GridDescriptor, GridInitRequest, ) @dataclass class _StoredCell: model: GridCellModel center: Tuple[int, int] class VisionGrid: def __init__(self, request: GridInitRequest, grid_id: str, rows: int, columns: int): self.grid_id = grid_id self.screenshot = request.screenshot_base64 self.memo = request.memo self.rows = rows self.columns = columns self.width = request.width self.height = request.height self.cells: Dict[str, _StoredCell] = {} self._action_history: List[dict[str, Any]] = [] self._engine = ActionEngine(self) self._build_cells() def _build_cells(self, margin: int = 4) -> None: cell_width = max(1, self.width // self.columns) cell_height = max(1, self.height // self.rows) for row in range(self.rows): for col in range(self.columns): left = col * cell_width + margin top = row * cell_height + margin right = min(self.width - margin, (col + 1) * cell_width - margin) bottom = min(self.height - margin, (row + 1) * cell_height - margin) cell_id = f"{self.grid_id}-{row}-{col}" bounds = (left, top, right, bottom) center = ((left + right) // 2, (top + bottom) // 2) cell = GridCellModel( cell_id=cell_id, row=row, column=col, bounds=bounds, ) self.cells[cell_id] = _StoredCell(model=cell, center=center) def describe(self) -> GridDescriptor: return GridDescriptor( grid_id=self.grid_id, rows=self.rows, columns=self.columns, cells=[cell.model for cell in self.cells.values()], metadata=self.metadata, ) @property def metadata(self) -> Dict[str, Any]: return { "memo": self.memo or "", "width": self.width, "height": self.height, } def resolve_cell_center(self, cell_id: str) -> Tuple[int, int]: cell = self.cells.get(cell_id) if not cell: raise KeyError(f"Unknown cell {cell_id}") return cell.center def preview_action(self, payload: ActionPayload) -> ActionResult: return self._engine.plan(payload) def apply_action(self, payload: ActionPayload) -> ActionResult: result = self._engine.plan(payload) self._action_history.append(result.model_dump()) return result def update_screenshot(self, screenshot_base64: str, memo: str | None = None) -> None: self.screenshot = screenshot_base64 if memo: self.memo = memo @property def action_history(self) -> List[dict[str, Any]]: return list(self._action_history) def summary(self) -> str: last_action = self._action_history[-1] if self._action_history else None last_summary = ( f"Last action: {last_action.get('detail')}" if last_action else "No actions recorded yet" ) return ( f"Grid {self.grid_id} ({self.rows}x{self.columns}) with {len(self.cells)} cells. {last_summary}." ) class GridManager: def __init__(self, settings: ServerSettings): self.settings = settings self._grids: Dict[str, VisionGrid] = {} @property def grid_count(self) -> int: return len(self._grids) def create_grid(self, request: GridInitRequest) -> VisionGrid: rows = request.rows or self.settings.grid_rows columns = request.columns or self.settings.grid_cols grid_id = uuid.uuid4().hex grid = VisionGrid(request, grid_id, rows, columns) self._grids[grid_id] = grid return grid def get_grid(self, grid_id: str) -> VisionGrid: try: return self._grids[grid_id] except KeyError as exc: raise KeyError(f"Grid {grid_id} not found") from exc def get_history(self, grid_id: str) -> List[dict[str, Any]]: return self.get_grid(grid_id).action_history def clear(self) -> None: self._grids.clear()