init
This commit is contained in:
1
server/__init__.py
Normal file
1
server/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .main import app # noqa: F401
|
||||
34
server/actions.py
Normal file
34
server/actions.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from .models import ActionPayload, ActionResult
|
||||
|
||||
|
||||
class ActionEngine:
|
||||
def __init__(self, grid) -> None:
|
||||
self.grid = grid
|
||||
|
||||
def plan(self, payload: ActionPayload) -> ActionResult:
|
||||
coords = self._resolve_coords(payload.target_cell)
|
||||
detail = self._describe(payload, coords)
|
||||
return ActionResult(
|
||||
success=True,
|
||||
detail=detail,
|
||||
coordinates=coords,
|
||||
payload={"comment": payload.comment or "", "text": payload.text or ""},
|
||||
)
|
||||
|
||||
def _resolve_coords(self, target_cell: str | None) -> Tuple[int, int] | None:
|
||||
if not target_cell:
|
||||
return None
|
||||
return self.grid.resolve_cell_center(target_cell)
|
||||
|
||||
def _describe(
|
||||
self, payload: ActionPayload, coords: Tuple[int, int] | None
|
||||
) -> str:
|
||||
cell_info = payload.target_cell or "free space"
|
||||
location = f"@{cell_info}" if coords else "(no target)"
|
||||
action_hint = payload.action.value
|
||||
extra = f" text='{payload.text}'" if payload.text else ""
|
||||
return f"Plan {action_hint} {location}{extra}"
|
||||
15
server/config.py
Normal file
15
server/config.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseSettings
|
||||
|
||||
|
||||
class ServerSettings(BaseSettings):
|
||||
grid_rows: int = 4
|
||||
grid_cols: int = 4
|
||||
cell_margin_px: int = 4
|
||||
storage_dir: Path = Path("data/screenshots")
|
||||
default_timeout: int = 10
|
||||
|
||||
class Config:
|
||||
env_prefix = "CLICKTHROUGH_"
|
||||
env_file = ".env"
|
||||
102
server/grid.py
Normal file
102
server/grid.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Tuple
|
||||
import uuid
|
||||
|
||||
from .actions import ActionEngine
|
||||
from .config import ServerSettings
|
||||
from .models import (
|
||||
ActionPayload,
|
||||
ActionResult,
|
||||
GridCellModel,
|
||||
GridDescriptor,
|
||||
GridInitRequest,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _StoredCell:
|
||||
model: GridCellModel
|
||||
center: Tuple[int, int]
|
||||
|
||||
|
||||
class VisionGrid:
|
||||
def __init__(self, request: GridInitRequest, grid_id: str, rows: int, columns: int):
|
||||
self.grid_id = grid_id
|
||||
self.screenshot = request.screenshot_base64
|
||||
self.memo = request.memo
|
||||
self.rows = rows
|
||||
self.columns = columns
|
||||
self.width = request.width
|
||||
self.height = request.height
|
||||
self.cells: Dict[str, _StoredCell] = {}
|
||||
self._engine = ActionEngine(self)
|
||||
self._build_cells()
|
||||
|
||||
def _build_cells(self, margin: int = 4) -> None:
|
||||
cell_width = max(1, self.width // self.columns)
|
||||
cell_height = max(1, self.height // self.rows)
|
||||
|
||||
for row in range(self.rows):
|
||||
for col in range(self.columns):
|
||||
left = col * cell_width + margin
|
||||
top = row * cell_height + margin
|
||||
right = min(self.width - margin, (col + 1) * cell_width - margin)
|
||||
bottom = min(self.height - margin, (row + 1) * cell_height - margin)
|
||||
cell_id = f"{self.grid_id}-{row}-{col}"
|
||||
bounds = (left, top, right, bottom)
|
||||
center = ((left + right) // 2, (top + bottom) // 2)
|
||||
cell = GridCellModel(
|
||||
cell_id=cell_id,
|
||||
row=row,
|
||||
column=col,
|
||||
bounds=bounds,
|
||||
)
|
||||
self.cells[cell_id] = _StoredCell(model=cell, center=center)
|
||||
|
||||
def describe(self) -> GridDescriptor:
|
||||
return GridDescriptor(
|
||||
grid_id=self.grid_id,
|
||||
rows=self.rows,
|
||||
columns=self.columns,
|
||||
cells=[cell.model for cell in self.cells.values()],
|
||||
metadata={
|
||||
"memo": self.memo or "",
|
||||
"width": self.width,
|
||||
"height": self.height,
|
||||
},
|
||||
)
|
||||
|
||||
def resolve_cell_center(self, cell_id: str) -> Tuple[int, int]:
|
||||
cell = self.cells.get(cell_id)
|
||||
if not cell:
|
||||
raise KeyError(f"Unknown cell {cell_id}")
|
||||
return cell.center
|
||||
|
||||
def apply_action(self, payload: ActionPayload) -> ActionResult:
|
||||
return self._engine.plan(payload)
|
||||
|
||||
|
||||
class GridManager:
|
||||
def __init__(self, settings: ServerSettings):
|
||||
self.settings = settings
|
||||
self._grids: Dict[str, VisionGrid] = {}
|
||||
|
||||
@property
|
||||
def grid_count(self) -> int:
|
||||
return len(self._grids)
|
||||
|
||||
def create_grid(self, request: GridInitRequest) -> VisionGrid:
|
||||
rows = request.rows or self.settings.grid_rows
|
||||
columns = request.columns or self.settings.grid_cols
|
||||
grid_id = uuid.uuid4().hex
|
||||
grid = VisionGrid(request, grid_id, rows, columns)
|
||||
self._grids[grid_id] = grid
|
||||
return grid
|
||||
|
||||
def get_grid(self, grid_id: str) -> VisionGrid:
|
||||
try:
|
||||
return self._grids[grid_id]
|
||||
except KeyError as exc:
|
||||
raise KeyError(f"Grid {grid_id} not found") from exc
|
||||
35
server/main.py
Normal file
35
server/main.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from fastapi import FastAPI, HTTPException
|
||||
|
||||
from .config import ServerSettings
|
||||
from .grid import GridManager
|
||||
from .models import ActionPayload, GridDescriptor, GridInitRequest
|
||||
|
||||
|
||||
settings = ServerSettings()
|
||||
manager = GridManager(settings)
|
||||
|
||||
app = FastAPI(
|
||||
title="Clickthrough",
|
||||
description="Grid-aware surface that lets an agent plan clicks, drags, and typing on a fake screenshot",
|
||||
version="0.1.0",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health_check() -> dict[str, str]:
|
||||
return {"status": "ok", "grid_count": str(manager.grid_count)}
|
||||
|
||||
|
||||
@app.post("/grid/init", response_model=GridDescriptor)
|
||||
def init_grid(request: GridInitRequest) -> GridDescriptor:
|
||||
grid = manager.create_grid(request)
|
||||
return grid.describe()
|
||||
|
||||
|
||||
@app.post("/grid/action")
|
||||
def apply_action(payload: ActionPayload):
|
||||
try:
|
||||
grid = manager.get_grid(payload.grid_id)
|
||||
except KeyError as exc:
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
return grid.apply_action(payload)
|
||||
55
server/models.py
Normal file
55
server/models.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ActionType(str, Enum):
|
||||
CLICK = "click"
|
||||
DOUBLE_CLICK = "double_click"
|
||||
DRAG = "drag"
|
||||
TYPE = "type"
|
||||
SCROLL = "scroll"
|
||||
|
||||
|
||||
class GridInitRequest(BaseModel):
|
||||
width: int
|
||||
height: int
|
||||
screenshot_base64: str
|
||||
rows: Optional[int] = None
|
||||
columns: Optional[int] = None
|
||||
memo: Optional[str] = None
|
||||
|
||||
|
||||
class GridCellModel(BaseModel):
|
||||
cell_id: str
|
||||
row: int
|
||||
column: int
|
||||
bounds: Tuple[int, int, int, int]
|
||||
label: Optional[str] = None
|
||||
|
||||
|
||||
class GridDescriptor(BaseModel):
|
||||
grid_id: str
|
||||
rows: int
|
||||
columns: int
|
||||
cells: List[GridCellModel]
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ActionPayload(BaseModel):
|
||||
grid_id: str
|
||||
action: ActionType
|
||||
target_cell: Optional[str] = None
|
||||
text: Optional[str] = None
|
||||
comment: Optional[str] = None
|
||||
data: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ActionResult(BaseModel):
|
||||
success: bool
|
||||
detail: str
|
||||
coordinates: Optional[Tuple[int, int]] = None
|
||||
payload: Dict[str, Any] = Field(default_factory=dict)
|
||||
Reference in New Issue
Block a user