Add pytesseract OCR, click_text interact action, and interact verify endpoint
All checks were successful
python-syntax / syntax-check (push) Successful in 6s

This commit is contained in:
2026-05-03 20:57:34 +02:00
parent 1c03cab457
commit 9e816e0417
8 changed files with 559 additions and 11 deletions

View File

@@ -8,13 +8,15 @@ from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse
from .config import SETTINGS
from .models import ExecRequest, InteractRequest, LaunchRequest, SeeRequest, SeeZoomRequest, WindowActionRequest, WindowQuery
from .models import ExecRequest, InteractRequest, InteractVerifyRequest, LaunchRequest, SeeRequest, SeeZoomRequest, WindowActionRequest, WindowQuery
from .services import (
apply_window_action,
capture_region_image,
capture_screen,
draw_grid,
encode_image,
execute_and_verify,
extract_ocr_items,
exec_action,
exec_command as run_exec_command,
get_displays,
@@ -65,7 +67,8 @@ async def _http_exception_handler(_: Request, exc: HTTPException):
detail = exc.detail
if isinstance(detail, dict):
message = str(detail.get("message", "request failed"))
return _err("http_error", message, exc.status_code, detail)
code = str(detail.get("code", "http_error"))
return _err(code, message, exc.status_code, detail.get("details"))
return _err("http_error", str(detail), exc.status_code)
@@ -99,6 +102,8 @@ def see(req: SeeRequest, _: None = Depends(_auth)):
if req.with_grid:
out_img, grid_meta = draw_grid(image, region["x"], region["y"], req.grid_rows, req.grid_cols, req.include_labels)
meta.update(grid_meta)
if req.ocr:
meta["ocr"] = extract_ocr_items(image, region["x"], region["y"], req.ocr_min_confidence, req.ocr_lang, req.ocr_psm)
return _ok(
{
"image": {
@@ -154,6 +159,11 @@ def interact(req: InteractRequest, _: None = Depends(_auth)):
return _ok(exec_action(req.action, req.screen))
@app.post("/interact/verify")
def interact_verify(req: InteractVerifyRequest, _: None = Depends(_auth)):
return _ok(execute_and_verify(req))
@app.get("/health")
def health(_: None = Depends(_auth)):
return _ok(