feat: migrate to v2-only API and unified response envelope
All checks were successful
python-syntax / syntax-check (push) Successful in 7s
All checks were successful
python-syntax / syntax-check (push) Successful in 7s
This commit is contained in:
691
server/app.py
691
server/app.py
@@ -8,10 +8,12 @@ import subprocess
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from typing import Literal, Optional
|
||||
from typing import Any, Literal, Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import Depends, FastAPI, Header, HTTPException, Response
|
||||
from fastapi import Depends, FastAPI, Header, HTTPException, Request
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.responses import JSONResponse
|
||||
from PIL import ImageChops, ImageStat
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
@@ -21,6 +23,55 @@ load_dotenv(dotenv_path=".env", override=False)
|
||||
app = FastAPI(title="clickthrough", version="0.1.0")
|
||||
|
||||
|
||||
def _ok(data: Any, status_code: int = 200):
|
||||
return JSONResponse(
|
||||
status_code=status_code,
|
||||
content={
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"data": data,
|
||||
"error": None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _err(code: str, message: str, status_code: int, details: Any = None):
|
||||
return JSONResponse(
|
||||
status_code=status_code,
|
||||
content={
|
||||
"ok": False,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"data": None,
|
||||
"error": {
|
||||
"code": code,
|
||||
"message": message,
|
||||
"details": details,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.exception_handler(HTTPException)
|
||||
async def _http_exception_handler(_: Request, exc: HTTPException):
|
||||
detail = exc.detail
|
||||
if isinstance(detail, dict):
|
||||
message = str(detail.get("message", "request failed"))
|
||||
return _err("http_error", message, exc.status_code, detail)
|
||||
return _err("http_error", str(detail), exc.status_code)
|
||||
|
||||
|
||||
@app.exception_handler(Exception)
|
||||
async def _unhandled_exception_handler(_: Request, exc: Exception):
|
||||
return _err("internal_error", "internal server error", 500, {"type": type(exc).__name__})
|
||||
|
||||
|
||||
@app.exception_handler(RequestValidationError)
|
||||
async def _validation_exception_handler(_: Request, exc: RequestValidationError):
|
||||
return _err("validation_error", "request validation failed", 422, exc.errors())
|
||||
|
||||
|
||||
def _env_bool(name: str, default: bool) -> bool:
|
||||
raw = os.getenv(name)
|
||||
if raw is None:
|
||||
@@ -288,6 +339,144 @@ class VerifyActionRequest(BaseModel):
|
||||
stop_on_action_error: bool = True
|
||||
|
||||
|
||||
class ObserveRequestV2(BaseModel):
|
||||
mode: Literal["screen", "region"] = "screen"
|
||||
region_x: int | None = Field(default=None, ge=0)
|
||||
region_y: int | None = Field(default=None, ge=0)
|
||||
region_width: int | None = Field(default=None, gt=0)
|
||||
region_height: int | None = Field(default=None, gt=0)
|
||||
include_image: bool = True
|
||||
image_format: Literal["png", "jpeg"] = "jpeg"
|
||||
jpeg_quality: int = Field(default=75, ge=1, le=100)
|
||||
ocr_mode: Literal["none", "region", "screen"] = "none"
|
||||
language_hint: str | None = Field(default=None, min_length=1, max_length=64)
|
||||
min_confidence: float = Field(default=0.4, ge=0.0, le=1.0)
|
||||
max_ocr_area_px: int | None = Field(default=1_500_000, ge=1000)
|
||||
group_lines: bool = True
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_region(self):
|
||||
if self.mode == "region":
|
||||
required = [self.region_x, self.region_y, self.region_width, self.region_height]
|
||||
if any(v is None for v in required):
|
||||
raise ValueError("region_x, region_y, region_width, region_height are required for mode=region")
|
||||
return self
|
||||
|
||||
|
||||
class ImageToolPoint(BaseModel):
|
||||
x: int = Field(ge=0)
|
||||
y: int = Field(ge=0)
|
||||
|
||||
|
||||
class LocalizeRequestV2(BaseModel):
|
||||
observation_id: str = Field(min_length=1, max_length=128)
|
||||
text_query: str | None = Field(default=None, max_length=512)
|
||||
text_match: Literal["contains", "exact", "regex"] = "contains"
|
||||
image_tool_point: ImageToolPoint | None = None
|
||||
candidate_index: int = Field(default=0, ge=0)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_selector(self):
|
||||
has_text = bool((self.text_query or "").strip())
|
||||
has_point = self.image_tool_point is not None
|
||||
if has_text == has_point:
|
||||
raise ValueError("provide exactly one of text_query or image_tool_point")
|
||||
return self
|
||||
|
||||
|
||||
class ActionTargetV2(BaseModel):
|
||||
resolved_target_id: str | None = Field(default=None, max_length=128)
|
||||
pixel_x: int | None = None
|
||||
pixel_y: int | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_shape(self):
|
||||
has_resolved = bool(self.resolved_target_id)
|
||||
has_pixel = self.pixel_x is not None or self.pixel_y is not None
|
||||
if has_resolved == has_pixel:
|
||||
raise ValueError("provide either resolved_target_id or pixel_x/pixel_y")
|
||||
if has_pixel and (self.pixel_x is None or self.pixel_y is None):
|
||||
raise ValueError("pixel_x and pixel_y are both required")
|
||||
return self
|
||||
|
||||
|
||||
class ActionRequestV2(BaseModel):
|
||||
action: Literal[
|
||||
"move",
|
||||
"click",
|
||||
"right_click",
|
||||
"double_click",
|
||||
"middle_click",
|
||||
"scroll",
|
||||
"type",
|
||||
"hotkey",
|
||||
]
|
||||
target: ActionTargetV2 | None = None
|
||||
duration_ms: int = Field(default=0, ge=0, le=20000)
|
||||
button: Literal["left", "right", "middle"] = "left"
|
||||
clicks: int = Field(default=1, ge=1, le=10)
|
||||
scroll_amount: int = 0
|
||||
text: str = ""
|
||||
keys: list[str] = Field(default_factory=list)
|
||||
interval_ms: int = Field(default=20, ge=0, le=5000)
|
||||
dry_run: bool = False
|
||||
|
||||
|
||||
class ActRequestV2(BaseModel):
|
||||
action: ActionRequestV2
|
||||
|
||||
|
||||
class ActVerifyRequestV2(BaseModel):
|
||||
action: ActionRequestV2
|
||||
condition: WaitTextCondition | WaitWindowCondition | WaitVisualCondition
|
||||
risk_level: Literal["low", "high"] = "low"
|
||||
retries: int | None = Field(default=None, ge=0, le=10)
|
||||
timeout_ms: int | None = Field(default=None, ge=0, le=120000)
|
||||
poll_interval_ms: int | None = Field(default=None, ge=50, le=10000)
|
||||
retry_delay_ms: int | None = Field(default=None, ge=0, le=60000)
|
||||
stop_on_action_error: bool = True
|
||||
|
||||
|
||||
OBSERVATIONS: dict[str, dict[str, Any]] = {}
|
||||
RESOLVED_TARGETS: dict[str, dict[str, Any]] = {}
|
||||
|
||||
|
||||
def _get_observation(observation_id: str) -> dict[str, Any]:
|
||||
observation = OBSERVATIONS.get(observation_id)
|
||||
if observation is None:
|
||||
raise HTTPException(status_code=404, detail="observation_id not found")
|
||||
return observation
|
||||
|
||||
|
||||
def _resolve_v2_action(req: ActionRequestV2) -> ActionRequest:
|
||||
target: Target | None = None
|
||||
if req.target is not None:
|
||||
if req.target.resolved_target_id:
|
||||
item = RESOLVED_TARGETS.get(req.target.resolved_target_id)
|
||||
if item is None:
|
||||
raise HTTPException(status_code=404, detail="resolved_target_id not found")
|
||||
target = PixelTarget(mode="pixel", x=item["x"], y=item["y"], dx=0, dy=0)
|
||||
else:
|
||||
target = PixelTarget(mode="pixel", x=req.target.pixel_x or 0, y=req.target.pixel_y or 0, dx=0, dy=0)
|
||||
return ActionRequest(
|
||||
action=req.action,
|
||||
target=target,
|
||||
duration_ms=req.duration_ms,
|
||||
button=req.button,
|
||||
clicks=req.clicks,
|
||||
scroll_amount=req.scroll_amount,
|
||||
text=req.text,
|
||||
keys=req.keys,
|
||||
interval_ms=req.interval_ms,
|
||||
dry_run=req.dry_run,
|
||||
)
|
||||
|
||||
|
||||
def _risk_defaults(risk_level: str) -> dict[str, int]:
|
||||
if risk_level == "high":
|
||||
return {"retries": 1, "timeout_ms": 6000, "poll_interval_ms": 250, "retry_delay_ms": 300}
|
||||
return {"retries": 0, "timeout_ms": 2500, "poll_interval_ms": 200, "retry_delay_ms": 150}
|
||||
|
||||
|
||||
def _auth(x_clickthrough_token: Optional[str] = Header(default=None)):
|
||||
token = SETTINGS["token"]
|
||||
@@ -1377,154 +1566,225 @@ def _exec_action(req: ActionRequest, screen: int = 0) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def _localization_confidence(source: str, confidence: float | None = None) -> str:
|
||||
if source == "image_tool_point":
|
||||
return "high"
|
||||
if source == "ocr" and confidence is not None:
|
||||
if confidence >= 0.8:
|
||||
return "high"
|
||||
if confidence >= 0.55:
|
||||
return "medium"
|
||||
return "low"
|
||||
|
||||
|
||||
@app.post("/v2/observe")
|
||||
def observe_v2(req: ObserveRequestV2, screen: int = 0, _: None = Depends(_auth)):
|
||||
capture_started = time.perf_counter()
|
||||
image, region, mon, displays, screen_selection = _capture_region_image(
|
||||
screen,
|
||||
req.region_x if req.mode == "region" else None,
|
||||
req.region_y if req.mode == "region" else None,
|
||||
req.region_width if req.mode == "region" else None,
|
||||
req.region_height if req.mode == "region" else None,
|
||||
)
|
||||
capture_ms = int((time.perf_counter() - capture_started) * 1000)
|
||||
|
||||
encoded = None
|
||||
if req.include_image:
|
||||
encoded = _encode_image(image, req.image_format, req.jpeg_quality)
|
||||
|
||||
ocr_started = time.perf_counter()
|
||||
blocks: list[dict] = []
|
||||
grouped_lines: list[dict] = []
|
||||
ocr_applied_mode = "none"
|
||||
if req.ocr_mode != "none":
|
||||
if req.ocr_mode == "screen":
|
||||
ocr_image, ocr_region, _, _, _ = _capture_region_image(screen, None, None, None, None)
|
||||
else:
|
||||
ocr_image, ocr_region = image, region
|
||||
|
||||
area = ocr_region["width"] * ocr_region["height"]
|
||||
if req.max_ocr_area_px is not None and area > req.max_ocr_area_px:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"ocr area {area} exceeds max_ocr_area_px {req.max_ocr_area_px}",
|
||||
)
|
||||
|
||||
blocks = _run_ocr(
|
||||
ocr_image,
|
||||
req.language_hint,
|
||||
req.min_confidence,
|
||||
ocr_region["x"],
|
||||
ocr_region["y"],
|
||||
)
|
||||
if req.group_lines:
|
||||
grouped_lines = _group_ocr_lines(blocks)
|
||||
ocr_applied_mode = req.ocr_mode
|
||||
ocr_ms = int((time.perf_counter() - ocr_started) * 1000)
|
||||
|
||||
observation_id = _request_id()
|
||||
OBSERVATIONS[observation_id] = {
|
||||
"id": observation_id,
|
||||
"region": region,
|
||||
"screen": screen_selection,
|
||||
"display": mon,
|
||||
"image_width": image.size[0],
|
||||
"image_height": image.size[1],
|
||||
"ocr_blocks": blocks,
|
||||
"ocr_lines": grouped_lines,
|
||||
"created_at_ms": _now_ms(),
|
||||
}
|
||||
|
||||
return _ok(
|
||||
{
|
||||
"observation_id": observation_id,
|
||||
"region": region,
|
||||
"screen": screen_selection,
|
||||
"display": mon,
|
||||
"image": {
|
||||
"included": req.include_image,
|
||||
"format": req.image_format if req.include_image else None,
|
||||
"base64": encoded,
|
||||
"width": image.size[0],
|
||||
"height": image.size[1],
|
||||
},
|
||||
"ocr": {
|
||||
"mode": ocr_applied_mode,
|
||||
"min_confidence": req.min_confidence,
|
||||
"language_hint": req.language_hint,
|
||||
"block_count": len(blocks),
|
||||
"line_count": len(grouped_lines),
|
||||
"blocks": blocks,
|
||||
"lines": grouped_lines,
|
||||
},
|
||||
"timing_ms": {
|
||||
"capture_ms": capture_ms,
|
||||
"ocr_ms": ocr_ms if req.ocr_mode != "none" else 0,
|
||||
"total_ms": capture_ms + (ocr_ms if req.ocr_mode != "none" else 0),
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.post("/v2/localize")
|
||||
def localize_v2(req: LocalizeRequestV2, _: None = Depends(_auth)):
|
||||
observation = _get_observation(req.observation_id)
|
||||
region = observation["region"]
|
||||
image_width = observation["image_width"]
|
||||
image_height = observation["image_height"]
|
||||
|
||||
if req.image_tool_point is not None:
|
||||
if req.image_tool_point.x >= image_width or req.image_tool_point.y >= image_height:
|
||||
raise HTTPException(status_code=400, detail="image_tool_point outside observation image bounds")
|
||||
x = region["x"] + req.image_tool_point.x
|
||||
y = region["y"] + req.image_tool_point.y
|
||||
_enforce_allowed_region(x, y)
|
||||
resolved_target_id = _request_id()
|
||||
RESOLVED_TARGETS[resolved_target_id] = {
|
||||
"id": resolved_target_id,
|
||||
"observation_id": req.observation_id,
|
||||
"x": x,
|
||||
"y": y,
|
||||
"source": "image_tool_point",
|
||||
}
|
||||
return _ok(
|
||||
{
|
||||
"resolved_target_id": resolved_target_id,
|
||||
"source": "image_tool_point",
|
||||
"localization_confidence": _localization_confidence("image_tool_point"),
|
||||
"pixel": {"x": x, "y": y},
|
||||
"observation_region": region,
|
||||
"image_bounds": {"width": image_width, "height": image_height},
|
||||
}
|
||||
)
|
||||
|
||||
lines = observation.get("ocr_lines") or _group_ocr_lines(observation.get("ocr_blocks", []))
|
||||
matches = _find_text_matches(lines, req.text_query or "", req.text_match, False, 200)
|
||||
if not matches:
|
||||
return _err("not_found", "no localization candidates found", 404, {"found": False, "matches": []})
|
||||
if req.candidate_index >= len(matches):
|
||||
raise HTTPException(status_code=400, detail="candidate_index is outside match results")
|
||||
|
||||
chosen = matches[req.candidate_index]
|
||||
bbox = chosen["bbox"]
|
||||
x = bbox["x"] + max(1, bbox["width"] // 2)
|
||||
y = bbox["y"] + max(1, bbox["height"] // 2)
|
||||
_enforce_allowed_region(x, y)
|
||||
resolved_target_id = _request_id()
|
||||
RESOLVED_TARGETS[resolved_target_id] = {
|
||||
"id": resolved_target_id,
|
||||
"observation_id": req.observation_id,
|
||||
"x": x,
|
||||
"y": y,
|
||||
"source": "ocr",
|
||||
"match": chosen,
|
||||
}
|
||||
|
||||
return _ok(
|
||||
{
|
||||
"resolved_target_id": resolved_target_id,
|
||||
"source": "ocr",
|
||||
"localization_confidence": _localization_confidence("ocr", chosen.get("confidence")),
|
||||
"pixel": {"x": x, "y": y},
|
||||
"selected_match": chosen,
|
||||
"match_count": len(matches),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.post("/v2/act")
|
||||
def act_v2(req: ActRequestV2, screen: int = 0, _: None = Depends(_auth)):
|
||||
legacy_action = _resolve_v2_action(req.action)
|
||||
result = _exec_action(legacy_action, screen)
|
||||
return _ok(result)
|
||||
|
||||
|
||||
@app.post("/v2/act-verify")
|
||||
def act_verify_v2(req: ActVerifyRequestV2, screen: int = 0, _: None = Depends(_auth)):
|
||||
defaults = _risk_defaults(req.risk_level)
|
||||
verify_req = VerifyActionRequest(
|
||||
action=_resolve_v2_action(req.action),
|
||||
condition=req.condition,
|
||||
retries=defaults["retries"] if req.retries is None else req.retries,
|
||||
timeout_ms=defaults["timeout_ms"] if req.timeout_ms is None else req.timeout_ms,
|
||||
poll_interval_ms=defaults["poll_interval_ms"] if req.poll_interval_ms is None else req.poll_interval_ms,
|
||||
retry_delay_ms=defaults["retry_delay_ms"] if req.retry_delay_ms is None else req.retry_delay_ms,
|
||||
stop_on_action_error=req.stop_on_action_error,
|
||||
)
|
||||
result = _run_verified_action(verify_req, screen)
|
||||
payload = {
|
||||
"risk_level": req.risk_level,
|
||||
"defaults_applied": defaults,
|
||||
**result,
|
||||
}
|
||||
if result.get("success", False):
|
||||
return _ok(payload)
|
||||
return _err("verification_failed", "action verification did not satisfy condition", 409, payload)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health(_: None = Depends(_auth)):
|
||||
return {
|
||||
"ok": True,
|
||||
"service": "clickthrough",
|
||||
"version": app.version,
|
||||
"time_ms": _now_ms(),
|
||||
"request_id": _request_id(),
|
||||
"dry_run": SETTINGS["dry_run"],
|
||||
"allowed_region": SETTINGS["allowed_region"],
|
||||
"exec": {
|
||||
"enabled": SETTINGS["exec_enabled"],
|
||||
"secret_configured": bool(SETTINGS["exec_secret"]),
|
||||
"default_shell": SETTINGS["exec_default_shell"],
|
||||
"default_timeout_s": SETTINGS["exec_default_timeout_s"],
|
||||
"max_timeout_s": SETTINGS["exec_max_timeout_s"],
|
||||
},
|
||||
}
|
||||
return _ok(
|
||||
{
|
||||
"service": "clickthrough",
|
||||
"version": app.version,
|
||||
"dry_run": SETTINGS["dry_run"],
|
||||
"allowed_region": SETTINGS["allowed_region"],
|
||||
"exec": {
|
||||
"enabled": SETTINGS["exec_enabled"],
|
||||
"secret_configured": bool(SETTINGS["exec_secret"]),
|
||||
"default_shell": SETTINGS["exec_default_shell"],
|
||||
"default_timeout_s": SETTINGS["exec_default_timeout_s"],
|
||||
"max_timeout_s": SETTINGS["exec_max_timeout_s"],
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/displays")
|
||||
def displays(_: None = Depends(_auth)):
|
||||
detected = _get_displays()
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"displays": detected,
|
||||
"default_screen": 0,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/screen")
|
||||
def screen(
|
||||
with_grid: bool = True,
|
||||
grid_rows: int = SETTINGS["default_grid_rows"],
|
||||
grid_cols: int = SETTINGS["default_grid_cols"],
|
||||
include_labels: bool = True,
|
||||
image_format: Literal["png", "jpeg"] = "png",
|
||||
jpeg_quality: int = 85,
|
||||
asImage: bool = False,
|
||||
screen: int = 0,
|
||||
_: None = Depends(_auth),
|
||||
):
|
||||
req = ScreenRequest(
|
||||
with_grid=with_grid,
|
||||
grid_rows=grid_rows,
|
||||
grid_cols=grid_cols,
|
||||
include_labels=include_labels,
|
||||
image_format=image_format,
|
||||
jpeg_quality=jpeg_quality,
|
||||
)
|
||||
|
||||
base_img, mon, displays, screen_selection = _capture_screen(screen)
|
||||
meta = {"region": mon, "screen": screen_selection, "displays": displays}
|
||||
out_img = base_img
|
||||
|
||||
if req.with_grid:
|
||||
out_img, grid_meta = _draw_grid(base_img, mon["x"], mon["y"], req.grid_rows, req.grid_cols, req.include_labels)
|
||||
meta.update(grid_meta)
|
||||
|
||||
if asImage:
|
||||
image_bytes = _serialize_image(out_img, req.image_format, req.jpeg_quality)
|
||||
media_type = "image/jpeg" if req.image_format == "jpeg" else "image/png"
|
||||
return Response(content=image_bytes, media_type=media_type)
|
||||
|
||||
encoded = _encode_image(out_img, req.image_format, req.jpeg_quality)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"image": {
|
||||
"format": req.image_format,
|
||||
"base64": encoded,
|
||||
"width": out_img.size[0],
|
||||
"height": out_img.size[1],
|
||||
},
|
||||
"meta": meta,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/zoom")
|
||||
def zoom(req: ZoomRequest, asImage: bool = False, screen: int = 0, _: None = Depends(_auth)):
|
||||
base_img, mon, displays, screen_selection = _capture_screen(screen)
|
||||
|
||||
cx = req.center_x - mon["x"]
|
||||
cy = req.center_y - mon["y"]
|
||||
|
||||
half_w = req.width // 2
|
||||
half_h = req.height // 2
|
||||
|
||||
left = max(0, cx - half_w)
|
||||
top = max(0, cy - half_h)
|
||||
right = min(base_img.size[0], left + req.width)
|
||||
bottom = min(base_img.size[1], top + req.height)
|
||||
|
||||
crop = base_img.crop((left, top, right, bottom))
|
||||
|
||||
region_x = mon["x"] + left
|
||||
region_y = mon["y"] + top
|
||||
|
||||
meta = {
|
||||
"source_monitor": mon,
|
||||
"screen": screen_selection,
|
||||
"displays": displays,
|
||||
"region": {
|
||||
"x": region_x,
|
||||
"y": region_y,
|
||||
"width": crop.size[0],
|
||||
"height": crop.size[1],
|
||||
},
|
||||
}
|
||||
|
||||
out_img = crop
|
||||
if req.with_grid:
|
||||
out_img, grid_meta = _draw_grid(crop, region_x, region_y, req.grid_rows, req.grid_cols, req.include_labels)
|
||||
meta.update(grid_meta)
|
||||
|
||||
if asImage:
|
||||
image_bytes = _serialize_image(out_img, req.image_format, req.jpeg_quality)
|
||||
media_type = "image/jpeg" if req.image_format == "jpeg" else "image/png"
|
||||
return Response(content=image_bytes, media_type=media_type)
|
||||
|
||||
encoded = _encode_image(out_img, req.image_format, req.jpeg_quality)
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"image": {
|
||||
"format": req.image_format,
|
||||
"base64": encoded,
|
||||
"width": out_img.size[0],
|
||||
"height": out_img.size[1],
|
||||
},
|
||||
"meta": meta,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/action")
|
||||
def action(req: ActionRequest, screen: int = 0, _: None = Depends(_auth)):
|
||||
result = _exec_action(req, screen)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
return _ok({"displays": detected, "default_screen": 0})
|
||||
|
||||
|
||||
@app.post("/exec")
|
||||
@@ -1540,12 +1800,7 @@ def exec_command(
|
||||
raise HTTPException(status_code=401, detail="invalid exec secret")
|
||||
|
||||
result = _exec_command(req)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
return _ok(result)
|
||||
|
||||
|
||||
@app.get("/windows")
|
||||
@@ -1565,151 +1820,19 @@ def windows(
|
||||
visible_only=visible_only,
|
||||
)
|
||||
matches = _list_windows(query)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"windows": matches,
|
||||
"count": len(matches),
|
||||
}
|
||||
return _ok({"windows": matches, "count": len(matches)})
|
||||
|
||||
|
||||
@app.post("/windows/action")
|
||||
def window_action(req: WindowActionRequest, _: None = Depends(_auth)):
|
||||
result = _apply_window_action(req)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
return _ok(result)
|
||||
|
||||
|
||||
@app.post("/launch")
|
||||
def launch(req: LaunchRequest, _: None = Depends(_auth)):
|
||||
result = _launch_app(req)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/wait")
|
||||
def wait(req: WaitRequest, screen: int = 0, _: None = Depends(_auth)):
|
||||
result = _wait_for_condition(req, screen)
|
||||
return {
|
||||
"ok": result.get("satisfied", False),
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/vision/diff")
|
||||
def vision_diff(req: VisionDiffRequest, screen: int = 0, _: None = Depends(_auth)):
|
||||
result = _compute_visual_diff(req, screen)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/vision/stability")
|
||||
def vision_stability(req: VisionStabilityRequest, screen: int = 0, _: None = Depends(_auth)):
|
||||
result = _measure_stability(req, screen)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/action/verify")
|
||||
def action_verify(req: VerifyActionRequest, screen: int = 0, _: None = Depends(_auth)):
|
||||
result = _run_verified_action(req, screen)
|
||||
return {
|
||||
"ok": result.get("success", False),
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/ocr")
|
||||
def ocr(req: OCRRequest, screen: int = 0, _: None = Depends(_auth)):
|
||||
image, region, mon, displays, screen_selection, source = _capture_ocr_source(req, screen)
|
||||
offset_x = region["x"] if source != "image" else 0
|
||||
offset_y = region["y"] if source != "image" else 0
|
||||
blocks = _run_ocr(image, req.language_hint, req.min_confidence, offset_x, offset_y)
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": {
|
||||
"mode": source,
|
||||
"screen": screen_selection if source != "image" else None,
|
||||
"display": mon if source != "image" else None,
|
||||
"language_hint": req.language_hint,
|
||||
"min_confidence": req.min_confidence,
|
||||
"region": region,
|
||||
"blocks": blocks,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@app.post("/ocr/find")
|
||||
def ocr_find(req: OCRFindRequest, screen: int = 0, _: None = Depends(_auth)):
|
||||
image, region, mon, displays, screen_selection, source = _capture_ocr_source(req, screen)
|
||||
offset_x = region["x"] if source != "image" else 0
|
||||
offset_y = region["y"] if source != "image" else 0
|
||||
blocks = _run_ocr(image, req.language_hint, req.min_confidence, offset_x, offset_y)
|
||||
matches = _find_text_matches(blocks, req.query, req.match, req.group_lines, req.max_results)
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": {
|
||||
"mode": source,
|
||||
"screen": screen_selection if source != "image" else None,
|
||||
"display": mon if source != "image" else None,
|
||||
"language_hint": req.language_hint,
|
||||
"min_confidence": req.min_confidence,
|
||||
"query": req.query,
|
||||
"match": req.match,
|
||||
"group_lines": req.group_lines,
|
||||
"region": region,
|
||||
"matches": matches,
|
||||
"match_count": len(matches),
|
||||
"blocks_considered": len(blocks),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@app.post("/batch")
|
||||
def batch(req: BatchRequest, screen: int = 0, _: None = Depends(_auth)):
|
||||
results = []
|
||||
for index, item in enumerate(req.actions):
|
||||
try:
|
||||
item_result = _exec_action(item, screen)
|
||||
results.append({"index": index, "ok": True, "result": item_result})
|
||||
except Exception as exc:
|
||||
results.append({"index": index, "ok": False, "error": str(exc)})
|
||||
if req.stop_on_error:
|
||||
break
|
||||
|
||||
return {
|
||||
"ok": all(r["ok"] for r in results),
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"results": results,
|
||||
}
|
||||
return _ok(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user