Add pytesseract OCR, click_text interact action, and interact verify endpoint
All checks were successful
python-syntax / syntax-check (push) Successful in 6s

This commit is contained in:
2026-05-03 20:57:34 +02:00
parent 1c03cab457
commit 9e816e0417
8 changed files with 559 additions and 11 deletions

View File

@@ -48,6 +48,7 @@ class ActionRequest(BaseModel):
"scroll",
"type",
"hotkey",
"click_text",
]
target: Optional[Target] = None
duration_ms: int = Field(default=0, ge=0, le=20000)
@@ -58,6 +59,13 @@ class ActionRequest(BaseModel):
keys: list[str] = Field(default_factory=list)
interval_ms: int = Field(default=20, ge=0, le=5000)
dry_run: bool = False
click_text: "ClickTextAction | None" = None
@model_validator(mode="after")
def _validate_click_text(self):
if self.action == "click_text" and self.click_text is None:
raise ValueError("click_text payload is required when action=click_text")
return self
class ExecRequest(BaseModel):
@@ -103,6 +111,10 @@ class SeeRequest(BaseModel):
include_labels: bool = True
image_format: Literal["png", "jpeg"] = "png"
jpeg_quality: int = Field(default=85, ge=1, le=100)
ocr: bool = False
ocr_min_confidence: float = Field(default=0.0, ge=0.0, le=100.0)
ocr_lang: str = Field(default="eng", min_length=1, max_length=64)
ocr_psm: int | None = Field(default=None, ge=0, le=13)
class SeeZoomRequest(BaseModel):
@@ -122,3 +134,55 @@ class SeeZoomRequest(BaseModel):
class InteractRequest(BaseModel):
screen: int = 0
action: ActionRequest
class OCRRegion(BaseModel):
x: int = Field(ge=0)
y: int = Field(ge=0)
width: int = Field(gt=0)
height: int = Field(gt=0)
class ClickTextAction(BaseModel):
text: str = Field(min_length=1, max_length=1000)
match: Literal["contains", "exact", "regex"] = "contains"
region: OCRRegion | None = None
screen: int | None = None
case_sensitive: bool = False
min_confidence: float = Field(default=0.0, ge=0.0, le=100.0)
occurrence: Literal["first", "best", "nth"] = "first"
nth: int | None = Field(default=None, ge=1, le=10000)
ocr_lang: str = Field(default="eng", min_length=1, max_length=64)
ocr_psm: int | None = Field(default=None, ge=0, le=13)
@model_validator(mode="after")
def _validate_nth(self):
if self.occurrence == "nth" and self.nth is None:
raise ValueError("nth is required when occurrence=nth")
if self.occurrence != "nth" and self.nth is not None:
raise ValueError("nth is only allowed when occurrence=nth")
return self
class VerifyOCRTextNearPoint(BaseModel):
type: Literal["ocr_text_near_point"]
text: str = Field(min_length=1, max_length=1000)
x: int = Field(ge=0)
y: int = Field(ge=0)
radius: int = Field(default=80, ge=1, le=1000)
screen: int = 0
match: Literal["contains", "exact", "regex"] = "contains"
case_sensitive: bool = False
min_confidence: float = Field(default=0.0, ge=0.0, le=100.0)
ocr_lang: str = Field(default="eng", min_length=1, max_length=64)
ocr_psm: int | None = Field(default=None, ge=0, le=13)
class InteractVerifyRequest(BaseModel):
action: InteractRequest
verify: VerifyOCRTextNearPoint
check_interval_ms: int = Field(default=250, ge=50, le=5000)
timeout_ms: int = Field(default=3000, ge=100, le=60000)
ActionRequest.model_rebuild()