Add pytesseract OCR, click_text interact action, and interact verify endpoint
All checks were successful
python-syntax / syntax-check (push) Successful in 6s
All checks were successful
python-syntax / syntax-check (push) Successful in 6s
This commit is contained in:
@@ -48,6 +48,7 @@ class ActionRequest(BaseModel):
|
||||
"scroll",
|
||||
"type",
|
||||
"hotkey",
|
||||
"click_text",
|
||||
]
|
||||
target: Optional[Target] = None
|
||||
duration_ms: int = Field(default=0, ge=0, le=20000)
|
||||
@@ -58,6 +59,13 @@ class ActionRequest(BaseModel):
|
||||
keys: list[str] = Field(default_factory=list)
|
||||
interval_ms: int = Field(default=20, ge=0, le=5000)
|
||||
dry_run: bool = False
|
||||
click_text: "ClickTextAction | None" = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_click_text(self):
|
||||
if self.action == "click_text" and self.click_text is None:
|
||||
raise ValueError("click_text payload is required when action=click_text")
|
||||
return self
|
||||
|
||||
|
||||
class ExecRequest(BaseModel):
|
||||
@@ -103,6 +111,10 @@ class SeeRequest(BaseModel):
|
||||
include_labels: bool = True
|
||||
image_format: Literal["png", "jpeg"] = "png"
|
||||
jpeg_quality: int = Field(default=85, ge=1, le=100)
|
||||
ocr: bool = False
|
||||
ocr_min_confidence: float = Field(default=0.0, ge=0.0, le=100.0)
|
||||
ocr_lang: str = Field(default="eng", min_length=1, max_length=64)
|
||||
ocr_psm: int | None = Field(default=None, ge=0, le=13)
|
||||
|
||||
|
||||
class SeeZoomRequest(BaseModel):
|
||||
@@ -122,3 +134,55 @@ class SeeZoomRequest(BaseModel):
|
||||
class InteractRequest(BaseModel):
|
||||
screen: int = 0
|
||||
action: ActionRequest
|
||||
|
||||
|
||||
class OCRRegion(BaseModel):
|
||||
x: int = Field(ge=0)
|
||||
y: int = Field(ge=0)
|
||||
width: int = Field(gt=0)
|
||||
height: int = Field(gt=0)
|
||||
|
||||
|
||||
class ClickTextAction(BaseModel):
|
||||
text: str = Field(min_length=1, max_length=1000)
|
||||
match: Literal["contains", "exact", "regex"] = "contains"
|
||||
region: OCRRegion | None = None
|
||||
screen: int | None = None
|
||||
case_sensitive: bool = False
|
||||
min_confidence: float = Field(default=0.0, ge=0.0, le=100.0)
|
||||
occurrence: Literal["first", "best", "nth"] = "first"
|
||||
nth: int | None = Field(default=None, ge=1, le=10000)
|
||||
ocr_lang: str = Field(default="eng", min_length=1, max_length=64)
|
||||
ocr_psm: int | None = Field(default=None, ge=0, le=13)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_nth(self):
|
||||
if self.occurrence == "nth" and self.nth is None:
|
||||
raise ValueError("nth is required when occurrence=nth")
|
||||
if self.occurrence != "nth" and self.nth is not None:
|
||||
raise ValueError("nth is only allowed when occurrence=nth")
|
||||
return self
|
||||
|
||||
|
||||
class VerifyOCRTextNearPoint(BaseModel):
|
||||
type: Literal["ocr_text_near_point"]
|
||||
text: str = Field(min_length=1, max_length=1000)
|
||||
x: int = Field(ge=0)
|
||||
y: int = Field(ge=0)
|
||||
radius: int = Field(default=80, ge=1, le=1000)
|
||||
screen: int = 0
|
||||
match: Literal["contains", "exact", "regex"] = "contains"
|
||||
case_sensitive: bool = False
|
||||
min_confidence: float = Field(default=0.0, ge=0.0, le=100.0)
|
||||
ocr_lang: str = Field(default="eng", min_length=1, max_length=64)
|
||||
ocr_psm: int | None = Field(default=None, ge=0, le=13)
|
||||
|
||||
|
||||
class InteractVerifyRequest(BaseModel):
|
||||
action: InteractRequest
|
||||
verify: VerifyOCRTextNearPoint
|
||||
check_interval_ms: int = Field(default=250, ge=50, le=5000)
|
||||
timeout_ms: int = Field(default=3000, ge=100, le=60000)
|
||||
|
||||
|
||||
ActionRequest.model_rebuild()
|
||||
|
||||
Reference in New Issue
Block a user