import sys from PIL import Image from fastapi.testclient import TestClient from server import services from server.app import app from server.config import SETTINGS from server.models import ClickTextAction def _auth_headers() -> dict: token = SETTINGS.get("token", "") if not token: return {} return {"x-clickthrough-token": token} def test_extract_ocr_items_normalization(monkeypatch): class FakeOutput: DICT = "DICT" class FakeTesseract: Output = FakeOutput @staticmethod def image_to_data(_image, lang, config, output_type): assert lang == "eng" assert output_type == "DICT" return { "text": ["hello", " ", "world"], "conf": ["95.0", "-1", "62.5"], "left": [10, 12, 40], "top": [20, 25, 60], "width": [30, 10, 50], "height": [10, 10, 12], } monkeypatch.setitem(sys.modules, "pytesseract", FakeTesseract) items = services.extract_ocr_items(Image.new("RGB", (100, 100)), origin_x=100, origin_y=200, min_confidence=60, lang="eng", psm=None) assert len(items) == 2 assert items[0]["text"] == "hello" assert items[0]["bbox"]["x"] == 110 assert items[0]["center"]["y"] == 225 assert items[1]["text"] == "world" def test_resolve_text_match_contains_exact_regex_and_nth(): items = [ {"text": "Save", "confidence": 70}, {"text": "Save as", "confidence": 96}, {"text": "SAVE", "confidence": 88}, ] contains = services._resolve_text_match(ClickTextAction(text="save", match="contains", occurrence="first"), items) assert contains["text"] == "Save" best = services._resolve_text_match(ClickTextAction(text="save", match="contains", occurrence="best"), items) assert best["text"] == "Save as" exact_case = services._resolve_text_match( ClickTextAction(text="SAVE", match="exact", case_sensitive=True, occurrence="first"), items, ) assert exact_case["text"] == "SAVE" regex_nth = services._resolve_text_match(ClickTextAction(text="^Save", match="regex", occurrence="nth", nth=2), items) assert regex_nth["text"] == "Save as" def test_interact_click_text_region_optional(monkeypatch): monkeypatch.setattr(services, "select_display", lambda screen: ({"screen": screen}, [], {"requested": screen, "selected": screen, "fallback": False})) monkeypatch.setattr( services, "capture_region_image", lambda screen, x, y, w, h: (Image.new("RGB", (20, 20)), {"x": x or 0, "y": y or 0, "width": w or 20, "height": h or 20}, {}, [], {}), ) monkeypatch.setattr( services, "extract_ocr_items", lambda *args, **kwargs: [ { "text": "Apply", "confidence": 93.0, "bbox": {"x": 10, "y": 20, "width": 20, "height": 10}, "center": {"x": 20, "y": 25}, "region_relative_bbox": {"x": 10, "y": 20, "width": 20, "height": 10}, } ], ) client = TestClient(app) response = client.post( "/interact", json={"screen": 0, "action": {"action": "click_text", "dry_run": True, "click_text": {"text": "Apply", "match": "contains"}}}, headers=_auth_headers(), ) assert response.status_code == 200 body = response.json()["data"] assert body["resolved_target"]["x"] == 20 assert body["click_text_match"]["matched"]["text"] == "Apply" def test_see_ocr_off_on_contract(monkeypatch): monkeypatch.setattr( "server.app.capture_region_image", lambda *args, **kwargs: (Image.new("RGB", (10, 10)), {"x": 0, "y": 0, "width": 10, "height": 10}, {"screen": 0}, [], {}), ) monkeypatch.setattr("server.app.encode_image", lambda *args, **kwargs: "abc") monkeypatch.setattr("server.app.extract_ocr_items", lambda *args, **kwargs: [{"text": "x"}]) client = TestClient(app) off = client.post("/see", json={"ocr": False, "with_grid": False}, headers=_auth_headers()) assert off.status_code == 200 assert "ocr" not in off.json()["data"]["meta"] on = client.post("/see", json={"ocr": True, "with_grid": False}, headers=_auth_headers()) assert on.status_code == 200 assert on.json()["data"]["meta"]["ocr"][0]["text"] == "x"