diff --git a/.env.example b/.env.example index db26eed..c12085a 100644 --- a/.env.example +++ b/.env.example @@ -14,3 +14,4 @@ CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell CLICKTHROUGH_EXEC_TIMEOUT_S=30 CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120 CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS=20000 +# CLICKTHROUGH_TESSERACT_CMD=/usr/bin/tesseract diff --git a/README.md b/README.md index 7f502ef..8714d73 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ CLICKTHROUGH_TOKEN=change-me python -m server.app Server defaults to `127.0.0.1:8123`. -For OCR support, install the native `tesseract` binary on the host (in addition to Python deps). +For OCR support, install the native `tesseract` binary on the host (in addition to Python deps), or point `CLICKTHROUGH_TESSERACT_CMD` at the executable if it lives somewhere weird. `python-dotenv` is enabled, so values from a repo-root `.env` file are loaded automatically. @@ -58,6 +58,7 @@ Environment variables: - `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`) - `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`) - `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` (default `20000`) +- `CLICKTHROUGH_TESSERACT_CMD` (optional path to the `tesseract` executable) ## Gitea CI diff --git a/docs/API.md b/docs/API.md index 663b8af..dbfb58e 100644 --- a/docs/API.md +++ b/docs/API.md @@ -213,6 +213,7 @@ Notes: - Output is deterministic JSON (stable ordering by top-to-bottom, then left-to-right). - `bbox` coordinates are in global screen space for `screen`/`region`, and image-local for `image`. - Requires `tesseract` executable plus Python package `pytesseract`. +- If `tesseract` is not on `PATH`, set `CLICKTHROUGH_TESSERACT_CMD` to the full executable path. ## `POST /exec` diff --git a/server/app.py b/server/app.py index 3fb91e3..5726da2 100644 --- a/server/app.py +++ b/server/app.py @@ -51,6 +51,7 @@ SETTINGS = { "exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")), "exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")), "exec_secret": os.getenv("CLICKTHROUGH_EXEC_SECRET", "").strip(), + "tesseract_cmd": os.getenv("CLICKTHROUGH_TESSERACT_CMD", "").strip(), } @@ -301,6 +302,10 @@ def _import_ocr_libs(): import pytesseract from pytesseract import Output + tesseract_cmd = SETTINGS["tesseract_cmd"] + if tesseract_cmd: + pytesseract.pytesseract.tesseract_cmd = tesseract_cmd + return pytesseract, Output except Exception as exc: raise HTTPException(status_code=500, detail=f"ocr backend unavailable: {exc}") from exc