From 930cdd2887fa537b5623030f497abcb1ddf67bbe Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 5 Apr 2026 20:18:07 +0200 Subject: [PATCH 1/2] feat(exec): add shell command execution endpoint --- .env.example | 6 +++ README.md | 6 +++ TODO.md | 7 ++- docs/API.md | 24 ++++++++- server/app.py | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 176 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index b18441f..2ebd111 100644 --- a/.env.example +++ b/.env.example @@ -7,3 +7,9 @@ CLICKTHROUGH_DRY_RUN=false CLICKTHROUGH_GRID_ROWS=12 CLICKTHROUGH_GRID_COLS=12 # CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080 + +CLICKTHROUGH_EXEC_ENABLED=true +CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell +CLICKTHROUGH_EXEC_TIMEOUT_S=30 +CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120 +CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS=20000 diff --git a/README.md b/README.md index 6184330..cfbd57e 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Let an Agent interact with your computer over HTTP, with grid-aware screenshots - **Visual endpoints**: full-screen capture with optional grid overlay and labeled cells (`asImage=true` can return raw image bytes) - **Zoom endpoint**: crop around a point with denser grid for fine targeting (`asImage=true` supported) - **Action endpoints**: move/click/right-click/double-click/middle-click/scroll/type/hotkey +- **Command execution endpoint**: run PowerShell/Bash/CMD commands via `POST /exec` - **Coordinate transform metadata** in visual responses so agents can map grid cells to real pixels - **Safety knobs**: token auth, dry-run mode, optional allowed-region restriction @@ -48,6 +49,11 @@ Environment variables: - `CLICKTHROUGH_GRID_ROWS` (default `12`) - `CLICKTHROUGH_GRID_COLS` (default `12`) - `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`) +- `CLICKTHROUGH_EXEC_ENABLED` (default `true`) +- `CLICKTHROUGH_EXEC_DEFAULT_SHELL` (default `powershell`; one of `powershell`, `bash`, `cmd`) +- `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`) +- `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`) +- `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` (default `20000`) ## Gitea CI diff --git a/TODO.md b/TODO.md index 37240d6..4a326c7 100644 --- a/TODO.md +++ b/TODO.md @@ -17,5 +17,8 @@ - CI workflow runs syntax checks on push + PR ## Next -- Manual runtime test on a desktop session (capture + click loop) -- Optional: add monitor selection and OCR helper endpoint +- [x] Add `POST /exec` endpoint (PowerShell/Bash/CMD) with timeout + stdout/stderr +- [x] Add exec configuration via env (`CLICKTHROUGH_EXEC_*`) +- [x] Document exec API + config +- [x] Create backlog issues for OCR/find/window/input/session-state improvements +- [ ] Open PR for exec feature branch and review/merge diff --git a/docs/API.md b/docs/API.md index 4a266b6..1c9bf7d 100644 --- a/docs/API.md +++ b/docs/API.md @@ -10,7 +10,7 @@ x-clickthrough-token: ## `GET /health` -Returns status and runtime safety flags. +Returns status and runtime safety flags, including `exec` capability config. ## `GET /screen` @@ -143,6 +143,28 @@ Hotkey: } ``` +## `POST /exec` + +Execute a shell command on the host running Clickthrough. + +```json +{ + "command": "Get-Process | Select-Object -First 5", + "shell": "powershell", + "timeout_s": 20, + "cwd": "C:/Users/Paul", + "dry_run": false +} +``` + +Notes: +- `shell` supports `powershell`, `bash`, `cmd` +- if `shell` is omitted, server uses `CLICKTHROUGH_EXEC_DEFAULT_SHELL` +- output is truncated based on `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` +- endpoint can be disabled with `CLICKTHROUGH_EXEC_ENABLED=false` + +Response includes `stdout`, `stderr`, `exit_code`, timeout state, and execution metadata. + ## `POST /batch` Runs multiple `action` payloads sequentially. diff --git a/server/app.py b/server/app.py index 25ec60e..ab30f76 100644 --- a/server/app.py +++ b/server/app.py @@ -1,6 +1,7 @@ import base64 import io import os +import subprocess import time import uuid from typing import Literal, Optional @@ -43,6 +44,11 @@ SETTINGS = { "default_grid_rows": int(os.getenv("CLICKTHROUGH_GRID_ROWS", "12")), "default_grid_cols": int(os.getenv("CLICKTHROUGH_GRID_COLS", "12")), "allowed_region": _parse_allowed_region(), + "exec_enabled": _env_bool("CLICKTHROUGH_EXEC_ENABLED", True), + "exec_default_shell": os.getenv("CLICKTHROUGH_EXEC_DEFAULT_SHELL", "powershell").strip().lower(), + "exec_default_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_TIMEOUT_S", "30")), + "exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")), + "exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")), } @@ -130,6 +136,14 @@ class BatchRequest(BaseModel): stop_on_error: bool = True +class ExecRequest(BaseModel): + command: str = Field(min_length=1, max_length=10000) + shell: Literal["powershell", "bash", "cmd"] | None = None + timeout_s: int | None = Field(default=None, ge=1, le=600) + cwd: str | None = None + dry_run: bool = False + + def _auth(x_clickthrough_token: Optional[str] = Header(default=None)): token = SETTINGS["token"] if token and x_clickthrough_token != token: @@ -259,6 +273,111 @@ def _import_input_lib(): raise HTTPException(status_code=500, detail=f"input backend unavailable: {exc}") from exc +def _pick_shell(explicit_shell: str | None) -> str: + shell_name = (explicit_shell or SETTINGS["exec_default_shell"] or "powershell").lower().strip() + if shell_name not in {"powershell", "bash", "cmd"}: + raise HTTPException(status_code=400, detail="unsupported shell") + return shell_name + + +def _truncate_text(text: str, limit: int) -> tuple[str, bool]: + if len(text) <= limit: + return text, False + return text[:limit], True + + +def _resolve_exec_program(shell_name: str, command: str) -> list[str]: + if shell_name == "powershell": + return ["powershell", "-NoProfile", "-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", command] + if shell_name == "bash": + return ["bash", "-lc", command] + if shell_name == "cmd": + return ["cmd", "/c", command] + raise HTTPException(status_code=400, detail="unsupported shell") + + +def _exec_command(req: ExecRequest) -> dict: + if not SETTINGS["exec_enabled"]: + raise HTTPException(status_code=403, detail="exec endpoint disabled") + + run_dry = SETTINGS["dry_run"] or req.dry_run + shell_name = _pick_shell(req.shell) + + timeout_s = req.timeout_s if req.timeout_s is not None else SETTINGS["exec_default_timeout_s"] + timeout_s = min(timeout_s, SETTINGS["exec_max_timeout_s"]) + + cwd = None + if req.cwd: + cwd = os.path.abspath(req.cwd) + if not os.path.isdir(cwd): + raise HTTPException(status_code=400, detail="cwd does not exist or is not a directory") + + argv = _resolve_exec_program(shell_name, req.command) + + if run_dry: + return { + "executed": False, + "dry_run": True, + "shell": shell_name, + "command": req.command, + "argv": argv, + "timeout_s": timeout_s, + "cwd": cwd, + } + + start = time.time() + try: + completed = subprocess.run( + argv, + cwd=cwd, + capture_output=True, + text=True, + timeout=timeout_s, + check=False, + ) + except subprocess.TimeoutExpired as exc: + stdout = exc.stdout or "" + stderr = exc.stderr or "" + stdout, stdout_truncated = _truncate_text(str(stdout), SETTINGS["exec_max_output_chars"]) + stderr, stderr_truncated = _truncate_text(str(stderr), SETTINGS["exec_max_output_chars"]) + return { + "executed": True, + "timed_out": True, + "shell": shell_name, + "command": req.command, + "argv": argv, + "timeout_s": timeout_s, + "cwd": cwd, + "duration_ms": int((time.time() - start) * 1000), + "exit_code": None, + "stdout": stdout, + "stderr": stderr, + "stdout_truncated": stdout_truncated, + "stderr_truncated": stderr_truncated, + } + except FileNotFoundError as exc: + raise HTTPException(status_code=400, detail=f"shell executable not found: {exc}") from exc + + stdout, stdout_truncated = _truncate_text(completed.stdout or "", SETTINGS["exec_max_output_chars"]) + stderr, stderr_truncated = _truncate_text(completed.stderr or "", SETTINGS["exec_max_output_chars"]) + + return { + "executed": True, + "timed_out": False, + "shell": shell_name, + "command": req.command, + "argv": argv, + "timeout_s": timeout_s, + "cwd": cwd, + "duration_ms": int((time.time() - start) * 1000), + "exit_code": completed.returncode, + "stdout": stdout, + "stderr": stderr, + "stdout_truncated": stdout_truncated, + "stderr_truncated": stderr_truncated, + } + + def _exec_action(req: ActionRequest) -> dict: run_dry = SETTINGS["dry_run"] or req.dry_run @@ -331,6 +450,12 @@ def health(_: None = Depends(_auth)): "request_id": _request_id(), "dry_run": SETTINGS["dry_run"], "allowed_region": SETTINGS["allowed_region"], + "exec": { + "enabled": SETTINGS["exec_enabled"], + "default_shell": SETTINGS["exec_default_shell"], + "default_timeout_s": SETTINGS["exec_default_timeout_s"], + "max_timeout_s": SETTINGS["exec_max_timeout_s"], + }, } @@ -449,6 +574,17 @@ def action(req: ActionRequest, _: None = Depends(_auth)): } +@app.post("/exec") +def exec_command(req: ExecRequest, _: None = Depends(_auth)): + result = _exec_command(req) + return { + "ok": True, + "request_id": _request_id(), + "time_ms": _now_ms(), + "result": result, + } + + @app.post("/batch") def batch(req: BatchRequest, _: None = Depends(_auth)): results = [] -- 2.39.5 From 38c1127347614d8a09a433ce667cc37011da4864 Mon Sep 17 00:00:00 2001 From: Luna Date: Sun, 5 Apr 2026 20:22:18 +0200 Subject: [PATCH 2/2] feat(exec): require configured secret and header auth for /exec --- .env.example | 1 + README.md | 1 + TODO.md | 1 + docs/API.md | 5 +++++ server/app.py | 17 ++++++++++++++++- 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 2ebd111..db26eed 100644 --- a/.env.example +++ b/.env.example @@ -9,6 +9,7 @@ CLICKTHROUGH_GRID_COLS=12 # CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080 CLICKTHROUGH_EXEC_ENABLED=true +CLICKTHROUGH_EXEC_SECRET=replace-with-a-strong-random-secret CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell CLICKTHROUGH_EXEC_TIMEOUT_S=30 CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120 diff --git a/README.md b/README.md index cfbd57e..b57fc1c 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ Environment variables: - `CLICKTHROUGH_GRID_COLS` (default `12`) - `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`) - `CLICKTHROUGH_EXEC_ENABLED` (default `true`) +- `CLICKTHROUGH_EXEC_SECRET` (**required for `/exec` to run**) - `CLICKTHROUGH_EXEC_DEFAULT_SHELL` (default `powershell`; one of `powershell`, `bash`, `cmd`) - `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`) - `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`) diff --git a/TODO.md b/TODO.md index 4a326c7..a5e56aa 100644 --- a/TODO.md +++ b/TODO.md @@ -22,3 +22,4 @@ - [x] Document exec API + config - [x] Create backlog issues for OCR/find/window/input/session-state improvements - [ ] Open PR for exec feature branch and review/merge +- [x] Require configured exec secret + per-request exec secret header diff --git a/docs/API.md b/docs/API.md index 1c9bf7d..26b6237 100644 --- a/docs/API.md +++ b/docs/API.md @@ -147,6 +147,10 @@ Hotkey: Execute a shell command on the host running Clickthrough. +Requirements: +- `CLICKTHROUGH_EXEC_SECRET` must be configured on the server +- send header `x-clickthrough-exec-secret: ` + ```json { "command": "Get-Process | Select-Object -First 5", @@ -162,6 +166,7 @@ Notes: - if `shell` is omitted, server uses `CLICKTHROUGH_EXEC_DEFAULT_SHELL` - output is truncated based on `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` - endpoint can be disabled with `CLICKTHROUGH_EXEC_ENABLED=false` +- if `CLICKTHROUGH_EXEC_SECRET` is missing, `/exec` is blocked (`403`) Response includes `stdout`, `stderr`, `exit_code`, timeout state, and execution metadata. diff --git a/server/app.py b/server/app.py index ab30f76..602fd6c 100644 --- a/server/app.py +++ b/server/app.py @@ -1,4 +1,5 @@ import base64 +import hmac import io import os import subprocess @@ -49,6 +50,7 @@ SETTINGS = { "exec_default_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_TIMEOUT_S", "30")), "exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")), "exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")), + "exec_secret": os.getenv("CLICKTHROUGH_EXEC_SECRET", "").strip(), } @@ -299,6 +301,8 @@ def _resolve_exec_program(shell_name: str, command: str) -> list[str]: def _exec_command(req: ExecRequest) -> dict: if not SETTINGS["exec_enabled"]: raise HTTPException(status_code=403, detail="exec endpoint disabled") + if not SETTINGS["exec_secret"]: + raise HTTPException(status_code=403, detail="exec secret not configured") run_dry = SETTINGS["dry_run"] or req.dry_run shell_name = _pick_shell(req.shell) @@ -452,6 +456,7 @@ def health(_: None = Depends(_auth)): "allowed_region": SETTINGS["allowed_region"], "exec": { "enabled": SETTINGS["exec_enabled"], + "secret_configured": bool(SETTINGS["exec_secret"]), "default_shell": SETTINGS["exec_default_shell"], "default_timeout_s": SETTINGS["exec_default_timeout_s"], "max_timeout_s": SETTINGS["exec_max_timeout_s"], @@ -575,7 +580,17 @@ def action(req: ActionRequest, _: None = Depends(_auth)): @app.post("/exec") -def exec_command(req: ExecRequest, _: None = Depends(_auth)): +def exec_command( + req: ExecRequest, + x_clickthrough_exec_secret: Optional[str] = Header(default=None), + _: None = Depends(_auth), +): + expected = SETTINGS["exec_secret"] + if not expected: + raise HTTPException(status_code=403, detail="exec secret not configured") + if not x_clickthrough_exec_secret or not hmac.compare_digest(x_clickthrough_exec_secret, expected): + raise HTTPException(status_code=401, detail="invalid exec secret") + result = _exec_command(req) return { "ok": True, -- 2.39.5