feat(exec): add low-friction shell execution endpoint
All checks were successful
python-syntax / syntax-check (push) Successful in 10s

This commit was merged in pull request #6.
This commit is contained in:
2026-04-05 20:23:48 +02:00
5 changed files with 199 additions and 3 deletions

View File

@@ -7,3 +7,10 @@ CLICKTHROUGH_DRY_RUN=false
CLICKTHROUGH_GRID_ROWS=12
CLICKTHROUGH_GRID_COLS=12
# CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080
CLICKTHROUGH_EXEC_ENABLED=true
CLICKTHROUGH_EXEC_SECRET=replace-with-a-strong-random-secret
CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell
CLICKTHROUGH_EXEC_TIMEOUT_S=30
CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120
CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS=20000

View File

@@ -7,6 +7,7 @@ Let an Agent interact with your computer over HTTP, with grid-aware screenshots
- **Visual endpoints**: full-screen capture with optional grid overlay and labeled cells (`asImage=true` can return raw image bytes)
- **Zoom endpoint**: crop around a point with denser grid for fine targeting (`asImage=true` supported)
- **Action endpoints**: move/click/right-click/double-click/middle-click/scroll/type/hotkey
- **Command execution endpoint**: run PowerShell/Bash/CMD commands via `POST /exec`
- **Coordinate transform metadata** in visual responses so agents can map grid cells to real pixels
- **Safety knobs**: token auth, dry-run mode, optional allowed-region restriction
@@ -48,6 +49,12 @@ Environment variables:
- `CLICKTHROUGH_GRID_ROWS` (default `12`)
- `CLICKTHROUGH_GRID_COLS` (default `12`)
- `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`)
- `CLICKTHROUGH_EXEC_ENABLED` (default `true`)
- `CLICKTHROUGH_EXEC_SECRET` (**required for `/exec` to run**)
- `CLICKTHROUGH_EXEC_DEFAULT_SHELL` (default `powershell`; one of `powershell`, `bash`, `cmd`)
- `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`)
- `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`)
- `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` (default `20000`)
## Gitea CI

View File

@@ -17,5 +17,9 @@
- CI workflow runs syntax checks on push + PR
## Next
- Manual runtime test on a desktop session (capture + click loop)
- Optional: add monitor selection and OCR helper endpoint
- [x] Add `POST /exec` endpoint (PowerShell/Bash/CMD) with timeout + stdout/stderr
- [x] Add exec configuration via env (`CLICKTHROUGH_EXEC_*`)
- [x] Document exec API + config
- [x] Create backlog issues for OCR/find/window/input/session-state improvements
- [ ] Open PR for exec feature branch and review/merge
- [x] Require configured exec secret + per-request exec secret header

View File

@@ -10,7 +10,7 @@ x-clickthrough-token: <token>
## `GET /health`
Returns status and runtime safety flags.
Returns status and runtime safety flags, including `exec` capability config.
## `GET /screen`
@@ -143,6 +143,33 @@ Hotkey:
}
```
## `POST /exec`
Execute a shell command on the host running Clickthrough.
Requirements:
- `CLICKTHROUGH_EXEC_SECRET` must be configured on the server
- send header `x-clickthrough-exec-secret: <secret>`
```json
{
"command": "Get-Process | Select-Object -First 5",
"shell": "powershell",
"timeout_s": 20,
"cwd": "C:/Users/Paul",
"dry_run": false
}
```
Notes:
- `shell` supports `powershell`, `bash`, `cmd`
- if `shell` is omitted, server uses `CLICKTHROUGH_EXEC_DEFAULT_SHELL`
- output is truncated based on `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS`
- endpoint can be disabled with `CLICKTHROUGH_EXEC_ENABLED=false`
- if `CLICKTHROUGH_EXEC_SECRET` is missing, `/exec` is blocked (`403`)
Response includes `stdout`, `stderr`, `exit_code`, timeout state, and execution metadata.
## `POST /batch`
Runs multiple `action` payloads sequentially.

View File

@@ -1,6 +1,8 @@
import base64
import hmac
import io
import os
import subprocess
import time
import uuid
from typing import Literal, Optional
@@ -43,6 +45,12 @@ SETTINGS = {
"default_grid_rows": int(os.getenv("CLICKTHROUGH_GRID_ROWS", "12")),
"default_grid_cols": int(os.getenv("CLICKTHROUGH_GRID_COLS", "12")),
"allowed_region": _parse_allowed_region(),
"exec_enabled": _env_bool("CLICKTHROUGH_EXEC_ENABLED", True),
"exec_default_shell": os.getenv("CLICKTHROUGH_EXEC_DEFAULT_SHELL", "powershell").strip().lower(),
"exec_default_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_TIMEOUT_S", "30")),
"exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")),
"exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")),
"exec_secret": os.getenv("CLICKTHROUGH_EXEC_SECRET", "").strip(),
}
@@ -130,6 +138,14 @@ class BatchRequest(BaseModel):
stop_on_error: bool = True
class ExecRequest(BaseModel):
command: str = Field(min_length=1, max_length=10000)
shell: Literal["powershell", "bash", "cmd"] | None = None
timeout_s: int | None = Field(default=None, ge=1, le=600)
cwd: str | None = None
dry_run: bool = False
def _auth(x_clickthrough_token: Optional[str] = Header(default=None)):
token = SETTINGS["token"]
if token and x_clickthrough_token != token:
@@ -259,6 +275,113 @@ def _import_input_lib():
raise HTTPException(status_code=500, detail=f"input backend unavailable: {exc}") from exc
def _pick_shell(explicit_shell: str | None) -> str:
shell_name = (explicit_shell or SETTINGS["exec_default_shell"] or "powershell").lower().strip()
if shell_name not in {"powershell", "bash", "cmd"}:
raise HTTPException(status_code=400, detail="unsupported shell")
return shell_name
def _truncate_text(text: str, limit: int) -> tuple[str, bool]:
if len(text) <= limit:
return text, False
return text[:limit], True
def _resolve_exec_program(shell_name: str, command: str) -> list[str]:
if shell_name == "powershell":
return ["powershell", "-NoProfile", "-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", command]
if shell_name == "bash":
return ["bash", "-lc", command]
if shell_name == "cmd":
return ["cmd", "/c", command]
raise HTTPException(status_code=400, detail="unsupported shell")
def _exec_command(req: ExecRequest) -> dict:
if not SETTINGS["exec_enabled"]:
raise HTTPException(status_code=403, detail="exec endpoint disabled")
if not SETTINGS["exec_secret"]:
raise HTTPException(status_code=403, detail="exec secret not configured")
run_dry = SETTINGS["dry_run"] or req.dry_run
shell_name = _pick_shell(req.shell)
timeout_s = req.timeout_s if req.timeout_s is not None else SETTINGS["exec_default_timeout_s"]
timeout_s = min(timeout_s, SETTINGS["exec_max_timeout_s"])
cwd = None
if req.cwd:
cwd = os.path.abspath(req.cwd)
if not os.path.isdir(cwd):
raise HTTPException(status_code=400, detail="cwd does not exist or is not a directory")
argv = _resolve_exec_program(shell_name, req.command)
if run_dry:
return {
"executed": False,
"dry_run": True,
"shell": shell_name,
"command": req.command,
"argv": argv,
"timeout_s": timeout_s,
"cwd": cwd,
}
start = time.time()
try:
completed = subprocess.run(
argv,
cwd=cwd,
capture_output=True,
text=True,
timeout=timeout_s,
check=False,
)
except subprocess.TimeoutExpired as exc:
stdout = exc.stdout or ""
stderr = exc.stderr or ""
stdout, stdout_truncated = _truncate_text(str(stdout), SETTINGS["exec_max_output_chars"])
stderr, stderr_truncated = _truncate_text(str(stderr), SETTINGS["exec_max_output_chars"])
return {
"executed": True,
"timed_out": True,
"shell": shell_name,
"command": req.command,
"argv": argv,
"timeout_s": timeout_s,
"cwd": cwd,
"duration_ms": int((time.time() - start) * 1000),
"exit_code": None,
"stdout": stdout,
"stderr": stderr,
"stdout_truncated": stdout_truncated,
"stderr_truncated": stderr_truncated,
}
except FileNotFoundError as exc:
raise HTTPException(status_code=400, detail=f"shell executable not found: {exc}") from exc
stdout, stdout_truncated = _truncate_text(completed.stdout or "", SETTINGS["exec_max_output_chars"])
stderr, stderr_truncated = _truncate_text(completed.stderr or "", SETTINGS["exec_max_output_chars"])
return {
"executed": True,
"timed_out": False,
"shell": shell_name,
"command": req.command,
"argv": argv,
"timeout_s": timeout_s,
"cwd": cwd,
"duration_ms": int((time.time() - start) * 1000),
"exit_code": completed.returncode,
"stdout": stdout,
"stderr": stderr,
"stdout_truncated": stdout_truncated,
"stderr_truncated": stderr_truncated,
}
def _exec_action(req: ActionRequest) -> dict:
run_dry = SETTINGS["dry_run"] or req.dry_run
@@ -331,6 +454,13 @@ def health(_: None = Depends(_auth)):
"request_id": _request_id(),
"dry_run": SETTINGS["dry_run"],
"allowed_region": SETTINGS["allowed_region"],
"exec": {
"enabled": SETTINGS["exec_enabled"],
"secret_configured": bool(SETTINGS["exec_secret"]),
"default_shell": SETTINGS["exec_default_shell"],
"default_timeout_s": SETTINGS["exec_default_timeout_s"],
"max_timeout_s": SETTINGS["exec_max_timeout_s"],
},
}
@@ -449,6 +579,27 @@ def action(req: ActionRequest, _: None = Depends(_auth)):
}
@app.post("/exec")
def exec_command(
req: ExecRequest,
x_clickthrough_exec_secret: Optional[str] = Header(default=None),
_: None = Depends(_auth),
):
expected = SETTINGS["exec_secret"]
if not expected:
raise HTTPException(status_code=403, detail="exec secret not configured")
if not x_clickthrough_exec_secret or not hmac.compare_digest(x_clickthrough_exec_secret, expected):
raise HTTPException(status_code=401, detail="invalid exec secret")
result = _exec_command(req)
return {
"ok": True,
"request_id": _request_id(),
"time_ms": _now_ms(),
"result": result,
}
@app.post("/batch")
def batch(req: BatchRequest, _: None = Depends(_auth)):
results = []