2026-04-05 20:23:48 +02:00
5 changed files with 176 additions and 3 deletions
--- a/.env.example
+++ b/.env.example
@@ -7,3 +7,9 @@ CLICKTHROUGH_DRY_RUN=false
 CLICKTHROUGH_GRID_ROWS=12
 CLICKTHROUGH_GRID_COLS=12
 # CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080
 CLICKTHROUGH_EXEC_ENABLED=true
 CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell
 CLICKTHROUGH_EXEC_TIMEOUT_S=30
 CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120
 CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS=20000
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@ Let an Agent interact with your computer over HTTP, with grid-aware screenshots
 - **Visual endpoints**: full-screen capture with optional grid overlay and labeled cells (`asImage=true` can return raw image bytes)
 - **Zoom endpoint**: crop around a point with denser grid for fine targeting (`asImage=true` supported)
 - **Action endpoints**: move/click/right-click/double-click/middle-click/scroll/type/hotkey
 - **Command execution endpoint**: run PowerShell/Bash/CMD commands via `POST /exec`
 - **Coordinate transform metadata** in visual responses so agents can map grid cells to real pixels
 - **Safety knobs**: token auth, dry-run mode, optional allowed-region restriction
@@ -48,6 +49,11 @@ Environment variables:
 - `CLICKTHROUGH_GRID_ROWS` (default `12`)
 - `CLICKTHROUGH_GRID_COLS` (default `12`)
 - `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`)
 - `CLICKTHROUGH_EXEC_ENABLED` (default `true`)
 - `CLICKTHROUGH_EXEC_DEFAULT_SHELL` (default `powershell`; one of `powershell`, `bash`, `cmd`)
 - `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`)
 - `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`)
 - `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` (default `20000`)
 ## Gitea CI
--- a/TODO.md
+++ b/TODO.md
@@ -17,5 +17,8 @@
 - CI workflow runs syntax checks on push + PR
 ## Next
- Manual runtime test on a desktop session (capture + click loop)
+- [x] Add `POST /exec` endpoint (PowerShell/Bash/CMD) with timeout + stdout/stderr
- Optional: add monitor selection and OCR helper endpoint
+- [x] Add exec configuration via env (`CLICKTHROUGH_EXEC_*`)
 - [x] Document exec API + config
 - [x] Create backlog issues for OCR/find/window/input/session-state improvements
 - [ ] Open PR for exec feature branch and review/merge
--- a/docs/API.md
+++ b/docs/API.md
@@ -10,7 +10,7 @@ x-clickthrough-token: <token>
 ## `GET /health`
-Returns status and runtime safety flags.
+Returns status and runtime safety flags, including `exec` capability config.
 ## `GET /screen`
@@ -143,6 +143,28 @@ Hotkey:
 }
 ```
 ## `POST /exec`
 Execute a shell command on the host running Clickthrough.
 ```json
 {
  "command": "Get-Process | Select-Object -First 5",
  "shell": "powershell",
  "timeout_s": 20,
  "cwd": "C:/Users/Paul",
  "dry_run": false
 }
 ```
 Notes:
 - `shell` supports `powershell`, `bash`, `cmd`
 - if `shell` is omitted, server uses `CLICKTHROUGH_EXEC_DEFAULT_SHELL`
 - output is truncated based on `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS`
 - endpoint can be disabled with `CLICKTHROUGH_EXEC_ENABLED=false`
 Response includes `stdout`, `stderr`, `exit_code`, timeout state, and execution metadata.
 ## `POST /batch`
 Runs multiple `action` payloads sequentially.
--- a/server/app.py
+++ b/server/app.py
@@ -1,6 +1,7 @@
 import base64
 import io
 import os
 import subprocess
 import time
 import uuid
 from typing import Literal, Optional
@@ -43,6 +44,11 @@ SETTINGS = {
    "default_grid_rows": int(os.getenv("CLICKTHROUGH_GRID_ROWS", "12")),
    "default_grid_cols": int(os.getenv("CLICKTHROUGH_GRID_COLS", "12")),
    "allowed_region": _parse_allowed_region(),
    "exec_enabled": _env_bool("CLICKTHROUGH_EXEC_ENABLED", True),
    "exec_default_shell": os.getenv("CLICKTHROUGH_EXEC_DEFAULT_SHELL", "powershell").strip().lower(),
    "exec_default_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_TIMEOUT_S", "30")),
    "exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")),
    "exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")),
 }
@@ -130,6 +136,14 @@ class BatchRequest(BaseModel):
    stop_on_error: bool = True
 class ExecRequest(BaseModel):
    command: str = Field(min_length=1, max_length=10000)
    shell: Literal["powershell", "bash", "cmd"] | None = None
    timeout_s: int | None = Field(default=None, ge=1, le=600)
    cwd: str | None = None
    dry_run: bool = False
 def _auth(x_clickthrough_token: Optional[str] = Header(default=None)):
    token = SETTINGS["token"]
    if token and x_clickthrough_token != token:
@@ -259,6 +273,111 @@ def _import_input_lib():
        raise HTTPException(status_code=500, detail=f"input backend unavailable: {exc}") from exc
 def _pick_shell(explicit_shell: str | None) -> str:
    shell_name = (explicit_shell or SETTINGS["exec_default_shell"] or "powershell").lower().strip()
    if shell_name not in {"powershell", "bash", "cmd"}:
        raise HTTPException(status_code=400, detail="unsupported shell")
    return shell_name
 def _truncate_text(text: str, limit: int) -> tuple[str, bool]:
    if len(text) <= limit:
        return text, False
    return text[:limit], True
 def _resolve_exec_program(shell_name: str, command: str) -> list[str]:
    if shell_name == "powershell":
        return ["powershell", "-NoProfile", "-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", command]
    if shell_name == "bash":
        return ["bash", "-lc", command]
    if shell_name == "cmd":
        return ["cmd", "/c", command]
    raise HTTPException(status_code=400, detail="unsupported shell")
 def _exec_command(req: ExecRequest) -> dict:
    if not SETTINGS["exec_enabled"]:
        raise HTTPException(status_code=403, detail="exec endpoint disabled")
    run_dry = SETTINGS["dry_run"] or req.dry_run
    shell_name = _pick_shell(req.shell)
    timeout_s = req.timeout_s if req.timeout_s is not None else SETTINGS["exec_default_timeout_s"]
    timeout_s = min(timeout_s, SETTINGS["exec_max_timeout_s"])
    cwd = None
    if req.cwd:
        cwd = os.path.abspath(req.cwd)
        if not os.path.isdir(cwd):
            raise HTTPException(status_code=400, detail="cwd does not exist or is not a directory")
    argv = _resolve_exec_program(shell_name, req.command)
    if run_dry:
        return {
            "executed": False,
            "dry_run": True,
            "shell": shell_name,
            "command": req.command,
            "argv": argv,
            "timeout_s": timeout_s,
            "cwd": cwd,
        }
    start = time.time()
    try:
        completed = subprocess.run(
            argv,
            cwd=cwd,
            capture_output=True,
            text=True,
            timeout=timeout_s,
            check=False,
        )
    except subprocess.TimeoutExpired as exc:
        stdout = exc.stdout or ""
        stderr = exc.stderr or ""
        stdout, stdout_truncated = _truncate_text(str(stdout), SETTINGS["exec_max_output_chars"])
        stderr, stderr_truncated = _truncate_text(str(stderr), SETTINGS["exec_max_output_chars"])
        return {
            "executed": True,
            "timed_out": True,
            "shell": shell_name,
            "command": req.command,
            "argv": argv,
            "timeout_s": timeout_s,
            "cwd": cwd,
            "duration_ms": int((time.time() - start) * 1000),
            "exit_code": None,
            "stdout": stdout,
            "stderr": stderr,
            "stdout_truncated": stdout_truncated,
            "stderr_truncated": stderr_truncated,
        }
    except FileNotFoundError as exc:
        raise HTTPException(status_code=400, detail=f"shell executable not found: {exc}") from exc
    stdout, stdout_truncated = _truncate_text(completed.stdout or "", SETTINGS["exec_max_output_chars"])
    stderr, stderr_truncated = _truncate_text(completed.stderr or "", SETTINGS["exec_max_output_chars"])
    return {
        "executed": True,
        "timed_out": False,
        "shell": shell_name,
        "command": req.command,
        "argv": argv,
        "timeout_s": timeout_s,
        "cwd": cwd,
        "duration_ms": int((time.time() - start) * 1000),
        "exit_code": completed.returncode,
        "stdout": stdout,
        "stderr": stderr,
        "stdout_truncated": stdout_truncated,
        "stderr_truncated": stderr_truncated,
    }
 def _exec_action(req: ActionRequest) -> dict:
    run_dry = SETTINGS["dry_run"] or req.dry_run
@@ -331,6 +450,12 @@ def health(_: None = Depends(_auth)):
        "request_id": _request_id(),
        "dry_run": SETTINGS["dry_run"],
        "allowed_region": SETTINGS["allowed_region"],
        "exec": {
            "enabled": SETTINGS["exec_enabled"],
            "default_shell": SETTINGS["exec_default_shell"],
            "default_timeout_s": SETTINGS["exec_default_timeout_s"],
            "max_timeout_s": SETTINGS["exec_max_timeout_s"],
        },
    }
@@ -449,6 +574,17 @@ def action(req: ActionRequest, _: None = Depends(_auth)):
    }
@app.post("/exec")
 def exec_command(req: ExecRequest, _: None = Depends(_auth)):
    result = _exec_command(req)
    return {
        "ok": True,
        "request_id": _request_id(),
        "time_ms": _now_ms(),
        "result": result,
    }
@app.post("/batch")
 def batch(req: BatchRequest, _: None = Depends(_auth)):
    results = []