From 930cdd2887fa537b5623030f497abcb1ddf67bbe Mon Sep 17 00:00:00 2001
From: Luna <clawy@reversed.dev>
Date: Sun, 5 Apr 2026 20:18:07 +0200
Subject: [PATCH 1/2] feat(exec): add shell command execution endpoint

---
 .env.example  |   6 +++
 README.md     |   6 +++
 TODO.md       |   7 ++-
 docs/API.md   |  24 ++++++++-
 server/app.py | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 176 insertions(+), 3 deletions(-)
diff --git a/.env.example b/.env.example
index b18441f..2ebd111 100644
--- a/.env.example
+++ b/.env.example
@@ -7,3 +7,9 @@ CLICKTHROUGH_DRY_RUN=false
 CLICKTHROUGH_GRID_ROWS=12
 CLICKTHROUGH_GRID_COLS=12
 # CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080
+
+CLICKTHROUGH_EXEC_ENABLED=true
+CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell
+CLICKTHROUGH_EXEC_TIMEOUT_S=30
+CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120
+CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS=20000
diff --git a/README.md b/README.md
index 6184330..cfbd57e 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@ Let an Agent interact with your computer over HTTP, with grid-aware screenshots
 - **Visual endpoints**: full-screen capture with optional grid overlay and labeled cells (`asImage=true` can return raw image bytes)
 - **Zoom endpoint**: crop around a point with denser grid for fine targeting (`asImage=true` supported)
 - **Action endpoints**: move/click/right-click/double-click/middle-click/scroll/type/hotkey
+- **Command execution endpoint**: run PowerShell/Bash/CMD commands via `POST /exec`
 - **Coordinate transform metadata** in visual responses so agents can map grid cells to real pixels
 - **Safety knobs**: token auth, dry-run mode, optional allowed-region restriction
 
@@ -48,6 +49,11 @@ Environment variables:
 - `CLICKTHROUGH_GRID_ROWS` (default `12`)
 - `CLICKTHROUGH_GRID_COLS` (default `12`)
 - `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`)
+- `CLICKTHROUGH_EXEC_ENABLED` (default `true`)
+- `CLICKTHROUGH_EXEC_DEFAULT_SHELL` (default `powershell`; one of `powershell`, `bash`, `cmd`)
+- `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`)
+- `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`)
+- `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` (default `20000`)
 
 ## Gitea CI
 
diff --git a/TODO.md b/TODO.md
index 37240d6..4a326c7 100644
--- a/TODO.md
+++ b/TODO.md
@@ -17,5 +17,8 @@
 - CI workflow runs syntax checks on push + PR
 
 ## Next
-- Manual runtime test on a desktop session (capture + click loop)
-- Optional: add monitor selection and OCR helper endpoint
+- [x] Add `POST /exec` endpoint (PowerShell/Bash/CMD) with timeout + stdout/stderr
+- [x] Add exec configuration via env (`CLICKTHROUGH_EXEC_*`)
+- [x] Document exec API + config
+- [x] Create backlog issues for OCR/find/window/input/session-state improvements
+- [ ] Open PR for exec feature branch and review/merge
diff --git a/docs/API.md b/docs/API.md
index 4a266b6..1c9bf7d 100644
--- a/docs/API.md
+++ b/docs/API.md
@@ -10,7 +10,7 @@ x-clickthrough-token: <token>
 
 ## `GET /health`
 
-Returns status and runtime safety flags.
+Returns status and runtime safety flags, including `exec` capability config.
 
 ## `GET /screen`
 
@@ -143,6 +143,28 @@ Hotkey:
 }
 ```
 
+## `POST /exec`
+
+Execute a shell command on the host running Clickthrough.
+
+```json
+{
+  "command": "Get-Process | Select-Object -First 5",
+  "shell": "powershell",
+  "timeout_s": 20,
+  "cwd": "C:/Users/Paul",
+  "dry_run": false
+}
+```
+
+Notes:
+- `shell` supports `powershell`, `bash`, `cmd`
+- if `shell` is omitted, server uses `CLICKTHROUGH_EXEC_DEFAULT_SHELL`
+- output is truncated based on `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS`
+- endpoint can be disabled with `CLICKTHROUGH_EXEC_ENABLED=false`
+
+Response includes `stdout`, `stderr`, `exit_code`, timeout state, and execution metadata.
+
 ## `POST /batch`
 
 Runs multiple `action` payloads sequentially.
diff --git a/server/app.py b/server/app.py
index 25ec60e..ab30f76 100644
--- a/server/app.py
+++ b/server/app.py
@@ -1,6 +1,7 @@
 import base64
 import io
 import os
+import subprocess
 import time
 import uuid
 from typing import Literal, Optional
@@ -43,6 +44,11 @@ SETTINGS = {
     "default_grid_rows": int(os.getenv("CLICKTHROUGH_GRID_ROWS", "12")),
     "default_grid_cols": int(os.getenv("CLICKTHROUGH_GRID_COLS", "12")),
     "allowed_region": _parse_allowed_region(),
+    "exec_enabled": _env_bool("CLICKTHROUGH_EXEC_ENABLED", True),
+    "exec_default_shell": os.getenv("CLICKTHROUGH_EXEC_DEFAULT_SHELL", "powershell").strip().lower(),
+    "exec_default_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_TIMEOUT_S", "30")),
+    "exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")),
+    "exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")),
 }
 
 
@@ -130,6 +136,14 @@ class BatchRequest(BaseModel):
     stop_on_error: bool = True
 
 
+class ExecRequest(BaseModel):
+    command: str = Field(min_length=1, max_length=10000)
+    shell: Literal["powershell", "bash", "cmd"] | None = None
+    timeout_s: int | None = Field(default=None, ge=1, le=600)
+    cwd: str | None = None
+    dry_run: bool = False
+
+
 def _auth(x_clickthrough_token: Optional[str] = Header(default=None)):
     token = SETTINGS["token"]
     if token and x_clickthrough_token != token:
@@ -259,6 +273,111 @@ def _import_input_lib():
         raise HTTPException(status_code=500, detail=f"input backend unavailable: {exc}") from exc
 
 
+def _pick_shell(explicit_shell: str | None) -> str:
+    shell_name = (explicit_shell or SETTINGS["exec_default_shell"] or "powershell").lower().strip()
+    if shell_name not in {"powershell", "bash", "cmd"}:
+        raise HTTPException(status_code=400, detail="unsupported shell")
+    return shell_name
+
+
+def _truncate_text(text: str, limit: int) -> tuple[str, bool]:
+    if len(text) <= limit:
+        return text, False
+    return text[:limit], True
+
+
+def _resolve_exec_program(shell_name: str, command: str) -> list[str]:
+    if shell_name == "powershell":
+        return ["powershell", "-NoProfile", "-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", command]
+    if shell_name == "bash":
+        return ["bash", "-lc", command]
+    if shell_name == "cmd":
+        return ["cmd", "/c", command]
+    raise HTTPException(status_code=400, detail="unsupported shell")
+
+
+def _exec_command(req: ExecRequest) -> dict:
+    if not SETTINGS["exec_enabled"]:
+        raise HTTPException(status_code=403, detail="exec endpoint disabled")
+
+    run_dry = SETTINGS["dry_run"] or req.dry_run
+    shell_name = _pick_shell(req.shell)
+
+    timeout_s = req.timeout_s if req.timeout_s is not None else SETTINGS["exec_default_timeout_s"]
+    timeout_s = min(timeout_s, SETTINGS["exec_max_timeout_s"])
+
+    cwd = None
+    if req.cwd:
+        cwd = os.path.abspath(req.cwd)
+        if not os.path.isdir(cwd):
+            raise HTTPException(status_code=400, detail="cwd does not exist or is not a directory")
+
+    argv = _resolve_exec_program(shell_name, req.command)
+
+    if run_dry:
+        return {
+            "executed": False,
+            "dry_run": True,
+            "shell": shell_name,
+            "command": req.command,
+            "argv": argv,
+            "timeout_s": timeout_s,
+            "cwd": cwd,
+        }
+
+    start = time.time()
+    try:
+        completed = subprocess.run(
+            argv,
+            cwd=cwd,
+            capture_output=True,
+            text=True,
+            timeout=timeout_s,
+            check=False,
+        )
+    except subprocess.TimeoutExpired as exc:
+        stdout = exc.stdout or ""
+        stderr = exc.stderr or ""
+        stdout, stdout_truncated = _truncate_text(str(stdout), SETTINGS["exec_max_output_chars"])
+        stderr, stderr_truncated = _truncate_text(str(stderr), SETTINGS["exec_max_output_chars"])
+        return {
+            "executed": True,
+            "timed_out": True,
+            "shell": shell_name,
+            "command": req.command,
+            "argv": argv,
+            "timeout_s": timeout_s,
+            "cwd": cwd,
+            "duration_ms": int((time.time() - start) * 1000),
+            "exit_code": None,
+            "stdout": stdout,
+            "stderr": stderr,
+            "stdout_truncated": stdout_truncated,
+            "stderr_truncated": stderr_truncated,
+        }
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=400, detail=f"shell executable not found: {exc}") from exc
+
+    stdout, stdout_truncated = _truncate_text(completed.stdout or "", SETTINGS["exec_max_output_chars"])
+    stderr, stderr_truncated = _truncate_text(completed.stderr or "", SETTINGS["exec_max_output_chars"])
+
+    return {
+        "executed": True,
+        "timed_out": False,
+        "shell": shell_name,
+        "command": req.command,
+        "argv": argv,
+        "timeout_s": timeout_s,
+        "cwd": cwd,
+        "duration_ms": int((time.time() - start) * 1000),
+        "exit_code": completed.returncode,
+        "stdout": stdout,
+        "stderr": stderr,
+        "stdout_truncated": stdout_truncated,
+        "stderr_truncated": stderr_truncated,
+    }
+
+
 def _exec_action(req: ActionRequest) -> dict:
     run_dry = SETTINGS["dry_run"] or req.dry_run
 
@@ -331,6 +450,12 @@ def health(_: None = Depends(_auth)):
         "request_id": _request_id(),
         "dry_run": SETTINGS["dry_run"],
         "allowed_region": SETTINGS["allowed_region"],
+        "exec": {
+            "enabled": SETTINGS["exec_enabled"],
+            "default_shell": SETTINGS["exec_default_shell"],
+            "default_timeout_s": SETTINGS["exec_default_timeout_s"],
+            "max_timeout_s": SETTINGS["exec_max_timeout_s"],
+        },
     }
 
 
@@ -449,6 +574,17 @@ def action(req: ActionRequest, _: None = Depends(_auth)):
     }
 
 
+@app.post("/exec")
+def exec_command(req: ExecRequest, _: None = Depends(_auth)):
+    result = _exec_command(req)
+    return {
+        "ok": True,
+        "request_id": _request_id(),
+        "time_ms": _now_ms(),
+        "result": result,
+    }
+
+
 @app.post("/batch")
 def batch(req: BatchRequest, _: None = Depends(_auth)):
     results = []
-- 
2.39.5


From 38c1127347614d8a09a433ce667cc37011da4864 Mon Sep 17 00:00:00 2001
From: Luna <clawy@reversed.dev>
Date: Sun, 5 Apr 2026 20:22:18 +0200
Subject: [PATCH 2/2] feat(exec): require configured secret and header auth for
 /exec

---
 .env.example  |  1 +
 README.md     |  1 +
 TODO.md       |  1 +
 docs/API.md   |  5 +++++
 server/app.py | 17 ++++++++++++++++-
 5 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/.env.example b/.env.example
index 2ebd111..db26eed 100644
--- a/.env.example
+++ b/.env.example
@@ -9,6 +9,7 @@ CLICKTHROUGH_GRID_COLS=12
 # CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080
 
 CLICKTHROUGH_EXEC_ENABLED=true
+CLICKTHROUGH_EXEC_SECRET=replace-with-a-strong-random-secret
 CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell
 CLICKTHROUGH_EXEC_TIMEOUT_S=30
 CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120
diff --git a/README.md b/README.md
index cfbd57e..b57fc1c 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,7 @@ Environment variables:
 - `CLICKTHROUGH_GRID_COLS` (default `12`)
 - `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`)
 - `CLICKTHROUGH_EXEC_ENABLED` (default `true`)
+- `CLICKTHROUGH_EXEC_SECRET` (**required for `/exec` to run**)
 - `CLICKTHROUGH_EXEC_DEFAULT_SHELL` (default `powershell`; one of `powershell`, `bash`, `cmd`)
 - `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`)
 - `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`)
diff --git a/TODO.md b/TODO.md
index 4a326c7..a5e56aa 100644
--- a/TODO.md
+++ b/TODO.md
@@ -22,3 +22,4 @@
 - [x] Document exec API + config
 - [x] Create backlog issues for OCR/find/window/input/session-state improvements
 - [ ] Open PR for exec feature branch and review/merge
+- [x] Require configured exec secret + per-request exec secret header
diff --git a/docs/API.md b/docs/API.md
index 1c9bf7d..26b6237 100644
--- a/docs/API.md
+++ b/docs/API.md
@@ -147,6 +147,10 @@ Hotkey:
 
 Execute a shell command on the host running Clickthrough.
 
+Requirements:
+- `CLICKTHROUGH_EXEC_SECRET` must be configured on the server
+- send header `x-clickthrough-exec-secret: <secret>`
+
 ```json
 {
   "command": "Get-Process | Select-Object -First 5",
@@ -162,6 +166,7 @@ Notes:
 - if `shell` is omitted, server uses `CLICKTHROUGH_EXEC_DEFAULT_SHELL`
 - output is truncated based on `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS`
 - endpoint can be disabled with `CLICKTHROUGH_EXEC_ENABLED=false`
+- if `CLICKTHROUGH_EXEC_SECRET` is missing, `/exec` is blocked (`403`)
 
 Response includes `stdout`, `stderr`, `exit_code`, timeout state, and execution metadata.
 
diff --git a/server/app.py b/server/app.py
index ab30f76..602fd6c 100644
--- a/server/app.py
+++ b/server/app.py
@@ -1,4 +1,5 @@
 import base64
+import hmac
 import io
 import os
 import subprocess
@@ -49,6 +50,7 @@ SETTINGS = {
     "exec_default_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_TIMEOUT_S", "30")),
     "exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")),
     "exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")),
+    "exec_secret": os.getenv("CLICKTHROUGH_EXEC_SECRET", "").strip(),
 }
 
 
@@ -299,6 +301,8 @@ def _resolve_exec_program(shell_name: str, command: str) -> list[str]:
 def _exec_command(req: ExecRequest) -> dict:
     if not SETTINGS["exec_enabled"]:
         raise HTTPException(status_code=403, detail="exec endpoint disabled")
+    if not SETTINGS["exec_secret"]:
+        raise HTTPException(status_code=403, detail="exec secret not configured")
 
     run_dry = SETTINGS["dry_run"] or req.dry_run
     shell_name = _pick_shell(req.shell)
@@ -452,6 +456,7 @@ def health(_: None = Depends(_auth)):
         "allowed_region": SETTINGS["allowed_region"],
         "exec": {
             "enabled": SETTINGS["exec_enabled"],
+            "secret_configured": bool(SETTINGS["exec_secret"]),
             "default_shell": SETTINGS["exec_default_shell"],
             "default_timeout_s": SETTINGS["exec_default_timeout_s"],
             "max_timeout_s": SETTINGS["exec_max_timeout_s"],
@@ -575,7 +580,17 @@ def action(req: ActionRequest, _: None = Depends(_auth)):
 
 
 @app.post("/exec")
-def exec_command(req: ExecRequest, _: None = Depends(_auth)):
+def exec_command(
+    req: ExecRequest,
+    x_clickthrough_exec_secret: Optional[str] = Header(default=None),
+    _: None = Depends(_auth),
+):
+    expected = SETTINGS["exec_secret"]
+    if not expected:
+        raise HTTPException(status_code=403, detail="exec secret not configured")
+    if not x_clickthrough_exec_secret or not hmac.compare_digest(x_clickthrough_exec_secret, expected):
+        raise HTTPException(status_code=401, detail="invalid exec secret")
+
     result = _exec_command(req)
     return {
         "ok": True,
-- 
2.39.5