feat(exec): add shell command execution endpoint
This commit is contained in:
@@ -7,3 +7,9 @@ CLICKTHROUGH_DRY_RUN=false
|
||||
CLICKTHROUGH_GRID_ROWS=12
|
||||
CLICKTHROUGH_GRID_COLS=12
|
||||
# CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080
|
||||
|
||||
CLICKTHROUGH_EXEC_ENABLED=true
|
||||
CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell
|
||||
CLICKTHROUGH_EXEC_TIMEOUT_S=30
|
||||
CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120
|
||||
CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS=20000
|
||||
|
||||
@@ -7,6 +7,7 @@ Let an Agent interact with your computer over HTTP, with grid-aware screenshots
|
||||
- **Visual endpoints**: full-screen capture with optional grid overlay and labeled cells (`asImage=true` can return raw image bytes)
|
||||
- **Zoom endpoint**: crop around a point with denser grid for fine targeting (`asImage=true` supported)
|
||||
- **Action endpoints**: move/click/right-click/double-click/middle-click/scroll/type/hotkey
|
||||
- **Command execution endpoint**: run PowerShell/Bash/CMD commands via `POST /exec`
|
||||
- **Coordinate transform metadata** in visual responses so agents can map grid cells to real pixels
|
||||
- **Safety knobs**: token auth, dry-run mode, optional allowed-region restriction
|
||||
|
||||
@@ -48,6 +49,11 @@ Environment variables:
|
||||
- `CLICKTHROUGH_GRID_ROWS` (default `12`)
|
||||
- `CLICKTHROUGH_GRID_COLS` (default `12`)
|
||||
- `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`)
|
||||
- `CLICKTHROUGH_EXEC_ENABLED` (default `true`)
|
||||
- `CLICKTHROUGH_EXEC_DEFAULT_SHELL` (default `powershell`; one of `powershell`, `bash`, `cmd`)
|
||||
- `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`)
|
||||
- `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`)
|
||||
- `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` (default `20000`)
|
||||
|
||||
## Gitea CI
|
||||
|
||||
|
||||
7
TODO.md
7
TODO.md
@@ -17,5 +17,8 @@
|
||||
- CI workflow runs syntax checks on push + PR
|
||||
|
||||
## Next
|
||||
- Manual runtime test on a desktop session (capture + click loop)
|
||||
- Optional: add monitor selection and OCR helper endpoint
|
||||
- [x] Add `POST /exec` endpoint (PowerShell/Bash/CMD) with timeout + stdout/stderr
|
||||
- [x] Add exec configuration via env (`CLICKTHROUGH_EXEC_*`)
|
||||
- [x] Document exec API + config
|
||||
- [x] Create backlog issues for OCR/find/window/input/session-state improvements
|
||||
- [ ] Open PR for exec feature branch and review/merge
|
||||
|
||||
24
docs/API.md
24
docs/API.md
@@ -10,7 +10,7 @@ x-clickthrough-token: <token>
|
||||
|
||||
## `GET /health`
|
||||
|
||||
Returns status and runtime safety flags.
|
||||
Returns status and runtime safety flags, including `exec` capability config.
|
||||
|
||||
## `GET /screen`
|
||||
|
||||
@@ -143,6 +143,28 @@ Hotkey:
|
||||
}
|
||||
```
|
||||
|
||||
## `POST /exec`
|
||||
|
||||
Execute a shell command on the host running Clickthrough.
|
||||
|
||||
```json
|
||||
{
|
||||
"command": "Get-Process | Select-Object -First 5",
|
||||
"shell": "powershell",
|
||||
"timeout_s": 20,
|
||||
"cwd": "C:/Users/Paul",
|
||||
"dry_run": false
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
- `shell` supports `powershell`, `bash`, `cmd`
|
||||
- if `shell` is omitted, server uses `CLICKTHROUGH_EXEC_DEFAULT_SHELL`
|
||||
- output is truncated based on `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS`
|
||||
- endpoint can be disabled with `CLICKTHROUGH_EXEC_ENABLED=false`
|
||||
|
||||
Response includes `stdout`, `stderr`, `exit_code`, timeout state, and execution metadata.
|
||||
|
||||
## `POST /batch`
|
||||
|
||||
Runs multiple `action` payloads sequentially.
|
||||
|
||||
136
server/app.py
136
server/app.py
@@ -1,6 +1,7 @@
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import uuid
|
||||
from typing import Literal, Optional
|
||||
@@ -43,6 +44,11 @@ SETTINGS = {
|
||||
"default_grid_rows": int(os.getenv("CLICKTHROUGH_GRID_ROWS", "12")),
|
||||
"default_grid_cols": int(os.getenv("CLICKTHROUGH_GRID_COLS", "12")),
|
||||
"allowed_region": _parse_allowed_region(),
|
||||
"exec_enabled": _env_bool("CLICKTHROUGH_EXEC_ENABLED", True),
|
||||
"exec_default_shell": os.getenv("CLICKTHROUGH_EXEC_DEFAULT_SHELL", "powershell").strip().lower(),
|
||||
"exec_default_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_TIMEOUT_S", "30")),
|
||||
"exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")),
|
||||
"exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")),
|
||||
}
|
||||
|
||||
|
||||
@@ -130,6 +136,14 @@ class BatchRequest(BaseModel):
|
||||
stop_on_error: bool = True
|
||||
|
||||
|
||||
class ExecRequest(BaseModel):
|
||||
command: str = Field(min_length=1, max_length=10000)
|
||||
shell: Literal["powershell", "bash", "cmd"] | None = None
|
||||
timeout_s: int | None = Field(default=None, ge=1, le=600)
|
||||
cwd: str | None = None
|
||||
dry_run: bool = False
|
||||
|
||||
|
||||
def _auth(x_clickthrough_token: Optional[str] = Header(default=None)):
|
||||
token = SETTINGS["token"]
|
||||
if token and x_clickthrough_token != token:
|
||||
@@ -259,6 +273,111 @@ def _import_input_lib():
|
||||
raise HTTPException(status_code=500, detail=f"input backend unavailable: {exc}") from exc
|
||||
|
||||
|
||||
def _pick_shell(explicit_shell: str | None) -> str:
|
||||
shell_name = (explicit_shell or SETTINGS["exec_default_shell"] or "powershell").lower().strip()
|
||||
if shell_name not in {"powershell", "bash", "cmd"}:
|
||||
raise HTTPException(status_code=400, detail="unsupported shell")
|
||||
return shell_name
|
||||
|
||||
|
||||
def _truncate_text(text: str, limit: int) -> tuple[str, bool]:
|
||||
if len(text) <= limit:
|
||||
return text, False
|
||||
return text[:limit], True
|
||||
|
||||
|
||||
def _resolve_exec_program(shell_name: str, command: str) -> list[str]:
|
||||
if shell_name == "powershell":
|
||||
return ["powershell", "-NoProfile", "-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", command]
|
||||
if shell_name == "bash":
|
||||
return ["bash", "-lc", command]
|
||||
if shell_name == "cmd":
|
||||
return ["cmd", "/c", command]
|
||||
raise HTTPException(status_code=400, detail="unsupported shell")
|
||||
|
||||
|
||||
def _exec_command(req: ExecRequest) -> dict:
|
||||
if not SETTINGS["exec_enabled"]:
|
||||
raise HTTPException(status_code=403, detail="exec endpoint disabled")
|
||||
|
||||
run_dry = SETTINGS["dry_run"] or req.dry_run
|
||||
shell_name = _pick_shell(req.shell)
|
||||
|
||||
timeout_s = req.timeout_s if req.timeout_s is not None else SETTINGS["exec_default_timeout_s"]
|
||||
timeout_s = min(timeout_s, SETTINGS["exec_max_timeout_s"])
|
||||
|
||||
cwd = None
|
||||
if req.cwd:
|
||||
cwd = os.path.abspath(req.cwd)
|
||||
if not os.path.isdir(cwd):
|
||||
raise HTTPException(status_code=400, detail="cwd does not exist or is not a directory")
|
||||
|
||||
argv = _resolve_exec_program(shell_name, req.command)
|
||||
|
||||
if run_dry:
|
||||
return {
|
||||
"executed": False,
|
||||
"dry_run": True,
|
||||
"shell": shell_name,
|
||||
"command": req.command,
|
||||
"argv": argv,
|
||||
"timeout_s": timeout_s,
|
||||
"cwd": cwd,
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
argv,
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout_s,
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
stdout = exc.stdout or ""
|
||||
stderr = exc.stderr or ""
|
||||
stdout, stdout_truncated = _truncate_text(str(stdout), SETTINGS["exec_max_output_chars"])
|
||||
stderr, stderr_truncated = _truncate_text(str(stderr), SETTINGS["exec_max_output_chars"])
|
||||
return {
|
||||
"executed": True,
|
||||
"timed_out": True,
|
||||
"shell": shell_name,
|
||||
"command": req.command,
|
||||
"argv": argv,
|
||||
"timeout_s": timeout_s,
|
||||
"cwd": cwd,
|
||||
"duration_ms": int((time.time() - start) * 1000),
|
||||
"exit_code": None,
|
||||
"stdout": stdout,
|
||||
"stderr": stderr,
|
||||
"stdout_truncated": stdout_truncated,
|
||||
"stderr_truncated": stderr_truncated,
|
||||
}
|
||||
except FileNotFoundError as exc:
|
||||
raise HTTPException(status_code=400, detail=f"shell executable not found: {exc}") from exc
|
||||
|
||||
stdout, stdout_truncated = _truncate_text(completed.stdout or "", SETTINGS["exec_max_output_chars"])
|
||||
stderr, stderr_truncated = _truncate_text(completed.stderr or "", SETTINGS["exec_max_output_chars"])
|
||||
|
||||
return {
|
||||
"executed": True,
|
||||
"timed_out": False,
|
||||
"shell": shell_name,
|
||||
"command": req.command,
|
||||
"argv": argv,
|
||||
"timeout_s": timeout_s,
|
||||
"cwd": cwd,
|
||||
"duration_ms": int((time.time() - start) * 1000),
|
||||
"exit_code": completed.returncode,
|
||||
"stdout": stdout,
|
||||
"stderr": stderr,
|
||||
"stdout_truncated": stdout_truncated,
|
||||
"stderr_truncated": stderr_truncated,
|
||||
}
|
||||
|
||||
|
||||
def _exec_action(req: ActionRequest) -> dict:
|
||||
run_dry = SETTINGS["dry_run"] or req.dry_run
|
||||
|
||||
@@ -331,6 +450,12 @@ def health(_: None = Depends(_auth)):
|
||||
"request_id": _request_id(),
|
||||
"dry_run": SETTINGS["dry_run"],
|
||||
"allowed_region": SETTINGS["allowed_region"],
|
||||
"exec": {
|
||||
"enabled": SETTINGS["exec_enabled"],
|
||||
"default_shell": SETTINGS["exec_default_shell"],
|
||||
"default_timeout_s": SETTINGS["exec_default_timeout_s"],
|
||||
"max_timeout_s": SETTINGS["exec_max_timeout_s"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -449,6 +574,17 @@ def action(req: ActionRequest, _: None = Depends(_auth)):
|
||||
}
|
||||
|
||||
|
||||
@app.post("/exec")
|
||||
def exec_command(req: ExecRequest, _: None = Depends(_auth)):
|
||||
result = _exec_command(req)
|
||||
return {
|
||||
"ok": True,
|
||||
"request_id": _request_id(),
|
||||
"time_ms": _now_ms(),
|
||||
"result": result,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/batch")
|
||||
def batch(req: BatchRequest, _: None = Depends(_auth)):
|
||||
results = []
|
||||
|
||||
Reference in New Issue
Block a user