feat(exec): add low-friction shell execution endpoint #6
@@ -7,3 +7,9 @@ CLICKTHROUGH_DRY_RUN=false
|
|||||||
CLICKTHROUGH_GRID_ROWS=12
|
CLICKTHROUGH_GRID_ROWS=12
|
||||||
CLICKTHROUGH_GRID_COLS=12
|
CLICKTHROUGH_GRID_COLS=12
|
||||||
# CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080
|
# CLICKTHROUGH_ALLOWED_REGION=0,0,1920,1080
|
||||||
|
|
||||||
|
CLICKTHROUGH_EXEC_ENABLED=true
|
||||||
|
CLICKTHROUGH_EXEC_DEFAULT_SHELL=powershell
|
||||||
|
CLICKTHROUGH_EXEC_TIMEOUT_S=30
|
||||||
|
CLICKTHROUGH_EXEC_MAX_TIMEOUT_S=120
|
||||||
|
CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS=20000
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ Let an Agent interact with your computer over HTTP, with grid-aware screenshots
|
|||||||
- **Visual endpoints**: full-screen capture with optional grid overlay and labeled cells (`asImage=true` can return raw image bytes)
|
- **Visual endpoints**: full-screen capture with optional grid overlay and labeled cells (`asImage=true` can return raw image bytes)
|
||||||
- **Zoom endpoint**: crop around a point with denser grid for fine targeting (`asImage=true` supported)
|
- **Zoom endpoint**: crop around a point with denser grid for fine targeting (`asImage=true` supported)
|
||||||
- **Action endpoints**: move/click/right-click/double-click/middle-click/scroll/type/hotkey
|
- **Action endpoints**: move/click/right-click/double-click/middle-click/scroll/type/hotkey
|
||||||
|
- **Command execution endpoint**: run PowerShell/Bash/CMD commands via `POST /exec`
|
||||||
- **Coordinate transform metadata** in visual responses so agents can map grid cells to real pixels
|
- **Coordinate transform metadata** in visual responses so agents can map grid cells to real pixels
|
||||||
- **Safety knobs**: token auth, dry-run mode, optional allowed-region restriction
|
- **Safety knobs**: token auth, dry-run mode, optional allowed-region restriction
|
||||||
|
|
||||||
@@ -48,6 +49,11 @@ Environment variables:
|
|||||||
- `CLICKTHROUGH_GRID_ROWS` (default `12`)
|
- `CLICKTHROUGH_GRID_ROWS` (default `12`)
|
||||||
- `CLICKTHROUGH_GRID_COLS` (default `12`)
|
- `CLICKTHROUGH_GRID_COLS` (default `12`)
|
||||||
- `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`)
|
- `CLICKTHROUGH_ALLOWED_REGION` (optional `x,y,width,height`)
|
||||||
|
- `CLICKTHROUGH_EXEC_ENABLED` (default `true`)
|
||||||
|
- `CLICKTHROUGH_EXEC_DEFAULT_SHELL` (default `powershell`; one of `powershell`, `bash`, `cmd`)
|
||||||
|
- `CLICKTHROUGH_EXEC_TIMEOUT_S` (default `30`)
|
||||||
|
- `CLICKTHROUGH_EXEC_MAX_TIMEOUT_S` (default `120`)
|
||||||
|
- `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS` (default `20000`)
|
||||||
|
|
||||||
## Gitea CI
|
## Gitea CI
|
||||||
|
|
||||||
|
|||||||
7
TODO.md
7
TODO.md
@@ -17,5 +17,8 @@
|
|||||||
- CI workflow runs syntax checks on push + PR
|
- CI workflow runs syntax checks on push + PR
|
||||||
|
|
||||||
## Next
|
## Next
|
||||||
- Manual runtime test on a desktop session (capture + click loop)
|
- [x] Add `POST /exec` endpoint (PowerShell/Bash/CMD) with timeout + stdout/stderr
|
||||||
- Optional: add monitor selection and OCR helper endpoint
|
- [x] Add exec configuration via env (`CLICKTHROUGH_EXEC_*`)
|
||||||
|
- [x] Document exec API + config
|
||||||
|
- [x] Create backlog issues for OCR/find/window/input/session-state improvements
|
||||||
|
- [ ] Open PR for exec feature branch and review/merge
|
||||||
|
|||||||
24
docs/API.md
24
docs/API.md
@@ -10,7 +10,7 @@ x-clickthrough-token: <token>
|
|||||||
|
|
||||||
## `GET /health`
|
## `GET /health`
|
||||||
|
|
||||||
Returns status and runtime safety flags.
|
Returns status and runtime safety flags, including `exec` capability config.
|
||||||
|
|
||||||
## `GET /screen`
|
## `GET /screen`
|
||||||
|
|
||||||
@@ -143,6 +143,28 @@ Hotkey:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## `POST /exec`
|
||||||
|
|
||||||
|
Execute a shell command on the host running Clickthrough.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"command": "Get-Process | Select-Object -First 5",
|
||||||
|
"shell": "powershell",
|
||||||
|
"timeout_s": 20,
|
||||||
|
"cwd": "C:/Users/Paul",
|
||||||
|
"dry_run": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- `shell` supports `powershell`, `bash`, `cmd`
|
||||||
|
- if `shell` is omitted, server uses `CLICKTHROUGH_EXEC_DEFAULT_SHELL`
|
||||||
|
- output is truncated based on `CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS`
|
||||||
|
- endpoint can be disabled with `CLICKTHROUGH_EXEC_ENABLED=false`
|
||||||
|
|
||||||
|
Response includes `stdout`, `stderr`, `exit_code`, timeout state, and execution metadata.
|
||||||
|
|
||||||
## `POST /batch`
|
## `POST /batch`
|
||||||
|
|
||||||
Runs multiple `action` payloads sequentially.
|
Runs multiple `action` payloads sequentially.
|
||||||
|
|||||||
136
server/app.py
136
server/app.py
@@ -1,6 +1,7 @@
|
|||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
import subprocess
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Literal, Optional
|
from typing import Literal, Optional
|
||||||
@@ -43,6 +44,11 @@ SETTINGS = {
|
|||||||
"default_grid_rows": int(os.getenv("CLICKTHROUGH_GRID_ROWS", "12")),
|
"default_grid_rows": int(os.getenv("CLICKTHROUGH_GRID_ROWS", "12")),
|
||||||
"default_grid_cols": int(os.getenv("CLICKTHROUGH_GRID_COLS", "12")),
|
"default_grid_cols": int(os.getenv("CLICKTHROUGH_GRID_COLS", "12")),
|
||||||
"allowed_region": _parse_allowed_region(),
|
"allowed_region": _parse_allowed_region(),
|
||||||
|
"exec_enabled": _env_bool("CLICKTHROUGH_EXEC_ENABLED", True),
|
||||||
|
"exec_default_shell": os.getenv("CLICKTHROUGH_EXEC_DEFAULT_SHELL", "powershell").strip().lower(),
|
||||||
|
"exec_default_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_TIMEOUT_S", "30")),
|
||||||
|
"exec_max_timeout_s": int(os.getenv("CLICKTHROUGH_EXEC_MAX_TIMEOUT_S", "120")),
|
||||||
|
"exec_max_output_chars": int(os.getenv("CLICKTHROUGH_EXEC_MAX_OUTPUT_CHARS", "20000")),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -130,6 +136,14 @@ class BatchRequest(BaseModel):
|
|||||||
stop_on_error: bool = True
|
stop_on_error: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
class ExecRequest(BaseModel):
|
||||||
|
command: str = Field(min_length=1, max_length=10000)
|
||||||
|
shell: Literal["powershell", "bash", "cmd"] | None = None
|
||||||
|
timeout_s: int | None = Field(default=None, ge=1, le=600)
|
||||||
|
cwd: str | None = None
|
||||||
|
dry_run: bool = False
|
||||||
|
|
||||||
|
|
||||||
def _auth(x_clickthrough_token: Optional[str] = Header(default=None)):
|
def _auth(x_clickthrough_token: Optional[str] = Header(default=None)):
|
||||||
token = SETTINGS["token"]
|
token = SETTINGS["token"]
|
||||||
if token and x_clickthrough_token != token:
|
if token and x_clickthrough_token != token:
|
||||||
@@ -259,6 +273,111 @@ def _import_input_lib():
|
|||||||
raise HTTPException(status_code=500, detail=f"input backend unavailable: {exc}") from exc
|
raise HTTPException(status_code=500, detail=f"input backend unavailable: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_shell(explicit_shell: str | None) -> str:
|
||||||
|
shell_name = (explicit_shell or SETTINGS["exec_default_shell"] or "powershell").lower().strip()
|
||||||
|
if shell_name not in {"powershell", "bash", "cmd"}:
|
||||||
|
raise HTTPException(status_code=400, detail="unsupported shell")
|
||||||
|
return shell_name
|
||||||
|
|
||||||
|
|
||||||
|
def _truncate_text(text: str, limit: int) -> tuple[str, bool]:
|
||||||
|
if len(text) <= limit:
|
||||||
|
return text, False
|
||||||
|
return text[:limit], True
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_exec_program(shell_name: str, command: str) -> list[str]:
|
||||||
|
if shell_name == "powershell":
|
||||||
|
return ["powershell", "-NoProfile", "-NonInteractive", "-ExecutionPolicy", "Bypass", "-Command", command]
|
||||||
|
if shell_name == "bash":
|
||||||
|
return ["bash", "-lc", command]
|
||||||
|
if shell_name == "cmd":
|
||||||
|
return ["cmd", "/c", command]
|
||||||
|
raise HTTPException(status_code=400, detail="unsupported shell")
|
||||||
|
|
||||||
|
|
||||||
|
def _exec_command(req: ExecRequest) -> dict:
|
||||||
|
if not SETTINGS["exec_enabled"]:
|
||||||
|
raise HTTPException(status_code=403, detail="exec endpoint disabled")
|
||||||
|
|
||||||
|
run_dry = SETTINGS["dry_run"] or req.dry_run
|
||||||
|
shell_name = _pick_shell(req.shell)
|
||||||
|
|
||||||
|
timeout_s = req.timeout_s if req.timeout_s is not None else SETTINGS["exec_default_timeout_s"]
|
||||||
|
timeout_s = min(timeout_s, SETTINGS["exec_max_timeout_s"])
|
||||||
|
|
||||||
|
cwd = None
|
||||||
|
if req.cwd:
|
||||||
|
cwd = os.path.abspath(req.cwd)
|
||||||
|
if not os.path.isdir(cwd):
|
||||||
|
raise HTTPException(status_code=400, detail="cwd does not exist or is not a directory")
|
||||||
|
|
||||||
|
argv = _resolve_exec_program(shell_name, req.command)
|
||||||
|
|
||||||
|
if run_dry:
|
||||||
|
return {
|
||||||
|
"executed": False,
|
||||||
|
"dry_run": True,
|
||||||
|
"shell": shell_name,
|
||||||
|
"command": req.command,
|
||||||
|
"argv": argv,
|
||||||
|
"timeout_s": timeout_s,
|
||||||
|
"cwd": cwd,
|
||||||
|
}
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
try:
|
||||||
|
completed = subprocess.run(
|
||||||
|
argv,
|
||||||
|
cwd=cwd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=timeout_s,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
except subprocess.TimeoutExpired as exc:
|
||||||
|
stdout = exc.stdout or ""
|
||||||
|
stderr = exc.stderr or ""
|
||||||
|
stdout, stdout_truncated = _truncate_text(str(stdout), SETTINGS["exec_max_output_chars"])
|
||||||
|
stderr, stderr_truncated = _truncate_text(str(stderr), SETTINGS["exec_max_output_chars"])
|
||||||
|
return {
|
||||||
|
"executed": True,
|
||||||
|
"timed_out": True,
|
||||||
|
"shell": shell_name,
|
||||||
|
"command": req.command,
|
||||||
|
"argv": argv,
|
||||||
|
"timeout_s": timeout_s,
|
||||||
|
"cwd": cwd,
|
||||||
|
"duration_ms": int((time.time() - start) * 1000),
|
||||||
|
"exit_code": None,
|
||||||
|
"stdout": stdout,
|
||||||
|
"stderr": stderr,
|
||||||
|
"stdout_truncated": stdout_truncated,
|
||||||
|
"stderr_truncated": stderr_truncated,
|
||||||
|
}
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(status_code=400, detail=f"shell executable not found: {exc}") from exc
|
||||||
|
|
||||||
|
stdout, stdout_truncated = _truncate_text(completed.stdout or "", SETTINGS["exec_max_output_chars"])
|
||||||
|
stderr, stderr_truncated = _truncate_text(completed.stderr or "", SETTINGS["exec_max_output_chars"])
|
||||||
|
|
||||||
|
return {
|
||||||
|
"executed": True,
|
||||||
|
"timed_out": False,
|
||||||
|
"shell": shell_name,
|
||||||
|
"command": req.command,
|
||||||
|
"argv": argv,
|
||||||
|
"timeout_s": timeout_s,
|
||||||
|
"cwd": cwd,
|
||||||
|
"duration_ms": int((time.time() - start) * 1000),
|
||||||
|
"exit_code": completed.returncode,
|
||||||
|
"stdout": stdout,
|
||||||
|
"stderr": stderr,
|
||||||
|
"stdout_truncated": stdout_truncated,
|
||||||
|
"stderr_truncated": stderr_truncated,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _exec_action(req: ActionRequest) -> dict:
|
def _exec_action(req: ActionRequest) -> dict:
|
||||||
run_dry = SETTINGS["dry_run"] or req.dry_run
|
run_dry = SETTINGS["dry_run"] or req.dry_run
|
||||||
|
|
||||||
@@ -331,6 +450,12 @@ def health(_: None = Depends(_auth)):
|
|||||||
"request_id": _request_id(),
|
"request_id": _request_id(),
|
||||||
"dry_run": SETTINGS["dry_run"],
|
"dry_run": SETTINGS["dry_run"],
|
||||||
"allowed_region": SETTINGS["allowed_region"],
|
"allowed_region": SETTINGS["allowed_region"],
|
||||||
|
"exec": {
|
||||||
|
"enabled": SETTINGS["exec_enabled"],
|
||||||
|
"default_shell": SETTINGS["exec_default_shell"],
|
||||||
|
"default_timeout_s": SETTINGS["exec_default_timeout_s"],
|
||||||
|
"max_timeout_s": SETTINGS["exec_max_timeout_s"],
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -449,6 +574,17 @@ def action(req: ActionRequest, _: None = Depends(_auth)):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/exec")
|
||||||
|
def exec_command(req: ExecRequest, _: None = Depends(_auth)):
|
||||||
|
result = _exec_command(req)
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"request_id": _request_id(),
|
||||||
|
"time_ms": _now_ms(),
|
||||||
|
"result": result,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/batch")
|
@app.post("/batch")
|
||||||
def batch(req: BatchRequest, _: None = Depends(_auth)):
|
def batch(req: BatchRequest, _: None = Depends(_auth)):
|
||||||
results = []
|
results = []
|
||||||
|
|||||||
Reference in New Issue
Block a user