Commit remaining workspace updates
Some checks failed
CI / test (push) Failing after 8s

This commit is contained in:
Space-Banane
2026-05-31 20:43:25 +02:00
parent 79c9e98842
commit 4123765aba
11 changed files with 4498 additions and 131 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -30,6 +30,7 @@ def main(argv: list[str] | None = None) -> int:
print(" OPENAI_API_KEY=...")
print(" SCREENJOB_TOKEN=...")
print(" DISABLE_UI=true|false (optional)")
print(" SCREENJOB_PROHIBITED_KEY_COMBOS=ctrl+shift+s,alt+f4 (optional)")
return 0
server.main()
return 0

View File

@@ -5,6 +5,7 @@ import json
import sys
from pathlib import Path
from .agent import normalize_disabled_tools
from .config import load_app_config
from .models import RuntimeOptions
from .runtime import create_openai_client, run_job
@@ -40,8 +41,55 @@ def build_parser() -> argparse.ArgumentParser:
default=4,
help="Compact model context every N steps to decay old screenshots (0 disables).",
)
parser.add_argument(
"--max-visual-context-images",
type=int,
default=3,
help="Maximum screenshots/enhanced images retained in model-visible context during rebases.",
)
parser.add_argument(
"--native-automation-mode",
choices=["off", "prefer", "require_fallback"],
default="prefer",
help="How strongly the agent should prefer Windows-native automation helpers over pixel fallback.",
)
parser.add_argument(
"--dialog-timeout-seconds",
type=float,
default=12.0,
help="Timeout for dialog-oriented waits and retries.",
)
parser.add_argument(
"--focus-timeout-seconds",
type=float,
default=8.0,
help="Timeout for focus-change waits and verification.",
)
parser.add_argument(
"--ui-element-timeout-seconds",
type=float,
default=8.0,
help="Timeout for native UI element lookup waits.",
)
parser.add_argument(
"--max-retries-per-surface",
type=int,
default=3,
help="Maximum repeated retries on the same classified window/dialog surface before the agent must pivot.",
)
parser.add_argument(
"--pretty-logs",
action="store_true",
help="Emit expanded multi-line tool call/result logs for easier debugging.",
)
parser.add_argument("--disable-tool", action="append", default=[], help="Disable a tool by name.")
parser.add_argument("--skip-safety-check", action="store_true", help="Bypass pre-flight safety check.")
parser.add_argument(
"--skip-safety-check",
"--skip-safety-chec",
dest="skip_safety_check",
action="store_true",
help="Bypass pre-flight safety check.",
)
parser.add_argument("--no-failsafe", action="store_true", help="Disable PyAutoGUI fail-safe.")
return parser
@@ -57,7 +105,10 @@ def main(argv: list[str] | None = None) -> int:
return 2
model = args.model or config.default_model
disabled_tools = sorted({str(x).strip() for x in args.disable_tool if str(x).strip()})
try:
disabled_tools = normalize_disabled_tools(args.disable_tool)
except ValueError as exc:
parser.error(str(exc))
if not args.skip_safety_check:
safety_client = create_openai_client(config.openai_api_key)
@@ -92,7 +143,15 @@ def main(argv: list[str] | None = None) -> int:
click_pause=args.click_pause,
reasoning_effort=args.reasoning_effort,
screen_context_decay_steps=max(0, int(args.screen_context_decay_steps)),
max_visual_context_images=max(0, int(args.max_visual_context_images)),
native_automation_mode=args.native_automation_mode,
dialog_timeout_seconds=max(0.5, float(args.dialog_timeout_seconds)),
focus_timeout_seconds=max(0.5, float(args.focus_timeout_seconds)),
ui_element_timeout_seconds=max(0.5, float(args.ui_element_timeout_seconds)),
max_retries_per_surface=max(1, int(args.max_retries_per_surface)),
pretty_logs=bool(args.pretty_logs),
disable_tools=set(disabled_tools),
prohibited_key_combos=set(config.prohibited_key_combos),
)
try:
result, artifacts = run_job(

View File

@@ -14,6 +14,13 @@ def _env_bool(name: str, default: bool = False) -> bool:
return raw.strip().lower() in {"1", "true", "yes", "on"}
def _env_csv(name: str) -> list[str]:
raw = os.getenv(name)
if raw is None:
return []
return [item.strip() for item in raw.split(",") if item.strip()]
@dataclass(frozen=True)
class AppConfig:
openai_api_key: str
@@ -25,6 +32,7 @@ class AppConfig:
port: int
runs_dir: Path
db_path: Path
prohibited_key_combos: tuple[str, ...] = ()
def load_app_config(cwd: Path) -> AppConfig:
@@ -38,6 +46,7 @@ def load_app_config(cwd: Path) -> AppConfig:
runs_dir = cwd / "screenjob_runs"
db_path = cwd / "screenjob.db"
disable_ui = _env_bool("DISABLE_UI", default=False)
prohibited_key_combos = tuple(_env_csv("SCREENJOB_PROHIBITED_KEY_COMBOS"))
return AppConfig(
openai_api_key=openai_api_key,
screenjob_token=screenjob_token,
@@ -48,5 +57,5 @@ def load_app_config(cwd: Path) -> AppConfig:
port=port,
runs_dir=runs_dir,
db_path=db_path,
prohibited_key_combos=prohibited_key_combos,
)

View File

@@ -60,4 +60,12 @@ class RuntimeOptions:
click_pause: float = 0.10
reasoning_effort: str = "medium"
screen_context_decay_steps: int = 4
max_visual_context_images: int = 3
native_automation_mode: str = "prefer"
dialog_timeout_seconds: float = 12.0
focus_timeout_seconds: float = 8.0
ui_element_timeout_seconds: float = 8.0
max_retries_per_surface: int = 3
pretty_logs: bool = False
disable_tools: set[str] | None = None
prohibited_key_combos: set[str] | None = None

View File

@@ -12,6 +12,7 @@ from fastapi.responses import FileResponse
from fastapi.responses import HTMLResponse, JSONResponse
from pydantic import BaseModel, Field
from .agent import normalize_disabled_tools
from .config import AppConfig, load_app_config
from .storage import HistoryDB
from .task_manager import JobManager
@@ -28,6 +29,13 @@ class CreateJobRequest(BaseModel):
click_pause: float = Field(0.10, ge=0.0, le=2.0)
reasoning_effort: str = Field("medium", pattern="^(low|medium|high)$")
screen_context_decay_steps: int = Field(4, ge=0, le=50)
max_visual_context_images: int = Field(3, ge=0, le=12)
native_automation_mode: str = Field("prefer", pattern="^(off|prefer|require_fallback)$")
dialog_timeout_seconds: float = Field(12.0, ge=0.5, le=120.0)
focus_timeout_seconds: float = Field(8.0, ge=0.5, le=120.0)
ui_element_timeout_seconds: float = Field(8.0, ge=0.5, le=120.0)
max_retries_per_surface: int = Field(3, ge=1, le=10)
pretty_logs: bool = False
disabled_tools: list[str] = Field(default_factory=list)
safety_override: bool = False
no_failsafe: bool = False
@@ -297,19 +305,30 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
@app.post("/api/jobs")
def create_job(payload: CreateJobRequest, _: None = Depends(require_token)) -> dict[str, str]:
job_id = manager.submit_job(
objective=payload.job,
model=payload.model,
max_steps=payload.max_steps,
command_timeout=payload.command_timeout,
type_interval=payload.type_interval,
click_pause=payload.click_pause,
reasoning_effort=payload.reasoning_effort,
screen_context_decay_steps=payload.screen_context_decay_steps,
disabled_tools=payload.disabled_tools,
safety_override=payload.safety_override,
no_failsafe=payload.no_failsafe,
)
try:
disabled_tools = normalize_disabled_tools(payload.disabled_tools)
job_id = manager.submit_job(
objective=payload.job,
model=payload.model,
max_steps=payload.max_steps,
command_timeout=payload.command_timeout,
type_interval=payload.type_interval,
click_pause=payload.click_pause,
reasoning_effort=payload.reasoning_effort,
screen_context_decay_steps=payload.screen_context_decay_steps,
max_visual_context_images=payload.max_visual_context_images,
native_automation_mode=payload.native_automation_mode,
dialog_timeout_seconds=payload.dialog_timeout_seconds,
focus_timeout_seconds=payload.focus_timeout_seconds,
ui_element_timeout_seconds=payload.ui_element_timeout_seconds,
max_retries_per_surface=payload.max_retries_per_surface,
pretty_logs=payload.pretty_logs,
disabled_tools=disabled_tools,
safety_override=payload.safety_override,
no_failsafe=payload.no_failsafe,
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
return {"job_id": job_id}
@app.get("/api/jobs")

View File

@@ -8,7 +8,9 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable
from .agent import normalize_disabled_tools
from .config import AppConfig
from .desktop_overlay import DesktopOverlayManager, get_desktop_overlay_manager
from .models import RuntimeOptions
from .runtime import create_openai_client, run_job
from .safety import assess_task_safety
@@ -32,10 +34,12 @@ class JobManager:
config: AppConfig,
db: HistoryDB,
broadcast: Callable[[dict[str, Any]], None] | None = None,
overlay_manager: DesktopOverlayManager | None = None,
) -> None:
self.config = config
self.db = db
self.broadcast = broadcast
self.overlay_manager = overlay_manager or get_desktop_overlay_manager()
self._running: dict[str, _RunningJob] = {}
self._lock = threading.Lock()
@@ -50,6 +54,13 @@ class JobManager:
click_pause: float = 0.10,
reasoning_effort: str = "medium",
screen_context_decay_steps: int = 4,
max_visual_context_images: int = 3,
native_automation_mode: str = "prefer",
dialog_timeout_seconds: float = 12.0,
focus_timeout_seconds: float = 8.0,
ui_element_timeout_seconds: float = 8.0,
max_retries_per_surface: int = 3,
pretty_logs: bool = False,
disabled_tools: list[str] | None = None,
safety_override: bool = False,
no_failsafe: bool = False,
@@ -57,7 +68,7 @@ class JobManager:
job_id = f"job_{int(time.time())}_{uuid.uuid4().hex[:8]}"
created_at = utc_now_iso()
selected_model = (model or self.config.default_model).strip() or self.config.default_model
disabled = sorted({tool.strip() for tool in (disabled_tools or []) if tool.strip()})
disabled = normalize_disabled_tools(disabled_tools)
self.db.create_job(
job_id=job_id,
objective=objective,
@@ -97,6 +108,13 @@ class JobManager:
"click_pause": click_pause,
"reasoning_effort": reasoning_effort,
"screen_context_decay_steps": screen_context_decay_steps,
"max_visual_context_images": max_visual_context_images,
"native_automation_mode": native_automation_mode,
"dialog_timeout_seconds": dialog_timeout_seconds,
"focus_timeout_seconds": focus_timeout_seconds,
"ui_element_timeout_seconds": ui_element_timeout_seconds,
"max_retries_per_surface": max_retries_per_surface,
"pretty_logs": pretty_logs,
"no_failsafe": no_failsafe,
"cancel_event": cancel_event,
},
@@ -127,6 +145,13 @@ class JobManager:
click_pause: float,
reasoning_effort: str,
screen_context_decay_steps: int,
max_visual_context_images: int,
native_automation_mode: str,
dialog_timeout_seconds: float,
focus_timeout_seconds: float,
ui_element_timeout_seconds: float,
max_retries_per_surface: int,
pretty_logs: bool,
no_failsafe: bool,
cancel_event: threading.Event,
) -> None:
@@ -226,7 +251,15 @@ class JobManager:
click_pause=click_pause,
reasoning_effort=reasoning_effort,
screen_context_decay_steps=max(0, int(screen_context_decay_steps)),
max_visual_context_images=max(0, int(max_visual_context_images)),
native_automation_mode=str(native_automation_mode or "prefer").strip().lower() or "prefer",
dialog_timeout_seconds=max(0.5, float(dialog_timeout_seconds)),
focus_timeout_seconds=max(0.5, float(focus_timeout_seconds)),
ui_element_timeout_seconds=max(0.5, float(ui_element_timeout_seconds)),
max_retries_per_surface=max(1, int(max_retries_per_surface)),
pretty_logs=bool(pretty_logs),
disable_tools=set(disabled_tools),
prohibited_key_combos=set(self.config.prohibited_key_combos),
)
try:
result, artifacts = run_job(
@@ -297,6 +330,14 @@ class JobManager:
},
},
)
if status == "completed":
self.overlay_manager.show_completion(
job_id=job_id,
objective=objective,
return_message=result.return_message,
steps=result.steps,
elapsed_seconds=max(0.0, float(result.ended_at - result.started_at)),
)
with self._lock:
self._running.pop(job_id, None)