This commit is contained in:
3589
src/agent.py
3589
src/agent.py
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||
print(" OPENAI_API_KEY=...")
|
||||
print(" SCREENJOB_TOKEN=...")
|
||||
print(" DISABLE_UI=true|false (optional)")
|
||||
print(" SCREENJOB_PROHIBITED_KEY_COMBOS=ctrl+shift+s,alt+f4 (optional)")
|
||||
return 0
|
||||
server.main()
|
||||
return 0
|
||||
|
||||
63
src/cli.py
63
src/cli.py
@@ -5,6 +5,7 @@ import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from .agent import normalize_disabled_tools
|
||||
from .config import load_app_config
|
||||
from .models import RuntimeOptions
|
||||
from .runtime import create_openai_client, run_job
|
||||
@@ -40,8 +41,55 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
default=4,
|
||||
help="Compact model context every N steps to decay old screenshots (0 disables).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-visual-context-images",
|
||||
type=int,
|
||||
default=3,
|
||||
help="Maximum screenshots/enhanced images retained in model-visible context during rebases.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--native-automation-mode",
|
||||
choices=["off", "prefer", "require_fallback"],
|
||||
default="prefer",
|
||||
help="How strongly the agent should prefer Windows-native automation helpers over pixel fallback.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dialog-timeout-seconds",
|
||||
type=float,
|
||||
default=12.0,
|
||||
help="Timeout for dialog-oriented waits and retries.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--focus-timeout-seconds",
|
||||
type=float,
|
||||
default=8.0,
|
||||
help="Timeout for focus-change waits and verification.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ui-element-timeout-seconds",
|
||||
type=float,
|
||||
default=8.0,
|
||||
help="Timeout for native UI element lookup waits.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-retries-per-surface",
|
||||
type=int,
|
||||
default=3,
|
||||
help="Maximum repeated retries on the same classified window/dialog surface before the agent must pivot.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pretty-logs",
|
||||
action="store_true",
|
||||
help="Emit expanded multi-line tool call/result logs for easier debugging.",
|
||||
)
|
||||
parser.add_argument("--disable-tool", action="append", default=[], help="Disable a tool by name.")
|
||||
parser.add_argument("--skip-safety-check", action="store_true", help="Bypass pre-flight safety check.")
|
||||
parser.add_argument(
|
||||
"--skip-safety-check",
|
||||
"--skip-safety-chec",
|
||||
dest="skip_safety_check",
|
||||
action="store_true",
|
||||
help="Bypass pre-flight safety check.",
|
||||
)
|
||||
parser.add_argument("--no-failsafe", action="store_true", help="Disable PyAutoGUI fail-safe.")
|
||||
return parser
|
||||
|
||||
@@ -57,7 +105,10 @@ def main(argv: list[str] | None = None) -> int:
|
||||
return 2
|
||||
|
||||
model = args.model or config.default_model
|
||||
disabled_tools = sorted({str(x).strip() for x in args.disable_tool if str(x).strip()})
|
||||
try:
|
||||
disabled_tools = normalize_disabled_tools(args.disable_tool)
|
||||
except ValueError as exc:
|
||||
parser.error(str(exc))
|
||||
|
||||
if not args.skip_safety_check:
|
||||
safety_client = create_openai_client(config.openai_api_key)
|
||||
@@ -92,7 +143,15 @@ def main(argv: list[str] | None = None) -> int:
|
||||
click_pause=args.click_pause,
|
||||
reasoning_effort=args.reasoning_effort,
|
||||
screen_context_decay_steps=max(0, int(args.screen_context_decay_steps)),
|
||||
max_visual_context_images=max(0, int(args.max_visual_context_images)),
|
||||
native_automation_mode=args.native_automation_mode,
|
||||
dialog_timeout_seconds=max(0.5, float(args.dialog_timeout_seconds)),
|
||||
focus_timeout_seconds=max(0.5, float(args.focus_timeout_seconds)),
|
||||
ui_element_timeout_seconds=max(0.5, float(args.ui_element_timeout_seconds)),
|
||||
max_retries_per_surface=max(1, int(args.max_retries_per_surface)),
|
||||
pretty_logs=bool(args.pretty_logs),
|
||||
disable_tools=set(disabled_tools),
|
||||
prohibited_key_combos=set(config.prohibited_key_combos),
|
||||
)
|
||||
try:
|
||||
result, artifacts = run_job(
|
||||
|
||||
@@ -14,6 +14,13 @@ def _env_bool(name: str, default: bool = False) -> bool:
|
||||
return raw.strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def _env_csv(name: str) -> list[str]:
|
||||
raw = os.getenv(name)
|
||||
if raw is None:
|
||||
return []
|
||||
return [item.strip() for item in raw.split(",") if item.strip()]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AppConfig:
|
||||
openai_api_key: str
|
||||
@@ -25,6 +32,7 @@ class AppConfig:
|
||||
port: int
|
||||
runs_dir: Path
|
||||
db_path: Path
|
||||
prohibited_key_combos: tuple[str, ...] = ()
|
||||
|
||||
|
||||
def load_app_config(cwd: Path) -> AppConfig:
|
||||
@@ -38,6 +46,7 @@ def load_app_config(cwd: Path) -> AppConfig:
|
||||
runs_dir = cwd / "screenjob_runs"
|
||||
db_path = cwd / "screenjob.db"
|
||||
disable_ui = _env_bool("DISABLE_UI", default=False)
|
||||
prohibited_key_combos = tuple(_env_csv("SCREENJOB_PROHIBITED_KEY_COMBOS"))
|
||||
return AppConfig(
|
||||
openai_api_key=openai_api_key,
|
||||
screenjob_token=screenjob_token,
|
||||
@@ -48,5 +57,5 @@ def load_app_config(cwd: Path) -> AppConfig:
|
||||
port=port,
|
||||
runs_dir=runs_dir,
|
||||
db_path=db_path,
|
||||
prohibited_key_combos=prohibited_key_combos,
|
||||
)
|
||||
|
||||
|
||||
@@ -60,4 +60,12 @@ class RuntimeOptions:
|
||||
click_pause: float = 0.10
|
||||
reasoning_effort: str = "medium"
|
||||
screen_context_decay_steps: int = 4
|
||||
max_visual_context_images: int = 3
|
||||
native_automation_mode: str = "prefer"
|
||||
dialog_timeout_seconds: float = 12.0
|
||||
focus_timeout_seconds: float = 8.0
|
||||
ui_element_timeout_seconds: float = 8.0
|
||||
max_retries_per_surface: int = 3
|
||||
pretty_logs: bool = False
|
||||
disable_tools: set[str] | None = None
|
||||
prohibited_key_combos: set[str] | None = None
|
||||
|
||||
@@ -12,6 +12,7 @@ from fastapi.responses import FileResponse
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .agent import normalize_disabled_tools
|
||||
from .config import AppConfig, load_app_config
|
||||
from .storage import HistoryDB
|
||||
from .task_manager import JobManager
|
||||
@@ -28,6 +29,13 @@ class CreateJobRequest(BaseModel):
|
||||
click_pause: float = Field(0.10, ge=0.0, le=2.0)
|
||||
reasoning_effort: str = Field("medium", pattern="^(low|medium|high)$")
|
||||
screen_context_decay_steps: int = Field(4, ge=0, le=50)
|
||||
max_visual_context_images: int = Field(3, ge=0, le=12)
|
||||
native_automation_mode: str = Field("prefer", pattern="^(off|prefer|require_fallback)$")
|
||||
dialog_timeout_seconds: float = Field(12.0, ge=0.5, le=120.0)
|
||||
focus_timeout_seconds: float = Field(8.0, ge=0.5, le=120.0)
|
||||
ui_element_timeout_seconds: float = Field(8.0, ge=0.5, le=120.0)
|
||||
max_retries_per_surface: int = Field(3, ge=1, le=10)
|
||||
pretty_logs: bool = False
|
||||
disabled_tools: list[str] = Field(default_factory=list)
|
||||
safety_override: bool = False
|
||||
no_failsafe: bool = False
|
||||
@@ -297,19 +305,30 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
|
||||
|
||||
@app.post("/api/jobs")
|
||||
def create_job(payload: CreateJobRequest, _: None = Depends(require_token)) -> dict[str, str]:
|
||||
job_id = manager.submit_job(
|
||||
objective=payload.job,
|
||||
model=payload.model,
|
||||
max_steps=payload.max_steps,
|
||||
command_timeout=payload.command_timeout,
|
||||
type_interval=payload.type_interval,
|
||||
click_pause=payload.click_pause,
|
||||
reasoning_effort=payload.reasoning_effort,
|
||||
screen_context_decay_steps=payload.screen_context_decay_steps,
|
||||
disabled_tools=payload.disabled_tools,
|
||||
safety_override=payload.safety_override,
|
||||
no_failsafe=payload.no_failsafe,
|
||||
)
|
||||
try:
|
||||
disabled_tools = normalize_disabled_tools(payload.disabled_tools)
|
||||
job_id = manager.submit_job(
|
||||
objective=payload.job,
|
||||
model=payload.model,
|
||||
max_steps=payload.max_steps,
|
||||
command_timeout=payload.command_timeout,
|
||||
type_interval=payload.type_interval,
|
||||
click_pause=payload.click_pause,
|
||||
reasoning_effort=payload.reasoning_effort,
|
||||
screen_context_decay_steps=payload.screen_context_decay_steps,
|
||||
max_visual_context_images=payload.max_visual_context_images,
|
||||
native_automation_mode=payload.native_automation_mode,
|
||||
dialog_timeout_seconds=payload.dialog_timeout_seconds,
|
||||
focus_timeout_seconds=payload.focus_timeout_seconds,
|
||||
ui_element_timeout_seconds=payload.ui_element_timeout_seconds,
|
||||
max_retries_per_surface=payload.max_retries_per_surface,
|
||||
pretty_logs=payload.pretty_logs,
|
||||
disabled_tools=disabled_tools,
|
||||
safety_override=payload.safety_override,
|
||||
no_failsafe=payload.no_failsafe,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
return {"job_id": job_id}
|
||||
|
||||
@app.get("/api/jobs")
|
||||
|
||||
@@ -8,7 +8,9 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
from .agent import normalize_disabled_tools
|
||||
from .config import AppConfig
|
||||
from .desktop_overlay import DesktopOverlayManager, get_desktop_overlay_manager
|
||||
from .models import RuntimeOptions
|
||||
from .runtime import create_openai_client, run_job
|
||||
from .safety import assess_task_safety
|
||||
@@ -32,10 +34,12 @@ class JobManager:
|
||||
config: AppConfig,
|
||||
db: HistoryDB,
|
||||
broadcast: Callable[[dict[str, Any]], None] | None = None,
|
||||
overlay_manager: DesktopOverlayManager | None = None,
|
||||
) -> None:
|
||||
self.config = config
|
||||
self.db = db
|
||||
self.broadcast = broadcast
|
||||
self.overlay_manager = overlay_manager or get_desktop_overlay_manager()
|
||||
self._running: dict[str, _RunningJob] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
@@ -50,6 +54,13 @@ class JobManager:
|
||||
click_pause: float = 0.10,
|
||||
reasoning_effort: str = "medium",
|
||||
screen_context_decay_steps: int = 4,
|
||||
max_visual_context_images: int = 3,
|
||||
native_automation_mode: str = "prefer",
|
||||
dialog_timeout_seconds: float = 12.0,
|
||||
focus_timeout_seconds: float = 8.0,
|
||||
ui_element_timeout_seconds: float = 8.0,
|
||||
max_retries_per_surface: int = 3,
|
||||
pretty_logs: bool = False,
|
||||
disabled_tools: list[str] | None = None,
|
||||
safety_override: bool = False,
|
||||
no_failsafe: bool = False,
|
||||
@@ -57,7 +68,7 @@ class JobManager:
|
||||
job_id = f"job_{int(time.time())}_{uuid.uuid4().hex[:8]}"
|
||||
created_at = utc_now_iso()
|
||||
selected_model = (model or self.config.default_model).strip() or self.config.default_model
|
||||
disabled = sorted({tool.strip() for tool in (disabled_tools or []) if tool.strip()})
|
||||
disabled = normalize_disabled_tools(disabled_tools)
|
||||
self.db.create_job(
|
||||
job_id=job_id,
|
||||
objective=objective,
|
||||
@@ -97,6 +108,13 @@ class JobManager:
|
||||
"click_pause": click_pause,
|
||||
"reasoning_effort": reasoning_effort,
|
||||
"screen_context_decay_steps": screen_context_decay_steps,
|
||||
"max_visual_context_images": max_visual_context_images,
|
||||
"native_automation_mode": native_automation_mode,
|
||||
"dialog_timeout_seconds": dialog_timeout_seconds,
|
||||
"focus_timeout_seconds": focus_timeout_seconds,
|
||||
"ui_element_timeout_seconds": ui_element_timeout_seconds,
|
||||
"max_retries_per_surface": max_retries_per_surface,
|
||||
"pretty_logs": pretty_logs,
|
||||
"no_failsafe": no_failsafe,
|
||||
"cancel_event": cancel_event,
|
||||
},
|
||||
@@ -127,6 +145,13 @@ class JobManager:
|
||||
click_pause: float,
|
||||
reasoning_effort: str,
|
||||
screen_context_decay_steps: int,
|
||||
max_visual_context_images: int,
|
||||
native_automation_mode: str,
|
||||
dialog_timeout_seconds: float,
|
||||
focus_timeout_seconds: float,
|
||||
ui_element_timeout_seconds: float,
|
||||
max_retries_per_surface: int,
|
||||
pretty_logs: bool,
|
||||
no_failsafe: bool,
|
||||
cancel_event: threading.Event,
|
||||
) -> None:
|
||||
@@ -226,7 +251,15 @@ class JobManager:
|
||||
click_pause=click_pause,
|
||||
reasoning_effort=reasoning_effort,
|
||||
screen_context_decay_steps=max(0, int(screen_context_decay_steps)),
|
||||
max_visual_context_images=max(0, int(max_visual_context_images)),
|
||||
native_automation_mode=str(native_automation_mode or "prefer").strip().lower() or "prefer",
|
||||
dialog_timeout_seconds=max(0.5, float(dialog_timeout_seconds)),
|
||||
focus_timeout_seconds=max(0.5, float(focus_timeout_seconds)),
|
||||
ui_element_timeout_seconds=max(0.5, float(ui_element_timeout_seconds)),
|
||||
max_retries_per_surface=max(1, int(max_retries_per_surface)),
|
||||
pretty_logs=bool(pretty_logs),
|
||||
disable_tools=set(disabled_tools),
|
||||
prohibited_key_combos=set(self.config.prohibited_key_combos),
|
||||
)
|
||||
try:
|
||||
result, artifacts = run_job(
|
||||
@@ -297,6 +330,14 @@ class JobManager:
|
||||
},
|
||||
},
|
||||
)
|
||||
if status == "completed":
|
||||
self.overlay_manager.show_completion(
|
||||
job_id=job_id,
|
||||
objective=objective,
|
||||
return_message=result.return_message,
|
||||
steps=result.steps,
|
||||
elapsed_seconds=max(0.0, float(result.ended_at - result.started_at)),
|
||||
)
|
||||
with self._lock:
|
||||
self._running.pop(job_id, None)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user