diff --git a/README.md b/README.md index d6e865f..a00506c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # ScreenJob ScreenJob is an autonomous desktop-and-terminal execution service. -It lets an LLM use controlled local tools (screen, click, type, shell) to complete GUI-heavy tasks on a real computer. +It lets an LLM use controlled local tools (screen, mouse, keyboard, clipboard, shell) to complete GUI-heavy tasks on a real computer. ## What It Solves @@ -15,7 +15,8 @@ It lets an LLM use controlled local tools (screen, click, type, shell) to comple ## Core Features -- Tool-based agent loop (`execute_command`, `see_screen`, `enhance`, `click`, `type`, `press_key`, `sleep`, `task_complete`) +- Hybrid control model: screenshot grounding plus Windows-native window, dialog, and UI-element helpers when available +- Tool-based agent loop (`execute_command`, `see_screen`, `enhance`, `list_windows`, `find_window`, `focus_window`, `close_window`, `wait_for_window`, `wait_for_focus_change`, `detect_dialog`, `dialog_action`, `dialog_set_filename`, `wait_for_dialog_close`, `list_ui_elements`, `invoke_ui_element`, `set_ui_element_value`, `select_ui_element`, `wait_for_ui_element`, `click`, `scroll`, `drag`, `move_mouse`, `type`, `press_key`, `clipboard_get`, `clipboard_set`, `get_cursor_position`, `get_active_window`, `sleep`, `task_complete`) - Safety pre-check with override support - Per-job tool disable list - Live/final usage and cost estimates @@ -109,43 +110,45 @@ Or use the PowerShell launcher: .\start_backend.ps1 ``` -### Windows Service +### Backend Startup -Run these from an elevated PowerShell session (Run as Administrator): -Requires .NET SDK 10+ (installer publishes a native service host executable). +For screenshot-driven automation, start the backend in the logged-in user session. +That gives `pyautogui` access to the interactive desktop, which Windows services do not. +If you previously installed the legacy service, remove it once from an elevated PowerShell session with `.\uninstall_backend_service.ps1`. -Install and start at boot: +Install a sign-in launcher for the current user: ```powershell -.\install_backend_service.ps1 -ForceReinstall -StartAfterInstall -DelayedAutoStart +.\install_backend_service.ps1 ``` -Check status: +Install it for all users: ```powershell -Get-Service -Name ScreenJobBackend +.\install_backend_service.ps1 -AllUsers ``` -Stop/start manually: +Start it immediately after installing: ```powershell -Stop-Service -Name ScreenJobBackend -Start-Service -Name ScreenJobBackend +.\install_backend_service.ps1 -StartNow ``` -Uninstall: +Remove the launcher: ```powershell .\uninstall_backend_service.ps1 ``` -Service logs are written to: +The launcher runs `start_backend.ps1` hidden via `start_backend_hidden.vbs`. +If you need to start the backend manually, run: -```text -screenjob_runs/service/backend-service.stdout.log -screenjob_runs/service/backend-service.stderr.log +```powershell +.\start_backend.ps1 ``` +The legacy Windows service host remains in the tree for reference, but it is not the recommended path for GUI tasks. + ### System Tray Icon (Windows) Start tray icon now: @@ -174,6 +177,7 @@ Remove startup shortcut: Tray menu actions: +- The service controls are for the legacy Windows service host. - Refresh service status - Start/Stop/Restart service (prompts for admin/UAC) - Open dashboard URL from `.env` `SCREENJOB_HOST` / `SCREENJOB_PORT` @@ -194,6 +198,11 @@ Auth for all API routes: { "job": "run \"ls -a\" in C:/Users/username/Documents and return output", "model": "gpt-5.4-mini", + "native_automation_mode": "prefer", + "dialog_timeout_seconds": 12, + "focus_timeout_seconds": 8, + "ui_element_timeout_seconds": 8, + "max_retries_per_surface": 3, "disabled_tools": [], "safety_override": false } @@ -238,17 +247,28 @@ Each job payload includes: ## Agent Instructions (Practical) - Prefer `execute_command` for deterministic actions (opening URLs, filesystem checks). +- First classify the current Windows surface, then choose the control channel. +- Prefer native window/dialog/element tools for focus changes, file pickers, modal confirmations, and browser-owned dialogs when available. - Use `see_screen` before UI interaction. - Use `enhance` before clicking small/ambiguous targets; prefer `region="small"` for compact controls. - Use `enhance` `mode="text"` for tiny labels/text, or `mode="ui"` for general UI. - Optionally set `enhance` `scale` (2-6) for tighter zoom control. +- Use `list_windows`, `find_window`, `focus_window`, and `wait_for_focus_change` instead of blind Alt+Tab retries. +- Use `detect_dialog`, `dialog_set_filename`, `dialog_action`, and `wait_for_dialog_close` for native open/save/confirm flows. +- Use `list_ui_elements`, `invoke_ui_element`, `set_ui_element_value`, `select_ui_element`, and `wait_for_ui_element` when controls are exposed natively. - Use `press_key` for non-text keys (Enter, Tab, arrows, Escape). - For shortcuts, use one `press_key` call with combo syntax (example: `win+r`). -- Use `click` offsets via `offset_up/down/left/right` and optional `sleep_after_seconds`. +- Use `click` offsets via `offset_up/down/left/right`; set `button` and `click_count` there instead of inventing one-off click tools. +- Use `move_mouse` when you need hover-only behavior and `drag` for slider, selection, or window moves. +- Use `scroll` for vertical navigation; positive amounts scroll up and negative amounts scroll down. +- Use `clipboard_get` / `clipboard_set` for copy-paste workflows, `get_cursor_position` for cursor inspection, and `get_active_window` before interacting with uncertain focus. +- If native automation is unavailable or disabled, ScreenJob falls back to screenshots plus mouse/keyboard control and emits fallback events. - When done, call: - `task_complete(return="...", data=...)` - Before `task_complete`, verify expected on-screen content with `see_screen` (and `enhance` if needed), and include an `observed_result` summary in `data`. +Per-job `disabled_tools` must match the built-in tool allowlist. `task_complete` cannot be disabled. + `data` should contain useful structured output for the requester (text, object, list, etc.). ## Verification diff --git a/install_backend_service.ps1 b/install_backend_service.ps1 index ab919bf..4a7cd8f 100644 --- a/install_backend_service.ps1 +++ b/install_backend_service.ps1 @@ -1,125 +1,84 @@ [CmdletBinding(SupportsShouldProcess = $true)] param( - [string]$ServiceName = "ScreenJobBackend", - [string]$DisplayName = "ScreenJob Backend", - [string]$Description = "Runs the ScreenJob backend (start_backend.ps1) as a Windows service.", - [ValidateSet("Automatic", "Manual", "Disabled")] - [string]$StartupType = "Automatic", - [switch]$DelayedAutoStart, - [switch]$ForceReinstall, - [switch]$StartAfterInstall + [switch]$Remove, + [switch]$AllUsers, + [switch]$StartNow ) Set-StrictMode -Version Latest $ErrorActionPreference = "Stop" +$scriptDir = Split-Path -Parent $PSCommandPath +$backendScript = Join-Path $scriptDir "start_backend.ps1" +$vbsLauncher = Join-Path $scriptDir "start_backend_hidden.vbs" +$shortcutName = "ScreenJob Backend.lnk" + +if (-not (Test-Path -LiteralPath $backendScript)) { + throw "Backend launcher script not found: $backendScript" +} + +if (-not (Test-Path -LiteralPath $vbsLauncher)) { + throw "Hidden backend launcher file not found: $vbsLauncher" +} + function Test-IsAdministrator { $identity = [Security.Principal.WindowsIdentity]::GetCurrent() $principal = New-Object Security.Principal.WindowsPrincipal($identity) return $principal.IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator) } -if (-not (Test-IsAdministrator)) { - throw "Run this script from an elevated PowerShell session (Run as Administrator)." -} - -$scriptDir = Split-Path -Parent $PSCommandPath -$backendScript = Join-Path $scriptDir "start_backend.ps1" -if (-not (Test-Path -LiteralPath $backendScript)) { - throw "Backend launcher script not found: $backendScript" -} - -$projectFile = Join-Path $scriptDir "service_host\ScreenJob.WindowsServiceHost\ScreenJob.WindowsServiceHost.csproj" -if (-not (Test-Path -LiteralPath $projectFile)) { - throw "Windows service host project not found: $projectFile" -} - -$dotnetCmd = Get-Command dotnet -ErrorAction SilentlyContinue -if ($null -eq $dotnetCmd) { - throw "dotnet SDK was not found in PATH. Install .NET SDK 10+ and retry." -} - -$publishDir = Join-Path $scriptDir "service_host\publish" -$serviceExe = Join-Path $publishDir "ScreenJob.WindowsServiceHost.exe" -$logDir = Join-Path $scriptDir "screenjob_runs\service" - -$existingService = Get-Service -Name $ServiceName -ErrorAction SilentlyContinue -if ($null -ne $existingService) { - if (-not $ForceReinstall) { - throw "Service '$ServiceName' already exists. Re-run with -ForceReinstall to replace it." - } - - if ($PSCmdlet.ShouldProcess($ServiceName, "Remove existing service")) { - if ($existingService.Status -ne "Stopped") { - Stop-Service -Name $ServiceName -Force -ErrorAction Stop - } - - & sc.exe delete $ServiceName | Out-Null - if ($LASTEXITCODE -ne 0) { - throw "Failed to delete existing service '$ServiceName' (sc.exe exit code $LASTEXITCODE)." - } - - $deadline = (Get-Date).AddSeconds(15) - while ((Get-Date) -lt $deadline) { - $stillThere = Get-Service -Name $ServiceName -ErrorAction SilentlyContinue - if ($null -eq $stillThere) { - break +$legacyService = Get-Service -Name "ScreenJobBackend" -ErrorAction SilentlyContinue +if ($null -ne $legacyService) { + if (Test-IsAdministrator) { + if ($PSCmdlet.ShouldProcess("ScreenJobBackend", "Remove legacy Windows service")) { + if ($legacyService.Status -ne "Stopped") { + Stop-Service -Name "ScreenJobBackend" -Force -ErrorAction Stop } - Start-Sleep -Milliseconds 300 + + & sc.exe delete ScreenJobBackend | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "Failed to delete legacy service 'ScreenJobBackend' (sc.exe exit code $LASTEXITCODE)." + } + + Write-Host "Removed legacy Windows service: ScreenJobBackend" } + } else { + Write-Warning "Legacy Windows service 'ScreenJobBackend' is still installed. Run uninstall_backend_service.ps1 from an elevated PowerShell session once to remove it." } } -if ($PSCmdlet.ShouldProcess($projectFile, "Publish Windows service host")) { - if (Test-Path -LiteralPath $serviceExe) { - Remove-Item -LiteralPath $serviceExe -Force -ErrorAction SilentlyContinue - } - - & $dotnetCmd.Source publish ` - $projectFile ` - -c Release ` - -r win-x64 ` - --self-contained false ` - -p:PublishSingleFile=true ` - -o $publishDir - - if ($LASTEXITCODE -ne 0) { - throw "dotnet publish failed with exit code $LASTEXITCODE." - } +$startupFolder = if ($AllUsers) { + [Environment]::GetFolderPath("CommonStartup") +} else { + [Environment]::GetFolderPath("Startup") } -if (-not (Test-Path -LiteralPath $serviceExe)) { - throw "Published service executable not found: $serviceExe" -} +$shortcutPath = Join-Path $startupFolder $shortcutName -$binaryPath = "`"$serviceExe`" --backend-script `"$backendScript`" --working-dir `"$scriptDir`" --log-dir `"$logDir`"" - -if ($PSCmdlet.ShouldProcess($ServiceName, "Create service")) { - New-Service ` - -Name $ServiceName ` - -BinaryPathName $binaryPath ` - -DisplayName $DisplayName ` - -Description $Description ` - -StartupType $StartupType - - if ($StartupType -eq "Automatic" -and $DelayedAutoStart) { - & sc.exe config $ServiceName start= delayed-auto | Out-Null - if ($LASTEXITCODE -ne 0) { - throw "Failed to enable delayed auto-start for '$ServiceName' (sc.exe exit code $LASTEXITCODE)." +if ($Remove) { + if (Test-Path -LiteralPath $shortcutPath) { + if ($PSCmdlet.ShouldProcess($shortcutPath, "Remove backend startup shortcut")) { + Remove-Item -LiteralPath $shortcutPath -Force + Write-Host "Removed backend startup shortcut: $shortcutPath" } + } else { + Write-Host "No backend startup shortcut found at: $shortcutPath" } - - # Restart on first/second/subsequent failure after 5 seconds. - & sc.exe failure $ServiceName reset= 86400 actions= restart/5000/restart/5000/restart/5000 | Out-Null - if ($LASTEXITCODE -ne 0) { - throw "Failed to configure failure actions for '$ServiceName' (sc.exe exit code $LASTEXITCODE)." - } - - if ($StartAfterInstall) { - Start-Service -Name $ServiceName -ErrorAction Stop - } + return } -Write-Host "Service '$ServiceName' installed successfully." -ForegroundColor Green -Write-Host "Check status with: Get-Service -Name $ServiceName" -Write-Host "View logs in: $logDir" +if ($PSCmdlet.ShouldProcess($shortcutPath, "Create backend startup shortcut")) { + $shell = New-Object -ComObject WScript.Shell + $shortcut = $shell.CreateShortcut($shortcutPath) + $shortcut.TargetPath = "$env:SystemRoot\System32\wscript.exe" + $shortcut.Arguments = '"' + $vbsLauncher + '"' + $shortcut.WorkingDirectory = $scriptDir + $shortcut.Description = "Launch ScreenJob backend at sign-in in the current user session." + $shortcut.Save() + Write-Host "Created backend startup shortcut: $shortcutPath" +} + +if ($StartNow) { + Start-Process -FilePath "$env:SystemRoot\System32\wscript.exe" -ArgumentList @($vbsLauncher) -WorkingDirectory $scriptDir | Out-Null + Write-Host "Started backend launcher now." +} diff --git a/src/desktop_overlay.py b/src/desktop_overlay.py new file mode 100644 index 0000000..777c6ca --- /dev/null +++ b/src/desktop_overlay.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import logging +import os +import queue +import threading +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class CompletionOverlayPayload: + job_id: str + objective: str + return_message: str + steps: int + elapsed_seconds: float + + +class DesktopOverlayManager: + def __init__(self, logger: logging.Logger | None = None, *, auto_dismiss_seconds: float = 10.0) -> None: + self.logger = logger or logging.getLogger("screenjob.overlay") + self._queue: queue.Queue[CompletionOverlayPayload] = queue.Queue() + self._thread: threading.Thread | None = None + self._lock = threading.Lock() + self._ready = threading.Event() + self._disabled = False + self._warned = False + self._auto_dismiss_ms = max(0, int(round(float(auto_dismiss_seconds) * 1000))) + + def show_completion( + self, + *, + job_id: str, + objective: str, + return_message: str, + steps: int, + elapsed_seconds: float, + ) -> None: + if os.name != "nt": + self._disable_once("Desktop completion HUD is only enabled on Windows.") + return + if not self._ensure_thread(): + return + self._queue.put( + CompletionOverlayPayload( + job_id=job_id, + objective=objective, + return_message=return_message, + steps=max(0, int(steps)), + elapsed_seconds=max(0.0, float(elapsed_seconds)), + ) + ) + + def _ensure_thread(self) -> bool: + with self._lock: + if self._disabled: + return False + if self._thread is None or not self._thread.is_alive(): + self._ready.clear() + self._thread = threading.Thread(target=self._ui_main, name="screenjob-overlay", daemon=True) + self._thread.start() + self._ready.wait(timeout=2.0) + return not self._disabled + + def _disable_once(self, reason: str) -> None: + with self._lock: + self._disabled = True + already_warned = self._warned + self._warned = True + self._ready.set() + if not already_warned: + self.logger.warning("%s Overlay notifications disabled.", reason) + + def _format_elapsed(self, elapsed_seconds: float) -> str: + total_seconds = max(0, int(round(elapsed_seconds))) + minutes, seconds = divmod(total_seconds, 60) + hours, minutes = divmod(minutes, 60) + if hours: + return f"{hours}h {minutes}m {seconds}s" + if minutes: + return f"{minutes}m {seconds}s" + return f"{seconds}s" + + def _shorten(self, text: str, limit: int) -> str: + raw = " ".join(str(text or "").split()) + if len(raw) <= limit: + return raw + return raw[: max(0, limit - 1)].rstrip() + "..." + + def _ui_main(self) -> None: + try: + import tkinter as tk + except Exception as exc: # noqa: BLE001 + self._disable_once(f"tkinter is unavailable ({type(exc).__name__}: {exc}).") + return + + try: + root = tk.Tk() + root.withdraw() + root.update_idletasks() + except Exception as exc: # noqa: BLE001 + self._disable_once(f"Desktop overlay could not initialize ({type(exc).__name__}: {exc}).") + return + + cards: list[dict[str, Any]] = [] + self._ready.set() + + def reposition() -> None: + screen_width = root.winfo_screenwidth() + top = 24 + for entry in cards: + window = entry["window"] + if not bool(window.winfo_exists()): + continue + window.update_idletasks() + width = max(320, int(window.winfo_width() or 360)) + height = max(120, int(window.winfo_height() or 160)) + left = max(12, screen_width - width - 24) + window.geometry(f"{width}x{height}+{left}+{top}") + top += height + 16 + + def dismiss(window: Any) -> None: + for index, entry in enumerate(list(cards)): + if entry["window"] is window: + after_id = entry.get("after_id") + if after_id is not None: + try: + window.after_cancel(after_id) + except Exception: # noqa: BLE001 + pass + cards.pop(index) + break + try: + if bool(window.winfo_exists()): + window.destroy() + except Exception: # noqa: BLE001 + pass + if cards: + reposition() + + def add_card(payload: CompletionOverlayPayload) -> None: + card = tk.Toplevel(root) + card.withdraw() + card.overrideredirect(True) + card.attributes("-topmost", True) + card.configure(bg="#0f172a") + + frame = tk.Frame(card, bg="#0f172a", highlightthickness=1, highlightbackground="#22c55e", bd=0) + frame.pack(fill="both", expand=True) + + close_button = tk.Button( + frame, + text="×", + command=lambda win=card: dismiss(win), + bg="#0f172a", + fg="#cbd5e1", + activebackground="#111827", + activeforeground="#ffffff", + relief="flat", + borderwidth=0, + font=("Segoe UI", 14, "bold"), + padx=6, + pady=0, + ) + close_button.place(relx=1.0, x=-8, y=6, anchor="ne") + + header = tk.Label( + frame, + text="Completed", + bg="#0f172a", + fg="#86efac", + font=("Segoe UI", 10, "bold"), + anchor="w", + ) + header.pack(fill="x", padx=14, pady=(12, 2)) + + title = tk.Label( + frame, + text=self._shorten(payload.objective, 72) or "Job complete", + bg="#0f172a", + fg="#f8fafc", + font=("Segoe UI", 11, "bold"), + justify="left", + wraplength=320, + anchor="w", + ) + title.pack(fill="x", padx=14) + + job_row = tk.Label( + frame, + text=f"Job {payload.job_id}", + bg="#0f172a", + fg="#94a3b8", + font=("Segoe UI", 9), + justify="left", + anchor="w", + ) + job_row.pack(fill="x", padx=14, pady=(2, 8)) + + message = tk.Label( + frame, + text=self._shorten(payload.return_message, 180) or "Task completed.", + bg="#0f172a", + fg="#e2e8f0", + font=("Segoe UI", 9), + justify="left", + wraplength=320, + anchor="w", + ) + message.pack(fill="x", padx=14) + + footer = tk.Label( + frame, + text=f"{payload.steps} step(s) | {self._format_elapsed(payload.elapsed_seconds)}", + bg="#0f172a", + fg="#94a3b8", + font=("Segoe UI", 9), + justify="left", + anchor="w", + ) + footer.pack(fill="x", padx=14, pady=(10, 12)) + + after_id = None + if self._auto_dismiss_ms > 0: + after_id = card.after(self._auto_dismiss_ms, lambda win=card: dismiss(win)) + + cards.insert(0, {"window": card, "after_id": after_id}) + while len(cards) > 3: + stale = cards.pop() + try: + stale_after_id = stale.get("after_id") + if stale_after_id is not None: + stale["window"].after_cancel(stale_after_id) + stale["window"].destroy() + except Exception: # noqa: BLE001 + pass + + card.update_idletasks() + reposition() + card.deiconify() + + def pump_queue() -> None: + try: + while True: + add_card(self._queue.get_nowait()) + except queue.Empty: + pass + try: + root.after(120, pump_queue) + except Exception: # noqa: BLE001 + self._disable_once("Desktop overlay event loop stopped unexpectedly.") + + pump_queue() + try: + root.mainloop() + except Exception as exc: # noqa: BLE001 + self._disable_once(f"Desktop overlay main loop failed ({type(exc).__name__}: {exc}).") + + +_overlay_singleton: DesktopOverlayManager | None = None +_overlay_lock = threading.Lock() + + +def get_desktop_overlay_manager(logger: logging.Logger | None = None) -> DesktopOverlayManager: + global _overlay_singleton + with _overlay_lock: + if _overlay_singleton is None: + _overlay_singleton = DesktopOverlayManager(logger=logger) + elif logger is not None: + _overlay_singleton.logger = logger + return _overlay_singleton diff --git a/start_backend_hidden.vbs b/start_backend_hidden.vbs new file mode 100644 index 0000000..20a87c7 --- /dev/null +++ b/start_backend_hidden.vbs @@ -0,0 +1,11 @@ +Option Explicit + +Dim shell, fso, scriptDir, psScript, command +Set shell = CreateObject("WScript.Shell") +Set fso = CreateObject("Scripting.FileSystemObject") + +scriptDir = fso.GetParentFolderName(WScript.ScriptFullName) +psScript = """" & fso.BuildPath(scriptDir, "start_backend.ps1") & """" + +command = "powershell.exe -NoProfile -ExecutionPolicy Bypass -WindowStyle Hidden -STA -File " & psScript +shell.Run command, 0, False diff --git a/tests/test_desktop_overlay.py b/tests/test_desktop_overlay.py new file mode 100644 index 0000000..2dc666f --- /dev/null +++ b/tests/test_desktop_overlay.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import types +from collections import deque +from typing import Any + +from src.desktop_overlay import CompletionOverlayPayload, DesktopOverlayManager + + +class _FakeWidget: + def __init__(self, root: "_FakeTk", *, width: int = 360, height: int = 160) -> None: + self._root = root + self._width = width + self._height = height + self._exists = True + self._after_ids: dict[str, tuple[int, Any]] = {} + + def withdraw(self) -> None: + return None + + def overrideredirect(self, *_args: Any, **_kwargs: Any) -> None: + return None + + def attributes(self, *_args: Any, **_kwargs: Any) -> None: + return None + + def configure(self, *_args: Any, **_kwargs: Any) -> None: + return None + + def pack(self, *_args: Any, **_kwargs: Any) -> None: + return None + + def place(self, *_args: Any, **_kwargs: Any) -> None: + return None + + def update_idletasks(self) -> None: + return None + + def winfo_width(self) -> int: + return self._width + + def winfo_height(self) -> int: + return self._height + + def winfo_exists(self) -> bool: + return self._exists + + def geometry(self, *_args: Any, **_kwargs: Any) -> None: + return None + + def deiconify(self) -> None: + return None + + def destroy(self) -> None: + self._exists = False + + def after(self, delay_ms: int, callback: Any) -> str: + after_id = self._root._schedule(delay_ms, callback) + self._after_ids[after_id] = (delay_ms, callback) + return after_id + + def after_cancel(self, after_id: str) -> None: + self._after_ids.pop(after_id, None) + self._root._cancel(after_id) + + +class _FakeButton(_FakeWidget): + def __init__(self, root: "_FakeTk", command: Any | None = None, **_kwargs: Any) -> None: + super().__init__(root) + self.command = command + + +class _FakeTk(_FakeWidget): + def __init__(self) -> None: + super().__init__(self) + self._events: deque[tuple[str, int, Any]] = deque() + self._event_seq = 0 + self.scheduled_delays: list[int] = [] + self.cards: list[_FakeWidget] = [] + + def withdraw(self) -> None: + return None + + def winfo_screenwidth(self) -> int: + return 1920 + + def _schedule(self, delay_ms: int, callback: Any) -> str: + after_id = f"after-{self._event_seq}" + self._event_seq += 1 + self.scheduled_delays.append(delay_ms) + self._events.append((after_id, delay_ms, callback)) + return after_id + + def _cancel(self, after_id: str) -> None: + self._events = deque(event for event in self._events if event[0] != after_id) + + def mainloop(self) -> None: + iterations = 0 + while self._events and iterations < 20: + after_id, _delay_ms, callback = self._events.popleft() + iterations += 1 + callback() + if any(not card.winfo_exists() for card in self.cards): + return + + +class _FakeTkModule(types.SimpleNamespace): + def __init__(self, root: _FakeTk) -> None: + super().__init__() + self._root = root + + def Tk(self) -> _FakeTk: + return self._root + + def Toplevel(self, _root: _FakeTk) -> _FakeWidget: + card = _FakeWidget(self._root) + self._root.cards.append(card) + return card + + def Frame(self, root: _FakeWidget, **_kwargs: Any) -> _FakeWidget: + return _FakeWidget(root._root) + + def Label(self, root: _FakeWidget, **_kwargs: Any) -> _FakeWidget: + return _FakeWidget(root._root) + + def Button(self, root: _FakeWidget, command: Any | None = None, **_kwargs: Any) -> _FakeButton: + return _FakeButton(root._root, command=command) + + +def test_completion_overlay_auto_dismisses(monkeypatch: Any) -> None: + root = _FakeTk() + fake_tk = _FakeTkModule(root) + monkeypatch.setitem(__import__("sys").modules, "tkinter", fake_tk) + + manager = DesktopOverlayManager(auto_dismiss_seconds=0.01) + manager._queue.put( + CompletionOverlayPayload( + job_id="job-123", + objective="Write a report", + return_message="Finished", + steps=5, + elapsed_seconds=12.4, + ) + ) + + manager._ui_main() + + assert any(delay == 10 for delay in root.scheduled_delays) + assert root.cards[0]._exists is False diff --git a/tests/test_task_manager.py b/tests/test_task_manager.py new file mode 100644 index 0000000..902bdcf --- /dev/null +++ b/tests/test_task_manager.py @@ -0,0 +1,238 @@ +from __future__ import annotations + +import threading +from pathlib import Path +from typing import Any + +import src.task_manager as task_manager_module +from src.config import AppConfig +from src.models import AgentResult, RunArtifacts, UsageSummary +from src.storage import HistoryDB +from src.task_manager import JobManager + + +class _OverlayRecorder: + def __init__(self) -> None: + self.calls: list[dict[str, Any]] = [] + + def show_completion(self, **kwargs: Any) -> None: + self.calls.append(kwargs) + + +def _build_manager(tmp_path: Path, overlay_manager: _OverlayRecorder) -> tuple[JobManager, HistoryDB, AppConfig]: + config = AppConfig( + openai_api_key="test-key", + screenjob_token="test-token", + disable_ui=False, + default_model="gpt-5.4-mini", + safety_model="gpt-5.4-mini", + host="127.0.0.1", + port=8787, + runs_dir=tmp_path / "runs", + db_path=tmp_path / "screenjob.db", + ) + db = HistoryDB(config.db_path) + manager = JobManager(config=config, db=db, overlay_manager=overlay_manager) + return manager, db, config + + +def _artifacts(tmp_path: Path) -> RunArtifacts: + root = tmp_path / "run_artifacts" + return RunArtifacts( + run_id="test_run", + root_dir=root, + logs_dir=root / "logs", + shots_dir=root / "shots", + enhance_dir=root / "enhanced", + log_file=root / "logs" / "screenjob.log", + ) + + +def _create_job(db: HistoryDB, job_id: str, objective: str) -> None: + db.create_job( + job_id=job_id, + objective=objective, + model="gpt-5.4-mini", + created_at="2026-05-30T12:00:00+00:00", + safety_override=True, + disabled_tools=[], + ) + + +def test_completed_job_triggers_desktop_overlay(tmp_path: Path, monkeypatch) -> None: + overlay = _OverlayRecorder() + manager, db, _config = _build_manager(tmp_path, overlay) + job_id = "job_overlay_complete" + objective = "Save todo-demo.txt in Documents" + _create_job(db, job_id, objective) + + result = AgentResult( + completed=True, + result="Saved todo-demo.txt", + return_message="Saved todo-demo.txt", + data={"observed_result": "todo-demo.txt - Notepad is visible"}, + steps=11, + started_at=100.0, + ended_at=112.6, + usage=UsageSummary(), + ) + monkeypatch.setattr(task_manager_module, "run_job", lambda **_kwargs: (result, _artifacts(tmp_path))) + + manager._execute_job( + job_id=job_id, + objective=objective, + model="gpt-5.4-mini", + disabled_tools=[], + safety_override=True, + max_steps=60, + command_timeout=45, + type_interval=0.02, + click_pause=0.10, + reasoning_effort="medium", + screen_context_decay_steps=4, + max_visual_context_images=3, + native_automation_mode="prefer", + dialog_timeout_seconds=12.0, + focus_timeout_seconds=8.0, + ui_element_timeout_seconds=8.0, + max_retries_per_surface=3, + pretty_logs=False, + no_failsafe=False, + cancel_event=threading.Event(), + ) + + assert overlay.calls == [ + { + "job_id": job_id, + "objective": objective, + "return_message": "Saved todo-demo.txt", + "steps": 11, + "elapsed_seconds": 12.599999999999994, + } + ] + assert db.get_job(job_id)["status"] == "completed" + + +def test_non_completed_jobs_do_not_trigger_desktop_overlay(tmp_path: Path, monkeypatch) -> None: + overlay = _OverlayRecorder() + manager, db, _config = _build_manager(tmp_path, overlay) + + failed_job_id = "job_overlay_failed" + _create_job(db, failed_job_id, "Fail intentionally") + failed_result = AgentResult( + completed=False, + result="Failure", + return_message="Failure", + data=None, + steps=7, + started_at=10.0, + ended_at=18.0, + usage=UsageSummary(), + error="Failure", + ) + monkeypatch.setattr(task_manager_module, "run_job", lambda **_kwargs: (failed_result, _artifacts(tmp_path))) + + manager._execute_job( + job_id=failed_job_id, + objective="Fail intentionally", + model="gpt-5.4-mini", + disabled_tools=[], + safety_override=True, + max_steps=60, + command_timeout=45, + type_interval=0.02, + click_pause=0.10, + reasoning_effort="medium", + screen_context_decay_steps=4, + max_visual_context_images=3, + native_automation_mode="prefer", + dialog_timeout_seconds=12.0, + focus_timeout_seconds=8.0, + ui_element_timeout_seconds=8.0, + max_retries_per_surface=3, + pretty_logs=False, + no_failsafe=False, + cancel_event=threading.Event(), + ) + + cancelled_job_id = "job_overlay_cancelled" + _create_job(db, cancelled_job_id, "Cancel intentionally") + cancelled_result = AgentResult( + completed=False, + result="Cancelled", + return_message="Cancelled", + data=None, + steps=4, + started_at=20.0, + ended_at=23.0, + usage=UsageSummary(), + error="Cancelled", + cancelled=True, + ) + monkeypatch.setattr(task_manager_module, "run_job", lambda **_kwargs: (cancelled_result, _artifacts(tmp_path))) + + manager._execute_job( + job_id=cancelled_job_id, + objective="Cancel intentionally", + model="gpt-5.4-mini", + disabled_tools=[], + safety_override=True, + max_steps=60, + command_timeout=45, + type_interval=0.02, + click_pause=0.10, + reasoning_effort="medium", + screen_context_decay_steps=4, + max_visual_context_images=3, + native_automation_mode="prefer", + dialog_timeout_seconds=12.0, + focus_timeout_seconds=8.0, + ui_element_timeout_seconds=8.0, + max_retries_per_surface=3, + pretty_logs=False, + no_failsafe=False, + cancel_event=threading.Event(), + ) + + assert overlay.calls == [] + + +def test_rejected_job_does_not_trigger_desktop_overlay(tmp_path: Path, monkeypatch) -> None: + overlay = _OverlayRecorder() + manager, db, _config = _build_manager(tmp_path, overlay) + job_id = "job_overlay_rejected" + _create_job(db, job_id, "Do something unsafe") + + monkeypatch.setattr(task_manager_module, "create_openai_client", lambda *_args, **_kwargs: object()) + monkeypatch.setattr( + task_manager_module, + "assess_task_safety", + lambda *_args, **_kwargs: (False, "Unsafe request", {"decision": "blocked"}), + ) + + manager._execute_job( + job_id=job_id, + objective="Do something unsafe", + model="gpt-5.4-mini", + disabled_tools=[], + safety_override=False, + max_steps=60, + command_timeout=45, + type_interval=0.02, + click_pause=0.10, + reasoning_effort="medium", + screen_context_decay_steps=4, + max_visual_context_images=3, + native_automation_mode="prefer", + dialog_timeout_seconds=12.0, + focus_timeout_seconds=8.0, + ui_element_timeout_seconds=8.0, + max_retries_per_surface=3, + pretty_logs=False, + no_failsafe=False, + cancel_event=threading.Event(), + ) + + assert overlay.calls == [] + events = db.get_job_events(job_id) + assert events[-1]["event_type"] == "job_rejected" diff --git a/uninstall_backend_service.ps1 b/uninstall_backend_service.ps1 index 025f898..f04772b 100644 --- a/uninstall_backend_service.ps1 +++ b/uninstall_backend_service.ps1 @@ -1,36 +1,45 @@ [CmdletBinding(SupportsShouldProcess = $true)] param( + [switch]$AllUsers, [string]$ServiceName = "ScreenJobBackend" ) Set-StrictMode -Version Latest $ErrorActionPreference = "Stop" -function Test-IsAdministrator { - $identity = [Security.Principal.WindowsIdentity]::GetCurrent() - $principal = New-Object Security.Principal.WindowsPrincipal($identity) - return $principal.IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator) +$scriptDir = Split-Path -Parent $PSCommandPath +$shortcutName = "ScreenJob Backend.lnk" +$startupFolder = if ($AllUsers) { + [Environment]::GetFolderPath("CommonStartup") +} else { + [Environment]::GetFolderPath("Startup") } -if (-not (Test-IsAdministrator)) { - throw "Run this script from an elevated PowerShell session (Run as Administrator)." -} +$shortcutPath = Join-Path $startupFolder $shortcutName $service = Get-Service -Name $ServiceName -ErrorAction SilentlyContinue -if ($null -eq $service) { - Write-Host "Service '$ServiceName' is not installed." - exit 0 -} +if ($null -ne $service) { + if ($PSCmdlet.ShouldProcess($ServiceName, "Remove legacy Windows service")) { + if ($service.Status -ne "Stopped") { + Stop-Service -Name $ServiceName -Force -ErrorAction Stop + } -if ($PSCmdlet.ShouldProcess($ServiceName, "Uninstall service")) { - if ($service.Status -ne "Stopped") { - Stop-Service -Name $ServiceName -Force -ErrorAction Stop - } + & sc.exe delete $ServiceName | Out-Null + if ($LASTEXITCODE -ne 0) { + throw "Failed to delete service '$ServiceName' (sc.exe exit code $LASTEXITCODE)." + } - & sc.exe delete $ServiceName | Out-Null - if ($LASTEXITCODE -ne 0) { - throw "Failed to delete service '$ServiceName' (sc.exe exit code $LASTEXITCODE)." + Write-Host "Removed legacy Windows service: $ServiceName" } } -Write-Host "Service '$ServiceName' uninstalled successfully." -ForegroundColor Green +if (Test-Path -LiteralPath $shortcutPath) { + if ($PSCmdlet.ShouldProcess($shortcutPath, "Remove backend startup shortcut")) { + Remove-Item -LiteralPath $shortcutPath -Force + Write-Host "Removed backend startup shortcut: $shortcutPath" + } +} else { + Write-Host "No backend startup shortcut found at: $shortcutPath" +} + +Write-Host "Backend launcher uninstalled successfully." -ForegroundColor Green