Switch backend startup to interactive session
This commit is contained in:
149
tests/test_desktop_overlay.py
Normal file
149
tests/test_desktop_overlay.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import types
|
||||
from collections import deque
|
||||
from typing import Any
|
||||
|
||||
from src.desktop_overlay import CompletionOverlayPayload, DesktopOverlayManager
|
||||
|
||||
|
||||
class _FakeWidget:
|
||||
def __init__(self, root: "_FakeTk", *, width: int = 360, height: int = 160) -> None:
|
||||
self._root = root
|
||||
self._width = width
|
||||
self._height = height
|
||||
self._exists = True
|
||||
self._after_ids: dict[str, tuple[int, Any]] = {}
|
||||
|
||||
def withdraw(self) -> None:
|
||||
return None
|
||||
|
||||
def overrideredirect(self, *_args: Any, **_kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def attributes(self, *_args: Any, **_kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def configure(self, *_args: Any, **_kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def pack(self, *_args: Any, **_kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def place(self, *_args: Any, **_kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def update_idletasks(self) -> None:
|
||||
return None
|
||||
|
||||
def winfo_width(self) -> int:
|
||||
return self._width
|
||||
|
||||
def winfo_height(self) -> int:
|
||||
return self._height
|
||||
|
||||
def winfo_exists(self) -> bool:
|
||||
return self._exists
|
||||
|
||||
def geometry(self, *_args: Any, **_kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
def deiconify(self) -> None:
|
||||
return None
|
||||
|
||||
def destroy(self) -> None:
|
||||
self._exists = False
|
||||
|
||||
def after(self, delay_ms: int, callback: Any) -> str:
|
||||
after_id = self._root._schedule(delay_ms, callback)
|
||||
self._after_ids[after_id] = (delay_ms, callback)
|
||||
return after_id
|
||||
|
||||
def after_cancel(self, after_id: str) -> None:
|
||||
self._after_ids.pop(after_id, None)
|
||||
self._root._cancel(after_id)
|
||||
|
||||
|
||||
class _FakeButton(_FakeWidget):
|
||||
def __init__(self, root: "_FakeTk", command: Any | None = None, **_kwargs: Any) -> None:
|
||||
super().__init__(root)
|
||||
self.command = command
|
||||
|
||||
|
||||
class _FakeTk(_FakeWidget):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(self)
|
||||
self._events: deque[tuple[str, int, Any]] = deque()
|
||||
self._event_seq = 0
|
||||
self.scheduled_delays: list[int] = []
|
||||
self.cards: list[_FakeWidget] = []
|
||||
|
||||
def withdraw(self) -> None:
|
||||
return None
|
||||
|
||||
def winfo_screenwidth(self) -> int:
|
||||
return 1920
|
||||
|
||||
def _schedule(self, delay_ms: int, callback: Any) -> str:
|
||||
after_id = f"after-{self._event_seq}"
|
||||
self._event_seq += 1
|
||||
self.scheduled_delays.append(delay_ms)
|
||||
self._events.append((after_id, delay_ms, callback))
|
||||
return after_id
|
||||
|
||||
def _cancel(self, after_id: str) -> None:
|
||||
self._events = deque(event for event in self._events if event[0] != after_id)
|
||||
|
||||
def mainloop(self) -> None:
|
||||
iterations = 0
|
||||
while self._events and iterations < 20:
|
||||
after_id, _delay_ms, callback = self._events.popleft()
|
||||
iterations += 1
|
||||
callback()
|
||||
if any(not card.winfo_exists() for card in self.cards):
|
||||
return
|
||||
|
||||
|
||||
class _FakeTkModule(types.SimpleNamespace):
|
||||
def __init__(self, root: _FakeTk) -> None:
|
||||
super().__init__()
|
||||
self._root = root
|
||||
|
||||
def Tk(self) -> _FakeTk:
|
||||
return self._root
|
||||
|
||||
def Toplevel(self, _root: _FakeTk) -> _FakeWidget:
|
||||
card = _FakeWidget(self._root)
|
||||
self._root.cards.append(card)
|
||||
return card
|
||||
|
||||
def Frame(self, root: _FakeWidget, **_kwargs: Any) -> _FakeWidget:
|
||||
return _FakeWidget(root._root)
|
||||
|
||||
def Label(self, root: _FakeWidget, **_kwargs: Any) -> _FakeWidget:
|
||||
return _FakeWidget(root._root)
|
||||
|
||||
def Button(self, root: _FakeWidget, command: Any | None = None, **_kwargs: Any) -> _FakeButton:
|
||||
return _FakeButton(root._root, command=command)
|
||||
|
||||
|
||||
def test_completion_overlay_auto_dismisses(monkeypatch: Any) -> None:
|
||||
root = _FakeTk()
|
||||
fake_tk = _FakeTkModule(root)
|
||||
monkeypatch.setitem(__import__("sys").modules, "tkinter", fake_tk)
|
||||
|
||||
manager = DesktopOverlayManager(auto_dismiss_seconds=0.01)
|
||||
manager._queue.put(
|
||||
CompletionOverlayPayload(
|
||||
job_id="job-123",
|
||||
objective="Write a report",
|
||||
return_message="Finished",
|
||||
steps=5,
|
||||
elapsed_seconds=12.4,
|
||||
)
|
||||
)
|
||||
|
||||
manager._ui_main()
|
||||
|
||||
assert any(delay == 10 for delay in root.scheduled_delays)
|
||||
assert root.cards[0]._exists is False
|
||||
238
tests/test_task_manager.py
Normal file
238
tests/test_task_manager.py
Normal file
@@ -0,0 +1,238 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import src.task_manager as task_manager_module
|
||||
from src.config import AppConfig
|
||||
from src.models import AgentResult, RunArtifacts, UsageSummary
|
||||
from src.storage import HistoryDB
|
||||
from src.task_manager import JobManager
|
||||
|
||||
|
||||
class _OverlayRecorder:
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[dict[str, Any]] = []
|
||||
|
||||
def show_completion(self, **kwargs: Any) -> None:
|
||||
self.calls.append(kwargs)
|
||||
|
||||
|
||||
def _build_manager(tmp_path: Path, overlay_manager: _OverlayRecorder) -> tuple[JobManager, HistoryDB, AppConfig]:
|
||||
config = AppConfig(
|
||||
openai_api_key="test-key",
|
||||
screenjob_token="test-token",
|
||||
disable_ui=False,
|
||||
default_model="gpt-5.4-mini",
|
||||
safety_model="gpt-5.4-mini",
|
||||
host="127.0.0.1",
|
||||
port=8787,
|
||||
runs_dir=tmp_path / "runs",
|
||||
db_path=tmp_path / "screenjob.db",
|
||||
)
|
||||
db = HistoryDB(config.db_path)
|
||||
manager = JobManager(config=config, db=db, overlay_manager=overlay_manager)
|
||||
return manager, db, config
|
||||
|
||||
|
||||
def _artifacts(tmp_path: Path) -> RunArtifacts:
|
||||
root = tmp_path / "run_artifacts"
|
||||
return RunArtifacts(
|
||||
run_id="test_run",
|
||||
root_dir=root,
|
||||
logs_dir=root / "logs",
|
||||
shots_dir=root / "shots",
|
||||
enhance_dir=root / "enhanced",
|
||||
log_file=root / "logs" / "screenjob.log",
|
||||
)
|
||||
|
||||
|
||||
def _create_job(db: HistoryDB, job_id: str, objective: str) -> None:
|
||||
db.create_job(
|
||||
job_id=job_id,
|
||||
objective=objective,
|
||||
model="gpt-5.4-mini",
|
||||
created_at="2026-05-30T12:00:00+00:00",
|
||||
safety_override=True,
|
||||
disabled_tools=[],
|
||||
)
|
||||
|
||||
|
||||
def test_completed_job_triggers_desktop_overlay(tmp_path: Path, monkeypatch) -> None:
|
||||
overlay = _OverlayRecorder()
|
||||
manager, db, _config = _build_manager(tmp_path, overlay)
|
||||
job_id = "job_overlay_complete"
|
||||
objective = "Save todo-demo.txt in Documents"
|
||||
_create_job(db, job_id, objective)
|
||||
|
||||
result = AgentResult(
|
||||
completed=True,
|
||||
result="Saved todo-demo.txt",
|
||||
return_message="Saved todo-demo.txt",
|
||||
data={"observed_result": "todo-demo.txt - Notepad is visible"},
|
||||
steps=11,
|
||||
started_at=100.0,
|
||||
ended_at=112.6,
|
||||
usage=UsageSummary(),
|
||||
)
|
||||
monkeypatch.setattr(task_manager_module, "run_job", lambda **_kwargs: (result, _artifacts(tmp_path)))
|
||||
|
||||
manager._execute_job(
|
||||
job_id=job_id,
|
||||
objective=objective,
|
||||
model="gpt-5.4-mini",
|
||||
disabled_tools=[],
|
||||
safety_override=True,
|
||||
max_steps=60,
|
||||
command_timeout=45,
|
||||
type_interval=0.02,
|
||||
click_pause=0.10,
|
||||
reasoning_effort="medium",
|
||||
screen_context_decay_steps=4,
|
||||
max_visual_context_images=3,
|
||||
native_automation_mode="prefer",
|
||||
dialog_timeout_seconds=12.0,
|
||||
focus_timeout_seconds=8.0,
|
||||
ui_element_timeout_seconds=8.0,
|
||||
max_retries_per_surface=3,
|
||||
pretty_logs=False,
|
||||
no_failsafe=False,
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
|
||||
assert overlay.calls == [
|
||||
{
|
||||
"job_id": job_id,
|
||||
"objective": objective,
|
||||
"return_message": "Saved todo-demo.txt",
|
||||
"steps": 11,
|
||||
"elapsed_seconds": 12.599999999999994,
|
||||
}
|
||||
]
|
||||
assert db.get_job(job_id)["status"] == "completed"
|
||||
|
||||
|
||||
def test_non_completed_jobs_do_not_trigger_desktop_overlay(tmp_path: Path, monkeypatch) -> None:
|
||||
overlay = _OverlayRecorder()
|
||||
manager, db, _config = _build_manager(tmp_path, overlay)
|
||||
|
||||
failed_job_id = "job_overlay_failed"
|
||||
_create_job(db, failed_job_id, "Fail intentionally")
|
||||
failed_result = AgentResult(
|
||||
completed=False,
|
||||
result="Failure",
|
||||
return_message="Failure",
|
||||
data=None,
|
||||
steps=7,
|
||||
started_at=10.0,
|
||||
ended_at=18.0,
|
||||
usage=UsageSummary(),
|
||||
error="Failure",
|
||||
)
|
||||
monkeypatch.setattr(task_manager_module, "run_job", lambda **_kwargs: (failed_result, _artifacts(tmp_path)))
|
||||
|
||||
manager._execute_job(
|
||||
job_id=failed_job_id,
|
||||
objective="Fail intentionally",
|
||||
model="gpt-5.4-mini",
|
||||
disabled_tools=[],
|
||||
safety_override=True,
|
||||
max_steps=60,
|
||||
command_timeout=45,
|
||||
type_interval=0.02,
|
||||
click_pause=0.10,
|
||||
reasoning_effort="medium",
|
||||
screen_context_decay_steps=4,
|
||||
max_visual_context_images=3,
|
||||
native_automation_mode="prefer",
|
||||
dialog_timeout_seconds=12.0,
|
||||
focus_timeout_seconds=8.0,
|
||||
ui_element_timeout_seconds=8.0,
|
||||
max_retries_per_surface=3,
|
||||
pretty_logs=False,
|
||||
no_failsafe=False,
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
|
||||
cancelled_job_id = "job_overlay_cancelled"
|
||||
_create_job(db, cancelled_job_id, "Cancel intentionally")
|
||||
cancelled_result = AgentResult(
|
||||
completed=False,
|
||||
result="Cancelled",
|
||||
return_message="Cancelled",
|
||||
data=None,
|
||||
steps=4,
|
||||
started_at=20.0,
|
||||
ended_at=23.0,
|
||||
usage=UsageSummary(),
|
||||
error="Cancelled",
|
||||
cancelled=True,
|
||||
)
|
||||
monkeypatch.setattr(task_manager_module, "run_job", lambda **_kwargs: (cancelled_result, _artifacts(tmp_path)))
|
||||
|
||||
manager._execute_job(
|
||||
job_id=cancelled_job_id,
|
||||
objective="Cancel intentionally",
|
||||
model="gpt-5.4-mini",
|
||||
disabled_tools=[],
|
||||
safety_override=True,
|
||||
max_steps=60,
|
||||
command_timeout=45,
|
||||
type_interval=0.02,
|
||||
click_pause=0.10,
|
||||
reasoning_effort="medium",
|
||||
screen_context_decay_steps=4,
|
||||
max_visual_context_images=3,
|
||||
native_automation_mode="prefer",
|
||||
dialog_timeout_seconds=12.0,
|
||||
focus_timeout_seconds=8.0,
|
||||
ui_element_timeout_seconds=8.0,
|
||||
max_retries_per_surface=3,
|
||||
pretty_logs=False,
|
||||
no_failsafe=False,
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
|
||||
assert overlay.calls == []
|
||||
|
||||
|
||||
def test_rejected_job_does_not_trigger_desktop_overlay(tmp_path: Path, monkeypatch) -> None:
|
||||
overlay = _OverlayRecorder()
|
||||
manager, db, _config = _build_manager(tmp_path, overlay)
|
||||
job_id = "job_overlay_rejected"
|
||||
_create_job(db, job_id, "Do something unsafe")
|
||||
|
||||
monkeypatch.setattr(task_manager_module, "create_openai_client", lambda *_args, **_kwargs: object())
|
||||
monkeypatch.setattr(
|
||||
task_manager_module,
|
||||
"assess_task_safety",
|
||||
lambda *_args, **_kwargs: (False, "Unsafe request", {"decision": "blocked"}),
|
||||
)
|
||||
|
||||
manager._execute_job(
|
||||
job_id=job_id,
|
||||
objective="Do something unsafe",
|
||||
model="gpt-5.4-mini",
|
||||
disabled_tools=[],
|
||||
safety_override=False,
|
||||
max_steps=60,
|
||||
command_timeout=45,
|
||||
type_interval=0.02,
|
||||
click_pause=0.10,
|
||||
reasoning_effort="medium",
|
||||
screen_context_decay_steps=4,
|
||||
max_visual_context_images=3,
|
||||
native_automation_mode="prefer",
|
||||
dialog_timeout_seconds=12.0,
|
||||
focus_timeout_seconds=8.0,
|
||||
ui_element_timeout_seconds=8.0,
|
||||
max_retries_per_surface=3,
|
||||
pretty_logs=False,
|
||||
no_failsafe=False,
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
|
||||
assert overlay.calls == []
|
||||
events = db.get_job_events(job_id)
|
||||
assert events[-1]["event_type"] == "job_rejected"
|
||||
Reference in New Issue
Block a user