test: add pytest verification suite and gitea ci workflow

2026-05-27 17:55:34 +02:00
parent 8fe6ad2d75
commit a19b285232
9 changed files with 360 additions and 13 deletions
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -0,0 +1,35 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - "**"
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install openai pillow python-dotenv fastapi uvicorn pytest httpx
+
+      - name: Compile check
+        run: |
+          python -m py_compile main.py screenjob.py src/*.py tests/*.py
+
+      - name: Run tests
+        env:
+          OPENAI_API_KEY: test_key
+          SCREENJOB_TOKEN: test_token
+        run: |
+          pytest -q
--- a/README.md
+++ b/README.md
@@ -157,4 +157,25 @@ src/
  storage.py
  task_manager.py
  ui.py
+tests/
+  conftest.py
+  test_pricing.py
+  test_server_api.py
+  test_storage.py
+.gitea/
+  workflows/
+    ci.yml
 ```
+
+## Verification
+
+Run local verification:
+
+```powershell
+pytest -q
+```
+
+Gitea CI pipeline:
+
+- File: `.gitea/workflows/ci.yml`
+- Runs compile checks + pytest on push and PR.
--- a/src/agent.py
+++ b/src/agent.py
@@ -18,9 +18,10 @@ from .utils import clamp, draw_global_grid, image_to_data_url, utc_now_iso
 try:
    import pyautogui
 except Exception as import_exc:
-    raise RuntimeError(
-        "pyautogui is required. Install dependencies with: pip install pyautogui pillow"
-    ) from import_exc
+    pyautogui = None  # type: ignore[assignment]
+    _PYAUTOGUI_IMPORT_ERROR = import_exc
+else:
+    _PYAUTOGUI_IMPORT_ERROR = None


 SYSTEM_PROMPT = """
@@ -52,6 +53,12 @@ class ScreenJobAgent:
        cancel_event: threading.Event | None = None,
        event_callback: Callable[[dict[str, Any]], None] | None = None,
    ) -> None:
+        if pyautogui is None:
+            raise RuntimeError(
+                "pyautogui is required for agent execution. "
+                "Install dependencies and ensure GUI access. "
+                f"Import error: {_PYAUTOGUI_IMPORT_ERROR}"
+            )
        self.client = client
        self.logger = logger
        self.artifacts = artifacts
@@ -784,4 +791,3 @@ class ScreenJobAgent:
            usage=self.usage,
            error=result_text,
        )
-
--- a/src/runtime.py
+++ b/src/runtime.py
@@ -14,9 +14,10 @@ from .utils import setup_artifacts, setup_logger
 try:
    import pyautogui
 except Exception as import_exc:
-    raise RuntimeError(
-        "pyautogui is required. Install dependencies with: pip install pyautogui pillow"
-    ) from import_exc
+    pyautogui = None  # type: ignore[assignment]
+    _PYAUTOGUI_IMPORT_ERROR = import_exc
+else:
+    _PYAUTOGUI_IMPORT_ERROR = None


 def create_openai_client(api_key: str) -> OpenAI:
@@ -34,6 +35,12 @@ def run_job(
    event_callback: Callable[[dict[str, Any]], None] | None = None,
    logger: logging.Logger | None = None,
 ) -> tuple[AgentResult, RunArtifacts]:
+    if pyautogui is None:
+        raise RuntimeError(
+            "pyautogui is required for runtime execution. "
+            "Install dependencies and ensure GUI access. "
+            f"Import error: {_PYAUTOGUI_IMPORT_ERROR}"
+        )
    pyautogui.FAILSAFE = not no_failsafe
    pyautogui.PAUSE = 0.05

@@ -54,4 +61,3 @@ def run_job(
    result = agent.run(objective)
    active_logger.info("Run finished. completed=%s elapsed=%.2fs", result.completed, result.ended_at - result.started_at)
    return result, artifacts
-
--- a/src/server.py
+++ b/src/server.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 import asyncio
 import secrets
+from contextlib import asynccontextmanager
 from pathlib import Path
 from typing import Any

@@ -73,20 +74,22 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
    if not app_config.screenjob_token:
        raise RuntimeError("SCREENJOB_TOKEN is required in environment or .env.")

-    app = FastAPI(title="ScreenJob API", version="1.0.0")
    db = HistoryDB(app_config.db_path)
    ws_hub = _WebSocketHub()
    manager = JobManager(config=app_config, db=db, broadcast=ws_hub.broadcast_from_thread)

+    @asynccontextmanager
+    async def lifespan(_: FastAPI):
+        ws_hub.set_loop(asyncio.get_running_loop())
+        yield
+
+    app = FastAPI(title="ScreenJob API", version="1.0.0", lifespan=lifespan)
+
    app.state.config = app_config
    app.state.db = db
    app.state.ws_hub = ws_hub
    app.state.manager = manager

-    @app.on_event("startup")
-    async def _on_startup() -> None:
-        ws_hub.set_loop(asyncio.get_running_loop())
-
    def _extract_token(
        authorization: str | None,
        x_screenjob_token: str | None,
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,10 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
--- a/tests/test_pricing.py
+++ b/tests/test_pricing.py
@@ -0,0 +1,32 @@
+from src.models import UsageSummary
+from src.pricing import estimate_cost_usd, normalize_model_for_pricing
+
+
+def test_normalize_model_for_pricing() -> None:
+    assert normalize_model_for_pricing("gpt-5.4-mini") == "gpt-5.4-mini"
+    assert normalize_model_for_pricing("gpt-5.4-mini-2026-05-01") == "gpt-5.4-mini"
+    assert normalize_model_for_pricing("unknown-model") == "unknown-model"
+
+
+def test_estimate_cost_with_cached_tokens() -> None:
+    usage = UsageSummary(
+        input_tokens=100_000,
+        cached_input_tokens=20_000,
+        output_tokens=50_000,
+        total_tokens=150_000,
+    )
+    cost, model = estimate_cost_usd("gpt-5.4-mini", usage)
+    assert model == "gpt-5.4-mini"
+    assert cost is not None
+    # Non-cached input: 80k at $0.75/M = 0.06
+    # Cached input: 20k at $0.075/M = 0.0015
+    # Output: 50k at $4.50/M = 0.225
+    assert abs(cost - 0.2865) < 1e-9
+
+
+def test_estimate_cost_unknown_model_returns_none() -> None:
+    usage = UsageSummary(input_tokens=10, output_tokens=10)
+    cost, model = estimate_cost_usd("my-new-model", usage)
+    assert model == "my-new-model"
+    assert cost is None
+
--- a/tests/test_server_api.py
+++ b/tests/test_server_api.py
@@ -0,0 +1,181 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from fastapi.testclient import TestClient
+
+import src.server as server_module
+from src.config import AppConfig
+
+
+class FakeJobManager:
+    def __init__(self, *, config: AppConfig, db: Any, broadcast: Any = None) -> None:
+        self.config = config
+        self._jobs: dict[str, dict[str, Any]] = {}
+        self._events: dict[str, list[dict[str, Any]]] = {}
+        self._counter = 0
+        self.last_submit_payload: dict[str, Any] | None = None
+
+    def submit_job(
+        self,
+        *,
+        objective: str,
+        model: str | None = None,
+        max_steps: int = 60,
+        command_timeout: int = 45,
+        type_interval: float = 0.02,
+        click_pause: float = 0.10,
+        disabled_tools: list[str] | None = None,
+        safety_override: bool = False,
+        no_failsafe: bool = False,
+    ) -> str:
+        self._counter += 1
+        job_id = f"job_fake_{self._counter:03d}"
+        selected_model = (model or self.config.default_model).strip()
+        self.last_submit_payload = {
+            "objective": objective,
+            "model": selected_model,
+            "disabled_tools": disabled_tools or [],
+            "safety_override": safety_override,
+            "max_steps": max_steps,
+            "command_timeout": command_timeout,
+            "type_interval": type_interval,
+            "click_pause": click_pause,
+            "no_failsafe": no_failsafe,
+        }
+        self._jobs[job_id] = {
+            "job_id": job_id,
+            "objective": objective,
+            "model": selected_model,
+            "status": "running",
+            "usage": {
+                "input_tokens": 10,
+                "cached_input_tokens": 2,
+                "output_tokens": 4,
+                "reasoning_tokens": 0,
+                "total_tokens": 14,
+                "estimated_cost_usd": 0.0001,
+            },
+            "artifacts_dir": str(self.config.runs_dir.resolve()),
+        }
+        self._events[job_id] = [
+            {
+                "id": 1,
+                "job_id": job_id,
+                "ts": "2026-05-27T00:00:00Z",
+                "step": 1,
+                "event_type": "tool_called",
+                "payload": {"tool": "execute_command"},
+            }
+        ]
+        return job_id
+
+    def list_jobs(self, limit: int = 100) -> list[dict[str, Any]]:
+        return list(self._jobs.values())[:limit]
+
+    def get_job(self, job_id: str) -> dict[str, Any] | None:
+        return self._jobs.get(job_id)
+
+    def get_events(self, job_id: str, limit: int = 500) -> list[dict[str, Any]]:
+        return self._events.get(job_id, [])[:limit]
+
+    def cancel_job(self, job_id: str) -> bool:
+        if job_id not in self._jobs:
+            return False
+        self._jobs[job_id]["status"] = "cancelling"
+        return True
+
+    def stats(self) -> dict[str, Any]:
+        return {
+            "total_jobs": len(self._jobs),
+            "running_jobs": sum(1 for x in self._jobs.values() if x["status"] == "running"),
+            "completed_jobs": 0,
+            "failed_jobs": 0,
+            "cancelled_jobs": 0,
+            "total_estimated_cost": sum(float((x["usage"] or {}).get("estimated_cost_usd") or 0) for x in self._jobs.values()),
+            "live_running_threads": 0,
+        }
+
+
+def _build_app(tmp_path: Path, monkeypatch: Any, disable_ui: bool = False):
+    monkeypatch.setattr(server_module, "JobManager", FakeJobManager)
+    config = AppConfig(
+        openai_api_key="test_key",
+        screenjob_token="test_token",
+        disable_ui=disable_ui,
+        default_model="gpt-5.4-mini",
+        safety_model="gpt-5.4-mini",
+        host="127.0.0.1",
+        port=8787,
+        runs_dir=tmp_path / "runs",
+        db_path=tmp_path / "screenjob_test.db",
+    )
+    config.runs_dir.mkdir(parents=True, exist_ok=True)
+    app = server_module.create_app(config)
+    return app, config
+
+
+def test_api_requires_auth(tmp_path: Path, monkeypatch: Any) -> None:
+    app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
+    client = TestClient(app)
+    assert client.get("/api/jobs").status_code == 401
+    assert client.post("/api/jobs", json={"job": "x"}).status_code == 401
+
+
+def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monkeypatch: Any) -> None:
+    app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
+    client = TestClient(app)
+    headers = {"Authorization": "Bearer test_token"}
+
+    response = client.post(
+        "/api/jobs",
+        headers=headers,
+        json={"job": "Open amazon.de", "disabled_tools": ["click"], "safety_override": True},
+    )
+    assert response.status_code == 200
+    payload = response.json()
+    assert list(payload.keys()) == ["job_id"]
+    job_id = payload["job_id"]
+
+    manager = app.state.manager
+    assert manager.last_submit_payload["model"] == "gpt-5.4-mini"
+    assert manager.last_submit_payload["disabled_tools"] == ["click"]
+
+    status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
+    assert status_res.status_code == 200
+    assert status_res.json()["job_id"] == job_id
+
+
+def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
+    app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
+    client = TestClient(app)
+    headers = {"Authorization": "Bearer test_token"}
+    create = client.post("/api/jobs", headers=headers, json={"job": "Test job"})
+    job_id = create.json()["job_id"]
+
+    events = client.get(f"/api/jobs/{job_id}/events?limit=20", headers=headers)
+    assert events.status_code == 200
+    assert len(events.json()["events"]) >= 1
+
+    cancel = client.post(f"/api/jobs/{job_id}/cancel", headers=headers)
+    assert cancel.status_code == 200
+    assert cancel.json()["cancel_requested"] is True
+
+    status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json()
+    assert status_after["status"] == "cancelling"
+
+
+def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
+    app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False)
+    client_enabled = TestClient(app_enabled)
+    root_enabled = client_enabled.get("/")
+    assert root_enabled.status_code == 200
+    assert "ScreenJob Monitor" in root_enabled.text
+
+    app_disabled, _ = _build_app(tmp_path / "disabled", monkeypatch, disable_ui=True)
+    client_disabled = TestClient(app_disabled)
+    root_disabled = client_disabled.get("/")
+    assert root_disabled.status_code == 200
+    assert root_disabled.json()["ui_disabled"] is True
+
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -0,0 +1,53 @@
+from pathlib import Path
+
+from src.storage import HistoryDB
+
+
+def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
+    db = HistoryDB(tmp_path / "screenjob_test.db")
+    job_id = "job_test_001"
+    db.create_job(
+        job_id=job_id,
+        objective="Open example.com",
+        model="gpt-5.4-mini",
+        created_at="2026-05-27T00:00:00Z",
+        safety_override=False,
+        disabled_tools=["click"],
+    )
+    db.add_event(
+        job_id=job_id,
+        ts="2026-05-27T00:00:01Z",
+        step=1,
+        event_type="tool_called",
+        payload={"tool": "see_screen"},
+    )
+    db.update_job(
+        job_id,
+        status="completed",
+        ended_at="2026-05-27T00:00:02Z",
+        result="Done",
+        steps=2,
+        estimated_cost_usd=0.1234,
+    )
+
+    job = db.get_job(job_id)
+    assert job is not None
+    assert job["status"] == "completed"
+    assert job["model"] == "gpt-5.4-mini"
+    assert job["disabled_tools"] == ["click"]
+    assert job["usage"]["estimated_cost_usd"] == 0.1234
+
+    events = db.get_job_events(job_id, limit=10)
+    assert len(events) == 1
+    assert events[0]["event_type"] == "tool_called"
+    assert events[0]["payload"]["tool"] == "see_screen"
+
+    jobs = db.list_jobs(limit=10)
+    assert len(jobs) == 1
+    assert jobs[0]["job_id"] == job_id
+
+    stats = db.stats()
+    assert stats["total_jobs"] == 1
+    assert stats["completed_jobs"] == 1
+    assert abs(stats["total_estimated_cost"] - 0.1234) < 1e-9
+