From a19b2852324ee960f228640e82753c7ea11e63dd Mon Sep 17 00:00:00 2001 From: Space-Banane Date: Wed, 27 May 2026 17:55:34 +0200 Subject: [PATCH] test: add pytest verification suite and gitea ci workflow --- .gitea/workflows/ci.yml | 35 ++++++++ README.md | 21 +++++ src/agent.py | 14 ++- src/runtime.py | 14 ++- src/server.py | 13 +-- tests/conftest.py | 10 +++ tests/test_pricing.py | 32 +++++++ tests/test_server_api.py | 181 +++++++++++++++++++++++++++++++++++++++ tests/test_storage.py | 53 ++++++++++++ 9 files changed, 360 insertions(+), 13 deletions(-) create mode 100644 .gitea/workflows/ci.yml create mode 100644 tests/conftest.py create mode 100644 tests/test_pricing.py create mode 100644 tests/test_server_api.py create mode 100644 tests/test_storage.py diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..5b8eca8 --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,35 @@ +name: CI + +on: + push: + branches: + - "**" + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install openai pillow python-dotenv fastapi uvicorn pytest httpx + + - name: Compile check + run: | + python -m py_compile main.py screenjob.py src/*.py tests/*.py + + - name: Run tests + env: + OPENAI_API_KEY: test_key + SCREENJOB_TOKEN: test_token + run: | + pytest -q diff --git a/README.md b/README.md index 0016d65..cee3f59 100644 --- a/README.md +++ b/README.md @@ -157,4 +157,25 @@ src/ storage.py task_manager.py ui.py +tests/ + conftest.py + test_pricing.py + test_server_api.py + test_storage.py +.gitea/ + workflows/ + ci.yml ``` + +## Verification + +Run local verification: + +```powershell +pytest -q +``` + +Gitea CI pipeline: + +- File: `.gitea/workflows/ci.yml` +- Runs compile checks + pytest on push and PR. diff --git a/src/agent.py b/src/agent.py index feb3c85..da1739c 100644 --- a/src/agent.py +++ b/src/agent.py @@ -18,9 +18,10 @@ from .utils import clamp, draw_global_grid, image_to_data_url, utc_now_iso try: import pyautogui except Exception as import_exc: - raise RuntimeError( - "pyautogui is required. Install dependencies with: pip install pyautogui pillow" - ) from import_exc + pyautogui = None # type: ignore[assignment] + _PYAUTOGUI_IMPORT_ERROR = import_exc +else: + _PYAUTOGUI_IMPORT_ERROR = None SYSTEM_PROMPT = """ @@ -52,6 +53,12 @@ class ScreenJobAgent: cancel_event: threading.Event | None = None, event_callback: Callable[[dict[str, Any]], None] | None = None, ) -> None: + if pyautogui is None: + raise RuntimeError( + "pyautogui is required for agent execution. " + "Install dependencies and ensure GUI access. " + f"Import error: {_PYAUTOGUI_IMPORT_ERROR}" + ) self.client = client self.logger = logger self.artifacts = artifacts @@ -784,4 +791,3 @@ class ScreenJobAgent: usage=self.usage, error=result_text, ) - diff --git a/src/runtime.py b/src/runtime.py index a5f35bd..580256e 100644 --- a/src/runtime.py +++ b/src/runtime.py @@ -14,9 +14,10 @@ from .utils import setup_artifacts, setup_logger try: import pyautogui except Exception as import_exc: - raise RuntimeError( - "pyautogui is required. Install dependencies with: pip install pyautogui pillow" - ) from import_exc + pyautogui = None # type: ignore[assignment] + _PYAUTOGUI_IMPORT_ERROR = import_exc +else: + _PYAUTOGUI_IMPORT_ERROR = None def create_openai_client(api_key: str) -> OpenAI: @@ -34,6 +35,12 @@ def run_job( event_callback: Callable[[dict[str, Any]], None] | None = None, logger: logging.Logger | None = None, ) -> tuple[AgentResult, RunArtifacts]: + if pyautogui is None: + raise RuntimeError( + "pyautogui is required for runtime execution. " + "Install dependencies and ensure GUI access. " + f"Import error: {_PYAUTOGUI_IMPORT_ERROR}" + ) pyautogui.FAILSAFE = not no_failsafe pyautogui.PAUSE = 0.05 @@ -54,4 +61,3 @@ def run_job( result = agent.run(objective) active_logger.info("Run finished. completed=%s elapsed=%.2fs", result.completed, result.ended_at - result.started_at) return result, artifacts - diff --git a/src/server.py b/src/server.py index 588079a..dda0d22 100644 --- a/src/server.py +++ b/src/server.py @@ -2,6 +2,7 @@ from __future__ import annotations import asyncio import secrets +from contextlib import asynccontextmanager from pathlib import Path from typing import Any @@ -73,20 +74,22 @@ def create_app(config: AppConfig | None = None) -> FastAPI: if not app_config.screenjob_token: raise RuntimeError("SCREENJOB_TOKEN is required in environment or .env.") - app = FastAPI(title="ScreenJob API", version="1.0.0") db = HistoryDB(app_config.db_path) ws_hub = _WebSocketHub() manager = JobManager(config=app_config, db=db, broadcast=ws_hub.broadcast_from_thread) + @asynccontextmanager + async def lifespan(_: FastAPI): + ws_hub.set_loop(asyncio.get_running_loop()) + yield + + app = FastAPI(title="ScreenJob API", version="1.0.0", lifespan=lifespan) + app.state.config = app_config app.state.db = db app.state.ws_hub = ws_hub app.state.manager = manager - @app.on_event("startup") - async def _on_startup() -> None: - ws_hub.set_loop(asyncio.get_running_loop()) - def _extract_token( authorization: str | None, x_screenjob_token: str | None, diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e4370f1 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + diff --git a/tests/test_pricing.py b/tests/test_pricing.py new file mode 100644 index 0000000..907350e --- /dev/null +++ b/tests/test_pricing.py @@ -0,0 +1,32 @@ +from src.models import UsageSummary +from src.pricing import estimate_cost_usd, normalize_model_for_pricing + + +def test_normalize_model_for_pricing() -> None: + assert normalize_model_for_pricing("gpt-5.4-mini") == "gpt-5.4-mini" + assert normalize_model_for_pricing("gpt-5.4-mini-2026-05-01") == "gpt-5.4-mini" + assert normalize_model_for_pricing("unknown-model") == "unknown-model" + + +def test_estimate_cost_with_cached_tokens() -> None: + usage = UsageSummary( + input_tokens=100_000, + cached_input_tokens=20_000, + output_tokens=50_000, + total_tokens=150_000, + ) + cost, model = estimate_cost_usd("gpt-5.4-mini", usage) + assert model == "gpt-5.4-mini" + assert cost is not None + # Non-cached input: 80k at $0.75/M = 0.06 + # Cached input: 20k at $0.075/M = 0.0015 + # Output: 50k at $4.50/M = 0.225 + assert abs(cost - 0.2865) < 1e-9 + + +def test_estimate_cost_unknown_model_returns_none() -> None: + usage = UsageSummary(input_tokens=10, output_tokens=10) + cost, model = estimate_cost_usd("my-new-model", usage) + assert model == "my-new-model" + assert cost is None + diff --git a/tests/test_server_api.py b/tests/test_server_api.py new file mode 100644 index 0000000..88afbf7 --- /dev/null +++ b/tests/test_server_api.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from fastapi.testclient import TestClient + +import src.server as server_module +from src.config import AppConfig + + +class FakeJobManager: + def __init__(self, *, config: AppConfig, db: Any, broadcast: Any = None) -> None: + self.config = config + self._jobs: dict[str, dict[str, Any]] = {} + self._events: dict[str, list[dict[str, Any]]] = {} + self._counter = 0 + self.last_submit_payload: dict[str, Any] | None = None + + def submit_job( + self, + *, + objective: str, + model: str | None = None, + max_steps: int = 60, + command_timeout: int = 45, + type_interval: float = 0.02, + click_pause: float = 0.10, + disabled_tools: list[str] | None = None, + safety_override: bool = False, + no_failsafe: bool = False, + ) -> str: + self._counter += 1 + job_id = f"job_fake_{self._counter:03d}" + selected_model = (model or self.config.default_model).strip() + self.last_submit_payload = { + "objective": objective, + "model": selected_model, + "disabled_tools": disabled_tools or [], + "safety_override": safety_override, + "max_steps": max_steps, + "command_timeout": command_timeout, + "type_interval": type_interval, + "click_pause": click_pause, + "no_failsafe": no_failsafe, + } + self._jobs[job_id] = { + "job_id": job_id, + "objective": objective, + "model": selected_model, + "status": "running", + "usage": { + "input_tokens": 10, + "cached_input_tokens": 2, + "output_tokens": 4, + "reasoning_tokens": 0, + "total_tokens": 14, + "estimated_cost_usd": 0.0001, + }, + "artifacts_dir": str(self.config.runs_dir.resolve()), + } + self._events[job_id] = [ + { + "id": 1, + "job_id": job_id, + "ts": "2026-05-27T00:00:00Z", + "step": 1, + "event_type": "tool_called", + "payload": {"tool": "execute_command"}, + } + ] + return job_id + + def list_jobs(self, limit: int = 100) -> list[dict[str, Any]]: + return list(self._jobs.values())[:limit] + + def get_job(self, job_id: str) -> dict[str, Any] | None: + return self._jobs.get(job_id) + + def get_events(self, job_id: str, limit: int = 500) -> list[dict[str, Any]]: + return self._events.get(job_id, [])[:limit] + + def cancel_job(self, job_id: str) -> bool: + if job_id not in self._jobs: + return False + self._jobs[job_id]["status"] = "cancelling" + return True + + def stats(self) -> dict[str, Any]: + return { + "total_jobs": len(self._jobs), + "running_jobs": sum(1 for x in self._jobs.values() if x["status"] == "running"), + "completed_jobs": 0, + "failed_jobs": 0, + "cancelled_jobs": 0, + "total_estimated_cost": sum(float((x["usage"] or {}).get("estimated_cost_usd") or 0) for x in self._jobs.values()), + "live_running_threads": 0, + } + + +def _build_app(tmp_path: Path, monkeypatch: Any, disable_ui: bool = False): + monkeypatch.setattr(server_module, "JobManager", FakeJobManager) + config = AppConfig( + openai_api_key="test_key", + screenjob_token="test_token", + disable_ui=disable_ui, + default_model="gpt-5.4-mini", + safety_model="gpt-5.4-mini", + host="127.0.0.1", + port=8787, + runs_dir=tmp_path / "runs", + db_path=tmp_path / "screenjob_test.db", + ) + config.runs_dir.mkdir(parents=True, exist_ok=True) + app = server_module.create_app(config) + return app, config + + +def test_api_requires_auth(tmp_path: Path, monkeypatch: Any) -> None: + app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) + client = TestClient(app) + assert client.get("/api/jobs").status_code == 401 + assert client.post("/api/jobs", json={"job": "x"}).status_code == 401 + + +def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monkeypatch: Any) -> None: + app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) + client = TestClient(app) + headers = {"Authorization": "Bearer test_token"} + + response = client.post( + "/api/jobs", + headers=headers, + json={"job": "Open amazon.de", "disabled_tools": ["click"], "safety_override": True}, + ) + assert response.status_code == 200 + payload = response.json() + assert list(payload.keys()) == ["job_id"] + job_id = payload["job_id"] + + manager = app.state.manager + assert manager.last_submit_payload["model"] == "gpt-5.4-mini" + assert manager.last_submit_payload["disabled_tools"] == ["click"] + + status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers) + assert status_res.status_code == 200 + assert status_res.json()["job_id"] == job_id + + +def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None: + app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False) + client = TestClient(app) + headers = {"Authorization": "Bearer test_token"} + create = client.post("/api/jobs", headers=headers, json={"job": "Test job"}) + job_id = create.json()["job_id"] + + events = client.get(f"/api/jobs/{job_id}/events?limit=20", headers=headers) + assert events.status_code == 200 + assert len(events.json()["events"]) >= 1 + + cancel = client.post(f"/api/jobs/{job_id}/cancel", headers=headers) + assert cancel.status_code == 200 + assert cancel.json()["cancel_requested"] is True + + status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json() + assert status_after["status"] == "cancelling" + + +def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None: + app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False) + client_enabled = TestClient(app_enabled) + root_enabled = client_enabled.get("/") + assert root_enabled.status_code == 200 + assert "ScreenJob Monitor" in root_enabled.text + + app_disabled, _ = _build_app(tmp_path / "disabled", monkeypatch, disable_ui=True) + client_disabled = TestClient(app_disabled) + root_disabled = client_disabled.get("/") + assert root_disabled.status_code == 200 + assert root_disabled.json()["ui_disabled"] is True + diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 0000000..40d38f5 --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,53 @@ +from pathlib import Path + +from src.storage import HistoryDB + + +def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None: + db = HistoryDB(tmp_path / "screenjob_test.db") + job_id = "job_test_001" + db.create_job( + job_id=job_id, + objective="Open example.com", + model="gpt-5.4-mini", + created_at="2026-05-27T00:00:00Z", + safety_override=False, + disabled_tools=["click"], + ) + db.add_event( + job_id=job_id, + ts="2026-05-27T00:00:01Z", + step=1, + event_type="tool_called", + payload={"tool": "see_screen"}, + ) + db.update_job( + job_id, + status="completed", + ended_at="2026-05-27T00:00:02Z", + result="Done", + steps=2, + estimated_cost_usd=0.1234, + ) + + job = db.get_job(job_id) + assert job is not None + assert job["status"] == "completed" + assert job["model"] == "gpt-5.4-mini" + assert job["disabled_tools"] == ["click"] + assert job["usage"]["estimated_cost_usd"] == 0.1234 + + events = db.get_job_events(job_id, limit=10) + assert len(events) == 1 + assert events[0]["event_type"] == "tool_called" + assert events[0]["payload"]["tool"] == "see_screen" + + jobs = db.list_jobs(limit=10) + assert len(jobs) == 1 + assert jobs[0]["job_id"] == job_id + + stats = db.stats() + assert stats["total_jobs"] == 1 + assert stats["completed_jobs"] == 1 + assert abs(stats["total_estimated_cost"] - 0.1234) < 1e-9 +