test: add pytest verification suite and gitea ci workflow
All checks were successful
CI / test (push) Successful in 48s

This commit is contained in:
Space-Banane
2026-05-27 17:55:34 +02:00
parent 8fe6ad2d75
commit a19b285232
9 changed files with 360 additions and 13 deletions

35
.gitea/workflows/ci.yml Normal file
View File

@@ -0,0 +1,35 @@
name: CI
on:
push:
branches:
- "**"
pull_request:
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install openai pillow python-dotenv fastapi uvicorn pytest httpx
- name: Compile check
run: |
python -m py_compile main.py screenjob.py src/*.py tests/*.py
- name: Run tests
env:
OPENAI_API_KEY: test_key
SCREENJOB_TOKEN: test_token
run: |
pytest -q

View File

@@ -157,4 +157,25 @@ src/
storage.py storage.py
task_manager.py task_manager.py
ui.py ui.py
tests/
conftest.py
test_pricing.py
test_server_api.py
test_storage.py
.gitea/
workflows/
ci.yml
``` ```
## Verification
Run local verification:
```powershell
pytest -q
```
Gitea CI pipeline:
- File: `.gitea/workflows/ci.yml`
- Runs compile checks + pytest on push and PR.

View File

@@ -18,9 +18,10 @@ from .utils import clamp, draw_global_grid, image_to_data_url, utc_now_iso
try: try:
import pyautogui import pyautogui
except Exception as import_exc: except Exception as import_exc:
raise RuntimeError( pyautogui = None # type: ignore[assignment]
"pyautogui is required. Install dependencies with: pip install pyautogui pillow" _PYAUTOGUI_IMPORT_ERROR = import_exc
) from import_exc else:
_PYAUTOGUI_IMPORT_ERROR = None
SYSTEM_PROMPT = """ SYSTEM_PROMPT = """
@@ -52,6 +53,12 @@ class ScreenJobAgent:
cancel_event: threading.Event | None = None, cancel_event: threading.Event | None = None,
event_callback: Callable[[dict[str, Any]], None] | None = None, event_callback: Callable[[dict[str, Any]], None] | None = None,
) -> None: ) -> None:
if pyautogui is None:
raise RuntimeError(
"pyautogui is required for agent execution. "
"Install dependencies and ensure GUI access. "
f"Import error: {_PYAUTOGUI_IMPORT_ERROR}"
)
self.client = client self.client = client
self.logger = logger self.logger = logger
self.artifacts = artifacts self.artifacts = artifacts
@@ -784,4 +791,3 @@ class ScreenJobAgent:
usage=self.usage, usage=self.usage,
error=result_text, error=result_text,
) )

View File

@@ -14,9 +14,10 @@ from .utils import setup_artifacts, setup_logger
try: try:
import pyautogui import pyautogui
except Exception as import_exc: except Exception as import_exc:
raise RuntimeError( pyautogui = None # type: ignore[assignment]
"pyautogui is required. Install dependencies with: pip install pyautogui pillow" _PYAUTOGUI_IMPORT_ERROR = import_exc
) from import_exc else:
_PYAUTOGUI_IMPORT_ERROR = None
def create_openai_client(api_key: str) -> OpenAI: def create_openai_client(api_key: str) -> OpenAI:
@@ -34,6 +35,12 @@ def run_job(
event_callback: Callable[[dict[str, Any]], None] | None = None, event_callback: Callable[[dict[str, Any]], None] | None = None,
logger: logging.Logger | None = None, logger: logging.Logger | None = None,
) -> tuple[AgentResult, RunArtifacts]: ) -> tuple[AgentResult, RunArtifacts]:
if pyautogui is None:
raise RuntimeError(
"pyautogui is required for runtime execution. "
"Install dependencies and ensure GUI access. "
f"Import error: {_PYAUTOGUI_IMPORT_ERROR}"
)
pyautogui.FAILSAFE = not no_failsafe pyautogui.FAILSAFE = not no_failsafe
pyautogui.PAUSE = 0.05 pyautogui.PAUSE = 0.05
@@ -54,4 +61,3 @@ def run_job(
result = agent.run(objective) result = agent.run(objective)
active_logger.info("Run finished. completed=%s elapsed=%.2fs", result.completed, result.ended_at - result.started_at) active_logger.info("Run finished. completed=%s elapsed=%.2fs", result.completed, result.ended_at - result.started_at)
return result, artifacts return result, artifacts

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import asyncio import asyncio
import secrets import secrets
from contextlib import asynccontextmanager
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@@ -73,20 +74,22 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
if not app_config.screenjob_token: if not app_config.screenjob_token:
raise RuntimeError("SCREENJOB_TOKEN is required in environment or .env.") raise RuntimeError("SCREENJOB_TOKEN is required in environment or .env.")
app = FastAPI(title="ScreenJob API", version="1.0.0")
db = HistoryDB(app_config.db_path) db = HistoryDB(app_config.db_path)
ws_hub = _WebSocketHub() ws_hub = _WebSocketHub()
manager = JobManager(config=app_config, db=db, broadcast=ws_hub.broadcast_from_thread) manager = JobManager(config=app_config, db=db, broadcast=ws_hub.broadcast_from_thread)
@asynccontextmanager
async def lifespan(_: FastAPI):
ws_hub.set_loop(asyncio.get_running_loop())
yield
app = FastAPI(title="ScreenJob API", version="1.0.0", lifespan=lifespan)
app.state.config = app_config app.state.config = app_config
app.state.db = db app.state.db = db
app.state.ws_hub = ws_hub app.state.ws_hub = ws_hub
app.state.manager = manager app.state.manager = manager
@app.on_event("startup")
async def _on_startup() -> None:
ws_hub.set_loop(asyncio.get_running_loop())
def _extract_token( def _extract_token(
authorization: str | None, authorization: str | None,
x_screenjob_token: str | None, x_screenjob_token: str | None,

10
tests/conftest.py Normal file
View File

@@ -0,0 +1,10 @@
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))

32
tests/test_pricing.py Normal file
View File

@@ -0,0 +1,32 @@
from src.models import UsageSummary
from src.pricing import estimate_cost_usd, normalize_model_for_pricing
def test_normalize_model_for_pricing() -> None:
assert normalize_model_for_pricing("gpt-5.4-mini") == "gpt-5.4-mini"
assert normalize_model_for_pricing("gpt-5.4-mini-2026-05-01") == "gpt-5.4-mini"
assert normalize_model_for_pricing("unknown-model") == "unknown-model"
def test_estimate_cost_with_cached_tokens() -> None:
usage = UsageSummary(
input_tokens=100_000,
cached_input_tokens=20_000,
output_tokens=50_000,
total_tokens=150_000,
)
cost, model = estimate_cost_usd("gpt-5.4-mini", usage)
assert model == "gpt-5.4-mini"
assert cost is not None
# Non-cached input: 80k at $0.75/M = 0.06
# Cached input: 20k at $0.075/M = 0.0015
# Output: 50k at $4.50/M = 0.225
assert abs(cost - 0.2865) < 1e-9
def test_estimate_cost_unknown_model_returns_none() -> None:
usage = UsageSummary(input_tokens=10, output_tokens=10)
cost, model = estimate_cost_usd("my-new-model", usage)
assert model == "my-new-model"
assert cost is None

181
tests/test_server_api.py Normal file
View File

@@ -0,0 +1,181 @@
from __future__ import annotations
from pathlib import Path
from typing import Any
from fastapi.testclient import TestClient
import src.server as server_module
from src.config import AppConfig
class FakeJobManager:
def __init__(self, *, config: AppConfig, db: Any, broadcast: Any = None) -> None:
self.config = config
self._jobs: dict[str, dict[str, Any]] = {}
self._events: dict[str, list[dict[str, Any]]] = {}
self._counter = 0
self.last_submit_payload: dict[str, Any] | None = None
def submit_job(
self,
*,
objective: str,
model: str | None = None,
max_steps: int = 60,
command_timeout: int = 45,
type_interval: float = 0.02,
click_pause: float = 0.10,
disabled_tools: list[str] | None = None,
safety_override: bool = False,
no_failsafe: bool = False,
) -> str:
self._counter += 1
job_id = f"job_fake_{self._counter:03d}"
selected_model = (model or self.config.default_model).strip()
self.last_submit_payload = {
"objective": objective,
"model": selected_model,
"disabled_tools": disabled_tools or [],
"safety_override": safety_override,
"max_steps": max_steps,
"command_timeout": command_timeout,
"type_interval": type_interval,
"click_pause": click_pause,
"no_failsafe": no_failsafe,
}
self._jobs[job_id] = {
"job_id": job_id,
"objective": objective,
"model": selected_model,
"status": "running",
"usage": {
"input_tokens": 10,
"cached_input_tokens": 2,
"output_tokens": 4,
"reasoning_tokens": 0,
"total_tokens": 14,
"estimated_cost_usd": 0.0001,
},
"artifacts_dir": str(self.config.runs_dir.resolve()),
}
self._events[job_id] = [
{
"id": 1,
"job_id": job_id,
"ts": "2026-05-27T00:00:00Z",
"step": 1,
"event_type": "tool_called",
"payload": {"tool": "execute_command"},
}
]
return job_id
def list_jobs(self, limit: int = 100) -> list[dict[str, Any]]:
return list(self._jobs.values())[:limit]
def get_job(self, job_id: str) -> dict[str, Any] | None:
return self._jobs.get(job_id)
def get_events(self, job_id: str, limit: int = 500) -> list[dict[str, Any]]:
return self._events.get(job_id, [])[:limit]
def cancel_job(self, job_id: str) -> bool:
if job_id not in self._jobs:
return False
self._jobs[job_id]["status"] = "cancelling"
return True
def stats(self) -> dict[str, Any]:
return {
"total_jobs": len(self._jobs),
"running_jobs": sum(1 for x in self._jobs.values() if x["status"] == "running"),
"completed_jobs": 0,
"failed_jobs": 0,
"cancelled_jobs": 0,
"total_estimated_cost": sum(float((x["usage"] or {}).get("estimated_cost_usd") or 0) for x in self._jobs.values()),
"live_running_threads": 0,
}
def _build_app(tmp_path: Path, monkeypatch: Any, disable_ui: bool = False):
monkeypatch.setattr(server_module, "JobManager", FakeJobManager)
config = AppConfig(
openai_api_key="test_key",
screenjob_token="test_token",
disable_ui=disable_ui,
default_model="gpt-5.4-mini",
safety_model="gpt-5.4-mini",
host="127.0.0.1",
port=8787,
runs_dir=tmp_path / "runs",
db_path=tmp_path / "screenjob_test.db",
)
config.runs_dir.mkdir(parents=True, exist_ok=True)
app = server_module.create_app(config)
return app, config
def test_api_requires_auth(tmp_path: Path, monkeypatch: Any) -> None:
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
client = TestClient(app)
assert client.get("/api/jobs").status_code == 401
assert client.post("/api/jobs", json={"job": "x"}).status_code == 401
def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monkeypatch: Any) -> None:
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
client = TestClient(app)
headers = {"Authorization": "Bearer test_token"}
response = client.post(
"/api/jobs",
headers=headers,
json={"job": "Open amazon.de", "disabled_tools": ["click"], "safety_override": True},
)
assert response.status_code == 200
payload = response.json()
assert list(payload.keys()) == ["job_id"]
job_id = payload["job_id"]
manager = app.state.manager
assert manager.last_submit_payload["model"] == "gpt-5.4-mini"
assert manager.last_submit_payload["disabled_tools"] == ["click"]
status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
assert status_res.status_code == 200
assert status_res.json()["job_id"] == job_id
def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
client = TestClient(app)
headers = {"Authorization": "Bearer test_token"}
create = client.post("/api/jobs", headers=headers, json={"job": "Test job"})
job_id = create.json()["job_id"]
events = client.get(f"/api/jobs/{job_id}/events?limit=20", headers=headers)
assert events.status_code == 200
assert len(events.json()["events"]) >= 1
cancel = client.post(f"/api/jobs/{job_id}/cancel", headers=headers)
assert cancel.status_code == 200
assert cancel.json()["cancel_requested"] is True
status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json()
assert status_after["status"] == "cancelling"
def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False)
client_enabled = TestClient(app_enabled)
root_enabled = client_enabled.get("/")
assert root_enabled.status_code == 200
assert "ScreenJob Monitor" in root_enabled.text
app_disabled, _ = _build_app(tmp_path / "disabled", monkeypatch, disable_ui=True)
client_disabled = TestClient(app_disabled)
root_disabled = client_disabled.get("/")
assert root_disabled.status_code == 200
assert root_disabled.json()["ui_disabled"] is True

53
tests/test_storage.py Normal file
View File

@@ -0,0 +1,53 @@
from pathlib import Path
from src.storage import HistoryDB
def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
db = HistoryDB(tmp_path / "screenjob_test.db")
job_id = "job_test_001"
db.create_job(
job_id=job_id,
objective="Open example.com",
model="gpt-5.4-mini",
created_at="2026-05-27T00:00:00Z",
safety_override=False,
disabled_tools=["click"],
)
db.add_event(
job_id=job_id,
ts="2026-05-27T00:00:01Z",
step=1,
event_type="tool_called",
payload={"tool": "see_screen"},
)
db.update_job(
job_id,
status="completed",
ended_at="2026-05-27T00:00:02Z",
result="Done",
steps=2,
estimated_cost_usd=0.1234,
)
job = db.get_job(job_id)
assert job is not None
assert job["status"] == "completed"
assert job["model"] == "gpt-5.4-mini"
assert job["disabled_tools"] == ["click"]
assert job["usage"]["estimated_cost_usd"] == 0.1234
events = db.get_job_events(job_id, limit=10)
assert len(events) == 1
assert events[0]["event_type"] == "tool_called"
assert events[0]["payload"]["tool"] == "see_screen"
jobs = db.list_jobs(limit=10)
assert len(jobs) == 1
assert jobs[0]["job_id"] == job_id
stats = db.stats()
assert stats["total_jobs"] == 1
assert stats["completed_jobs"] == 1
assert abs(stats["total_estimated_cost"] - 0.1234) < 1e-9