test: add pytest verification suite and gitea ci workflow
All checks were successful
CI / test (push) Successful in 48s
All checks were successful
CI / test (push) Successful in 48s
This commit is contained in:
35
.gitea/workflows/ci.yml
Normal file
35
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- "**"
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install openai pillow python-dotenv fastapi uvicorn pytest httpx
|
||||||
|
|
||||||
|
- name: Compile check
|
||||||
|
run: |
|
||||||
|
python -m py_compile main.py screenjob.py src/*.py tests/*.py
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
env:
|
||||||
|
OPENAI_API_KEY: test_key
|
||||||
|
SCREENJOB_TOKEN: test_token
|
||||||
|
run: |
|
||||||
|
pytest -q
|
||||||
21
README.md
21
README.md
@@ -157,4 +157,25 @@ src/
|
|||||||
storage.py
|
storage.py
|
||||||
task_manager.py
|
task_manager.py
|
||||||
ui.py
|
ui.py
|
||||||
|
tests/
|
||||||
|
conftest.py
|
||||||
|
test_pricing.py
|
||||||
|
test_server_api.py
|
||||||
|
test_storage.py
|
||||||
|
.gitea/
|
||||||
|
workflows/
|
||||||
|
ci.yml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
Run local verification:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
pytest -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Gitea CI pipeline:
|
||||||
|
|
||||||
|
- File: `.gitea/workflows/ci.yml`
|
||||||
|
- Runs compile checks + pytest on push and PR.
|
||||||
|
|||||||
14
src/agent.py
14
src/agent.py
@@ -18,9 +18,10 @@ from .utils import clamp, draw_global_grid, image_to_data_url, utc_now_iso
|
|||||||
try:
|
try:
|
||||||
import pyautogui
|
import pyautogui
|
||||||
except Exception as import_exc:
|
except Exception as import_exc:
|
||||||
raise RuntimeError(
|
pyautogui = None # type: ignore[assignment]
|
||||||
"pyautogui is required. Install dependencies with: pip install pyautogui pillow"
|
_PYAUTOGUI_IMPORT_ERROR = import_exc
|
||||||
) from import_exc
|
else:
|
||||||
|
_PYAUTOGUI_IMPORT_ERROR = None
|
||||||
|
|
||||||
|
|
||||||
SYSTEM_PROMPT = """
|
SYSTEM_PROMPT = """
|
||||||
@@ -52,6 +53,12 @@ class ScreenJobAgent:
|
|||||||
cancel_event: threading.Event | None = None,
|
cancel_event: threading.Event | None = None,
|
||||||
event_callback: Callable[[dict[str, Any]], None] | None = None,
|
event_callback: Callable[[dict[str, Any]], None] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if pyautogui is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"pyautogui is required for agent execution. "
|
||||||
|
"Install dependencies and ensure GUI access. "
|
||||||
|
f"Import error: {_PYAUTOGUI_IMPORT_ERROR}"
|
||||||
|
)
|
||||||
self.client = client
|
self.client = client
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.artifacts = artifacts
|
self.artifacts = artifacts
|
||||||
@@ -784,4 +791,3 @@ class ScreenJobAgent:
|
|||||||
usage=self.usage,
|
usage=self.usage,
|
||||||
error=result_text,
|
error=result_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -14,9 +14,10 @@ from .utils import setup_artifacts, setup_logger
|
|||||||
try:
|
try:
|
||||||
import pyautogui
|
import pyautogui
|
||||||
except Exception as import_exc:
|
except Exception as import_exc:
|
||||||
raise RuntimeError(
|
pyautogui = None # type: ignore[assignment]
|
||||||
"pyautogui is required. Install dependencies with: pip install pyautogui pillow"
|
_PYAUTOGUI_IMPORT_ERROR = import_exc
|
||||||
) from import_exc
|
else:
|
||||||
|
_PYAUTOGUI_IMPORT_ERROR = None
|
||||||
|
|
||||||
|
|
||||||
def create_openai_client(api_key: str) -> OpenAI:
|
def create_openai_client(api_key: str) -> OpenAI:
|
||||||
@@ -34,6 +35,12 @@ def run_job(
|
|||||||
event_callback: Callable[[dict[str, Any]], None] | None = None,
|
event_callback: Callable[[dict[str, Any]], None] | None = None,
|
||||||
logger: logging.Logger | None = None,
|
logger: logging.Logger | None = None,
|
||||||
) -> tuple[AgentResult, RunArtifacts]:
|
) -> tuple[AgentResult, RunArtifacts]:
|
||||||
|
if pyautogui is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"pyautogui is required for runtime execution. "
|
||||||
|
"Install dependencies and ensure GUI access. "
|
||||||
|
f"Import error: {_PYAUTOGUI_IMPORT_ERROR}"
|
||||||
|
)
|
||||||
pyautogui.FAILSAFE = not no_failsafe
|
pyautogui.FAILSAFE = not no_failsafe
|
||||||
pyautogui.PAUSE = 0.05
|
pyautogui.PAUSE = 0.05
|
||||||
|
|
||||||
@@ -54,4 +61,3 @@ def run_job(
|
|||||||
result = agent.run(objective)
|
result = agent.run(objective)
|
||||||
active_logger.info("Run finished. completed=%s elapsed=%.2fs", result.completed, result.ended_at - result.started_at)
|
active_logger.info("Run finished. completed=%s elapsed=%.2fs", result.completed, result.ended_at - result.started_at)
|
||||||
return result, artifacts
|
return result, artifacts
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import secrets
|
import secrets
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -73,20 +74,22 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
|
|||||||
if not app_config.screenjob_token:
|
if not app_config.screenjob_token:
|
||||||
raise RuntimeError("SCREENJOB_TOKEN is required in environment or .env.")
|
raise RuntimeError("SCREENJOB_TOKEN is required in environment or .env.")
|
||||||
|
|
||||||
app = FastAPI(title="ScreenJob API", version="1.0.0")
|
|
||||||
db = HistoryDB(app_config.db_path)
|
db = HistoryDB(app_config.db_path)
|
||||||
ws_hub = _WebSocketHub()
|
ws_hub = _WebSocketHub()
|
||||||
manager = JobManager(config=app_config, db=db, broadcast=ws_hub.broadcast_from_thread)
|
manager = JobManager(config=app_config, db=db, broadcast=ws_hub.broadcast_from_thread)
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(_: FastAPI):
|
||||||
|
ws_hub.set_loop(asyncio.get_running_loop())
|
||||||
|
yield
|
||||||
|
|
||||||
|
app = FastAPI(title="ScreenJob API", version="1.0.0", lifespan=lifespan)
|
||||||
|
|
||||||
app.state.config = app_config
|
app.state.config = app_config
|
||||||
app.state.db = db
|
app.state.db = db
|
||||||
app.state.ws_hub = ws_hub
|
app.state.ws_hub = ws_hub
|
||||||
app.state.manager = manager
|
app.state.manager = manager
|
||||||
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def _on_startup() -> None:
|
|
||||||
ws_hub.set_loop(asyncio.get_running_loop())
|
|
||||||
|
|
||||||
def _extract_token(
|
def _extract_token(
|
||||||
authorization: str | None,
|
authorization: str | None,
|
||||||
x_screenjob_token: str | None,
|
x_screenjob_token: str | None,
|
||||||
|
|||||||
10
tests/conftest.py
Normal file
10
tests/conftest.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
if str(ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(ROOT))
|
||||||
|
|
||||||
32
tests/test_pricing.py
Normal file
32
tests/test_pricing.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
from src.models import UsageSummary
|
||||||
|
from src.pricing import estimate_cost_usd, normalize_model_for_pricing
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_model_for_pricing() -> None:
|
||||||
|
assert normalize_model_for_pricing("gpt-5.4-mini") == "gpt-5.4-mini"
|
||||||
|
assert normalize_model_for_pricing("gpt-5.4-mini-2026-05-01") == "gpt-5.4-mini"
|
||||||
|
assert normalize_model_for_pricing("unknown-model") == "unknown-model"
|
||||||
|
|
||||||
|
|
||||||
|
def test_estimate_cost_with_cached_tokens() -> None:
|
||||||
|
usage = UsageSummary(
|
||||||
|
input_tokens=100_000,
|
||||||
|
cached_input_tokens=20_000,
|
||||||
|
output_tokens=50_000,
|
||||||
|
total_tokens=150_000,
|
||||||
|
)
|
||||||
|
cost, model = estimate_cost_usd("gpt-5.4-mini", usage)
|
||||||
|
assert model == "gpt-5.4-mini"
|
||||||
|
assert cost is not None
|
||||||
|
# Non-cached input: 80k at $0.75/M = 0.06
|
||||||
|
# Cached input: 20k at $0.075/M = 0.0015
|
||||||
|
# Output: 50k at $4.50/M = 0.225
|
||||||
|
assert abs(cost - 0.2865) < 1e-9
|
||||||
|
|
||||||
|
|
||||||
|
def test_estimate_cost_unknown_model_returns_none() -> None:
|
||||||
|
usage = UsageSummary(input_tokens=10, output_tokens=10)
|
||||||
|
cost, model = estimate_cost_usd("my-new-model", usage)
|
||||||
|
assert model == "my-new-model"
|
||||||
|
assert cost is None
|
||||||
|
|
||||||
181
tests/test_server_api.py
Normal file
181
tests/test_server_api.py
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
import src.server as server_module
|
||||||
|
from src.config import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class FakeJobManager:
|
||||||
|
def __init__(self, *, config: AppConfig, db: Any, broadcast: Any = None) -> None:
|
||||||
|
self.config = config
|
||||||
|
self._jobs: dict[str, dict[str, Any]] = {}
|
||||||
|
self._events: dict[str, list[dict[str, Any]]] = {}
|
||||||
|
self._counter = 0
|
||||||
|
self.last_submit_payload: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
def submit_job(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
objective: str,
|
||||||
|
model: str | None = None,
|
||||||
|
max_steps: int = 60,
|
||||||
|
command_timeout: int = 45,
|
||||||
|
type_interval: float = 0.02,
|
||||||
|
click_pause: float = 0.10,
|
||||||
|
disabled_tools: list[str] | None = None,
|
||||||
|
safety_override: bool = False,
|
||||||
|
no_failsafe: bool = False,
|
||||||
|
) -> str:
|
||||||
|
self._counter += 1
|
||||||
|
job_id = f"job_fake_{self._counter:03d}"
|
||||||
|
selected_model = (model or self.config.default_model).strip()
|
||||||
|
self.last_submit_payload = {
|
||||||
|
"objective": objective,
|
||||||
|
"model": selected_model,
|
||||||
|
"disabled_tools": disabled_tools or [],
|
||||||
|
"safety_override": safety_override,
|
||||||
|
"max_steps": max_steps,
|
||||||
|
"command_timeout": command_timeout,
|
||||||
|
"type_interval": type_interval,
|
||||||
|
"click_pause": click_pause,
|
||||||
|
"no_failsafe": no_failsafe,
|
||||||
|
}
|
||||||
|
self._jobs[job_id] = {
|
||||||
|
"job_id": job_id,
|
||||||
|
"objective": objective,
|
||||||
|
"model": selected_model,
|
||||||
|
"status": "running",
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 10,
|
||||||
|
"cached_input_tokens": 2,
|
||||||
|
"output_tokens": 4,
|
||||||
|
"reasoning_tokens": 0,
|
||||||
|
"total_tokens": 14,
|
||||||
|
"estimated_cost_usd": 0.0001,
|
||||||
|
},
|
||||||
|
"artifacts_dir": str(self.config.runs_dir.resolve()),
|
||||||
|
}
|
||||||
|
self._events[job_id] = [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"job_id": job_id,
|
||||||
|
"ts": "2026-05-27T00:00:00Z",
|
||||||
|
"step": 1,
|
||||||
|
"event_type": "tool_called",
|
||||||
|
"payload": {"tool": "execute_command"},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
return job_id
|
||||||
|
|
||||||
|
def list_jobs(self, limit: int = 100) -> list[dict[str, Any]]:
|
||||||
|
return list(self._jobs.values())[:limit]
|
||||||
|
|
||||||
|
def get_job(self, job_id: str) -> dict[str, Any] | None:
|
||||||
|
return self._jobs.get(job_id)
|
||||||
|
|
||||||
|
def get_events(self, job_id: str, limit: int = 500) -> list[dict[str, Any]]:
|
||||||
|
return self._events.get(job_id, [])[:limit]
|
||||||
|
|
||||||
|
def cancel_job(self, job_id: str) -> bool:
|
||||||
|
if job_id not in self._jobs:
|
||||||
|
return False
|
||||||
|
self._jobs[job_id]["status"] = "cancelling"
|
||||||
|
return True
|
||||||
|
|
||||||
|
def stats(self) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"total_jobs": len(self._jobs),
|
||||||
|
"running_jobs": sum(1 for x in self._jobs.values() if x["status"] == "running"),
|
||||||
|
"completed_jobs": 0,
|
||||||
|
"failed_jobs": 0,
|
||||||
|
"cancelled_jobs": 0,
|
||||||
|
"total_estimated_cost": sum(float((x["usage"] or {}).get("estimated_cost_usd") or 0) for x in self._jobs.values()),
|
||||||
|
"live_running_threads": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_app(tmp_path: Path, monkeypatch: Any, disable_ui: bool = False):
|
||||||
|
monkeypatch.setattr(server_module, "JobManager", FakeJobManager)
|
||||||
|
config = AppConfig(
|
||||||
|
openai_api_key="test_key",
|
||||||
|
screenjob_token="test_token",
|
||||||
|
disable_ui=disable_ui,
|
||||||
|
default_model="gpt-5.4-mini",
|
||||||
|
safety_model="gpt-5.4-mini",
|
||||||
|
host="127.0.0.1",
|
||||||
|
port=8787,
|
||||||
|
runs_dir=tmp_path / "runs",
|
||||||
|
db_path=tmp_path / "screenjob_test.db",
|
||||||
|
)
|
||||||
|
config.runs_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
app = server_module.create_app(config)
|
||||||
|
return app, config
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_requires_auth(tmp_path: Path, monkeypatch: Any) -> None:
|
||||||
|
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
|
||||||
|
client = TestClient(app)
|
||||||
|
assert client.get("/api/jobs").status_code == 401
|
||||||
|
assert client.post("/api/jobs", json={"job": "x"}).status_code == 401
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monkeypatch: Any) -> None:
|
||||||
|
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
|
||||||
|
client = TestClient(app)
|
||||||
|
headers = {"Authorization": "Bearer test_token"}
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
"/api/jobs",
|
||||||
|
headers=headers,
|
||||||
|
json={"job": "Open amazon.de", "disabled_tools": ["click"], "safety_override": True},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200
|
||||||
|
payload = response.json()
|
||||||
|
assert list(payload.keys()) == ["job_id"]
|
||||||
|
job_id = payload["job_id"]
|
||||||
|
|
||||||
|
manager = app.state.manager
|
||||||
|
assert manager.last_submit_payload["model"] == "gpt-5.4-mini"
|
||||||
|
assert manager.last_submit_payload["disabled_tools"] == ["click"]
|
||||||
|
|
||||||
|
status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
|
||||||
|
assert status_res.status_code == 200
|
||||||
|
assert status_res.json()["job_id"] == job_id
|
||||||
|
|
||||||
|
|
||||||
|
def test_cancel_endpoint_and_events(tmp_path: Path, monkeypatch: Any) -> None:
|
||||||
|
app, _ = _build_app(tmp_path, monkeypatch, disable_ui=False)
|
||||||
|
client = TestClient(app)
|
||||||
|
headers = {"Authorization": "Bearer test_token"}
|
||||||
|
create = client.post("/api/jobs", headers=headers, json={"job": "Test job"})
|
||||||
|
job_id = create.json()["job_id"]
|
||||||
|
|
||||||
|
events = client.get(f"/api/jobs/{job_id}/events?limit=20", headers=headers)
|
||||||
|
assert events.status_code == 200
|
||||||
|
assert len(events.json()["events"]) >= 1
|
||||||
|
|
||||||
|
cancel = client.post(f"/api/jobs/{job_id}/cancel", headers=headers)
|
||||||
|
assert cancel.status_code == 200
|
||||||
|
assert cancel.json()["cancel_requested"] is True
|
||||||
|
|
||||||
|
status_after = client.get(f"/api/jobs/{job_id}", headers=headers).json()
|
||||||
|
assert status_after["status"] == "cancelling"
|
||||||
|
|
||||||
|
|
||||||
|
def test_ui_toggle(tmp_path: Path, monkeypatch: Any) -> None:
|
||||||
|
app_enabled, _ = _build_app(tmp_path / "enabled", monkeypatch, disable_ui=False)
|
||||||
|
client_enabled = TestClient(app_enabled)
|
||||||
|
root_enabled = client_enabled.get("/")
|
||||||
|
assert root_enabled.status_code == 200
|
||||||
|
assert "ScreenJob Monitor" in root_enabled.text
|
||||||
|
|
||||||
|
app_disabled, _ = _build_app(tmp_path / "disabled", monkeypatch, disable_ui=True)
|
||||||
|
client_disabled = TestClient(app_disabled)
|
||||||
|
root_disabled = client_disabled.get("/")
|
||||||
|
assert root_disabled.status_code == 200
|
||||||
|
assert root_disabled.json()["ui_disabled"] is True
|
||||||
|
|
||||||
53
tests/test_storage.py
Normal file
53
tests/test_storage.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from src.storage import HistoryDB
|
||||||
|
|
||||||
|
|
||||||
|
def test_history_db_job_and_events_roundtrip(tmp_path: Path) -> None:
|
||||||
|
db = HistoryDB(tmp_path / "screenjob_test.db")
|
||||||
|
job_id = "job_test_001"
|
||||||
|
db.create_job(
|
||||||
|
job_id=job_id,
|
||||||
|
objective="Open example.com",
|
||||||
|
model="gpt-5.4-mini",
|
||||||
|
created_at="2026-05-27T00:00:00Z",
|
||||||
|
safety_override=False,
|
||||||
|
disabled_tools=["click"],
|
||||||
|
)
|
||||||
|
db.add_event(
|
||||||
|
job_id=job_id,
|
||||||
|
ts="2026-05-27T00:00:01Z",
|
||||||
|
step=1,
|
||||||
|
event_type="tool_called",
|
||||||
|
payload={"tool": "see_screen"},
|
||||||
|
)
|
||||||
|
db.update_job(
|
||||||
|
job_id,
|
||||||
|
status="completed",
|
||||||
|
ended_at="2026-05-27T00:00:02Z",
|
||||||
|
result="Done",
|
||||||
|
steps=2,
|
||||||
|
estimated_cost_usd=0.1234,
|
||||||
|
)
|
||||||
|
|
||||||
|
job = db.get_job(job_id)
|
||||||
|
assert job is not None
|
||||||
|
assert job["status"] == "completed"
|
||||||
|
assert job["model"] == "gpt-5.4-mini"
|
||||||
|
assert job["disabled_tools"] == ["click"]
|
||||||
|
assert job["usage"]["estimated_cost_usd"] == 0.1234
|
||||||
|
|
||||||
|
events = db.get_job_events(job_id, limit=10)
|
||||||
|
assert len(events) == 1
|
||||||
|
assert events[0]["event_type"] == "tool_called"
|
||||||
|
assert events[0]["payload"]["tool"] == "see_screen"
|
||||||
|
|
||||||
|
jobs = db.list_jobs(limit=10)
|
||||||
|
assert len(jobs) == 1
|
||||||
|
assert jobs[0]["job_id"] == job_id
|
||||||
|
|
||||||
|
stats = db.stats()
|
||||||
|
assert stats["total_jobs"] == 1
|
||||||
|
assert stats["completed_jobs"] == 1
|
||||||
|
assert abs(stats["total_estimated_cost"] - 0.1234) < 1e-9
|
||||||
|
|
||||||
Reference in New Issue
Block a user