feat: add shared runtime with FastAPI job server and safety pipeline
This commit is contained in:
260
src/agent.py
260
src/agent.py
@@ -3,14 +3,16 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
from typing import Any
|
||||
from typing import Any, Callable
|
||||
|
||||
from openai import OpenAI
|
||||
from PIL import Image, ImageEnhance, ImageFilter, ImageOps
|
||||
|
||||
from .models import AgentResult, RunArtifacts
|
||||
from .models import AgentResult, RunArtifacts, RuntimeOptions, UsageSummary
|
||||
from .pricing import estimate_cost_usd
|
||||
from .utils import clamp, draw_global_grid, image_to_data_url, utc_now_iso
|
||||
|
||||
try:
|
||||
@@ -46,32 +48,78 @@ class ScreenJobAgent:
|
||||
client: OpenAI,
|
||||
logger: logging.Logger,
|
||||
artifacts: RunArtifacts,
|
||||
model: str,
|
||||
max_steps: int,
|
||||
command_timeout: int,
|
||||
type_interval: float,
|
||||
click_pause: float,
|
||||
options: RuntimeOptions,
|
||||
cancel_event: threading.Event | None = None,
|
||||
event_callback: Callable[[dict[str, Any]], None] | None = None,
|
||||
) -> None:
|
||||
self.client = client
|
||||
self.logger = logger
|
||||
self.artifacts = artifacts
|
||||
self.model = model
|
||||
self.max_steps = max_steps
|
||||
self.command_timeout = command_timeout
|
||||
self.type_interval = type_interval
|
||||
self.click_pause = click_pause
|
||||
self.options = options
|
||||
self.cancel_event = cancel_event or threading.Event()
|
||||
self.event_callback = event_callback
|
||||
|
||||
self.step = 0
|
||||
self.completed = False
|
||||
self.final_result = ""
|
||||
self.previous_response_id: str | None = None
|
||||
self.usage = UsageSummary()
|
||||
|
||||
self.last_screen_data_url: str | None = None
|
||||
self.last_screen_meta: dict[str, Any] | None = None
|
||||
self.click_history: list[tuple[int, int, float]] = []
|
||||
self.disabled_tools = {tool.strip() for tool in (options.disable_tools or set()) if tool.strip()}
|
||||
|
||||
def _emit(self, event_type: str, payload: dict[str, Any]) -> None:
|
||||
if self.event_callback is None:
|
||||
return
|
||||
event = {
|
||||
"ts": utc_now_iso(),
|
||||
"job_run_id": self.artifacts.run_id,
|
||||
"step": self.step,
|
||||
"event_type": event_type,
|
||||
"payload": payload,
|
||||
}
|
||||
try:
|
||||
self.event_callback(event)
|
||||
except Exception: # noqa: BLE001
|
||||
self.logger.debug("Event callback failed.", exc_info=True)
|
||||
|
||||
def _is_cancelled(self) -> bool:
|
||||
return bool(self.cancel_event.is_set())
|
||||
|
||||
def _register_usage(self, response: Any) -> None:
|
||||
usage_obj = getattr(response, "usage", None)
|
||||
if usage_obj is None:
|
||||
return
|
||||
input_tokens = int(getattr(usage_obj, "input_tokens", 0) or 0)
|
||||
output_tokens = int(getattr(usage_obj, "output_tokens", 0) or 0)
|
||||
total_tokens = int(getattr(usage_obj, "total_tokens", input_tokens + output_tokens) or 0)
|
||||
|
||||
input_details = getattr(usage_obj, "input_tokens_details", None)
|
||||
cached_tokens = int(getattr(input_details, "cached_tokens", 0) or 0) if input_details else 0
|
||||
output_details = getattr(usage_obj, "output_tokens_details", None)
|
||||
reasoning_tokens = int(getattr(output_details, "reasoning_tokens", 0) or 0) if output_details else 0
|
||||
|
||||
self.usage.input_tokens += input_tokens
|
||||
self.usage.cached_input_tokens += cached_tokens
|
||||
self.usage.output_tokens += output_tokens
|
||||
self.usage.reasoning_tokens += reasoning_tokens
|
||||
self.usage.total_tokens += total_tokens
|
||||
estimated_cost, model_for_pricing = estimate_cost_usd(self.options.model, self.usage)
|
||||
self.usage.estimated_cost_usd = estimated_cost
|
||||
self.usage.model_for_pricing = model_for_pricing
|
||||
|
||||
self._emit(
|
||||
"usage_update",
|
||||
{
|
||||
"usage": self.usage.to_dict(),
|
||||
"response_id": getattr(response, "id", None),
|
||||
},
|
||||
)
|
||||
|
||||
def _tool_schemas(self) -> list[dict[str, Any]]:
|
||||
return [
|
||||
all_tools: list[dict[str, Any]] = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "task_complete",
|
||||
@@ -213,6 +261,7 @@ class ScreenJobAgent:
|
||||
},
|
||||
},
|
||||
]
|
||||
return [tool for tool in all_tools if tool["name"] not in self.disabled_tools]
|
||||
|
||||
def _capture_screen(self, with_grid: bool = True) -> tuple[Image.Image, dict[str, Any]]:
|
||||
screenshot = pyautogui.screenshot().convert("RGB")
|
||||
@@ -378,13 +427,16 @@ class ScreenJobAgent:
|
||||
"recent_similar_clicks": len(near_same),
|
||||
}
|
||||
|
||||
pyautogui.moveTo(x, y, duration=self.click_pause)
|
||||
pyautogui.moveTo(x, y, duration=self.options.click_pause)
|
||||
pyautogui.click(x=x, y=y)
|
||||
sleep_after = self._parse_seconds(args.get("sleep_after_seconds", 0), default=0.0, max_seconds=30.0)
|
||||
if sleep_after > 0:
|
||||
time.sleep(sleep_after)
|
||||
else:
|
||||
time.sleep(0.15)
|
||||
wait_remaining = sleep_after if sleep_after > 0 else 0.15
|
||||
while wait_remaining > 0:
|
||||
if self._is_cancelled():
|
||||
break
|
||||
interval = min(0.05, wait_remaining)
|
||||
time.sleep(interval)
|
||||
wait_remaining -= interval
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
@@ -401,7 +453,11 @@ class ScreenJobAgent:
|
||||
|
||||
def _tool_type(self, args: dict[str, Any]) -> dict[str, Any]:
|
||||
text = str(args.get("text", ""))
|
||||
pyautogui.write(text, interval=self.type_interval)
|
||||
for char in text:
|
||||
if self._is_cancelled():
|
||||
return {"ok": False, "cancelled": True, "typed_length": 0}
|
||||
pyautogui.write(char, interval=0)
|
||||
time.sleep(self.options.type_interval)
|
||||
return {"ok": True, "typed_length": len(text), "message": "Text typed."}
|
||||
|
||||
def _tool_press_key(self, args: dict[str, Any]) -> dict[str, Any]:
|
||||
@@ -410,15 +466,25 @@ class ScreenJobAgent:
|
||||
if not key:
|
||||
return {"ok": False, "error": "Missing key."}
|
||||
repeats = min(repeats, 50)
|
||||
pressed = 0
|
||||
for _ in range(repeats):
|
||||
if self._is_cancelled():
|
||||
break
|
||||
pyautogui.press(key)
|
||||
pressed += 1
|
||||
time.sleep(0.03)
|
||||
return {"ok": True, "key": key, "repeats": repeats, "message": "Key press executed."}
|
||||
return {"ok": True, "key": key, "repeats": pressed, "message": "Key press executed."}
|
||||
|
||||
def _tool_sleep(self, args: dict[str, Any]) -> dict[str, Any]:
|
||||
seconds = self._parse_seconds(args.get("seconds"), default=0.0, max_seconds=60.0)
|
||||
time.sleep(seconds)
|
||||
return {"ok": True, "slept_seconds": seconds, "message": "Sleep completed."}
|
||||
elapsed = 0.0
|
||||
while elapsed < seconds:
|
||||
if self._is_cancelled():
|
||||
return {"ok": False, "cancelled": True, "slept_seconds": round(elapsed, 3)}
|
||||
interval = min(0.1, seconds - elapsed)
|
||||
time.sleep(interval)
|
||||
elapsed += interval
|
||||
return {"ok": True, "slept_seconds": round(seconds, 3), "message": "Sleep completed."}
|
||||
|
||||
def _tool_execute_command(self, args: dict[str, Any]) -> dict[str, Any]:
|
||||
command = str(args.get("command", "")).strip()
|
||||
@@ -426,36 +492,55 @@ class ScreenJobAgent:
|
||||
return {"ok": False, "error": "Empty command."}
|
||||
|
||||
started = time.time()
|
||||
process: subprocess.Popen[str] | None = None
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
timeout=self.command_timeout,
|
||||
check=False,
|
||||
)
|
||||
elapsed_ms = int((time.time() - started) * 1000)
|
||||
while True:
|
||||
if self._is_cancelled():
|
||||
process.terminate()
|
||||
return {
|
||||
"ok": False,
|
||||
"cancelled": True,
|
||||
"command": command,
|
||||
"elapsed_ms": int((time.time() - started) * 1000),
|
||||
}
|
||||
if process.poll() is not None:
|
||||
break
|
||||
if (time.time() - started) > self.options.command_timeout:
|
||||
process.kill()
|
||||
stdout, stderr = process.communicate(timeout=2)
|
||||
return {
|
||||
"ok": False,
|
||||
"command": command,
|
||||
"error": "Command timed out.",
|
||||
"elapsed_ms": int((time.time() - started) * 1000),
|
||||
"timeout_seconds": self.options.command_timeout,
|
||||
"stdout": (stdout or "")[-12000:],
|
||||
"stderr": (stderr or "")[-12000:],
|
||||
}
|
||||
time.sleep(0.05)
|
||||
|
||||
stdout, stderr = process.communicate(timeout=2)
|
||||
return {
|
||||
"ok": True,
|
||||
"command": command,
|
||||
"exit_code": completed.returncode,
|
||||
"stdout": completed.stdout[-12000:],
|
||||
"stderr": completed.stderr[-12000:],
|
||||
"elapsed_ms": elapsed_ms,
|
||||
}
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
elapsed_ms = int((time.time() - started) * 1000)
|
||||
return {
|
||||
"ok": False,
|
||||
"command": command,
|
||||
"error": "Command timed out.",
|
||||
"elapsed_ms": elapsed_ms,
|
||||
"timeout_seconds": self.command_timeout,
|
||||
"stdout": (exc.stdout or "")[-12000:],
|
||||
"stderr": (exc.stderr or "")[-12000:],
|
||||
"exit_code": process.returncode,
|
||||
"stdout": (stdout or "")[-12000:],
|
||||
"stderr": (stderr or "")[-12000:],
|
||||
"elapsed_ms": int((time.time() - started) * 1000),
|
||||
}
|
||||
except Exception as exc: # noqa: BLE001
|
||||
if process is not None and process.poll() is None:
|
||||
try:
|
||||
process.kill()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
return {"ok": False, "command": command, "error": f"{type(exc).__name__}: {exc}"}
|
||||
|
||||
def _tool_task_complete(self, args: dict[str, Any]) -> dict[str, Any]:
|
||||
@@ -465,6 +550,8 @@ class ScreenJobAgent:
|
||||
return {"ok": True, "result": result}
|
||||
|
||||
def _dispatch_tool(self, name: str, args: dict[str, Any]) -> dict[str, Any]:
|
||||
if name in self.disabled_tools:
|
||||
return {"ok": False, "error": f"Tool '{name}' is disabled for this job."}
|
||||
handlers = {
|
||||
"see_screen": self._tool_see_screen,
|
||||
"enhance": self._tool_enhance,
|
||||
@@ -491,7 +578,7 @@ class ScreenJobAgent:
|
||||
|
||||
def _call_model(self, input_items: list[dict[str, Any]]) -> Any:
|
||||
return self.client.responses.create(
|
||||
model=self.model,
|
||||
model=self.options.model,
|
||||
instructions=SYSTEM_PROMPT,
|
||||
tools=self._tool_schemas(),
|
||||
input=input_items,
|
||||
@@ -502,8 +589,18 @@ class ScreenJobAgent:
|
||||
|
||||
def run(self, job: str) -> AgentResult:
|
||||
started_at = time.time()
|
||||
self.logger.info("Starting run_id=%s model=%s", self.artifacts.run_id, self.model)
|
||||
self.logger.info("Starting run_id=%s model=%s", self.artifacts.run_id, self.options.model)
|
||||
self.logger.info("Job: %s", job)
|
||||
self.logger.info("Disabled tools: %s", sorted(self.disabled_tools))
|
||||
self._emit(
|
||||
"job_started",
|
||||
{
|
||||
"run_id": self.artifacts.run_id,
|
||||
"model": self.options.model,
|
||||
"objective": job,
|
||||
"disabled_tools": sorted(self.disabled_tools),
|
||||
},
|
||||
)
|
||||
|
||||
self._tool_see_screen({})
|
||||
init_input: list[dict[str, Any]] = [
|
||||
@@ -528,25 +625,37 @@ class ScreenJobAgent:
|
||||
)
|
||||
|
||||
pending_input = init_input
|
||||
error_text: str | None = None
|
||||
cancelled = False
|
||||
|
||||
while self.step < self.options.max_steps and not self.completed:
|
||||
if self._is_cancelled():
|
||||
cancelled = True
|
||||
error_text = "Cancelled by user request."
|
||||
break
|
||||
|
||||
while self.step < self.max_steps and not self.completed:
|
||||
self.step += 1
|
||||
self.logger.info("---- Agent step %d/%d ----", self.step, self.max_steps)
|
||||
self.logger.info("---- Agent step %d/%d ----", self.step, self.options.max_steps)
|
||||
self._emit("step_started", {"step": self.step, "max_steps": self.options.max_steps})
|
||||
try:
|
||||
response = self._call_model(pending_input)
|
||||
self._register_usage(response)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
self.logger.exception("OpenAI API call failed on step %d", self.step)
|
||||
raise RuntimeError(f"OpenAI API call failed: {type(exc).__name__}: {exc}") from exc
|
||||
error_text = f"OpenAI API call failed: {type(exc).__name__}: {exc}"
|
||||
break
|
||||
|
||||
self.previous_response_id = response.id
|
||||
output_items = list(response.output or [])
|
||||
text_preview = getattr(response, "output_text", "") or ""
|
||||
if text_preview.strip():
|
||||
self.logger.info("Model text: %s", text_preview.strip()[:500])
|
||||
self._emit("model_text", {"step": self.step, "text": text_preview.strip()[:2000]})
|
||||
|
||||
tool_calls = [item for item in output_items if getattr(item, "type", None) == "function_call"]
|
||||
if not tool_calls:
|
||||
self.logger.warning("No tool calls returned; nudging model to continue with tools.")
|
||||
self._emit("step_warning", {"step": self.step, "message": "No tool calls; nudged model."})
|
||||
pending_input = [
|
||||
{
|
||||
"role": "user",
|
||||
@@ -566,12 +675,21 @@ class ScreenJobAgent:
|
||||
|
||||
next_input: list[dict[str, Any]] = []
|
||||
for tool_call in tool_calls:
|
||||
if self._is_cancelled():
|
||||
cancelled = True
|
||||
error_text = "Cancelled by user request."
|
||||
break
|
||||
|
||||
name = str(getattr(tool_call, "name", ""))
|
||||
call_id = str(getattr(tool_call, "call_id", ""))
|
||||
args_raw = getattr(tool_call, "arguments", "{}")
|
||||
args = self._safe_parse_args(args_raw)
|
||||
|
||||
self.logger.info("Tool call: %s args=%s", name, json.dumps(args, ensure_ascii=False))
|
||||
self._emit(
|
||||
"tool_called",
|
||||
{"step": self.step, "tool": name, "args": args},
|
||||
)
|
||||
try:
|
||||
result = self._dispatch_tool(name, args)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
@@ -587,6 +705,7 @@ class ScreenJobAgent:
|
||||
name,
|
||||
json.dumps(result, ensure_ascii=False)[:2500],
|
||||
)
|
||||
self._emit("tool_result", {"step": self.step, "tool": name, "result": result})
|
||||
next_input.append(
|
||||
{
|
||||
"type": "function_call_output",
|
||||
@@ -600,26 +719,69 @@ class ScreenJobAgent:
|
||||
next_input.append(
|
||||
self._build_visual_message(title, self.last_screen_data_url, self.last_screen_meta)
|
||||
)
|
||||
self._emit(
|
||||
"visual_update",
|
||||
{
|
||||
"step": self.step,
|
||||
"kind": name,
|
||||
"image_meta": self.last_screen_meta,
|
||||
},
|
||||
)
|
||||
|
||||
if cancelled:
|
||||
break
|
||||
pending_input = next_input
|
||||
|
||||
ended_at = time.time()
|
||||
if self.completed:
|
||||
self.logger.info("Task completed in %d step(s).", self.step)
|
||||
self._emit("job_completed", {"result": self.final_result, "steps": self.step, "usage": self.usage.to_dict()})
|
||||
return AgentResult(
|
||||
completed=True,
|
||||
result=self.final_result,
|
||||
steps=self.step,
|
||||
started_at=started_at,
|
||||
ended_at=ended_at,
|
||||
usage=self.usage,
|
||||
)
|
||||
|
||||
self.logger.warning("Stopped due to step limit (%d).", self.max_steps)
|
||||
if cancelled:
|
||||
self.logger.warning("Run cancelled by user after %d step(s).", self.step)
|
||||
self._emit("job_cancelled", {"steps": self.step, "usage": self.usage.to_dict()})
|
||||
return AgentResult(
|
||||
completed=False,
|
||||
result="Cancelled by user request.",
|
||||
steps=self.step,
|
||||
started_at=started_at,
|
||||
ended_at=ended_at,
|
||||
usage=self.usage,
|
||||
error=error_text,
|
||||
cancelled=True,
|
||||
)
|
||||
|
||||
if error_text:
|
||||
self.logger.error("Run failed: %s", error_text)
|
||||
self._emit("job_failed", {"steps": self.step, "error": error_text, "usage": self.usage.to_dict()})
|
||||
return AgentResult(
|
||||
completed=False,
|
||||
result=error_text,
|
||||
steps=self.step,
|
||||
started_at=started_at,
|
||||
ended_at=ended_at,
|
||||
usage=self.usage,
|
||||
error=error_text,
|
||||
)
|
||||
|
||||
self.logger.warning("Stopped due to step limit (%d).", self.options.max_steps)
|
||||
result_text = f"Stopped after max steps ({self.options.max_steps}) without task_complete."
|
||||
self._emit("job_stopped", {"steps": self.step, "reason": "max_steps", "usage": self.usage.to_dict()})
|
||||
return AgentResult(
|
||||
completed=False,
|
||||
result=f"Stopped after max steps ({self.max_steps}) without task_complete.",
|
||||
result=result_text,
|
||||
steps=self.step,
|
||||
started_at=started_at,
|
||||
ended_at=ended_at,
|
||||
usage=self.usage,
|
||||
error=result_text,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user