feat: finalize production cleanup with structured agent responses and project governance

This commit is contained in:
Space-Banane
2026-05-27 18:08:52 +02:00
parent a19b285232
commit c09f0ee9c0
17 changed files with 737 additions and 126 deletions

View File

@@ -39,7 +39,8 @@ Rules:
7) You may call multiple tools in one step. If needed, do click then sleep.
8) Never spam repeated clicks on the same coordinate; switch strategy.
9) Keep tool arguments valid JSON and concise.
10) When objective is fully complete, call task_complete(result="...").
10) When objective is fully complete, call task_complete(return="...", data=...).
11) The "data" field should contain structured output useful for the requester (for example command output text).
"""
@@ -69,6 +70,7 @@ class ScreenJobAgent:
self.step = 0
self.completed = False
self.final_result = ""
self.final_data: Any | None = None
self.previous_response_id: str | None = None
self.usage = UsageSummary()
@@ -134,9 +136,11 @@ class ScreenJobAgent:
"parameters": {
"type": "object",
"properties": {
"return": {"type": "string"},
"result": {"type": "string"},
"data": {},
},
"required": ["result"],
"required": [],
"additionalProperties": False,
},
},
@@ -551,10 +555,17 @@ class ScreenJobAgent:
return {"ok": False, "command": command, "error": f"{type(exc).__name__}: {exc}"}
def _tool_task_complete(self, args: dict[str, Any]) -> dict[str, Any]:
result = str(args.get("result", "")).strip() or "Task completed."
return_text = str(args.get("return", "")).strip()
if not return_text:
return_text = str(args.get("result", "")).strip()
if not return_text:
return_text = "Task completed."
data = args.get("data")
self.completed = True
self.final_result = result
return {"ok": True, "result": result}
self.final_result = return_text
self.final_data = data
return {"ok": True, "return": return_text, "data": data}
def _dispatch_tool(self, name: str, args: dict[str, Any]) -> dict[str, Any]:
if name in self.disabled_tools:
@@ -620,7 +631,8 @@ class ScreenJobAgent:
f"JOB: {job}\n"
"You are in an action loop. Prefer execute_command for deterministic actions. "
"You can return multiple tool calls in one step (example: click then sleep). "
"Call task_complete(result=...) only when truly done."
"When done call task_complete(return=..., data=...). "
"Include useful structured output in data."
),
}
],
@@ -672,7 +684,7 @@ class ScreenJobAgent:
"text": (
"No function call was returned. Continue by using tools. "
"You may call multiple tools in one step. "
"When complete, call task_complete(result=...)."
"When complete, call task_complete(return=..., data=...)."
),
}
],
@@ -746,6 +758,8 @@ class ScreenJobAgent:
return AgentResult(
completed=True,
result=self.final_result,
return_message=self.final_result,
data=self.final_data,
steps=self.step,
started_at=started_at,
ended_at=ended_at,
@@ -758,6 +772,8 @@ class ScreenJobAgent:
return AgentResult(
completed=False,
result="Cancelled by user request.",
return_message="Cancelled by user request.",
data=None,
steps=self.step,
started_at=started_at,
ended_at=ended_at,
@@ -772,6 +788,8 @@ class ScreenJobAgent:
return AgentResult(
completed=False,
result=error_text,
return_message=error_text,
data=None,
steps=self.step,
started_at=started_at,
ended_at=ended_at,
@@ -785,6 +803,8 @@ class ScreenJobAgent:
return AgentResult(
completed=False,
result=result_text,
return_message=result_text,
data=None,
steps=self.step,
started_at=started_at,
ended_at=ended_at,

View File

@@ -61,6 +61,9 @@ def main(argv: list[str] | None = None) -> int:
{
"completed": False,
"result": f"Blocked by safety check: {reason}",
"response": {"return": f"Blocked by safety check: {reason}", "data": parsed},
"return": f"Blocked by safety check: {reason}",
"data": parsed,
"safety": parsed,
},
ensure_ascii=False,
@@ -101,7 +104,10 @@ def main(argv: list[str] | None = None) -> int:
payload = {
"completed": result.completed,
"result": result.result,
"result": result.return_message,
"response": {"return": result.return_message, "data": result.data},
"return": result.return_message,
"data": result.data,
"steps": result.steps,
"elapsed_seconds": round(result.ended_at - result.started_at, 3),
"artifacts_dir": str(artifacts.root_dir.resolve()),
@@ -111,4 +117,3 @@ def main(argv: list[str] | None = None) -> int:
}
print(json.dumps(payload, ensure_ascii=False, indent=2))
return 0 if result.completed else 1

View File

@@ -19,6 +19,8 @@ class RunArtifacts:
class AgentResult:
completed: bool
result: str
return_message: str
data: Any | None
steps: int
started_at: float
ended_at: float

View File

@@ -31,6 +31,7 @@ class HistoryDB:
started_at TEXT,
ended_at TEXT,
result TEXT,
response_json TEXT,
error TEXT,
steps INTEGER DEFAULT 0,
cancelled INTEGER DEFAULT 0,
@@ -65,6 +66,9 @@ class HistoryDB:
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_job_events_job_id_id ON job_events(job_id, id)"
)
columns = {row[1] for row in conn.execute("PRAGMA table_info(jobs)").fetchall()}
if "response_json" not in columns:
conn.execute("ALTER TABLE jobs ADD COLUMN response_json TEXT")
conn.commit()
def create_job(
@@ -195,6 +199,7 @@ class HistoryDB:
"started_at": row["started_at"],
"ended_at": row["ended_at"],
"result": row["result"],
"response": self._parse_response_payload(row["response_json"], row["result"]),
"error": row["error"],
"steps": row["steps"],
"cancelled": bool(row["cancelled"]),
@@ -214,3 +219,17 @@ class HistoryDB:
},
}
def _parse_response_payload(self, response_json: str | None, result: str | None) -> dict[str, Any]:
fallback_return = str(result or "").strip()
if not response_json:
return {"return": fallback_return, "data": None}
try:
payload = json.loads(response_json)
if isinstance(payload, dict):
return {
"return": str(payload.get("return") or fallback_return),
"data": payload.get("data"),
}
except Exception:
pass
return {"return": fallback_return, "data": None}

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import json
import threading
import time
import uuid
@@ -159,6 +160,7 @@ class JobManager:
ended_at=ended_at,
error=error_text,
result=error_text,
response_json=json.dumps({"return": error_text, "data": None}, ensure_ascii=False),
)
self._publish(
job_id,
@@ -237,6 +239,7 @@ class JobManager:
ended_at=ended_at,
error=err,
result=err,
response_json=json.dumps({"return": err, "data": None}, ensure_ascii=False),
)
self._publish(job_id, {"ts": ended_at, "step": 0, "event_type": "job_failed", "payload": {"error": err}})
with self._lock:
@@ -251,7 +254,14 @@ class JobManager:
job_id,
status=status,
ended_at=ended_at,
result=result.result,
result=result.return_message,
response_json=json.dumps(
{
"return": result.return_message,
"data": result.data,
},
ensure_ascii=False,
),
error=result.error,
steps=result.steps,
cancelled=1 if result.cancelled else 0,
@@ -271,7 +281,8 @@ class JobManager:
"event_type": "job_finished",
"payload": {
"status": status,
"result": result.result,
"result": result.return_message,
"response": {"return": result.return_message, "data": result.data},
"error": result.error,
"cancelled": result.cancelled,
"usage": result.usage.to_dict(),
@@ -318,10 +329,10 @@ class JobManager:
job["is_running_thread"] = live.thread.is_alive()
else:
job["is_running_thread"] = False
return job
return self._normalize_job_payload(job)
def list_jobs(self, limit: int = 100) -> list[dict[str, Any]]:
return self.db.list_jobs(limit=limit)
return [self._normalize_job_payload(job) for job in self.db.list_jobs(limit=limit)]
def get_events(self, job_id: str, limit: int = 500) -> list[dict[str, Any]]:
return self.db.get_job_events(job_id, limit=limit)
@@ -331,3 +342,12 @@ class JobManager:
with self._lock:
stats["live_running_threads"] = sum(1 for job in self._running.values() if job.thread.is_alive())
return stats
def _normalize_job_payload(self, job: dict[str, Any]) -> dict[str, Any]:
response = job.get("response")
if not isinstance(response, dict):
response = {"return": str(job.get("result") or ""), "data": None}
job["response"] = response
job["return"] = str(response.get("return") or "")
job["data"] = response.get("data")
return job