refactor: remove verification field from responses and update related documentation
All checks were successful
CI / test (push) Successful in 7s

This commit is contained in:
Space-Banane
2026-05-27 21:23:40 +02:00
parent 48a145d147
commit 595375e1a7
9 changed files with 26 additions and 64 deletions

View File

@@ -42,7 +42,8 @@ Rules:
9) Keep tool arguments valid JSON and concise.
10) When objective is fully complete, call task_complete(return="...", data=...).
11) The "data" field should contain structured output useful for the requester (for example command output text).
12) Before finishing, always verify outcome with a final screen capture.
12) Before finishing, verify actual screen content matches the expected outcome.
13) For verification, call see_screen (and enhance if needed), then include a concise observed_result in data.
"""
@@ -80,7 +81,6 @@ class ScreenJobAgent:
self.last_screen_meta: dict[str, Any] | None = None
self.click_history: list[tuple[int, int, float]] = []
self.disabled_tools = {tool.strip() for tool in (options.disable_tools or set()) if tool.strip()}
self.final_verification: dict[str, Any] | None = None
def _emit(self, event_type: str, payload: dict[str, Any]) -> None:
if self.event_callback is None:
@@ -367,16 +367,6 @@ class ScreenJobAgent:
"message": "Screen captured with coordinate grid.",
}
def _capture_final_verification(self) -> dict[str, Any]:
image, meta = self._capture_screen(with_grid=True)
out_path = self.artifacts.shots_dir / f"screen_final_verification_step_{self.step:03d}.png"
self._save_image(image, out_path)
data_url = image_to_data_url(image, "PNG")
verification_meta = meta | {"path": str(out_path.resolve()), "final_verification": True}
self.last_screen_data_url = data_url
self.last_screen_meta = verification_meta
return {"ok": True, "path": str(out_path.resolve()), "meta": verification_meta}
def _tool_enhance(self, args: dict[str, Any]) -> dict[str, Any]:
coord = args.get("coordinate") or {}
x = int(coord.get("x", 0))
@@ -608,12 +598,7 @@ class ScreenJobAgent:
self.completed = True
self.final_result = return_text
self.final_data = data
try:
verification = self._capture_final_verification()
except Exception as exc: # noqa: BLE001
verification = {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
self.final_verification = verification
return {"ok": True, "return": return_text, "data": data, "verification": verification}
return {"ok": True, "return": return_text, "data": data}
def _dispatch_tool(self, name: str, args: dict[str, Any]) -> dict[str, Any]:
if name in self.disabled_tools:
@@ -681,8 +666,9 @@ class ScreenJobAgent:
"For modifier shortcuts, use a single press_key combo (example: win+r). "
"You can return multiple tool calls in one step (example: click then sleep). "
"When done call task_complete(return=..., data=...). "
"Include useful structured output in data. "
"Always finish with a final screen verification."
"Before task_complete, verify the screen content is what was expected "
"using see_screen/enhance and include observed_result in data. "
"Include useful structured output in data."
),
}
],
@@ -735,8 +721,9 @@ class ScreenJobAgent:
"No function call was returned. Continue by using tools. "
"Use one press_key call for key combos like win+r. "
"You may call multiple tools in one step. "
"When complete, call task_complete(return=..., data=...). "
"Always finish with a final screen verification."
"Before task_complete, verify expected screen content with see_screen/enhance "
"and include observed_result in data. "
"When complete, call task_complete(return=..., data=...)."
),
}
],
@@ -812,7 +799,6 @@ class ScreenJobAgent:
result=self.final_result,
return_message=self.final_result,
data=self.final_data,
verification=self.final_verification,
steps=self.step,
started_at=started_at,
ended_at=ended_at,
@@ -827,7 +813,6 @@ class ScreenJobAgent:
result="Cancelled by user request.",
return_message="Cancelled by user request.",
data=None,
verification=self.final_verification,
steps=self.step,
started_at=started_at,
ended_at=ended_at,
@@ -844,7 +829,6 @@ class ScreenJobAgent:
result=error_text,
return_message=error_text,
data=None,
verification=self.final_verification,
steps=self.step,
started_at=started_at,
ended_at=ended_at,
@@ -860,7 +844,6 @@ class ScreenJobAgent:
result=result_text,
return_message=result_text,
data=None,
verification=self.final_verification,
steps=self.step,
started_at=started_at,
ended_at=ended_at,

View File

@@ -108,7 +108,6 @@ def main(argv: list[str] | None = None) -> int:
"response": {"return": result.return_message, "data": result.data},
"return": result.return_message,
"data": result.data,
"verification": result.verification,
"steps": result.steps,
"elapsed_seconds": round(result.ended_at - result.started_at, 3),
"artifacts_dir": str(artifacts.root_dir.resolve()),

View File

@@ -21,7 +21,6 @@ class AgentResult:
result: str
return_message: str
data: Any | None
verification: dict[str, Any] | None
steps: int
started_at: float
ended_at: float

View File

@@ -222,15 +222,14 @@ class HistoryDB:
def _parse_response_payload(self, response_json: str | None, result: str | None) -> dict[str, Any]:
fallback_return = str(result or "").strip()
if not response_json:
return {"return": fallback_return, "data": None, "verification": None}
return {"return": fallback_return, "data": None}
try:
payload = json.loads(response_json)
if isinstance(payload, dict):
return {
"return": str(payload.get("return") or fallback_return),
"data": payload.get("data"),
"verification": payload.get("verification"),
}
except Exception:
pass
return {"return": fallback_return, "data": None, "verification": None}
return {"return": fallback_return, "data": None}

View File

@@ -160,7 +160,7 @@ class JobManager:
ended_at=ended_at,
error=error_text,
result=error_text,
response_json=json.dumps({"return": error_text, "data": None, "verification": None}, ensure_ascii=False),
response_json=json.dumps({"return": error_text, "data": None}, ensure_ascii=False),
)
self._publish(
job_id,
@@ -239,7 +239,7 @@ class JobManager:
ended_at=ended_at,
error=err,
result=err,
response_json=json.dumps({"return": err, "data": None, "verification": None}, ensure_ascii=False),
response_json=json.dumps({"return": err, "data": None}, ensure_ascii=False),
)
self._publish(job_id, {"ts": ended_at, "step": 0, "event_type": "job_failed", "payload": {"error": err}})
with self._lock:
@@ -259,7 +259,6 @@ class JobManager:
{
"return": result.return_message,
"data": result.data,
"verification": result.verification,
},
ensure_ascii=False,
),
@@ -284,7 +283,6 @@ class JobManager:
"status": status,
"result": result.return_message,
"response": {"return": result.return_message, "data": result.data},
"verification": result.verification,
"error": result.error,
"cancelled": result.cancelled,
"usage": result.usage.to_dict(),
@@ -348,9 +346,8 @@ class JobManager:
def _normalize_job_payload(self, job: dict[str, Any]) -> dict[str, Any]:
response = job.get("response")
if not isinstance(response, dict):
response = {"return": str(job.get("result") or ""), "data": None, "verification": None}
response = {"return": str(job.get("result") or ""), "data": None}
job["response"] = response
job["return"] = str(response.get("return") or "")
job["data"] = response.get("data")
job["verification"] = response.get("verification")
return job