feat: add shared runtime with FastAPI job server and safety pipeline

2026-05-27 17:43:51 +02:00
parent 84b0df520c
commit 10355bf11a
14 changed files with 1516 additions and 157 deletions
--- a/src/safety.py
+++ b/src/safety.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from openai import OpenAI
+
+
+SAFETY_PROMPT = """
+You are a safety gate for a desktop automation agent that can execute terminal commands, click/type on screen, and browse websites.
+Classify whether the user objective can be executed safely and legally.
+
+Return strict JSON only:
+{
+  "safe": true|false,
+  "reason": "short reason",
+  "risk_level": "low|medium|high",
+  "recommended_disabled_tools": ["optional_tool_names"]
+}
+"""
+
+
+def _extract_json(text: str) -> dict[str, Any]:
+    text = text.strip()
+    try:
+        parsed = json.loads(text)
+        return parsed if isinstance(parsed, dict) else {}
+    except Exception:
+        pass
+    start = text.find("{")
+    end = text.rfind("}")
+    if start >= 0 and end > start:
+        try:
+            parsed = json.loads(text[start : end + 1])
+            return parsed if isinstance(parsed, dict) else {}
+        except Exception:
+            return {}
+    return {}
+
+
+def assess_task_safety(
+    client: OpenAI,
+    *,
+    model: str,
+    objective: str,
+    disabled_tools: list[str],
+) -> tuple[bool, str, dict[str, Any]]:
+    try:
+        response = client.responses.create(
+            model=model,
+            instructions=SAFETY_PROMPT,
+            input=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "input_text",
+                            "text": (
+                                "Objective:\n"
+                                f"{objective}\n\n"
+                                f"Disabled tools: {json.dumps(disabled_tools, ensure_ascii=False)}\n"
+                                "Answer with strict JSON only."
+                            ),
+                        }
+                    ],
+                }
+            ],
+        )
+    except Exception as exc:  # noqa: BLE001
+        return False, f"Safety check failed: {type(exc).__name__}: {exc}", {"safe": False}
+
+    raw_text = getattr(response, "output_text", "") or ""
+    parsed = _extract_json(raw_text)
+    safe = bool(parsed.get("safe", False))
+    reason = str(parsed.get("reason", "")).strip() or "No reason provided by safety check."
+    if not parsed:
+        safe = False
+        reason = "Safety check returned unparseable response."
+    return safe, reason, parsed
+