feat: add shared runtime with FastAPI job server and safety pipeline
This commit is contained in:
80
src/safety.py
Normal file
80
src/safety.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
|
||||
SAFETY_PROMPT = """
|
||||
You are a safety gate for a desktop automation agent that can execute terminal commands, click/type on screen, and browse websites.
|
||||
Classify whether the user objective can be executed safely and legally.
|
||||
|
||||
Return strict JSON only:
|
||||
{
|
||||
"safe": true|false,
|
||||
"reason": "short reason",
|
||||
"risk_level": "low|medium|high",
|
||||
"recommended_disabled_tools": ["optional_tool_names"]
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
def _extract_json(text: str) -> dict[str, Any]:
|
||||
text = text.strip()
|
||||
try:
|
||||
parsed = json.loads(text)
|
||||
return parsed if isinstance(parsed, dict) else {}
|
||||
except Exception:
|
||||
pass
|
||||
start = text.find("{")
|
||||
end = text.rfind("}")
|
||||
if start >= 0 and end > start:
|
||||
try:
|
||||
parsed = json.loads(text[start : end + 1])
|
||||
return parsed if isinstance(parsed, dict) else {}
|
||||
except Exception:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def assess_task_safety(
|
||||
client: OpenAI,
|
||||
*,
|
||||
model: str,
|
||||
objective: str,
|
||||
disabled_tools: list[str],
|
||||
) -> tuple[bool, str, dict[str, Any]]:
|
||||
try:
|
||||
response = client.responses.create(
|
||||
model=model,
|
||||
instructions=SAFETY_PROMPT,
|
||||
input=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": (
|
||||
"Objective:\n"
|
||||
f"{objective}\n\n"
|
||||
f"Disabled tools: {json.dumps(disabled_tools, ensure_ascii=False)}\n"
|
||||
"Answer with strict JSON only."
|
||||
),
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return False, f"Safety check failed: {type(exc).__name__}: {exc}", {"safe": False}
|
||||
|
||||
raw_text = getattr(response, "output_text", "") or ""
|
||||
parsed = _extract_json(raw_text)
|
||||
safe = bool(parsed.get("safe", False))
|
||||
reason = str(parsed.get("reason", "")).strip() or "No reason provided by safety check."
|
||||
if not parsed:
|
||||
safe = False
|
||||
reason = "Safety check returned unparseable response."
|
||||
return safe, reason, parsed
|
||||
|
||||
Reference in New Issue
Block a user