Default model reasoning effort to medium
This commit is contained in:
@@ -628,6 +628,9 @@ class ScreenJobAgent:
|
|||||||
return {"_raw": raw}
|
return {"_raw": raw}
|
||||||
|
|
||||||
def _call_model(self, input_items: list[dict[str, Any]]) -> Any:
|
def _call_model(self, input_items: list[dict[str, Any]]) -> Any:
|
||||||
|
effort = str(self.options.reasoning_effort or "medium").strip().lower()
|
||||||
|
if effort not in {"low", "medium", "high"}:
|
||||||
|
effort = "medium"
|
||||||
return self.client.responses.create(
|
return self.client.responses.create(
|
||||||
model=self.options.model,
|
model=self.options.model,
|
||||||
instructions=SYSTEM_PROMPT,
|
instructions=SYSTEM_PROMPT,
|
||||||
@@ -636,6 +639,7 @@ class ScreenJobAgent:
|
|||||||
previous_response_id=self.previous_response_id,
|
previous_response_id=self.previous_response_id,
|
||||||
parallel_tool_calls=True,
|
parallel_tool_calls=True,
|
||||||
max_tool_calls=8,
|
max_tool_calls=8,
|
||||||
|
reasoning={"effort": effort},
|
||||||
)
|
)
|
||||||
|
|
||||||
def run(self, job: str) -> AgentResult:
|
def run(self, job: str) -> AgentResult:
|
||||||
@@ -648,6 +652,7 @@ class ScreenJobAgent:
|
|||||||
{
|
{
|
||||||
"run_id": self.artifacts.run_id,
|
"run_id": self.artifacts.run_id,
|
||||||
"model": self.options.model,
|
"model": self.options.model,
|
||||||
|
"reasoning_effort": self.options.reasoning_effort,
|
||||||
"objective": job,
|
"objective": job,
|
||||||
"disabled_tools": sorted(self.disabled_tools),
|
"disabled_tools": sorted(self.disabled_tools),
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -28,6 +28,12 @@ def build_parser() -> argparse.ArgumentParser:
|
|||||||
parser.add_argument("--command-timeout", type=int, default=45, help="Timeout in seconds for execute_command.")
|
parser.add_argument("--command-timeout", type=int, default=45, help="Timeout in seconds for execute_command.")
|
||||||
parser.add_argument("--type-interval", type=float, default=0.02, help="Seconds between typed characters.")
|
parser.add_argument("--type-interval", type=float, default=0.02, help="Seconds between typed characters.")
|
||||||
parser.add_argument("--click-pause", type=float, default=0.10, help="Mouse move duration before click.")
|
parser.add_argument("--click-pause", type=float, default=0.10, help="Mouse move duration before click.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--reasoning-effort",
|
||||||
|
choices=["low", "medium", "high"],
|
||||||
|
default="medium",
|
||||||
|
help="Reasoning effort passed to the model.",
|
||||||
|
)
|
||||||
parser.add_argument("--disable-tool", action="append", default=[], help="Disable a tool by name.")
|
parser.add_argument("--disable-tool", action="append", default=[], help="Disable a tool by name.")
|
||||||
parser.add_argument("--skip-safety-check", action="store_true", help="Bypass pre-flight safety check.")
|
parser.add_argument("--skip-safety-check", action="store_true", help="Bypass pre-flight safety check.")
|
||||||
parser.add_argument("--no-failsafe", action="store_true", help="Disable PyAutoGUI fail-safe.")
|
parser.add_argument("--no-failsafe", action="store_true", help="Disable PyAutoGUI fail-safe.")
|
||||||
@@ -78,6 +84,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
command_timeout=args.command_timeout,
|
command_timeout=args.command_timeout,
|
||||||
type_interval=args.type_interval,
|
type_interval=args.type_interval,
|
||||||
click_pause=args.click_pause,
|
click_pause=args.click_pause,
|
||||||
|
reasoning_effort=args.reasoning_effort,
|
||||||
disable_tools=set(disabled_tools),
|
disable_tools=set(disabled_tools),
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -58,4 +58,5 @@ class RuntimeOptions:
|
|||||||
command_timeout: int = 45
|
command_timeout: int = 45
|
||||||
type_interval: float = 0.02
|
type_interval: float = 0.02
|
||||||
click_pause: float = 0.10
|
click_pause: float = 0.10
|
||||||
|
reasoning_effort: str = "medium"
|
||||||
disable_tools: set[str] | None = None
|
disable_tools: set[str] | None = None
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ class CreateJobRequest(BaseModel):
|
|||||||
command_timeout: int = Field(45, ge=1, le=600)
|
command_timeout: int = Field(45, ge=1, le=600)
|
||||||
type_interval: float = Field(0.02, ge=0.0, le=1.0)
|
type_interval: float = Field(0.02, ge=0.0, le=1.0)
|
||||||
click_pause: float = Field(0.10, ge=0.0, le=2.0)
|
click_pause: float = Field(0.10, ge=0.0, le=2.0)
|
||||||
|
reasoning_effort: str = Field("medium", pattern="^(low|medium|high)$")
|
||||||
disabled_tools: list[str] = Field(default_factory=list)
|
disabled_tools: list[str] = Field(default_factory=list)
|
||||||
safety_override: bool = False
|
safety_override: bool = False
|
||||||
no_failsafe: bool = False
|
no_failsafe: bool = False
|
||||||
@@ -301,6 +302,7 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
|
|||||||
command_timeout=payload.command_timeout,
|
command_timeout=payload.command_timeout,
|
||||||
type_interval=payload.type_interval,
|
type_interval=payload.type_interval,
|
||||||
click_pause=payload.click_pause,
|
click_pause=payload.click_pause,
|
||||||
|
reasoning_effort=payload.reasoning_effort,
|
||||||
disabled_tools=payload.disabled_tools,
|
disabled_tools=payload.disabled_tools,
|
||||||
safety_override=payload.safety_override,
|
safety_override=payload.safety_override,
|
||||||
no_failsafe=payload.no_failsafe,
|
no_failsafe=payload.no_failsafe,
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ class JobManager:
|
|||||||
command_timeout: int = 45,
|
command_timeout: int = 45,
|
||||||
type_interval: float = 0.02,
|
type_interval: float = 0.02,
|
||||||
click_pause: float = 0.10,
|
click_pause: float = 0.10,
|
||||||
|
reasoning_effort: str = "medium",
|
||||||
disabled_tools: list[str] | None = None,
|
disabled_tools: list[str] | None = None,
|
||||||
safety_override: bool = False,
|
safety_override: bool = False,
|
||||||
no_failsafe: bool = False,
|
no_failsafe: bool = False,
|
||||||
@@ -93,6 +94,7 @@ class JobManager:
|
|||||||
"command_timeout": command_timeout,
|
"command_timeout": command_timeout,
|
||||||
"type_interval": type_interval,
|
"type_interval": type_interval,
|
||||||
"click_pause": click_pause,
|
"click_pause": click_pause,
|
||||||
|
"reasoning_effort": reasoning_effort,
|
||||||
"no_failsafe": no_failsafe,
|
"no_failsafe": no_failsafe,
|
||||||
"cancel_event": cancel_event,
|
"cancel_event": cancel_event,
|
||||||
},
|
},
|
||||||
@@ -121,6 +123,7 @@ class JobManager:
|
|||||||
command_timeout: int,
|
command_timeout: int,
|
||||||
type_interval: float,
|
type_interval: float,
|
||||||
click_pause: float,
|
click_pause: float,
|
||||||
|
reasoning_effort: str,
|
||||||
no_failsafe: bool,
|
no_failsafe: bool,
|
||||||
cancel_event: threading.Event,
|
cancel_event: threading.Event,
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -218,6 +221,7 @@ class JobManager:
|
|||||||
command_timeout=command_timeout,
|
command_timeout=command_timeout,
|
||||||
type_interval=type_interval,
|
type_interval=type_interval,
|
||||||
click_pause=click_pause,
|
click_pause=click_pause,
|
||||||
|
reasoning_effort=reasoning_effort,
|
||||||
disable_tools=set(disabled_tools),
|
disable_tools=set(disabled_tools),
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -29,7 +29,10 @@ def test_cli_emits_structured_return_and_data(monkeypatch: Any, capsys, tmp_path
|
|||||||
def fake_assess_task_safety(*_args, **_kwargs):
|
def fake_assess_task_safety(*_args, **_kwargs):
|
||||||
return True, "safe", {"safe": True}
|
return True, "safe", {"safe": True}
|
||||||
|
|
||||||
|
captured_kwargs: dict[str, Any] = {}
|
||||||
|
|
||||||
def fake_run_job(*_args, **_kwargs):
|
def fake_run_job(*_args, **_kwargs):
|
||||||
|
captured_kwargs.update(_kwargs)
|
||||||
result = AgentResult(
|
result = AgentResult(
|
||||||
completed=True,
|
completed=True,
|
||||||
result="Done",
|
result="Done",
|
||||||
@@ -66,3 +69,4 @@ def test_cli_emits_structured_return_and_data(monkeypatch: Any, capsys, tmp_path
|
|||||||
assert payload["response"]["data"] == "file1.txt\nfile2.txt"
|
assert payload["response"]["data"] == "file1.txt\nfile2.txt"
|
||||||
assert payload["return"] == "Task completed successfully"
|
assert payload["return"] == "Task completed successfully"
|
||||||
assert payload["data"] == "file1.txt\nfile2.txt"
|
assert payload["data"] == "file1.txt\nfile2.txt"
|
||||||
|
assert captured_kwargs["options"].reasoning_effort == "medium"
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ class FakeJobManager:
|
|||||||
command_timeout: int = 45,
|
command_timeout: int = 45,
|
||||||
type_interval: float = 0.02,
|
type_interval: float = 0.02,
|
||||||
click_pause: float = 0.10,
|
click_pause: float = 0.10,
|
||||||
|
reasoning_effort: str = "medium",
|
||||||
disabled_tools: list[str] | None = None,
|
disabled_tools: list[str] | None = None,
|
||||||
safety_override: bool = False,
|
safety_override: bool = False,
|
||||||
no_failsafe: bool = False,
|
no_failsafe: bool = False,
|
||||||
@@ -46,6 +47,7 @@ class FakeJobManager:
|
|||||||
"command_timeout": command_timeout,
|
"command_timeout": command_timeout,
|
||||||
"type_interval": type_interval,
|
"type_interval": type_interval,
|
||||||
"click_pause": click_pause,
|
"click_pause": click_pause,
|
||||||
|
"reasoning_effort": reasoning_effort,
|
||||||
"no_failsafe": no_failsafe,
|
"no_failsafe": no_failsafe,
|
||||||
}
|
}
|
||||||
self._jobs[job_id] = {
|
self._jobs[job_id] = {
|
||||||
@@ -189,6 +191,7 @@ def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monke
|
|||||||
manager = app.state.manager
|
manager = app.state.manager
|
||||||
assert manager.last_submit_payload["model"] == "gpt-5.4-mini"
|
assert manager.last_submit_payload["model"] == "gpt-5.4-mini"
|
||||||
assert manager.last_submit_payload["disabled_tools"] == ["click"]
|
assert manager.last_submit_payload["disabled_tools"] == ["click"]
|
||||||
|
assert manager.last_submit_payload["reasoning_effort"] == "medium"
|
||||||
|
|
||||||
status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
|
status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers)
|
||||||
assert status_res.status_code == 200
|
assert status_res.status_code == 200
|
||||||
|
|||||||
Reference in New Issue
Block a user