From 0c019474af500074412b1fcced65eb71fc3f4a52 Mon Sep 17 00:00:00 2001 From: Space-Banane Date: Wed, 27 May 2026 22:02:20 +0200 Subject: [PATCH] Default model reasoning effort to medium --- src/agent.py | 5 +++++ src/cli.py | 7 +++++++ src/models.py | 1 + src/server.py | 2 ++ src/task_manager.py | 4 ++++ tests/test_cli.py | 4 ++++ tests/test_server_api.py | 3 +++ 7 files changed, 26 insertions(+) diff --git a/src/agent.py b/src/agent.py index 2922ff6..59a3124 100644 --- a/src/agent.py +++ b/src/agent.py @@ -628,6 +628,9 @@ class ScreenJobAgent: return {"_raw": raw} def _call_model(self, input_items: list[dict[str, Any]]) -> Any: + effort = str(self.options.reasoning_effort or "medium").strip().lower() + if effort not in {"low", "medium", "high"}: + effort = "medium" return self.client.responses.create( model=self.options.model, instructions=SYSTEM_PROMPT, @@ -636,6 +639,7 @@ class ScreenJobAgent: previous_response_id=self.previous_response_id, parallel_tool_calls=True, max_tool_calls=8, + reasoning={"effort": effort}, ) def run(self, job: str) -> AgentResult: @@ -648,6 +652,7 @@ class ScreenJobAgent: { "run_id": self.artifacts.run_id, "model": self.options.model, + "reasoning_effort": self.options.reasoning_effort, "objective": job, "disabled_tools": sorted(self.disabled_tools), }, diff --git a/src/cli.py b/src/cli.py index bf379ea..53b4426 100644 --- a/src/cli.py +++ b/src/cli.py @@ -28,6 +28,12 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--command-timeout", type=int, default=45, help="Timeout in seconds for execute_command.") parser.add_argument("--type-interval", type=float, default=0.02, help="Seconds between typed characters.") parser.add_argument("--click-pause", type=float, default=0.10, help="Mouse move duration before click.") + parser.add_argument( + "--reasoning-effort", + choices=["low", "medium", "high"], + default="medium", + help="Reasoning effort passed to the model.", + ) parser.add_argument("--disable-tool", action="append", default=[], help="Disable a tool by name.") parser.add_argument("--skip-safety-check", action="store_true", help="Bypass pre-flight safety check.") parser.add_argument("--no-failsafe", action="store_true", help="Disable PyAutoGUI fail-safe.") @@ -78,6 +84,7 @@ def main(argv: list[str] | None = None) -> int: command_timeout=args.command_timeout, type_interval=args.type_interval, click_pause=args.click_pause, + reasoning_effort=args.reasoning_effort, disable_tools=set(disabled_tools), ) try: diff --git a/src/models.py b/src/models.py index 7a55fca..098d323 100644 --- a/src/models.py +++ b/src/models.py @@ -58,4 +58,5 @@ class RuntimeOptions: command_timeout: int = 45 type_interval: float = 0.02 click_pause: float = 0.10 + reasoning_effort: str = "medium" disable_tools: set[str] | None = None diff --git a/src/server.py b/src/server.py index 1e7cde0..16a97f3 100644 --- a/src/server.py +++ b/src/server.py @@ -25,6 +25,7 @@ class CreateJobRequest(BaseModel): command_timeout: int = Field(45, ge=1, le=600) type_interval: float = Field(0.02, ge=0.0, le=1.0) click_pause: float = Field(0.10, ge=0.0, le=2.0) + reasoning_effort: str = Field("medium", pattern="^(low|medium|high)$") disabled_tools: list[str] = Field(default_factory=list) safety_override: bool = False no_failsafe: bool = False @@ -301,6 +302,7 @@ def create_app(config: AppConfig | None = None) -> FastAPI: command_timeout=payload.command_timeout, type_interval=payload.type_interval, click_pause=payload.click_pause, + reasoning_effort=payload.reasoning_effort, disabled_tools=payload.disabled_tools, safety_override=payload.safety_override, no_failsafe=payload.no_failsafe, diff --git a/src/task_manager.py b/src/task_manager.py index 3d8cc42..114adee 100644 --- a/src/task_manager.py +++ b/src/task_manager.py @@ -48,6 +48,7 @@ class JobManager: command_timeout: int = 45, type_interval: float = 0.02, click_pause: float = 0.10, + reasoning_effort: str = "medium", disabled_tools: list[str] | None = None, safety_override: bool = False, no_failsafe: bool = False, @@ -93,6 +94,7 @@ class JobManager: "command_timeout": command_timeout, "type_interval": type_interval, "click_pause": click_pause, + "reasoning_effort": reasoning_effort, "no_failsafe": no_failsafe, "cancel_event": cancel_event, }, @@ -121,6 +123,7 @@ class JobManager: command_timeout: int, type_interval: float, click_pause: float, + reasoning_effort: str, no_failsafe: bool, cancel_event: threading.Event, ) -> None: @@ -218,6 +221,7 @@ class JobManager: command_timeout=command_timeout, type_interval=type_interval, click_pause=click_pause, + reasoning_effort=reasoning_effort, disable_tools=set(disabled_tools), ) try: diff --git a/tests/test_cli.py b/tests/test_cli.py index f058a97..37cc3de 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -29,7 +29,10 @@ def test_cli_emits_structured_return_and_data(monkeypatch: Any, capsys, tmp_path def fake_assess_task_safety(*_args, **_kwargs): return True, "safe", {"safe": True} + captured_kwargs: dict[str, Any] = {} + def fake_run_job(*_args, **_kwargs): + captured_kwargs.update(_kwargs) result = AgentResult( completed=True, result="Done", @@ -66,3 +69,4 @@ def test_cli_emits_structured_return_and_data(monkeypatch: Any, capsys, tmp_path assert payload["response"]["data"] == "file1.txt\nfile2.txt" assert payload["return"] == "Task completed successfully" assert payload["data"] == "file1.txt\nfile2.txt" + assert captured_kwargs["options"].reasoning_effort == "medium" diff --git a/tests/test_server_api.py b/tests/test_server_api.py index 49efdf7..676bc3a 100644 --- a/tests/test_server_api.py +++ b/tests/test_server_api.py @@ -26,6 +26,7 @@ class FakeJobManager: command_timeout: int = 45, type_interval: float = 0.02, click_pause: float = 0.10, + reasoning_effort: str = "medium", disabled_tools: list[str] | None = None, safety_override: bool = False, no_failsafe: bool = False, @@ -46,6 +47,7 @@ class FakeJobManager: "command_timeout": command_timeout, "type_interval": type_interval, "click_pause": click_pause, + "reasoning_effort": reasoning_effort, "no_failsafe": no_failsafe, } self._jobs[job_id] = { @@ -189,6 +191,7 @@ def test_create_job_returns_only_job_id_and_defaults_model(tmp_path: Path, monke manager = app.state.manager assert manager.last_submit_payload["model"] == "gpt-5.4-mini" assert manager.last_submit_payload["disabled_tools"] == ["click"] + assert manager.last_submit_payload["reasoning_effort"] == "medium" status_res = client.get(f"/api/jobs/{job_id}/status", headers=headers) assert status_res.status_code == 200