Resolve Runner issue, runner refused to launch

2026-05-23 13:53:26 +02:00
parent d6a9397914
commit c3925f37e1
9 changed files with 17 additions and 112 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -120,7 +120,6 @@ Common optional:
 - `OPENAI_API_KEY` (required when `CODEX_AUTH_MODE=api_key`)
 - `OPENAI_PROJECT_ID`, `OPENAI_ORG_ID`
 - `OPENAI_REVIEW_MODEL`
- `OPENAI_REASONING_EFFORT`
 - `CODEX_AUTH_MODE` (`api_key` default, `chatgpt` supported)
 - `CODEX_AUTH_JSON_PATH` (custom path to `auth.json` for `chatgpt` mode)
 - `WORKDIR`, `MAX_DIFF_BYTES`, `MAX_REVIEW_MINUTES`, `CONCURRENCY`
@@ -176,4 +175,4 @@ Treat these as high-sensitivity areas when modifying worker/runner paths.
 If you are confident that your changes are ready to be committed, please follow the commit message format below:

 ```[type]. Short description (max 50 chars)```
-Push after commiting. Ask the user once if you have permission to commit and from then on commit without asking.
+Push after commiting. Ask the user once if you have permission to commit and from then on commit without asking.
--- a/TODO.md
+++ b/TODO.md
@@ -15,7 +15,7 @@
 - [x] `BUG`: Log webhook events rejected because repo is not listed in `ALLOWED_REPOS`.
 - [ ] `FEATURE`: Full control UI to update the bots settings. Password in env variable protected login page. No more env variables.
 - [ ] `FEATURE`: Automatic Trigger on new PRs and or commits on PRs with context that its a change that needs review not the whole PR again. GITEA_ALLOW_PR_AUTO_REVIEW=true would be needed
- [ ] `BUG`: Container runner hardcodes `codex exec --json -m gpt-5`; use `OPENAI_REVIEW_MODEL` and `OPENAI_REASONING_EFFORT` consistently across runner paths.
+- [x] `BUG`: Container runner now uses configured `OPENAI_REVIEW_MODEL` and no longer configures reasoning-effort flags.
 - [ ] `BUG`: Preserve command arguments losslessly (quoted args are currently flattened by `" ".join(...)` + `.split()` roundtrip).
 - [ ] `BUG`: `parse_command` only matches when `@codex` is at the start of the comment; support inline command usage in normal review-discussion comments.
 - [ ] `BUG`: Add max comment length handling/chunking before posting to Gitea to avoid failures on large review outputs.
--- a/src/gitea_codex_bot/config.py
+++ b/src/gitea_codex_bot/config.py
@@ -20,7 +20,6 @@ class Settings(BaseSettings):
    openai_project_id: str | None = Field(default=None, alias="OPENAI_PROJECT_ID")
    openai_org_id: str | None = Field(default=None, alias="OPENAI_ORG_ID")
    openai_review_model: str = Field(default="gpt-5.3-codex", alias="OPENAI_REVIEW_MODEL")
-    openai_reasoning_effort: Literal["none", "low", "medium", "high"] = Field(default="high", alias="OPENAI_REASONING_EFFORT")
    codex_auth_mode: Literal["api_key", "chatgpt"] = Field(default="api_key", alias="CODEX_AUTH_MODE")
    codex_auth_json_path: str | None = Field(default=None, alias="CODEX_AUTH_JSON_PATH")

--- a/src/gitea_codex_bot/services/review_format.py
+++ b/src/gitea_codex_bot/services/review_format.py
@@ -105,4 +105,4 @@ def _format_usage_note(result: dict) -> str:
 def _format_missing_config_note(repo_configured: bool) -> str:
    if repo_configured:
        return ""
-    return "ℹ️.codex-review.yml is not configured"
+    return "> ℹ️.codex-review.yml is not configured"
--- a/src/gitea_codex_bot/services/reviewer.py
+++ b/src/gitea_codex_bot/services/reviewer.py
@@ -164,7 +164,6 @@ def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]:
        "model": settings.openai_review_model,
        "input": prompt,
        "text": {"format": {"type": "json_object"}},
-        "reasoning": {"effort": settings.openai_reasoning_effort},
    }
    with httpx.Client(timeout=120.0) as client:
        response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body)
@@ -236,18 +235,6 @@ def _fallback_review(diff_context: dict[str, Any], *, failure_reason: str | None
            }
        )

-    if "TODO" in diff_context["diff"]:
-        findings.append(
-            {
-                "severity": "low",
-                "file": "unknown",
-                "line_start": 1,
-                "line_end": 1,
-                "title": "TODO marker in diff",
-                "body": "The change introduces TODO markers that may indicate incomplete behavior.",
-                "suggestion": "Resolve or track TODOs before merging.",
-            }
-        )
    return {
        "verdict": "correct" if not findings else "has_issues",
        "confidence": 0.4 if not findings else 0.6,
--- a/src/gitea_codex_bot/workers/container_runner.py
+++ b/src/gitea_codex_bot/workers/container_runner.py
@@ -63,8 +63,6 @@ def run_review_ephemeral(
    gitea = GiteaClient(settings)
    pr = gitea.get_pull_request(repo, pr_number)
    repo_cfg = _load_repo_review_config_from_gitea(gitea, repo, pr.head_sha)
-    _apply_repo_default_review_mode(command, repo_cfg)
-    prompt = _build_review_instructions(command, repo_cfg)
    container_name = f"codex-review-{uuid.uuid4().hex[:12]}"
    extra_env: dict[str, str] = {
        "GITEA_TOKEN": settings.gitea_token.get_secret_value(),
@@ -83,24 +81,9 @@ def run_review_ephemeral(
            settings,
            pr=pr,
            container_name=container_name,
-            prompt=prompt,
            extra_env=extra_env,
-            include_reasoning_effort=True,
        )
-        if _needs_reasoning_effort_compat_retry(completed):
-            logger.info("Ephemeral runner does not support --reasoning-effort; retrying without it.")
-            completed = _run_ephemeral_container(
-                settings,
-                pr=pr,
-                container_name=container_name,
-                prompt=prompt,
-                extra_env=extra_env,
-                include_reasoning_effort=False,
-            )
        if completed.returncode != 0:
-            compat_failure = _summarize_review_prompt_compat_failure(completed)
-            if compat_failure:
-                raise RuntimeError(compat_failure)
            raise RuntimeError(_format_runner_failure(completed))
        parsed = _parse_review_result_from_stdout_artifact(completed.stdout)
        parsed["_meta"] = _extract_result_meta_from_codex_stdout(completed.stdout, settings)
@@ -115,15 +98,12 @@ def _run_ephemeral_container(
    *,
    pr: PullRequestContext,
    container_name: str,
-    prompt: str,
    extra_env: dict[str, str],
-    include_reasoning_effort: bool,
 ) -> subprocess.CompletedProcess[str]:
-    install_and_run = _build_install_and_run_command(settings, pr=pr, include_reasoning_effort=include_reasoning_effort)
+    install_and_run = _build_install_and_run_command(settings, pr=pr)
    cmd = _build_docker_command(settings, container_name=container_name, install_and_run=install_and_run)
    return subprocess.run(
        cmd,
-        input=prompt,
        text=True,
        check=False,
        capture_output=True,
@@ -136,7 +116,6 @@ def _build_install_and_run_command(
    settings: Settings,
    *,
    pr: PullRequestContext,
-    include_reasoning_effort: bool = True,
 ) -> str:
    steps = ["set -euo pipefail"]
    if settings.codex_auth_mode != "chatgpt":
@@ -162,7 +141,8 @@ def _build_install_and_run_command(
    steps.extend(
        [
            "apt-get update >/tmp/apt-update.log 2>&1 && apt-get install -y --no-install-recommends ca-certificates git >/tmp/apt-install.log 2>&1 || { rc=$?; echo 'ca-certificates/git install failed'; tail -n 80 /tmp/apt-update.log || true; tail -n 80 /tmp/apt-install.log || true; exit $rc; }",
-            "npm install -g @openai/codex >/tmp/codex-install.log 2>&1 || { rc=$?; echo 'codex install failed'; tail -n 200 /tmp/codex-install.log || true; exit $rc; }",
+            "npm install -g @openai/codex@latest >/tmp/codex-install.log 2>&1 || { rc=$?; echo 'codex install failed'; tail -n 200 /tmp/codex-install.log || true; exit $rc; }",
+            "codex --version >/tmp/codex-version.log 2>&1 || { rc=$?; echo 'codex version check failed'; tail -n 40 /tmp/codex-version.log || true; exit $rc; }",
        ]
    )
    schema_json = json.dumps(REVIEW_RESULT_SCHEMA, separators=(",", ":"))
@@ -181,7 +161,6 @@ def _build_install_and_run_command(
        ]
    )
    model = settings.openai_review_model.strip()
-    reasoning_effort = settings.openai_reasoning_effort.strip()
    codex_exec_parts = [
        "codex exec review",
        f"--base {shlex.quote(pr.base_sha)}",
@@ -193,9 +172,6 @@ def _build_install_and_run_command(
    ]
    if model:
        codex_exec_parts.append(f"-m {shlex.quote(model)}")
-    if include_reasoning_effort and reasoning_effort:
-        codex_exec_parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
-    codex_exec_parts.append("-")
    steps.extend(
        [
            " ".join(codex_exec_parts),
@@ -207,13 +183,6 @@ def _build_install_and_run_command(
    return "\n".join(steps)


-def _needs_reasoning_effort_compat_retry(completed: subprocess.CompletedProcess[str]) -> bool:
-    if completed.returncode == 0:
-        return False
-    stderr_text = completed.stderr or ""
-    return "unexpected argument '--reasoning-effort' found" in stderr_text
-
-
 def _build_docker_command(settings: Settings, *, container_name: str, install_and_run: str) -> list[str]:
    cmd = [
        "docker",
@@ -263,7 +232,7 @@ def _ephemeral_runner_failure_result(exc: Exception, auth_mode: str) -> dict[str
    summary = f"{mode_label} runner failed before review execution. Error: {message}"
    return {
        "verdict": "has_issues",
-        "confidence": 0.6,
+        "confidence": 0.67,
        "summary": summary,
        "findings": [
            {
@@ -328,32 +297,6 @@ def _load_repo_review_config_from_gitea(gitea: GiteaClient, repo: str, head_sha:
    return parse_repo_review_config_text(content, configured=True)


-def _apply_repo_default_review_mode(command: ParsedCommand, repo_cfg: RepoReviewConfig) -> None:
-    if command.name != "review" or command.mode_explicit:
-        return
-    configured_mode = repo_cfg.default_mode
-    command.mode = configured_mode if configured_mode in {"summary", "security", "performance", "tests", "full"} else "summary"
-
-
-def _build_review_instructions(command: ParsedCommand, repo_cfg: RepoReviewConfig) -> str:
-    focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability"
-    ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)"
-    lines = [
-        "Review this pull request using local git data in this checkout only.",
-        "Focus on issues introduced by this PR.",
-        "Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.",
-        "Avoid style-only nitpicks.",
-        f"Requested mode: {command.mode}",
-        f"Command: {command.raw}",
-        f"Focus areas: {focus}",
-        f"Ignore patterns: {ignore}",
-        f"Repository include_tests setting: {repo_cfg.include_tests}",
-        f"Full-content review requested: {command.full}",
-        "Return strict JSON matching the provided output schema.",
-    ]
-    return "\n".join(lines)
-
-
 def _parse_review_result_from_stdout_artifact(stdout: str) -> dict[str, Any]:
    start = stdout.find(RESULT_START_MARKER)
    end = stdout.find(RESULT_END_MARKER)
@@ -371,19 +314,6 @@ def _parse_review_result_from_stdout_artifact(stdout: str) -> dict[str, Any]:
    return payload


-def _summarize_review_prompt_compat_failure(completed: subprocess.CompletedProcess[str]) -> str | None:
-    text = " ".join([(completed.stdout or "").strip(), (completed.stderr or "").strip()]).lower()
-    has_prompt_conflict = "prompt" in text and (
-        "cannot be used with" in text or "can't be used with" in text or "incompatible" in text
-    )
-    if "--base" not in text or not has_prompt_conflict:
-        return None
-    return (
-        "Installed Codex CLI rejected `codex exec review --base ...` with custom instructions. "
-        "This runner is configured to fail fast on that compatibility issue."
-    )
-
-
 def _extract_result_meta_from_codex_stdout(stdout: str, settings: Settings) -> dict[str, Any]:
    model = settings.openai_review_model
    usage: dict[str, int] = {}
--- a/tests/test_container_runner.py
+++ b/tests/test_container_runner.py
@@ -91,15 +91,18 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
    assert f"codex exec review --base {pr.base_sha}" in command
    assert "--output-schema /tmp/codex-review-schema.json" in command
    assert "-o /tmp/codex-review-result.json" in command
+    assert "npm install -g @openai/codex@latest" in command
+    assert "codex --version >/tmp/codex-version.log" in command
+    assert " - " not in command
    assert f'echo "{RESULT_START_MARKER}"' in command
    assert f'echo "{RESULT_END_MARKER}"' in command


-def test_build_install_command_can_disable_reasoning_effort_flag() -> None:
+def test_build_install_command_does_not_include_reasoning_effort_flag() -> None:
    settings = get_settings()
    pr = _sample_pr()

-    command = _build_install_and_run_command(settings, pr=pr, include_reasoning_effort=False)
+    command = _build_install_and_run_command(settings, pr=pr)

    assert "--reasoning-effort" not in command

@@ -220,7 +223,7 @@ def test_run_review_ephemeral_api_key_mode_does_not_fallback_to_host(monkeypatch
    assert "API-key auth runner failed" in result["summary"]


-def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_run_review_ephemeral_single_attempt_success(monkeypatch: pytest.MonkeyPatch) -> None:
    get_settings.cache_clear()
    settings = get_settings()

@@ -239,16 +242,6 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(

    def _fake_run(cmd, *args, **kwargs):
        calls.append(cmd)
-        if len(calls) == 1:
-            return type(
-                "Completed",
-                (),
-                {
-                    "returncode": 2,
-                    "stdout": "",
-                    "stderr": "error: unexpected argument '--reasoning-effort' found",
-                },
-            )()
        return type(
            "Completed",
            (),
@@ -274,11 +267,9 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(
    )

    assert result["verdict"] == "correct"
-    assert len(calls) == 2
+    assert len(calls) == 1
    first_shell = calls[0][-1]
-    second_shell = calls[1][-1]
-    assert "--reasoning-effort" in first_shell
-    assert "--reasoning-effort" not in second_shell
+    assert "--reasoning-effort" not in first_shell


 def test_parse_review_result_from_stdout_artifact() -> None:
--- a/tests/test_review_format.py
+++ b/tests/test_review_format.py
@@ -68,7 +68,7 @@ def test_format_result_comment_appends_missing_config_note_for_system_layout() -
        },
        repo_configured=False,
    )
-    assert body.endswith("ℹ️.codex-review.yml is not configured")
+    assert body.endswith("> ℹ️.codex-review.yml is not configured")


 def test_format_result_comment_does_not_append_missing_config_note_to_agent_markdown() -> None:
@@ -79,4 +79,4 @@ def test_format_result_comment_does_not_append_missing_config_note_to_agent_mark
        },
        repo_configured=False,
    )
-    assert "ℹ️.codex-review.yml is not configured" not in body
+    assert "> ℹ️.codex-review.yml is not configured" not in body
--- a/tests/test_reviewer_fallback.py
+++ b/tests/test_reviewer_fallback.py
@@ -36,7 +36,6 @@ def test_run_review_for_pr_uses_openai_http_error_in_fallback(monkeypatch) -> No
    assert result["summary"].startswith("OpenAI review failed. Error: OpenAI API HTTP 429:")
    assert result["findings"][0]["title"] == "OpenAI review request failed"
    assert "rate_limited" in result["findings"][0]["body"]
-    assert any(finding["title"] == "TODO marker in diff" for finding in result["findings"])


 def test_build_prompt_includes_trigger_message() -> None: