Resolve Runner issue, runner refused to launch
All checks were successful
ci / test (pull_request) Successful in 35s
ci / publish (pull_request) Has been skipped

This commit is contained in:
Space-Banane
2026-05-23 13:53:26 +02:00
parent d6a9397914
commit c3925f37e1
9 changed files with 17 additions and 112 deletions

View File

@@ -120,7 +120,6 @@ Common optional:
- `OPENAI_API_KEY` (required when `CODEX_AUTH_MODE=api_key`)
- `OPENAI_PROJECT_ID`, `OPENAI_ORG_ID`
- `OPENAI_REVIEW_MODEL`
- `OPENAI_REASONING_EFFORT`
- `CODEX_AUTH_MODE` (`api_key` default, `chatgpt` supported)
- `CODEX_AUTH_JSON_PATH` (custom path to `auth.json` for `chatgpt` mode)
- `WORKDIR`, `MAX_DIFF_BYTES`, `MAX_REVIEW_MINUTES`, `CONCURRENCY`

View File

@@ -15,7 +15,7 @@
- [x] `BUG`: Log webhook events rejected because repo is not listed in `ALLOWED_REPOS`.
- [ ] `FEATURE`: Full control UI to update the bots settings. Password in env variable protected login page. No more env variables.
- [ ] `FEATURE`: Automatic Trigger on new PRs and or commits on PRs with context that its a change that needs review not the whole PR again. GITEA_ALLOW_PR_AUTO_REVIEW=true would be needed
- [ ] `BUG`: Container runner hardcodes `codex exec --json -m gpt-5`; use `OPENAI_REVIEW_MODEL` and `OPENAI_REASONING_EFFORT` consistently across runner paths.
- [x] `BUG`: Container runner now uses configured `OPENAI_REVIEW_MODEL` and no longer configures reasoning-effort flags.
- [ ] `BUG`: Preserve command arguments losslessly (quoted args are currently flattened by `" ".join(...)` + `.split()` roundtrip).
- [ ] `BUG`: `parse_command` only matches when `@codex` is at the start of the comment; support inline command usage in normal review-discussion comments.
- [ ] `BUG`: Add max comment length handling/chunking before posting to Gitea to avoid failures on large review outputs.

View File

@@ -20,7 +20,6 @@ class Settings(BaseSettings):
openai_project_id: str | None = Field(default=None, alias="OPENAI_PROJECT_ID")
openai_org_id: str | None = Field(default=None, alias="OPENAI_ORG_ID")
openai_review_model: str = Field(default="gpt-5.3-codex", alias="OPENAI_REVIEW_MODEL")
openai_reasoning_effort: Literal["none", "low", "medium", "high"] = Field(default="high", alias="OPENAI_REASONING_EFFORT")
codex_auth_mode: Literal["api_key", "chatgpt"] = Field(default="api_key", alias="CODEX_AUTH_MODE")
codex_auth_json_path: str | None = Field(default=None, alias="CODEX_AUTH_JSON_PATH")

View File

@@ -105,4 +105,4 @@ def _format_usage_note(result: dict) -> str:
def _format_missing_config_note(repo_configured: bool) -> str:
if repo_configured:
return ""
return ".codex-review.yml is not configured"
return "> .codex-review.yml is not configured"

View File

@@ -164,7 +164,6 @@ def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]:
"model": settings.openai_review_model,
"input": prompt,
"text": {"format": {"type": "json_object"}},
"reasoning": {"effort": settings.openai_reasoning_effort},
}
with httpx.Client(timeout=120.0) as client:
response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body)
@@ -236,18 +235,6 @@ def _fallback_review(diff_context: dict[str, Any], *, failure_reason: str | None
}
)
if "TODO" in diff_context["diff"]:
findings.append(
{
"severity": "low",
"file": "unknown",
"line_start": 1,
"line_end": 1,
"title": "TODO marker in diff",
"body": "The change introduces TODO markers that may indicate incomplete behavior.",
"suggestion": "Resolve or track TODOs before merging.",
}
)
return {
"verdict": "correct" if not findings else "has_issues",
"confidence": 0.4 if not findings else 0.6,

View File

@@ -63,8 +63,6 @@ def run_review_ephemeral(
gitea = GiteaClient(settings)
pr = gitea.get_pull_request(repo, pr_number)
repo_cfg = _load_repo_review_config_from_gitea(gitea, repo, pr.head_sha)
_apply_repo_default_review_mode(command, repo_cfg)
prompt = _build_review_instructions(command, repo_cfg)
container_name = f"codex-review-{uuid.uuid4().hex[:12]}"
extra_env: dict[str, str] = {
"GITEA_TOKEN": settings.gitea_token.get_secret_value(),
@@ -83,24 +81,9 @@ def run_review_ephemeral(
settings,
pr=pr,
container_name=container_name,
prompt=prompt,
extra_env=extra_env,
include_reasoning_effort=True,
)
if _needs_reasoning_effort_compat_retry(completed):
logger.info("Ephemeral runner does not support --reasoning-effort; retrying without it.")
completed = _run_ephemeral_container(
settings,
pr=pr,
container_name=container_name,
prompt=prompt,
extra_env=extra_env,
include_reasoning_effort=False,
)
if completed.returncode != 0:
compat_failure = _summarize_review_prompt_compat_failure(completed)
if compat_failure:
raise RuntimeError(compat_failure)
raise RuntimeError(_format_runner_failure(completed))
parsed = _parse_review_result_from_stdout_artifact(completed.stdout)
parsed["_meta"] = _extract_result_meta_from_codex_stdout(completed.stdout, settings)
@@ -115,15 +98,12 @@ def _run_ephemeral_container(
*,
pr: PullRequestContext,
container_name: str,
prompt: str,
extra_env: dict[str, str],
include_reasoning_effort: bool,
) -> subprocess.CompletedProcess[str]:
install_and_run = _build_install_and_run_command(settings, pr=pr, include_reasoning_effort=include_reasoning_effort)
install_and_run = _build_install_and_run_command(settings, pr=pr)
cmd = _build_docker_command(settings, container_name=container_name, install_and_run=install_and_run)
return subprocess.run(
cmd,
input=prompt,
text=True,
check=False,
capture_output=True,
@@ -136,7 +116,6 @@ def _build_install_and_run_command(
settings: Settings,
*,
pr: PullRequestContext,
include_reasoning_effort: bool = True,
) -> str:
steps = ["set -euo pipefail"]
if settings.codex_auth_mode != "chatgpt":
@@ -162,7 +141,8 @@ def _build_install_and_run_command(
steps.extend(
[
"apt-get update >/tmp/apt-update.log 2>&1 && apt-get install -y --no-install-recommends ca-certificates git >/tmp/apt-install.log 2>&1 || { rc=$?; echo 'ca-certificates/git install failed'; tail -n 80 /tmp/apt-update.log || true; tail -n 80 /tmp/apt-install.log || true; exit $rc; }",
"npm install -g @openai/codex >/tmp/codex-install.log 2>&1 || { rc=$?; echo 'codex install failed'; tail -n 200 /tmp/codex-install.log || true; exit $rc; }",
"npm install -g @openai/codex@latest >/tmp/codex-install.log 2>&1 || { rc=$?; echo 'codex install failed'; tail -n 200 /tmp/codex-install.log || true; exit $rc; }",
"codex --version >/tmp/codex-version.log 2>&1 || { rc=$?; echo 'codex version check failed'; tail -n 40 /tmp/codex-version.log || true; exit $rc; }",
]
)
schema_json = json.dumps(REVIEW_RESULT_SCHEMA, separators=(",", ":"))
@@ -181,7 +161,6 @@ def _build_install_and_run_command(
]
)
model = settings.openai_review_model.strip()
reasoning_effort = settings.openai_reasoning_effort.strip()
codex_exec_parts = [
"codex exec review",
f"--base {shlex.quote(pr.base_sha)}",
@@ -193,9 +172,6 @@ def _build_install_and_run_command(
]
if model:
codex_exec_parts.append(f"-m {shlex.quote(model)}")
if include_reasoning_effort and reasoning_effort:
codex_exec_parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
codex_exec_parts.append("-")
steps.extend(
[
" ".join(codex_exec_parts),
@@ -207,13 +183,6 @@ def _build_install_and_run_command(
return "\n".join(steps)
def _needs_reasoning_effort_compat_retry(completed: subprocess.CompletedProcess[str]) -> bool:
if completed.returncode == 0:
return False
stderr_text = completed.stderr or ""
return "unexpected argument '--reasoning-effort' found" in stderr_text
def _build_docker_command(settings: Settings, *, container_name: str, install_and_run: str) -> list[str]:
cmd = [
"docker",
@@ -263,7 +232,7 @@ def _ephemeral_runner_failure_result(exc: Exception, auth_mode: str) -> dict[str
summary = f"{mode_label} runner failed before review execution. Error: {message}"
return {
"verdict": "has_issues",
"confidence": 0.6,
"confidence": 0.67,
"summary": summary,
"findings": [
{
@@ -328,32 +297,6 @@ def _load_repo_review_config_from_gitea(gitea: GiteaClient, repo: str, head_sha:
return parse_repo_review_config_text(content, configured=True)
def _apply_repo_default_review_mode(command: ParsedCommand, repo_cfg: RepoReviewConfig) -> None:
if command.name != "review" or command.mode_explicit:
return
configured_mode = repo_cfg.default_mode
command.mode = configured_mode if configured_mode in {"summary", "security", "performance", "tests", "full"} else "summary"
def _build_review_instructions(command: ParsedCommand, repo_cfg: RepoReviewConfig) -> str:
focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability"
ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)"
lines = [
"Review this pull request using local git data in this checkout only.",
"Focus on issues introduced by this PR.",
"Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.",
"Avoid style-only nitpicks.",
f"Requested mode: {command.mode}",
f"Command: {command.raw}",
f"Focus areas: {focus}",
f"Ignore patterns: {ignore}",
f"Repository include_tests setting: {repo_cfg.include_tests}",
f"Full-content review requested: {command.full}",
"Return strict JSON matching the provided output schema.",
]
return "\n".join(lines)
def _parse_review_result_from_stdout_artifact(stdout: str) -> dict[str, Any]:
start = stdout.find(RESULT_START_MARKER)
end = stdout.find(RESULT_END_MARKER)
@@ -371,19 +314,6 @@ def _parse_review_result_from_stdout_artifact(stdout: str) -> dict[str, Any]:
return payload
def _summarize_review_prompt_compat_failure(completed: subprocess.CompletedProcess[str]) -> str | None:
text = " ".join([(completed.stdout or "").strip(), (completed.stderr or "").strip()]).lower()
has_prompt_conflict = "prompt" in text and (
"cannot be used with" in text or "can't be used with" in text or "incompatible" in text
)
if "--base" not in text or not has_prompt_conflict:
return None
return (
"Installed Codex CLI rejected `codex exec review --base ...` with custom instructions. "
"This runner is configured to fail fast on that compatibility issue."
)
def _extract_result_meta_from_codex_stdout(stdout: str, settings: Settings) -> dict[str, Any]:
model = settings.openai_review_model
usage: dict[str, int] = {}

View File

@@ -91,15 +91,18 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
assert f"codex exec review --base {pr.base_sha}" in command
assert "--output-schema /tmp/codex-review-schema.json" in command
assert "-o /tmp/codex-review-result.json" in command
assert "npm install -g @openai/codex@latest" in command
assert "codex --version >/tmp/codex-version.log" in command
assert " - " not in command
assert f'echo "{RESULT_START_MARKER}"' in command
assert f'echo "{RESULT_END_MARKER}"' in command
def test_build_install_command_can_disable_reasoning_effort_flag() -> None:
def test_build_install_command_does_not_include_reasoning_effort_flag() -> None:
settings = get_settings()
pr = _sample_pr()
command = _build_install_and_run_command(settings, pr=pr, include_reasoning_effort=False)
command = _build_install_and_run_command(settings, pr=pr)
assert "--reasoning-effort" not in command
@@ -220,7 +223,7 @@ def test_run_review_ephemeral_api_key_mode_does_not_fallback_to_host(monkeypatch
assert "API-key auth runner failed" in result["summary"]
def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(monkeypatch: pytest.MonkeyPatch) -> None:
def test_run_review_ephemeral_single_attempt_success(monkeypatch: pytest.MonkeyPatch) -> None:
get_settings.cache_clear()
settings = get_settings()
@@ -239,16 +242,6 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(
def _fake_run(cmd, *args, **kwargs):
calls.append(cmd)
if len(calls) == 1:
return type(
"Completed",
(),
{
"returncode": 2,
"stdout": "",
"stderr": "error: unexpected argument '--reasoning-effort' found",
},
)()
return type(
"Completed",
(),
@@ -274,11 +267,9 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(
)
assert result["verdict"] == "correct"
assert len(calls) == 2
assert len(calls) == 1
first_shell = calls[0][-1]
second_shell = calls[1][-1]
assert "--reasoning-effort" in first_shell
assert "--reasoning-effort" not in second_shell
assert "--reasoning-effort" not in first_shell
def test_parse_review_result_from_stdout_artifact() -> None:

View File

@@ -68,7 +68,7 @@ def test_format_result_comment_appends_missing_config_note_for_system_layout() -
},
repo_configured=False,
)
assert body.endswith(".codex-review.yml is not configured")
assert body.endswith("> .codex-review.yml is not configured")
def test_format_result_comment_does_not_append_missing_config_note_to_agent_markdown() -> None:
@@ -79,4 +79,4 @@ def test_format_result_comment_does_not_append_missing_config_note_to_agent_mark
},
repo_configured=False,
)
assert ".codex-review.yml is not configured" not in body
assert "> .codex-review.yml is not configured" not in body

View File

@@ -36,7 +36,6 @@ def test_run_review_for_pr_uses_openai_http_error_in_fallback(monkeypatch) -> No
assert result["summary"].startswith("OpenAI review failed. Error: OpenAI API HTTP 429:")
assert result["findings"][0]["title"] == "OpenAI review request failed"
assert "rate_limited" in result["findings"][0]["body"]
assert any(finding["title"] == "TODO marker in diff" for finding in result["findings"])
def test_build_prompt_includes_trigger_message() -> None: