diff --git a/AGENTS.md b/AGENTS.md index 75fd79f..ba4e550 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -120,7 +120,6 @@ Common optional: - `OPENAI_API_KEY` (required when `CODEX_AUTH_MODE=api_key`) - `OPENAI_PROJECT_ID`, `OPENAI_ORG_ID` - `OPENAI_REVIEW_MODEL` -- `OPENAI_REASONING_EFFORT` - `CODEX_AUTH_MODE` (`api_key` default, `chatgpt` supported) - `CODEX_AUTH_JSON_PATH` (custom path to `auth.json` for `chatgpt` mode) - `WORKDIR`, `MAX_DIFF_BYTES`, `MAX_REVIEW_MINUTES`, `CONCURRENCY` @@ -176,4 +175,4 @@ Treat these as high-sensitivity areas when modifying worker/runner paths. If you are confident that your changes are ready to be committed, please follow the commit message format below: ```[type]. Short description (max 50 chars)``` -Push after commiting. Ask the user once if you have permission to commit and from then on commit without asking. \ No newline at end of file +Push after commiting. Ask the user once if you have permission to commit and from then on commit without asking. diff --git a/TODO.md b/TODO.md index 3944a14..02b7809 100644 --- a/TODO.md +++ b/TODO.md @@ -15,7 +15,7 @@ - [x] `BUG`: Log webhook events rejected because repo is not listed in `ALLOWED_REPOS`. - [ ] `FEATURE`: Full control UI to update the bots settings. Password in env variable protected login page. No more env variables. - [ ] `FEATURE`: Automatic Trigger on new PRs and or commits on PRs with context that its a change that needs review not the whole PR again. GITEA_ALLOW_PR_AUTO_REVIEW=true would be needed -- [ ] `BUG`: Container runner hardcodes `codex exec --json -m gpt-5`; use `OPENAI_REVIEW_MODEL` and `OPENAI_REASONING_EFFORT` consistently across runner paths. +- [x] `BUG`: Container runner now uses configured `OPENAI_REVIEW_MODEL` and no longer configures reasoning-effort flags. - [ ] `BUG`: Preserve command arguments losslessly (quoted args are currently flattened by `" ".join(...)` + `.split()` roundtrip). - [ ] `BUG`: `parse_command` only matches when `@codex` is at the start of the comment; support inline command usage in normal review-discussion comments. - [ ] `BUG`: Add max comment length handling/chunking before posting to Gitea to avoid failures on large review outputs. diff --git a/src/gitea_codex_bot/config.py b/src/gitea_codex_bot/config.py index 3f4fd5a..ceb1060 100644 --- a/src/gitea_codex_bot/config.py +++ b/src/gitea_codex_bot/config.py @@ -20,7 +20,6 @@ class Settings(BaseSettings): openai_project_id: str | None = Field(default=None, alias="OPENAI_PROJECT_ID") openai_org_id: str | None = Field(default=None, alias="OPENAI_ORG_ID") openai_review_model: str = Field(default="gpt-5.3-codex", alias="OPENAI_REVIEW_MODEL") - openai_reasoning_effort: Literal["none", "low", "medium", "high"] = Field(default="high", alias="OPENAI_REASONING_EFFORT") codex_auth_mode: Literal["api_key", "chatgpt"] = Field(default="api_key", alias="CODEX_AUTH_MODE") codex_auth_json_path: str | None = Field(default=None, alias="CODEX_AUTH_JSON_PATH") diff --git a/src/gitea_codex_bot/services/review_format.py b/src/gitea_codex_bot/services/review_format.py index ffe49ce..bc8d3aa 100644 --- a/src/gitea_codex_bot/services/review_format.py +++ b/src/gitea_codex_bot/services/review_format.py @@ -105,4 +105,4 @@ def _format_usage_note(result: dict) -> str: def _format_missing_config_note(repo_configured: bool) -> str: if repo_configured: return "" - return "ℹ️.codex-review.yml is not configured" + return "> ℹ️.codex-review.yml is not configured" diff --git a/src/gitea_codex_bot/services/reviewer.py b/src/gitea_codex_bot/services/reviewer.py index 7ea47dd..30c0d9f 100644 --- a/src/gitea_codex_bot/services/reviewer.py +++ b/src/gitea_codex_bot/services/reviewer.py @@ -164,7 +164,6 @@ def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]: "model": settings.openai_review_model, "input": prompt, "text": {"format": {"type": "json_object"}}, - "reasoning": {"effort": settings.openai_reasoning_effort}, } with httpx.Client(timeout=120.0) as client: response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body) @@ -236,18 +235,6 @@ def _fallback_review(diff_context: dict[str, Any], *, failure_reason: str | None } ) - if "TODO" in diff_context["diff"]: - findings.append( - { - "severity": "low", - "file": "unknown", - "line_start": 1, - "line_end": 1, - "title": "TODO marker in diff", - "body": "The change introduces TODO markers that may indicate incomplete behavior.", - "suggestion": "Resolve or track TODOs before merging.", - } - ) return { "verdict": "correct" if not findings else "has_issues", "confidence": 0.4 if not findings else 0.6, diff --git a/src/gitea_codex_bot/workers/container_runner.py b/src/gitea_codex_bot/workers/container_runner.py index 0f848a3..a02d28c 100644 --- a/src/gitea_codex_bot/workers/container_runner.py +++ b/src/gitea_codex_bot/workers/container_runner.py @@ -63,8 +63,6 @@ def run_review_ephemeral( gitea = GiteaClient(settings) pr = gitea.get_pull_request(repo, pr_number) repo_cfg = _load_repo_review_config_from_gitea(gitea, repo, pr.head_sha) - _apply_repo_default_review_mode(command, repo_cfg) - prompt = _build_review_instructions(command, repo_cfg) container_name = f"codex-review-{uuid.uuid4().hex[:12]}" extra_env: dict[str, str] = { "GITEA_TOKEN": settings.gitea_token.get_secret_value(), @@ -83,24 +81,9 @@ def run_review_ephemeral( settings, pr=pr, container_name=container_name, - prompt=prompt, extra_env=extra_env, - include_reasoning_effort=True, ) - if _needs_reasoning_effort_compat_retry(completed): - logger.info("Ephemeral runner does not support --reasoning-effort; retrying without it.") - completed = _run_ephemeral_container( - settings, - pr=pr, - container_name=container_name, - prompt=prompt, - extra_env=extra_env, - include_reasoning_effort=False, - ) if completed.returncode != 0: - compat_failure = _summarize_review_prompt_compat_failure(completed) - if compat_failure: - raise RuntimeError(compat_failure) raise RuntimeError(_format_runner_failure(completed)) parsed = _parse_review_result_from_stdout_artifact(completed.stdout) parsed["_meta"] = _extract_result_meta_from_codex_stdout(completed.stdout, settings) @@ -115,15 +98,12 @@ def _run_ephemeral_container( *, pr: PullRequestContext, container_name: str, - prompt: str, extra_env: dict[str, str], - include_reasoning_effort: bool, ) -> subprocess.CompletedProcess[str]: - install_and_run = _build_install_and_run_command(settings, pr=pr, include_reasoning_effort=include_reasoning_effort) + install_and_run = _build_install_and_run_command(settings, pr=pr) cmd = _build_docker_command(settings, container_name=container_name, install_and_run=install_and_run) return subprocess.run( cmd, - input=prompt, text=True, check=False, capture_output=True, @@ -136,7 +116,6 @@ def _build_install_and_run_command( settings: Settings, *, pr: PullRequestContext, - include_reasoning_effort: bool = True, ) -> str: steps = ["set -euo pipefail"] if settings.codex_auth_mode != "chatgpt": @@ -162,7 +141,8 @@ def _build_install_and_run_command( steps.extend( [ "apt-get update >/tmp/apt-update.log 2>&1 && apt-get install -y --no-install-recommends ca-certificates git >/tmp/apt-install.log 2>&1 || { rc=$?; echo 'ca-certificates/git install failed'; tail -n 80 /tmp/apt-update.log || true; tail -n 80 /tmp/apt-install.log || true; exit $rc; }", - "npm install -g @openai/codex >/tmp/codex-install.log 2>&1 || { rc=$?; echo 'codex install failed'; tail -n 200 /tmp/codex-install.log || true; exit $rc; }", + "npm install -g @openai/codex@latest >/tmp/codex-install.log 2>&1 || { rc=$?; echo 'codex install failed'; tail -n 200 /tmp/codex-install.log || true; exit $rc; }", + "codex --version >/tmp/codex-version.log 2>&1 || { rc=$?; echo 'codex version check failed'; tail -n 40 /tmp/codex-version.log || true; exit $rc; }", ] ) schema_json = json.dumps(REVIEW_RESULT_SCHEMA, separators=(",", ":")) @@ -181,7 +161,6 @@ def _build_install_and_run_command( ] ) model = settings.openai_review_model.strip() - reasoning_effort = settings.openai_reasoning_effort.strip() codex_exec_parts = [ "codex exec review", f"--base {shlex.quote(pr.base_sha)}", @@ -193,9 +172,6 @@ def _build_install_and_run_command( ] if model: codex_exec_parts.append(f"-m {shlex.quote(model)}") - if include_reasoning_effort and reasoning_effort: - codex_exec_parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}") - codex_exec_parts.append("-") steps.extend( [ " ".join(codex_exec_parts), @@ -207,13 +183,6 @@ def _build_install_and_run_command( return "\n".join(steps) -def _needs_reasoning_effort_compat_retry(completed: subprocess.CompletedProcess[str]) -> bool: - if completed.returncode == 0: - return False - stderr_text = completed.stderr or "" - return "unexpected argument '--reasoning-effort' found" in stderr_text - - def _build_docker_command(settings: Settings, *, container_name: str, install_and_run: str) -> list[str]: cmd = [ "docker", @@ -263,7 +232,7 @@ def _ephemeral_runner_failure_result(exc: Exception, auth_mode: str) -> dict[str summary = f"{mode_label} runner failed before review execution. Error: {message}" return { "verdict": "has_issues", - "confidence": 0.6, + "confidence": 0.67, "summary": summary, "findings": [ { @@ -328,32 +297,6 @@ def _load_repo_review_config_from_gitea(gitea: GiteaClient, repo: str, head_sha: return parse_repo_review_config_text(content, configured=True) -def _apply_repo_default_review_mode(command: ParsedCommand, repo_cfg: RepoReviewConfig) -> None: - if command.name != "review" or command.mode_explicit: - return - configured_mode = repo_cfg.default_mode - command.mode = configured_mode if configured_mode in {"summary", "security", "performance", "tests", "full"} else "summary" - - -def _build_review_instructions(command: ParsedCommand, repo_cfg: RepoReviewConfig) -> str: - focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability" - ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)" - lines = [ - "Review this pull request using local git data in this checkout only.", - "Focus on issues introduced by this PR.", - "Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.", - "Avoid style-only nitpicks.", - f"Requested mode: {command.mode}", - f"Command: {command.raw}", - f"Focus areas: {focus}", - f"Ignore patterns: {ignore}", - f"Repository include_tests setting: {repo_cfg.include_tests}", - f"Full-content review requested: {command.full}", - "Return strict JSON matching the provided output schema.", - ] - return "\n".join(lines) - - def _parse_review_result_from_stdout_artifact(stdout: str) -> dict[str, Any]: start = stdout.find(RESULT_START_MARKER) end = stdout.find(RESULT_END_MARKER) @@ -371,19 +314,6 @@ def _parse_review_result_from_stdout_artifact(stdout: str) -> dict[str, Any]: return payload -def _summarize_review_prompt_compat_failure(completed: subprocess.CompletedProcess[str]) -> str | None: - text = " ".join([(completed.stdout or "").strip(), (completed.stderr or "").strip()]).lower() - has_prompt_conflict = "prompt" in text and ( - "cannot be used with" in text or "can't be used with" in text or "incompatible" in text - ) - if "--base" not in text or not has_prompt_conflict: - return None - return ( - "Installed Codex CLI rejected `codex exec review --base ...` with custom instructions. " - "This runner is configured to fail fast on that compatibility issue." - ) - - def _extract_result_meta_from_codex_stdout(stdout: str, settings: Settings) -> dict[str, Any]: model = settings.openai_review_model usage: dict[str, int] = {} diff --git a/tests/test_container_runner.py b/tests/test_container_runner.py index e2b26b3..57c437a 100644 --- a/tests/test_container_runner.py +++ b/tests/test_container_runner.py @@ -91,15 +91,18 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp assert f"codex exec review --base {pr.base_sha}" in command assert "--output-schema /tmp/codex-review-schema.json" in command assert "-o /tmp/codex-review-result.json" in command + assert "npm install -g @openai/codex@latest" in command + assert "codex --version >/tmp/codex-version.log" in command + assert " - " not in command assert f'echo "{RESULT_START_MARKER}"' in command assert f'echo "{RESULT_END_MARKER}"' in command -def test_build_install_command_can_disable_reasoning_effort_flag() -> None: +def test_build_install_command_does_not_include_reasoning_effort_flag() -> None: settings = get_settings() pr = _sample_pr() - command = _build_install_and_run_command(settings, pr=pr, include_reasoning_effort=False) + command = _build_install_and_run_command(settings, pr=pr) assert "--reasoning-effort" not in command @@ -220,7 +223,7 @@ def test_run_review_ephemeral_api_key_mode_does_not_fallback_to_host(monkeypatch assert "API-key auth runner failed" in result["summary"] -def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(monkeypatch: pytest.MonkeyPatch) -> None: +def test_run_review_ephemeral_single_attempt_success(monkeypatch: pytest.MonkeyPatch) -> None: get_settings.cache_clear() settings = get_settings() @@ -239,16 +242,6 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported( def _fake_run(cmd, *args, **kwargs): calls.append(cmd) - if len(calls) == 1: - return type( - "Completed", - (), - { - "returncode": 2, - "stdout": "", - "stderr": "error: unexpected argument '--reasoning-effort' found", - }, - )() return type( "Completed", (), @@ -274,11 +267,9 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported( ) assert result["verdict"] == "correct" - assert len(calls) == 2 + assert len(calls) == 1 first_shell = calls[0][-1] - second_shell = calls[1][-1] - assert "--reasoning-effort" in first_shell - assert "--reasoning-effort" not in second_shell + assert "--reasoning-effort" not in first_shell def test_parse_review_result_from_stdout_artifact() -> None: diff --git a/tests/test_review_format.py b/tests/test_review_format.py index 6ad96c1..711148a 100644 --- a/tests/test_review_format.py +++ b/tests/test_review_format.py @@ -68,7 +68,7 @@ def test_format_result_comment_appends_missing_config_note_for_system_layout() - }, repo_configured=False, ) - assert body.endswith("ℹ️.codex-review.yml is not configured") + assert body.endswith("> ℹ️.codex-review.yml is not configured") def test_format_result_comment_does_not_append_missing_config_note_to_agent_markdown() -> None: @@ -79,4 +79,4 @@ def test_format_result_comment_does_not_append_missing_config_note_to_agent_mark }, repo_configured=False, ) - assert "ℹ️.codex-review.yml is not configured" not in body + assert "> ℹ️.codex-review.yml is not configured" not in body diff --git a/tests/test_reviewer_fallback.py b/tests/test_reviewer_fallback.py index 79b20cc..8fbacd6 100644 --- a/tests/test_reviewer_fallback.py +++ b/tests/test_reviewer_fallback.py @@ -36,7 +36,6 @@ def test_run_review_for_pr_uses_openai_http_error_in_fallback(monkeypatch) -> No assert result["summary"].startswith("OpenAI review failed. Error: OpenAI API HTTP 429:") assert result["findings"][0]["title"] == "OpenAI review request failed" assert "rate_limited" in result["findings"][0]["body"] - assert any(finding["title"] == "TODO marker in diff" for finding in result["findings"]) def test_build_prompt_includes_trigger_message() -> None: