feat: Enhance review prompt with detailed instructions and placeholders for empty sections

2026-05-23 13:37:09 +02:00
parent c3bc3501ca
commit d6a9397914
4 changed files with 323 additions and 138 deletions
--- a/src/gitea_codex_bot/services/reviewer.py
+++ b/src/gitea_codex_bot/services/reviewer.py
@@ -114,11 +114,19 @@ def _build_prompt(
    test_output: str | None,
 ) -> str:
    mode = command.mode if command.name in {"review", "rerun"} else "summary"
    changed_files = diff_context.get("changed_files") or []
    changed_files_section = os.linesep.join(changed_files) if changed_files else "(none)"
    unified_diff = str(diff_context.get("diff", ""))
    unified_diff_section = unified_diff if unified_diff.strip() else "(empty)"
    return (
        "You are reviewing a Gitea pull request.\n\n"
        "Focus only on issues introduced by this PR.\n"
        "Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.\n"
        "Avoid style nitpicks.\n\n"
        "You do not have internet/network access. Do not try to fetch URLs.\n"
        "Use only the PR metadata, changed files, diff, and optional file/test content included below.\n"
        "Never claim that PR content is inaccessible or missing if these sections are present.\n"
        "If the changed-file list is `(none)` and unified diff is `(empty)`, treat this as a no-op PR and explain that no code changes were detected.\n\n"
        "Return JSON only with schema:\n"
        "{\n"
        '  "verdict": "correct" | "has_issues",\n'
@@ -132,8 +140,8 @@ def _build_prompt(
        f"Trigger message: {command.raw}\n"
        f"Repo focus: {', '.join(repo_cfg.focus)}\n"
        f"Diff truncated: {diff_context['truncated']}\n"
-        f"Changed files:\n{os.linesep.join(diff_context['changed_files'])}\n\n"
+        f"Changed files:\n{changed_files_section}\n\n"
-        f"Unified diff:\n{diff_context['diff']}\n\n"
+        f"Unified diff:\n{unified_diff_section}\n\n"
        f"Changed file content (optional):\n{changed_file_contents or '(not included)'}\n\n"
        f"Test output (optional):\n{test_output or '(not included)'}\n"
    )
--- a/src/gitea_codex_bot/workers/container_runner.py
+++ b/src/gitea_codex_bot/workers/container_runner.py
@@ -4,7 +4,6 @@ import base64
 import json
 import logging
 import os
 import re
 import shlex
 import subprocess
 import uuid
@@ -12,14 +11,47 @@ from pathlib import Path
 from typing import Any
 from gitea_codex_bot.config import Settings
-from gitea_codex_bot.services.gitea import GiteaClient
+from gitea_codex_bot.services.gitea import GiteaClient, PullRequestContext
-from gitea_codex_bot.services.repo_config import RepoReviewConfig
+from gitea_codex_bot.services.repo_config import RepoReviewConfig, parse_repo_review_config_text
-from gitea_codex_bot.services.reviewer import normalize_review_result, prepare_review_prompt
+from gitea_codex_bot.services.reviewer import normalize_review_result
 from gitea_codex_bot.types import ParsedCommand
 CONTAINER_CODEX_HOME = "/root/.codex"
 REVIEW_OUTPUT_FILE = "/tmp/codex-review-result.json"
 REVIEW_SCHEMA_FILE = "/tmp/codex-review-schema.json"
 RESULT_START_MARKER = "__CODEX_REVIEW_RESULT_BEGIN__"
 RESULT_END_MARKER = "__CODEX_REVIEW_RESULT_END__"
 logger = logging.getLogger(__name__)
 REVIEW_RESULT_SCHEMA: dict[str, Any] = {
    "type": "object",
    "additionalProperties": True,
    "required": ["verdict", "confidence", "summary", "findings", "markdown_comment"],
    "properties": {
        "verdict": {"type": "string", "enum": ["correct", "has_issues"]},
        "confidence": {"type": "number"},
        "summary": {"type": "string"},
        "markdown_comment": {"type": "string"},
        "findings": {
            "type": "array",
            "items": {
                "type": "object",
                "additionalProperties": True,
                "required": ["severity", "file", "line_start", "line_end", "title", "body"],
                "properties": {
                    "severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]},
                    "file": {"type": "string"},
                    "line_start": {"type": "integer"},
                    "line_end": {"type": "integer"},
                    "title": {"type": "string"},
                    "body": {"type": "string"},
                    "suggestion": {"type": "string"},
                },
            },
        },
    },
 }
 def run_review_ephemeral(
    settings: Settings,
@@ -29,14 +61,27 @@ def run_review_ephemeral(
    command: ParsedCommand,
 ) -> tuple[dict[str, Any], RepoReviewConfig]:
    gitea = GiteaClient(settings)
-    prompt, _diff_context, repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command)
+    pr = gitea.get_pull_request(repo, pr_number)
    repo_cfg = _load_repo_review_config_from_gitea(gitea, repo, pr.head_sha)
    _apply_repo_default_review_mode(command, repo_cfg)
    prompt = _build_review_instructions(command, repo_cfg)
    container_name = f"codex-review-{uuid.uuid4().hex[:12]}"
-    extra_env: dict[str, str] = {}
+    extra_env: dict[str, str] = {
        "GITEA_TOKEN": settings.gitea_token.get_secret_value(),
        "GITEA_GIT_USERNAME": settings.gitea_bot_username,
    }
    if settings.openai_api_key:
        extra_env["OPENAI_API_KEY"] = settings.openai_api_key.get_secret_value()
    if settings.openai_org_id:
        extra_env["OPENAI_ORG_ID"] = settings.openai_org_id
    if settings.openai_project_id:
        extra_env["OPENAI_PROJECT_ID"] = settings.openai_project_id
    if settings.codex_auth_mode == "chatgpt":
        extra_env["CODEX_AUTH_JSON_B64"] = _load_codex_auth_json_b64(settings)
    try:
        completed = _run_ephemeral_container(
            settings,
            pr=pr,
            container_name=container_name,
            prompt=prompt,
            extra_env=extra_env,
@@ -46,14 +91,18 @@ def run_review_ephemeral(
            logger.info("Ephemeral runner does not support --reasoning-effort; retrying without it.")
            completed = _run_ephemeral_container(
                settings,
                pr=pr,
                container_name=container_name,
                prompt=prompt,
                extra_env=extra_env,
                include_reasoning_effort=False,
            )
        if completed.returncode != 0:
            compat_failure = _summarize_review_prompt_compat_failure(completed)
            if compat_failure:
                raise RuntimeError(compat_failure)
            raise RuntimeError(_format_runner_failure(completed))
-        parsed = _parse_codex_exec_stdout(completed.stdout)
+        parsed = _parse_review_result_from_stdout_artifact(completed.stdout)
        parsed["_meta"] = _extract_result_meta_from_codex_stdout(completed.stdout, settings)
        return normalize_review_result(parsed), repo_cfg
    except Exception as exc:
@@ -64,12 +113,13 @@ def run_review_ephemeral(
 def _run_ephemeral_container(
    settings: Settings,
    *,
    pr: PullRequestContext,
    container_name: str,
    prompt: str,
    extra_env: dict[str, str],
    include_reasoning_effort: bool,
 ) -> subprocess.CompletedProcess[str]:
-    install_and_run = _build_install_and_run_command(settings, include_reasoning_effort=include_reasoning_effort)
+    install_and_run = _build_install_and_run_command(settings, pr=pr, include_reasoning_effort=include_reasoning_effort)
    cmd = _build_docker_command(settings, container_name=container_name, install_and_run=install_and_run)
    return subprocess.run(
        cmd,
@@ -82,8 +132,25 @@ def _run_ephemeral_container(
    )
-def _build_install_and_run_command(settings: Settings, *, include_reasoning_effort: bool = True) -> str:
+def _build_install_and_run_command(
    settings: Settings,
    *,
    pr: PullRequestContext,
    include_reasoning_effort: bool = True,
 ) -> str:
    steps = ["set -euo pipefail"]
    if settings.codex_auth_mode != "chatgpt":
        steps.extend(
            [
                'if [ -z "${OPENAI_API_KEY:-}" ]; then echo "OPENAI_API_KEY missing in runner env" >&2; exit 8; fi',
            ]
        )
    steps.extend(
        [
            'if [ -z "${GITEA_TOKEN:-}" ]; then echo "GITEA_TOKEN missing in runner env" >&2; exit 8; fi',
            'if [ -z "${GITEA_GIT_USERNAME:-}" ]; then echo "GITEA_GIT_USERNAME missing in runner env" >&2; exit 8; fi',
        ]
    )
    if settings.codex_auth_mode == "chatgpt":
        steps.extend(
            [
@@ -94,19 +161,50 @@ def _build_install_and_run_command(settings: Settings, *, include_reasoning_effo
        )
    steps.extend(
        [
-            "apt-get update >/tmp/apt-update.log 2>&1 && apt-get install -y --no-install-recommends ca-certificates >/tmp/apt-install.log 2>&1 || { rc=$?; echo 'ca-certificates install failed'; tail -n 80 /tmp/apt-update.log || true; tail -n 80 /tmp/apt-install.log || true; exit $rc; }",
+            "apt-get update >/tmp/apt-update.log 2>&1 && apt-get install -y --no-install-recommends ca-certificates git >/tmp/apt-install.log 2>&1 || { rc=$?; echo 'ca-certificates/git install failed'; tail -n 80 /tmp/apt-update.log || true; tail -n 80 /tmp/apt-install.log || true; exit $rc; }",
            "npm install -g @openai/codex >/tmp/codex-install.log 2>&1 || { rc=$?; echo 'codex install failed'; tail -n 200 /tmp/codex-install.log || true; exit $rc; }",
        ]
    )
    schema_json = json.dumps(REVIEW_RESULT_SCHEMA, separators=(",", ":"))
    steps.extend(
        [
            f"cat > {REVIEW_SCHEMA_FILE} <<'JSON'\n{schema_json}\nJSON",
            'auth_b64="$(printf "%s" "${GITEA_GIT_USERNAME}:${GITEA_TOKEN}" | base64 | tr -d \'\\n\')"',
            f'git -c http.extraHeader="Authorization: Basic $auth_b64" clone --no-tags --depth 80 {shlex.quote(pr.clone_url)} /work/repo',
            "cd /work/repo",
            f'git -c http.extraHeader="Authorization: Basic $auth_b64" fetch --no-tags origin {shlex.quote(pr.base_ref)} {shlex.quote(pr.head_ref)}',
            f"git checkout --detach {shlex.quote(pr.head_sha)}",
            'resolved_head="$(git rev-parse HEAD)"',
            f'if [ "$resolved_head" != {shlex.quote(pr.head_sha)} ]; then echo "Checked out SHA mismatch: expected {pr.head_sha}, got $resolved_head" >&2; exit 9; fi',
            "unset GITEA_TOKEN auth_b64",
            "git config --global --unset-all http.extraHeader >/dev/null 2>&1 || true",
        ]
    )
    model = settings.openai_review_model.strip()
    reasoning_effort = settings.openai_reasoning_effort.strip()
-    codex_exec_parts = ["codex exec --skip-git-repo-check --json"]
+    codex_exec_parts = [
        "codex exec review",
        f"--base {shlex.quote(pr.base_sha)}",
        "--json",
        "--output-schema",
        shlex.quote(REVIEW_SCHEMA_FILE),
        "-o",
        shlex.quote(REVIEW_OUTPUT_FILE),
    ]
    if model:
        codex_exec_parts.append(f"-m {shlex.quote(model)}")
    if include_reasoning_effort and reasoning_effort:
        codex_exec_parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
-    steps.append(" ".join(codex_exec_parts))
+    codex_exec_parts.append("-")
-    return "; ".join(steps)
+    steps.extend(
        [
            " ".join(codex_exec_parts),
            f'echo "{RESULT_START_MARKER}"',
            f"cat {shlex.quote(REVIEW_OUTPUT_FILE)}",
            f'echo "{RESULT_END_MARKER}"',
        ]
    )
    return "\n".join(steps)
 def _needs_reasoning_effort_compat_retry(completed: subprocess.CompletedProcess[str]) -> bool:
@@ -147,6 +245,14 @@ def _build_docker_command(settings: Settings, *, container_name: str, install_an
                "OPENAI_PROJECT_ID",
            ]
        )
    cmd.extend(
        [
            "-e",
            "GITEA_TOKEN",
            "-e",
            "GITEA_GIT_USERNAME",
        ]
    )
    cmd.extend([settings.review_runner_image, "bash", "-lc", install_and_run])
    return cmd
@@ -215,27 +321,67 @@ def ensure_workdir(path: str) -> Path:
    return target
-def _parse_codex_exec_stdout(stdout: str) -> dict[str, Any]:
+def _load_repo_review_config_from_gitea(gitea: GiteaClient, repo: str, head_sha: str) -> RepoReviewConfig:
-    last_text: str | None = None
+    content = gitea.get_file_content(repo, ".codex-review.yml", ref=head_sha)
-    for line in stdout.splitlines():
+    if content is None:
-        line = line.strip()
+        return RepoReviewConfig(configured=False)
-        if not line:
+    return parse_repo_review_config_text(content, configured=True)
-            continue
+
 def _apply_repo_default_review_mode(command: ParsedCommand, repo_cfg: RepoReviewConfig) -> None:
    if command.name != "review" or command.mode_explicit:
        return
    configured_mode = repo_cfg.default_mode
    command.mode = configured_mode if configured_mode in {"summary", "security", "performance", "tests", "full"} else "summary"
 def _build_review_instructions(command: ParsedCommand, repo_cfg: RepoReviewConfig) -> str:
    focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability"
    ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)"
    lines = [
        "Review this pull request using local git data in this checkout only.",
        "Focus on issues introduced by this PR.",
        "Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.",
        "Avoid style-only nitpicks.",
        f"Requested mode: {command.mode}",
        f"Command: {command.raw}",
        f"Focus areas: {focus}",
        f"Ignore patterns: {ignore}",
        f"Repository include_tests setting: {repo_cfg.include_tests}",
        f"Full-content review requested: {command.full}",
        "Return strict JSON matching the provided output schema.",
    ]
    return "\n".join(lines)
 def _parse_review_result_from_stdout_artifact(stdout: str) -> dict[str, Any]:
    start = stdout.find(RESULT_START_MARKER)
    end = stdout.find(RESULT_END_MARKER)
    if start == -1 or end == -1 or end <= start:
        raise RuntimeError("Runner output did not include final review artifact markers.")
    artifact = stdout[start + len(RESULT_START_MARKER) : end].strip()
    if not artifact:
        raise RuntimeError("Runner output contained empty final review artifact.")
    try:
-            payload = json.loads(line)
+        payload = json.loads(artifact)
-        except json.JSONDecodeError:
+    except json.JSONDecodeError as exc:
-            continue
+        raise RuntimeError(f"Final review artifact was not valid JSON: {exc}") from exc
-        if isinstance(payload, dict) and {"verdict", "summary", "findings"}.issubset(payload.keys()):
+    if not isinstance(payload, dict):
        raise RuntimeError(f"Final review artifact JSON must be an object, got {type(payload)!r}.")
    return payload
-        extracted = _extract_text(payload)
+
-        if extracted:
+
-            last_text = extracted
+def _summarize_review_prompt_compat_failure(completed: subprocess.CompletedProcess[str]) -> str | None:
-            parsed = _parse_review_json_from_text(extracted)
+    text = " ".join([(completed.stdout or "").strip(), (completed.stderr or "").strip()]).lower()
-            if parsed:
+    has_prompt_conflict = "prompt" in text and (
-                return parsed
+        "cannot be used with" in text or "can't be used with" in text or "incompatible" in text
-    if not last_text:
+    )
-        raise RuntimeError("codex exec output did not include parseable review payload text")
+    if "--base" not in text or not has_prompt_conflict:
-    raise RuntimeError(f"codex exec output text did not contain review JSON; text_tail={_tail_text(last_text, 400)}")
+        return None
    return (
        "Installed Codex CLI rejected `codex exec review --base ...` with custom instructions. "
        "This runner is configured to fail fast on that compatibility issue."
    )
 def _extract_result_meta_from_codex_stdout(stdout: str, settings: Settings) -> dict[str, Any]:
@@ -297,49 +443,3 @@ def _find_first_dict_for_key(payload: Any, key: str) -> dict[str, Any] | None:
            if found:
                return found
    return None
 def _parse_review_json_from_text(text: str) -> dict[str, Any] | None:
    candidates: list[str] = [text.strip()]
    fenced = re.search(r"```(?:json)?\s*(\{.*\})\s*```", text, flags=re.DOTALL | re.IGNORECASE)
    if fenced:
        candidates.append(fenced.group(1).strip())
    start = text.find("{")
    end = text.rfind("}")
    if start != -1 and end != -1 and end > start:
        candidates.append(text[start : end + 1].strip())
    seen: set[str] = set()
    for candidate in candidates:
        if not candidate or candidate in seen:
            continue
        seen.add(candidate)
        try:
            payload = json.loads(candidate)
        except json.JSONDecodeError:
            continue
        if isinstance(payload, dict) and {"verdict", "summary", "findings"}.issubset(payload.keys()):
            return payload
    return None
 def _extract_text(payload: Any) -> str | None:
    if isinstance(payload, str):
        return payload
    if isinstance(payload, dict):
        for key in ("text", "message", "content", "output"):
            value = payload.get(key)
            text = _extract_text(value)
            if text:
                return text
        for value in payload.values():
            if not isinstance(value, (dict, list)):
                continue
            text = _extract_text(value)
            if text:
                return text
    if isinstance(payload, list):
        for item in payload:
            text = _extract_text(item)
            if text:
                return text
    return None
--- a/tests/test_container_runner.py
+++ b/tests/test_container_runner.py
@@ -5,18 +5,37 @@ from pathlib import Path
 import pytest
 from gitea_codex_bot.config import get_settings
 from gitea_codex_bot.services.gitea import PullRequestContext
 from gitea_codex_bot.types import ParsedCommand
 from gitea_codex_bot.workers.container_runner import (
    CONTAINER_CODEX_HOME,
    RESULT_END_MARKER,
    RESULT_START_MARKER,
    _build_docker_command,
    _build_install_and_run_command,
    _extract_result_meta_from_codex_stdout,
    _load_codex_auth_json_b64,
-    _parse_codex_exec_stdout,
+    _load_repo_review_config_from_gitea,
    _parse_review_result_from_stdout_artifact,
    _resolve_codex_auth_json_path,
    run_review_ephemeral,
 )
 def _sample_pr() -> PullRequestContext:
    return PullRequestContext(
        repo="acme/repo",
        pr_number=1,
        base_ref="main",
        base_sha="b" * 40,
        head_ref="feature",
        head_sha="a" * 40,
        clone_url="https://gitea.test/acme/repo.git",
        html_url="https://gitea.test/acme/repo/pulls/1",
        is_fork=False,
    )
 def test_build_docker_command_api_key_mode_uses_openai_env() -> None:
    settings = get_settings()
@@ -25,6 +44,8 @@ def test_build_docker_command_api_key_mode_uses_openai_env() -> None:
    assert "OPENAI_API_KEY" in cmd
    assert "OPENAI_ORG_ID" in cmd
    assert "OPENAI_PROJECT_ID" in cmd
    assert "GITEA_TOKEN" in cmd
    assert "GITEA_GIT_USERNAME" in cmd
    assert "--mount" not in cmd
@@ -45,37 +66,40 @@ def test_build_docker_command_chatgpt_mode_mounts_auth_json(
    assert "OPENAI_API_KEY" not in cmd
    assert f"CODEX_HOME={CONTAINER_CODEX_HOME}" in env_items
    assert "CODEX_AUTH_JSON_B64" in env_items
    assert "GITEA_TOKEN" in env_items
    assert "GITEA_GIT_USERNAME" in env_items
-def test_build_install_command_chatgpt_mode_copies_auth_json(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
    auth_file = tmp_path / "auth.json"
    auth_file.write_text("{}", encoding="utf-8")
    monkeypatch.setenv("CODEX_AUTH_MODE", "chatgpt")
    monkeypatch.setenv("CODEX_AUTH_JSON_PATH", str(auth_file))
    get_settings.cache_clear()
    settings = get_settings()
    pr = _sample_pr()
-    command = _build_install_and_run_command(settings)
+    command = _build_install_and_run_command(settings, pr=pr)
    assert 'printf "%s" "$CODEX_AUTH_JSON_B64" | base64 -d > /root/.codex/auth.json' in command
-    assert "codex exec --skip-git-repo-check --json -m gpt-5.3-codex" in command
+    assert "git -c http.extraHeader=" in command
-    assert f"--reasoning-effort {settings.openai_reasoning_effort}" in command
+    assert f"clone --no-tags --depth 80 {pr.clone_url} /work/repo" in command
-
+    assert f"fetch --no-tags origin {pr.base_ref} {pr.head_ref}" in command
-
+    assert f"git checkout --detach {pr.head_sha}" in command
-def test_build_install_command_includes_configured_reasoning_effort(monkeypatch: pytest.MonkeyPatch) -> None:
+    assert "resolved_head=\"$(git rev-parse HEAD)\"" in command
-    monkeypatch.setenv("OPENAI_REASONING_EFFORT", "medium")
+    assert "unset GITEA_TOKEN auth_b64" in command
-    get_settings.cache_clear()
+    assert f"codex exec review --base {pr.base_sha}" in command
-    settings = get_settings()
+    assert "--output-schema /tmp/codex-review-schema.json" in command
-
+    assert "-o /tmp/codex-review-result.json" in command
-    command = _build_install_and_run_command(settings)
+    assert f'echo "{RESULT_START_MARKER}"' in command
-
+    assert f'echo "{RESULT_END_MARKER}"' in command
    assert "--reasoning-effort medium" in command
 def test_build_install_command_can_disable_reasoning_effort_flag() -> None:
    settings = get_settings()
    pr = _sample_pr()
-    command = _build_install_and_run_command(settings, include_reasoning_effort=False)
+    command = _build_install_and_run_command(settings, pr=pr, include_reasoning_effort=False)
    assert "--reasoning-effort" not in command
@@ -104,6 +128,29 @@ def test_load_codex_auth_json_b64_roundtrip(monkeypatch: pytest.MonkeyPatch, tmp
    assert encoded
 def test_load_repo_review_config_from_gitea_when_missing() -> None:
    class _Gitea:
        def get_file_content(self, *_args, **_kwargs):
            return None
    cfg = _load_repo_review_config_from_gitea(_Gitea(), "acme/repo", "a" * 40)
    assert cfg.configured is False
    assert cfg.enabled is True
 def test_load_repo_review_config_from_gitea_when_present() -> None:
    class _Gitea:
        def get_file_content(self, *_args, **_kwargs):
            return "enabled: false\nreview:\n  default_mode: tests\n"
    cfg = _load_repo_review_config_from_gitea(_Gitea(), "acme/repo", "a" * 40)
    assert cfg.configured is True
    assert cfg.enabled is False
    assert cfg.default_mode == "tests"
 def test_run_review_ephemeral_chatgpt_does_not_fallback_to_api_key_path(
    monkeypatch: pytest.MonkeyPatch,
    tmp_path: Path,
@@ -115,18 +162,22 @@ def test_run_review_ephemeral_chatgpt_does_not_fallback_to_api_key_path(
    get_settings.cache_clear()
    settings = get_settings()
-    monkeypatch.setattr(
+    class _FakeGiteaClient:
-        "gitea_codex_bot.workers.container_runner.prepare_review_prompt",
+        def __init__(self, _settings) -> None:
-        lambda *_args, **_kwargs: ("prompt", {"diff": ""}, object()),
+            pass
-    )
+
-    monkeypatch.setattr("gitea_codex_bot.workers.container_runner.GiteaClient", lambda _settings: object())
+        def get_pull_request(self, *_args, **_kwargs):
            return _sample_pr()
        def get_file_content(self, *_args, **_kwargs):
            return None
    monkeypatch.setattr("gitea_codex_bot.workers.container_runner.GiteaClient", _FakeGiteaClient)
    monkeypatch.setattr(
        "gitea_codex_bot.workers.container_runner.subprocess.run",
        lambda *_args, **_kwargs: (_ for _ in ()).throw(RuntimeError("docker unavailable")),
    )
    from gitea_codex_bot.types import ParsedCommand
    result, _repo_cfg = run_review_ephemeral(
        settings,
        repo="acme/repo",
@@ -142,18 +193,22 @@ def test_run_review_ephemeral_api_key_mode_does_not_fallback_to_host(monkeypatch
    get_settings.cache_clear()
    settings = get_settings()
-    monkeypatch.setattr(
+    class _FakeGiteaClient:
-        "gitea_codex_bot.workers.container_runner.prepare_review_prompt",
+        def __init__(self, _settings) -> None:
-        lambda *_args, **_kwargs: ("prompt", {"diff": ""}, object()),
+            pass
-    )
+
-    monkeypatch.setattr("gitea_codex_bot.workers.container_runner.GiteaClient", lambda _settings: object())
+        def get_pull_request(self, *_args, **_kwargs):
            return _sample_pr()
        def get_file_content(self, *_args, **_kwargs):
            return None
    monkeypatch.setattr("gitea_codex_bot.workers.container_runner.GiteaClient", _FakeGiteaClient)
    monkeypatch.setattr(
        "gitea_codex_bot.workers.container_runner.subprocess.run",
        lambda *_args, **_kwargs: (_ for _ in ()).throw(RuntimeError("docker unavailable")),
    )
    from gitea_codex_bot.types import ParsedCommand
    result, _repo_cfg = run_review_ephemeral(
        settings,
        repo="acme/repo",
@@ -169,12 +224,17 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(
    get_settings.cache_clear()
    settings = get_settings()
-    monkeypatch.setattr(
+    class _FakeGiteaClient:
-        "gitea_codex_bot.workers.container_runner.prepare_review_prompt",
+        def __init__(self, _settings) -> None:
-        lambda *_args, **_kwargs: ("prompt", {"diff": ""}, object()),
+            pass
    )
    monkeypatch.setattr("gitea_codex_bot.workers.container_runner.GiteaClient", lambda _settings: object())
        def get_pull_request(self, *_args, **_kwargs):
            return _sample_pr()
        def get_file_content(self, *_args, **_kwargs):
            return None
    monkeypatch.setattr("gitea_codex_bot.workers.container_runner.GiteaClient", _FakeGiteaClient)
    calls: list[list[str]] = []
    def _fake_run(cmd, *args, **kwargs):
@@ -194,15 +254,18 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(
            (),
            {
                "returncode": 0,
-                "stdout": '{"verdict":"correct","confidence":0.9,"summary":"ok","findings":[]}\n',
+                "stdout": (
                    '{"type":"response.started","model":"gpt-5.3-codex"}\n'
                    f"{RESULT_START_MARKER}\n"
                    '{"verdict":"correct","confidence":0.9,"summary":"ok","findings":[],"markdown_comment":"ok"}\n'
                    f"{RESULT_END_MARKER}\n"
                ),
                "stderr": "",
            },
        )()
    monkeypatch.setattr("gitea_codex_bot.workers.container_runner.subprocess.run", _fake_run)
    from gitea_codex_bot.types import ParsedCommand
    result, _repo_cfg = run_review_ephemeral(
        settings,
        repo="acme/repo",
@@ -218,33 +281,26 @@ def test_run_review_ephemeral_retries_without_reasoning_effort_when_unsupported(
    assert "--reasoning-effort" not in second_shell
-def test_parse_codex_exec_stdout_from_stream_item_text_json() -> None:
+def test_parse_review_result_from_stdout_artifact() -> None:
-    stdout = '\n'.join(
+    stdout = (
-        [
+        "noise\n"
-            '{"type":"thread.started","thread_id":"abc"}',
+        f"{RESULT_START_MARKER}\n"
-            '{"type":"item.completed","item":{"type":"agent_message","text":"{\\"verdict\\":\\"correct\\",\\"confidence\\":0.9,\\"summary\\":\\"ok\\",\\"findings\\":[]}"}}',
+        '{"verdict":"correct","confidence":0.9,"summary":"ok","findings":[],"markdown_comment":"ok"}\n'
-        ]
+        f"{RESULT_END_MARKER}\n"
    )
-    parsed = _parse_codex_exec_stdout(stdout)
+    parsed = _parse_review_result_from_stdout_artifact(stdout)
    assert parsed["verdict"] == "correct"
    assert parsed["summary"] == "ok"
-def test_parse_codex_exec_stdout_from_fenced_json_text() -> None:
+def test_parse_review_result_from_stdout_artifact_fails_without_markers() -> None:
-    stdout = '\n'.join(
+    with pytest.raises(RuntimeError):
-        [
+        _parse_review_result_from_stdout_artifact("no markers here")
            '{"type":"thread.started","thread_id":"abc"}',
            '{"type":"item.completed","item":{"type":"agent_message","text":"Here is the result:\\n```json\\n{\\"verdict\\":\\"has_issues\\",\\"confidence\\":0.8,\\"summary\\":\\"x\\",\\"findings\\":[]}\\n```"}}',
        ]
    )
    parsed = _parse_codex_exec_stdout(stdout)
    assert parsed["verdict"] == "has_issues"
    assert parsed["summary"] == "x"
 def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None:
    settings = get_settings()
-    stdout = '\n'.join(
+    stdout = "\n".join(
        [
            '{"type":"response.started","model":"gpt-5.3-codex"}',
            '{"type":"response.completed","response":{"usage":{"input_tokens":101,"output_tokens":22,"total_tokens":123}}}',
--- a/tests/test_reviewer_fallback.py
+++ b/tests/test_reviewer_fallback.py
@@ -57,6 +57,27 @@ def test_build_prompt_includes_trigger_message() -> None:
    assert "Trigger message: @codex review security\nPlease focus auth." in prompt
 def test_build_prompt_uses_empty_placeholders_and_no_network_instruction() -> None:
    pr = type("PR", (), {"html_url": "https://gitea.example/pr/1"})()
    command = ParsedCommand(name="review", raw="@codex review")
    diff_context = {"truncated": False, "changed_files": [], "diff": ""}
    repo_cfg = RepoReviewConfig()
    prompt = _build_prompt(
        pr,
        command,
        diff_context,
        repo_cfg,
        changed_file_contents="",
        test_output=None,
    )
    assert "You do not have internet/network access. Do not try to fetch URLs." in prompt
    assert "Never claim that PR content is inaccessible or missing if these sections are present." in prompt
    assert "Changed files:\n(none)" in prompt
    assert "Unified diff:\n(empty)" in prompt
 def test_prepare_review_prompt_applies_repo_default_mode_when_command_mode_not_explicit(monkeypatch, tmp_path) -> None:
    repo_dir = tmp_path / "repo"
    repo_dir.mkdir(parents=True, exist_ok=True)