fix. default full review without test execution

2026-05-24 14:33:19 +02:00
parent f4fd190148
commit 2482c9911f
4 changed files with 116 additions and 26 deletions
--- a/src/gitea_codex_bot/services/repo_config.py
+++ b/src/gitea_codex_bot/services/repo_config.py
@@ -10,9 +10,9 @@ import yaml
 class RepoReviewConfig:
    configured: bool = True
    enabled: bool = True
-    default_mode: str = "summary"
+    default_mode: str = "full"
    max_diff_bytes: int = 200000
-    include_tests: bool = True
+    include_tests: bool = False
    focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"])
    ignore: list[str] = field(default_factory=list)
@@ -27,13 +27,13 @@ def load_repo_review_config(repo_root: Path) -> RepoReviewConfig:
 def parse_repo_review_config_text(text: str, *, configured: bool) -> RepoReviewConfig:
    raw = yaml.safe_load(text) or {}
    review = raw.get("review", {}) or {}
-    default_mode = str(review.get("default_mode", "summary")).strip().lower() or "summary"
+    default_mode = str(review.get("default_mode", "full")).strip().lower() or "full"
    return RepoReviewConfig(
        configured=configured,
        enabled=bool(raw.get("enabled", True)),
        default_mode=default_mode,
        max_diff_bytes=int(review.get("max_diff_bytes", 200000)),
-        include_tests=bool(review.get("include_tests", True)),
+        include_tests=bool(review.get("include_tests", False)),
        focus=list(review.get("focus", ["correctness", "security", "maintainability"])),
        ignore=list(raw.get("ignore", [])),
    )
--- a/src/gitea_codex_bot/workers/container_runner.py
+++ b/src/gitea_codex_bot/workers/container_runner.py
@@ -20,6 +20,7 @@ from gitea_codex_bot.types import ParsedCommand
 CONTAINER_CODEX_HOME = "/root/.codex"
 REVIEW_OUTPUT_FILE = "/tmp/codex-review-result.json"
 REVIEW_SCHEMA_FILE = "/tmp/codex-review-schema.json"
 REVIEW_EMITTED_FILE = "/tmp/codex-review-emitted.flag"
 RESULT_START_MARKER = "__CODEX_REVIEW_RESULT_BEGIN__"
 RESULT_END_MARKER = "__CODEX_REVIEW_RESULT_END__"
 logger = logging.getLogger(__name__)
@@ -38,7 +39,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = {
            "items": {
                "type": "object",
                "additionalProperties": False,
-                "required": ["severity", "file", "line_start", "line_end", "title", "body"],
+                "required": ["severity", "file", "line_start", "line_end", "title", "body", "suggestion"],
                "properties": {
                    "severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]},
                    "file": {"type": "string"},
@@ -46,7 +47,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = {
                    "line_end": {"type": "integer"},
                    "title": {"type": "string"},
                    "body": {"type": "string"},
-                    "suggestion": {"type": "string"},
+                    "suggestion": {"type": ["string", "null"]},
                },
            },
        },
@@ -142,7 +143,39 @@ def _build_install_and_run_command(
    result_start_marker: str,
    result_end_marker: str,
 ) -> str:
-    steps = ["set -euo pipefail"]
+    runner_fallback_json = json.dumps(
        {
            "verdict": "has_issues",
            "confidence": 0.67,
            "summary": "Ephemeral codex execution failed before producing a review result.",
            "markdown_comment": "Ephemeral codex execution failed before producing a review result.",
            "findings": [
                {
                    "severity": "high",
                    "file": "runner",
                    "line_start": 1,
                    "line_end": 1,
                    "title": "Ephemeral review runner failed",
                    "body": "codex exec failed before emitting a valid structured artifact.",
                    "suggestion": "Check ephemeral runner logs for auth/model/network issues and rerun @codex review.",
                }
            ],
        },
        separators=(",", ":"),
    )
    steps = [
        "set -euo pipefail",
        f"rm -f {shlex.quote(REVIEW_EMITTED_FILE)}",
        "emit_review_artifact() { "
        "rc=\"$1\"; "
        f"if [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then "
        f"cat > {shlex.quote(REVIEW_OUTPUT_FILE)} <<'JSON'\n{runner_fallback_json}\nJSON\n"
        "fi; "
        f'if [ ! -f {shlex.quote(REVIEW_EMITTED_FILE)} ]; then echo "{result_start_marker}"; cat {shlex.quote(REVIEW_OUTPUT_FILE)}; echo "{result_end_marker}"; touch {shlex.quote(REVIEW_EMITTED_FILE)}; fi; '
        "return \"$rc\"; "
        "}",
        "trap 'rc=$?; set +e; emit_review_artifact \"$rc\"; exit \"$rc\"' EXIT",
    ]
    if settings.codex_auth_mode != "chatgpt":
        steps.extend(
            [
@@ -198,6 +231,8 @@ def _build_install_and_run_command(
    model = settings.openai_review_model.strip()
    codex_exec_parts = [
        "codex exec",
        "--sandbox",
        "danger-full-access",
        "--json",
        "--output-schema",
        shlex.quote(REVIEW_SCHEMA_FILE),
@@ -209,10 +244,12 @@ def _build_install_and_run_command(
    codex_exec_parts.append(shlex.quote(review_prompt))
    steps.extend(
        [
-            " ".join(codex_exec_parts),
+            "set +e",
-            f'echo "{result_start_marker}"',
+            "codex_rc=0",
-            f"cat {shlex.quote(REVIEW_OUTPUT_FILE)}",
+            " ".join(codex_exec_parts) + ' || codex_rc="$?"',
-            f'echo "{result_end_marker}"',
+            "set -e",
            f'if [ "$codex_rc" -ne 0 ] || [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then cat > {REVIEW_OUTPUT_FILE} <<\'JSON\'\n{runner_fallback_json}\nJSON\nfi',
            "emit_review_artifact 0",
        ]
    )
    return "\n".join(steps)
@@ -235,6 +272,12 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig
    focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability"
    ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)"
    mode = command.mode if command.name in {"review", "rerun"} else "summary"
    allow_test_execution = command.mode == "tests" or repo_cfg.include_tests
    tests_policy = (
        "Tests may be executed for this run because tests mode/include_tests is explicitly enabled."
        if allow_test_execution
        else "Do not run tests, benchmarks, or other executables. Review changes statically unless explicitly asked."
    )
    return "\n".join(
        [
            f"review: {intent}",
@@ -245,6 +288,7 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig
            f"Focus areas: {focus}.",
            f"Ignore patterns: {ignore}.",
            f"Include tests setting: {repo_cfg.include_tests}.",
            tests_policy,
            f"Full review requested: {command.full}.",
            "Return strict JSON matching the provided output schema.",
        ]
@@ -261,6 +305,8 @@ def _build_docker_command(settings: Settings, *, container_name: str, install_an
        container_name,
        "-e",
        "CODEX_DISABLE_TELEMETRY=1",
        "-e",
        "CODEX_SANDBOX_MODE=danger-full-access",
    ]
    if settings.codex_auth_mode == "chatgpt":
        cmd.extend(
@@ -371,21 +417,17 @@ def _parse_review_result_from_stdout_artifact(
    result_start_marker: str,
    result_end_marker: str,
 ) -> dict[str, Any]:
-    lines = stdout.splitlines()
+    start_pos = stdout.find(result_start_marker)
-    start_idx = -1
+    if start_pos == -1:
    end_idx = -1
    for idx, line in enumerate(lines):
        if line.strip() == result_start_marker:
            start_idx = idx
            break
    if start_idx != -1:
        for idx in range(start_idx + 1, len(lines)):
            if lines[idx].strip() == result_end_marker:
                end_idx = idx
                break
    if start_idx == -1 or end_idx == -1 or end_idx <= start_idx:
        raise RuntimeError("Runner output did not include final review artifact markers.")
-    artifact = "\n".join(lines[start_idx + 1 : end_idx]).strip()
+    artifact_start = start_pos + len(result_start_marker)
    # Prefer the last end marker so marker-like text inside JSON does not
    # truncate the payload when earlier incidental matches exist.
    end_pos = stdout.rfind(result_end_marker)
    if end_pos == -1 or end_pos <= artifact_start:
        raise RuntimeError("Runner output did not include final review artifact markers.")
    artifact = stdout[artifact_start:end_pos].strip()
    if not artifact:
        raise RuntimeError("Runner output contained empty final review artifact.")
    try:
--- a/tests/test_container_runner.py
+++ b/tests/test_container_runner.py
@@ -116,7 +116,10 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
    assert f"git checkout --detach {pr.head_sha}" in command
    assert "resolved_head=\"$(git rev-parse HEAD)\"" in command
    assert "unset GITEA_TOKEN auth_b64" in command
-    assert "codex exec --json --output-schema /tmp/codex-review-schema.json -o /tmp/codex-review-result.json" in command
+    assert (
        "codex exec --sandbox danger-full-access --json --output-schema /tmp/codex-review-schema.json "
        "-o /tmp/codex-review-result.json"
    ) in command
    assert "review: security --full" in command
    assert "--output-schema /tmp/codex-review-schema.json" in command
    assert "-o /tmp/codex-review-result.json" in command
@@ -369,6 +372,27 @@ def test_build_exec_review_prompt_falls_back_when_no_extra_text() -> None:
    assert prompt.startswith("review: review this pull request and report introduced issues.")
 def test_build_exec_review_prompt_disables_test_execution_by_default() -> None:
    prompt = _build_exec_review_prompt(ParsedCommand(name="review", raw="@codex review"), RepoReviewConfig(), _sample_pr())
    assert "Do not run tests, benchmarks, or other executables." in prompt
 def test_build_exec_review_prompt_allows_test_execution_for_tests_mode() -> None:
    prompt = _build_exec_review_prompt(
        ParsedCommand(name="review", raw="@codex review tests", mode="tests", mode_explicit=True),
        RepoReviewConfig(),
        _sample_pr(),
    )
    assert "Tests may be executed for this run" in prompt
 def test_apply_repo_default_review_mode_uses_full_when_not_configured() -> None:
    command = ParsedCommand(name="review", raw="@codex review")
    cfg = RepoReviewConfig()
    _apply_repo_default_review_mode(command, cfg)
    assert command.mode == "full"
 def test_apply_repo_default_review_mode_for_review_command() -> None:
    command = ParsedCommand(name="review", raw="@codex review")
    cfg = RepoReviewConfig(default_mode="tests")
@@ -390,6 +414,22 @@ def test_parse_review_result_from_stdout_artifact_uses_end_marker_after_start()
    assert parsed["verdict"] == "correct"
 def test_parse_review_result_from_stdout_artifact_handles_inline_end_marker() -> None:
    stdout = (
        "noise\n"
        f"{RESULT_START_MARKER}_a\n"
        '{"verdict":"correct","confidence":0.9,"summary":"ok","findings":[],"markdown_comment":"ok"}'
        f"{RESULT_END_MARKER}_a\n"
    )
    parsed = _parse_review_result_from_stdout_artifact(
        stdout,
        result_start_marker=f"{RESULT_START_MARKER}_a",
        result_end_marker=f"{RESULT_END_MARKER}_a",
    )
    assert parsed["verdict"] == "correct"
    assert parsed["summary"] == "ok"
 def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None:
    settings = get_settings()
    stdout = "\n".join(
--- a/tests/test_repo_config.py
+++ b/tests/test_repo_config.py
@@ -0,0 +1,8 @@
 from gitea_codex_bot.services.repo_config import parse_repo_review_config_text
 def test_parse_repo_review_config_defaults_to_full_and_no_tests() -> None:
    cfg = parse_repo_review_config_text("enabled: true\n", configured=True)
    assert cfg.default_mode == "full"
    assert cfg.include_tests is False