fix. default full review without test execution

2026-05-24 14:33:19 +02:00
parent f4fd190148
commit 2482c9911f
4 changed files with 116 additions and 26 deletions
--- a/src/gitea_codex_bot/workers/container_runner.py
+++ b/src/gitea_codex_bot/workers/container_runner.py
@@ -20,6 +20,7 @@ from gitea_codex_bot.types import ParsedCommand
 CONTAINER_CODEX_HOME = "/root/.codex"
 REVIEW_OUTPUT_FILE = "/tmp/codex-review-result.json"
 REVIEW_SCHEMA_FILE = "/tmp/codex-review-schema.json"
+REVIEW_EMITTED_FILE = "/tmp/codex-review-emitted.flag"
 RESULT_START_MARKER = "__CODEX_REVIEW_RESULT_BEGIN__"
 RESULT_END_MARKER = "__CODEX_REVIEW_RESULT_END__"
 logger = logging.getLogger(__name__)
@@ -38,7 +39,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = {
            "items": {
                "type": "object",
                "additionalProperties": False,
-                "required": ["severity", "file", "line_start", "line_end", "title", "body"],
+                "required": ["severity", "file", "line_start", "line_end", "title", "body", "suggestion"],
                "properties": {
                    "severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]},
                    "file": {"type": "string"},
@@ -46,7 +47,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = {
                    "line_end": {"type": "integer"},
                    "title": {"type": "string"},
                    "body": {"type": "string"},
-                    "suggestion": {"type": "string"},
+                    "suggestion": {"type": ["string", "null"]},
                },
            },
        },
@@ -142,7 +143,39 @@ def _build_install_and_run_command(
    result_start_marker: str,
    result_end_marker: str,
 ) -> str:
-    steps = ["set -euo pipefail"]
+    runner_fallback_json = json.dumps(
+        {
+            "verdict": "has_issues",
+            "confidence": 0.67,
+            "summary": "Ephemeral codex execution failed before producing a review result.",
+            "markdown_comment": "Ephemeral codex execution failed before producing a review result.",
+            "findings": [
+                {
+                    "severity": "high",
+                    "file": "runner",
+                    "line_start": 1,
+                    "line_end": 1,
+                    "title": "Ephemeral review runner failed",
+                    "body": "codex exec failed before emitting a valid structured artifact.",
+                    "suggestion": "Check ephemeral runner logs for auth/model/network issues and rerun @codex review.",
+                }
+            ],
+        },
+        separators=(",", ":"),
+    )
+    steps = [
+        "set -euo pipefail",
+        f"rm -f {shlex.quote(REVIEW_EMITTED_FILE)}",
+        "emit_review_artifact() { "
+        "rc=\"$1\"; "
+        f"if [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then "
+        f"cat > {shlex.quote(REVIEW_OUTPUT_FILE)} <<'JSON'\n{runner_fallback_json}\nJSON\n"
+        "fi; "
+        f'if [ ! -f {shlex.quote(REVIEW_EMITTED_FILE)} ]; then echo "{result_start_marker}"; cat {shlex.quote(REVIEW_OUTPUT_FILE)}; echo "{result_end_marker}"; touch {shlex.quote(REVIEW_EMITTED_FILE)}; fi; '
+        "return \"$rc\"; "
+        "}",
+        "trap 'rc=$?; set +e; emit_review_artifact \"$rc\"; exit \"$rc\"' EXIT",
+    ]
    if settings.codex_auth_mode != "chatgpt":
        steps.extend(
            [
@@ -198,6 +231,8 @@ def _build_install_and_run_command(
    model = settings.openai_review_model.strip()
    codex_exec_parts = [
        "codex exec",
+        "--sandbox",
+        "danger-full-access",
        "--json",
        "--output-schema",
        shlex.quote(REVIEW_SCHEMA_FILE),
@@ -209,10 +244,12 @@ def _build_install_and_run_command(
    codex_exec_parts.append(shlex.quote(review_prompt))
    steps.extend(
        [
-            " ".join(codex_exec_parts),
-            f'echo "{result_start_marker}"',
-            f"cat {shlex.quote(REVIEW_OUTPUT_FILE)}",
-            f'echo "{result_end_marker}"',
+            "set +e",
+            "codex_rc=0",
+            " ".join(codex_exec_parts) + ' || codex_rc="$?"',
+            "set -e",
+            f'if [ "$codex_rc" -ne 0 ] || [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then cat > {REVIEW_OUTPUT_FILE} <<\'JSON\'\n{runner_fallback_json}\nJSON\nfi',
+            "emit_review_artifact 0",
        ]
    )
    return "\n".join(steps)
@@ -235,6 +272,12 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig
    focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability"
    ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)"
    mode = command.mode if command.name in {"review", "rerun"} else "summary"
+    allow_test_execution = command.mode == "tests" or repo_cfg.include_tests
+    tests_policy = (
+        "Tests may be executed for this run because tests mode/include_tests is explicitly enabled."
+        if allow_test_execution
+        else "Do not run tests, benchmarks, or other executables. Review changes statically unless explicitly asked."
+    )
    return "\n".join(
        [
            f"review: {intent}",
@@ -245,6 +288,7 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig
            f"Focus areas: {focus}.",
            f"Ignore patterns: {ignore}.",
            f"Include tests setting: {repo_cfg.include_tests}.",
+            tests_policy,
            f"Full review requested: {command.full}.",
            "Return strict JSON matching the provided output schema.",
        ]
@@ -261,6 +305,8 @@ def _build_docker_command(settings: Settings, *, container_name: str, install_an
        container_name,
        "-e",
        "CODEX_DISABLE_TELEMETRY=1",
+        "-e",
+        "CODEX_SANDBOX_MODE=danger-full-access",
    ]
    if settings.codex_auth_mode == "chatgpt":
        cmd.extend(
@@ -371,21 +417,17 @@ def _parse_review_result_from_stdout_artifact(
    result_start_marker: str,
    result_end_marker: str,
 ) -> dict[str, Any]:
-    lines = stdout.splitlines()
-    start_idx = -1
-    end_idx = -1
-    for idx, line in enumerate(lines):
-        if line.strip() == result_start_marker:
-            start_idx = idx
-            break
-    if start_idx != -1:
-        for idx in range(start_idx + 1, len(lines)):
-            if lines[idx].strip() == result_end_marker:
-                end_idx = idx
-                break
-    if start_idx == -1 or end_idx == -1 or end_idx <= start_idx:
+    start_pos = stdout.find(result_start_marker)
+    if start_pos == -1:
        raise RuntimeError("Runner output did not include final review artifact markers.")
-    artifact = "\n".join(lines[start_idx + 1 : end_idx]).strip()
+    artifact_start = start_pos + len(result_start_marker)
+
+    # Prefer the last end marker so marker-like text inside JSON does not
+    # truncate the payload when earlier incidental matches exist.
+    end_pos = stdout.rfind(result_end_marker)
+    if end_pos == -1 or end_pos <= artifact_start:
+        raise RuntimeError("Runner output did not include final review artifact markers.")
+    artifact = stdout[artifact_start:end_pos].strip()
    if not artifact:
        raise RuntimeError("Runner output contained empty final review artifact.")
    try: