diff --git a/src/gitea_codex_bot/services/repo_config.py b/src/gitea_codex_bot/services/repo_config.py index 6ea0ed6..a6980fb 100644 --- a/src/gitea_codex_bot/services/repo_config.py +++ b/src/gitea_codex_bot/services/repo_config.py @@ -10,9 +10,9 @@ import yaml class RepoReviewConfig: configured: bool = True enabled: bool = True - default_mode: str = "summary" + default_mode: str = "full" max_diff_bytes: int = 200000 - include_tests: bool = True + include_tests: bool = False focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"]) ignore: list[str] = field(default_factory=list) @@ -27,13 +27,13 @@ def load_repo_review_config(repo_root: Path) -> RepoReviewConfig: def parse_repo_review_config_text(text: str, *, configured: bool) -> RepoReviewConfig: raw = yaml.safe_load(text) or {} review = raw.get("review", {}) or {} - default_mode = str(review.get("default_mode", "summary")).strip().lower() or "summary" + default_mode = str(review.get("default_mode", "full")).strip().lower() or "full" return RepoReviewConfig( configured=configured, enabled=bool(raw.get("enabled", True)), default_mode=default_mode, max_diff_bytes=int(review.get("max_diff_bytes", 200000)), - include_tests=bool(review.get("include_tests", True)), + include_tests=bool(review.get("include_tests", False)), focus=list(review.get("focus", ["correctness", "security", "maintainability"])), ignore=list(raw.get("ignore", [])), ) diff --git a/src/gitea_codex_bot/workers/container_runner.py b/src/gitea_codex_bot/workers/container_runner.py index 3fce7b2..210c56e 100644 --- a/src/gitea_codex_bot/workers/container_runner.py +++ b/src/gitea_codex_bot/workers/container_runner.py @@ -20,6 +20,7 @@ from gitea_codex_bot.types import ParsedCommand CONTAINER_CODEX_HOME = "/root/.codex" REVIEW_OUTPUT_FILE = "/tmp/codex-review-result.json" REVIEW_SCHEMA_FILE = "/tmp/codex-review-schema.json" +REVIEW_EMITTED_FILE = "/tmp/codex-review-emitted.flag" RESULT_START_MARKER = "__CODEX_REVIEW_RESULT_BEGIN__" RESULT_END_MARKER = "__CODEX_REVIEW_RESULT_END__" logger = logging.getLogger(__name__) @@ -38,7 +39,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = { "items": { "type": "object", "additionalProperties": False, - "required": ["severity", "file", "line_start", "line_end", "title", "body"], + "required": ["severity", "file", "line_start", "line_end", "title", "body", "suggestion"], "properties": { "severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]}, "file": {"type": "string"}, @@ -46,7 +47,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = { "line_end": {"type": "integer"}, "title": {"type": "string"}, "body": {"type": "string"}, - "suggestion": {"type": "string"}, + "suggestion": {"type": ["string", "null"]}, }, }, }, @@ -142,7 +143,39 @@ def _build_install_and_run_command( result_start_marker: str, result_end_marker: str, ) -> str: - steps = ["set -euo pipefail"] + runner_fallback_json = json.dumps( + { + "verdict": "has_issues", + "confidence": 0.67, + "summary": "Ephemeral codex execution failed before producing a review result.", + "markdown_comment": "Ephemeral codex execution failed before producing a review result.", + "findings": [ + { + "severity": "high", + "file": "runner", + "line_start": 1, + "line_end": 1, + "title": "Ephemeral review runner failed", + "body": "codex exec failed before emitting a valid structured artifact.", + "suggestion": "Check ephemeral runner logs for auth/model/network issues and rerun @codex review.", + } + ], + }, + separators=(",", ":"), + ) + steps = [ + "set -euo pipefail", + f"rm -f {shlex.quote(REVIEW_EMITTED_FILE)}", + "emit_review_artifact() { " + "rc=\"$1\"; " + f"if [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then " + f"cat > {shlex.quote(REVIEW_OUTPUT_FILE)} <<'JSON'\n{runner_fallback_json}\nJSON\n" + "fi; " + f'if [ ! -f {shlex.quote(REVIEW_EMITTED_FILE)} ]; then echo "{result_start_marker}"; cat {shlex.quote(REVIEW_OUTPUT_FILE)}; echo "{result_end_marker}"; touch {shlex.quote(REVIEW_EMITTED_FILE)}; fi; ' + "return \"$rc\"; " + "}", + "trap 'rc=$?; set +e; emit_review_artifact \"$rc\"; exit \"$rc\"' EXIT", + ] if settings.codex_auth_mode != "chatgpt": steps.extend( [ @@ -198,6 +231,8 @@ def _build_install_and_run_command( model = settings.openai_review_model.strip() codex_exec_parts = [ "codex exec", + "--sandbox", + "danger-full-access", "--json", "--output-schema", shlex.quote(REVIEW_SCHEMA_FILE), @@ -209,10 +244,12 @@ def _build_install_and_run_command( codex_exec_parts.append(shlex.quote(review_prompt)) steps.extend( [ - " ".join(codex_exec_parts), - f'echo "{result_start_marker}"', - f"cat {shlex.quote(REVIEW_OUTPUT_FILE)}", - f'echo "{result_end_marker}"', + "set +e", + "codex_rc=0", + " ".join(codex_exec_parts) + ' || codex_rc="$?"', + "set -e", + f'if [ "$codex_rc" -ne 0 ] || [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then cat > {REVIEW_OUTPUT_FILE} <<\'JSON\'\n{runner_fallback_json}\nJSON\nfi', + "emit_review_artifact 0", ] ) return "\n".join(steps) @@ -235,6 +272,12 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability" ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)" mode = command.mode if command.name in {"review", "rerun"} else "summary" + allow_test_execution = command.mode == "tests" or repo_cfg.include_tests + tests_policy = ( + "Tests may be executed for this run because tests mode/include_tests is explicitly enabled." + if allow_test_execution + else "Do not run tests, benchmarks, or other executables. Review changes statically unless explicitly asked." + ) return "\n".join( [ f"review: {intent}", @@ -245,6 +288,7 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig f"Focus areas: {focus}.", f"Ignore patterns: {ignore}.", f"Include tests setting: {repo_cfg.include_tests}.", + tests_policy, f"Full review requested: {command.full}.", "Return strict JSON matching the provided output schema.", ] @@ -261,6 +305,8 @@ def _build_docker_command(settings: Settings, *, container_name: str, install_an container_name, "-e", "CODEX_DISABLE_TELEMETRY=1", + "-e", + "CODEX_SANDBOX_MODE=danger-full-access", ] if settings.codex_auth_mode == "chatgpt": cmd.extend( @@ -371,21 +417,17 @@ def _parse_review_result_from_stdout_artifact( result_start_marker: str, result_end_marker: str, ) -> dict[str, Any]: - lines = stdout.splitlines() - start_idx = -1 - end_idx = -1 - for idx, line in enumerate(lines): - if line.strip() == result_start_marker: - start_idx = idx - break - if start_idx != -1: - for idx in range(start_idx + 1, len(lines)): - if lines[idx].strip() == result_end_marker: - end_idx = idx - break - if start_idx == -1 or end_idx == -1 or end_idx <= start_idx: + start_pos = stdout.find(result_start_marker) + if start_pos == -1: raise RuntimeError("Runner output did not include final review artifact markers.") - artifact = "\n".join(lines[start_idx + 1 : end_idx]).strip() + artifact_start = start_pos + len(result_start_marker) + + # Prefer the last end marker so marker-like text inside JSON does not + # truncate the payload when earlier incidental matches exist. + end_pos = stdout.rfind(result_end_marker) + if end_pos == -1 or end_pos <= artifact_start: + raise RuntimeError("Runner output did not include final review artifact markers.") + artifact = stdout[artifact_start:end_pos].strip() if not artifact: raise RuntimeError("Runner output contained empty final review artifact.") try: diff --git a/tests/test_container_runner.py b/tests/test_container_runner.py index 8d76bdb..d2b618a 100644 --- a/tests/test_container_runner.py +++ b/tests/test_container_runner.py @@ -116,7 +116,10 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp assert f"git checkout --detach {pr.head_sha}" in command assert "resolved_head=\"$(git rev-parse HEAD)\"" in command assert "unset GITEA_TOKEN auth_b64" in command - assert "codex exec --json --output-schema /tmp/codex-review-schema.json -o /tmp/codex-review-result.json" in command + assert ( + "codex exec --sandbox danger-full-access --json --output-schema /tmp/codex-review-schema.json " + "-o /tmp/codex-review-result.json" + ) in command assert "review: security --full" in command assert "--output-schema /tmp/codex-review-schema.json" in command assert "-o /tmp/codex-review-result.json" in command @@ -369,6 +372,27 @@ def test_build_exec_review_prompt_falls_back_when_no_extra_text() -> None: assert prompt.startswith("review: review this pull request and report introduced issues.") +def test_build_exec_review_prompt_disables_test_execution_by_default() -> None: + prompt = _build_exec_review_prompt(ParsedCommand(name="review", raw="@codex review"), RepoReviewConfig(), _sample_pr()) + assert "Do not run tests, benchmarks, or other executables." in prompt + + +def test_build_exec_review_prompt_allows_test_execution_for_tests_mode() -> None: + prompt = _build_exec_review_prompt( + ParsedCommand(name="review", raw="@codex review tests", mode="tests", mode_explicit=True), + RepoReviewConfig(), + _sample_pr(), + ) + assert "Tests may be executed for this run" in prompt + + +def test_apply_repo_default_review_mode_uses_full_when_not_configured() -> None: + command = ParsedCommand(name="review", raw="@codex review") + cfg = RepoReviewConfig() + _apply_repo_default_review_mode(command, cfg) + assert command.mode == "full" + + def test_apply_repo_default_review_mode_for_review_command() -> None: command = ParsedCommand(name="review", raw="@codex review") cfg = RepoReviewConfig(default_mode="tests") @@ -390,6 +414,22 @@ def test_parse_review_result_from_stdout_artifact_uses_end_marker_after_start() assert parsed["verdict"] == "correct" +def test_parse_review_result_from_stdout_artifact_handles_inline_end_marker() -> None: + stdout = ( + "noise\n" + f"{RESULT_START_MARKER}_a\n" + '{"verdict":"correct","confidence":0.9,"summary":"ok","findings":[],"markdown_comment":"ok"}' + f"{RESULT_END_MARKER}_a\n" + ) + parsed = _parse_review_result_from_stdout_artifact( + stdout, + result_start_marker=f"{RESULT_START_MARKER}_a", + result_end_marker=f"{RESULT_END_MARKER}_a", + ) + assert parsed["verdict"] == "correct" + assert parsed["summary"] == "ok" + + def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None: settings = get_settings() stdout = "\n".join( diff --git a/tests/test_repo_config.py b/tests/test_repo_config.py new file mode 100644 index 0000000..58a4fd2 --- /dev/null +++ b/tests/test_repo_config.py @@ -0,0 +1,8 @@ +from gitea_codex_bot.services.repo_config import parse_repo_review_config_text + + +def test_parse_repo_review_config_defaults_to_full_and_no_tests() -> None: + cfg = parse_repo_review_config_text("enabled: true\n", configured=True) + assert cfg.default_mode == "full" + assert cfg.include_tests is False +