fix. default full review without test execution
All checks were successful
ci / test (push) Successful in 39s
ci / publish (push) Successful in 1m7s

This commit is contained in:
Space-Banane
2026-05-24 14:33:19 +02:00
parent f4fd190148
commit 2482c9911f
4 changed files with 116 additions and 26 deletions

View File

@@ -10,9 +10,9 @@ import yaml
class RepoReviewConfig: class RepoReviewConfig:
configured: bool = True configured: bool = True
enabled: bool = True enabled: bool = True
default_mode: str = "summary" default_mode: str = "full"
max_diff_bytes: int = 200000 max_diff_bytes: int = 200000
include_tests: bool = True include_tests: bool = False
focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"]) focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"])
ignore: list[str] = field(default_factory=list) ignore: list[str] = field(default_factory=list)
@@ -27,13 +27,13 @@ def load_repo_review_config(repo_root: Path) -> RepoReviewConfig:
def parse_repo_review_config_text(text: str, *, configured: bool) -> RepoReviewConfig: def parse_repo_review_config_text(text: str, *, configured: bool) -> RepoReviewConfig:
raw = yaml.safe_load(text) or {} raw = yaml.safe_load(text) or {}
review = raw.get("review", {}) or {} review = raw.get("review", {}) or {}
default_mode = str(review.get("default_mode", "summary")).strip().lower() or "summary" default_mode = str(review.get("default_mode", "full")).strip().lower() or "full"
return RepoReviewConfig( return RepoReviewConfig(
configured=configured, configured=configured,
enabled=bool(raw.get("enabled", True)), enabled=bool(raw.get("enabled", True)),
default_mode=default_mode, default_mode=default_mode,
max_diff_bytes=int(review.get("max_diff_bytes", 200000)), max_diff_bytes=int(review.get("max_diff_bytes", 200000)),
include_tests=bool(review.get("include_tests", True)), include_tests=bool(review.get("include_tests", False)),
focus=list(review.get("focus", ["correctness", "security", "maintainability"])), focus=list(review.get("focus", ["correctness", "security", "maintainability"])),
ignore=list(raw.get("ignore", [])), ignore=list(raw.get("ignore", [])),
) )

View File

@@ -20,6 +20,7 @@ from gitea_codex_bot.types import ParsedCommand
CONTAINER_CODEX_HOME = "/root/.codex" CONTAINER_CODEX_HOME = "/root/.codex"
REVIEW_OUTPUT_FILE = "/tmp/codex-review-result.json" REVIEW_OUTPUT_FILE = "/tmp/codex-review-result.json"
REVIEW_SCHEMA_FILE = "/tmp/codex-review-schema.json" REVIEW_SCHEMA_FILE = "/tmp/codex-review-schema.json"
REVIEW_EMITTED_FILE = "/tmp/codex-review-emitted.flag"
RESULT_START_MARKER = "__CODEX_REVIEW_RESULT_BEGIN__" RESULT_START_MARKER = "__CODEX_REVIEW_RESULT_BEGIN__"
RESULT_END_MARKER = "__CODEX_REVIEW_RESULT_END__" RESULT_END_MARKER = "__CODEX_REVIEW_RESULT_END__"
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -38,7 +39,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = {
"items": { "items": {
"type": "object", "type": "object",
"additionalProperties": False, "additionalProperties": False,
"required": ["severity", "file", "line_start", "line_end", "title", "body"], "required": ["severity", "file", "line_start", "line_end", "title", "body", "suggestion"],
"properties": { "properties": {
"severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]}, "severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]},
"file": {"type": "string"}, "file": {"type": "string"},
@@ -46,7 +47,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = {
"line_end": {"type": "integer"}, "line_end": {"type": "integer"},
"title": {"type": "string"}, "title": {"type": "string"},
"body": {"type": "string"}, "body": {"type": "string"},
"suggestion": {"type": "string"}, "suggestion": {"type": ["string", "null"]},
}, },
}, },
}, },
@@ -142,7 +143,39 @@ def _build_install_and_run_command(
result_start_marker: str, result_start_marker: str,
result_end_marker: str, result_end_marker: str,
) -> str: ) -> str:
steps = ["set -euo pipefail"] runner_fallback_json = json.dumps(
{
"verdict": "has_issues",
"confidence": 0.67,
"summary": "Ephemeral codex execution failed before producing a review result.",
"markdown_comment": "Ephemeral codex execution failed before producing a review result.",
"findings": [
{
"severity": "high",
"file": "runner",
"line_start": 1,
"line_end": 1,
"title": "Ephemeral review runner failed",
"body": "codex exec failed before emitting a valid structured artifact.",
"suggestion": "Check ephemeral runner logs for auth/model/network issues and rerun @codex review.",
}
],
},
separators=(",", ":"),
)
steps = [
"set -euo pipefail",
f"rm -f {shlex.quote(REVIEW_EMITTED_FILE)}",
"emit_review_artifact() { "
"rc=\"$1\"; "
f"if [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then "
f"cat > {shlex.quote(REVIEW_OUTPUT_FILE)} <<'JSON'\n{runner_fallback_json}\nJSON\n"
"fi; "
f'if [ ! -f {shlex.quote(REVIEW_EMITTED_FILE)} ]; then echo "{result_start_marker}"; cat {shlex.quote(REVIEW_OUTPUT_FILE)}; echo "{result_end_marker}"; touch {shlex.quote(REVIEW_EMITTED_FILE)}; fi; '
"return \"$rc\"; "
"}",
"trap 'rc=$?; set +e; emit_review_artifact \"$rc\"; exit \"$rc\"' EXIT",
]
if settings.codex_auth_mode != "chatgpt": if settings.codex_auth_mode != "chatgpt":
steps.extend( steps.extend(
[ [
@@ -198,6 +231,8 @@ def _build_install_and_run_command(
model = settings.openai_review_model.strip() model = settings.openai_review_model.strip()
codex_exec_parts = [ codex_exec_parts = [
"codex exec", "codex exec",
"--sandbox",
"danger-full-access",
"--json", "--json",
"--output-schema", "--output-schema",
shlex.quote(REVIEW_SCHEMA_FILE), shlex.quote(REVIEW_SCHEMA_FILE),
@@ -209,10 +244,12 @@ def _build_install_and_run_command(
codex_exec_parts.append(shlex.quote(review_prompt)) codex_exec_parts.append(shlex.quote(review_prompt))
steps.extend( steps.extend(
[ [
" ".join(codex_exec_parts), "set +e",
f'echo "{result_start_marker}"', "codex_rc=0",
f"cat {shlex.quote(REVIEW_OUTPUT_FILE)}", " ".join(codex_exec_parts) + ' || codex_rc="$?"',
f'echo "{result_end_marker}"', "set -e",
f'if [ "$codex_rc" -ne 0 ] || [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then cat > {REVIEW_OUTPUT_FILE} <<\'JSON\'\n{runner_fallback_json}\nJSON\nfi',
"emit_review_artifact 0",
] ]
) )
return "\n".join(steps) return "\n".join(steps)
@@ -235,6 +272,12 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig
focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability" focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability"
ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)" ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)"
mode = command.mode if command.name in {"review", "rerun"} else "summary" mode = command.mode if command.name in {"review", "rerun"} else "summary"
allow_test_execution = command.mode == "tests" or repo_cfg.include_tests
tests_policy = (
"Tests may be executed for this run because tests mode/include_tests is explicitly enabled."
if allow_test_execution
else "Do not run tests, benchmarks, or other executables. Review changes statically unless explicitly asked."
)
return "\n".join( return "\n".join(
[ [
f"review: {intent}", f"review: {intent}",
@@ -245,6 +288,7 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig
f"Focus areas: {focus}.", f"Focus areas: {focus}.",
f"Ignore patterns: {ignore}.", f"Ignore patterns: {ignore}.",
f"Include tests setting: {repo_cfg.include_tests}.", f"Include tests setting: {repo_cfg.include_tests}.",
tests_policy,
f"Full review requested: {command.full}.", f"Full review requested: {command.full}.",
"Return strict JSON matching the provided output schema.", "Return strict JSON matching the provided output schema.",
] ]
@@ -261,6 +305,8 @@ def _build_docker_command(settings: Settings, *, container_name: str, install_an
container_name, container_name,
"-e", "-e",
"CODEX_DISABLE_TELEMETRY=1", "CODEX_DISABLE_TELEMETRY=1",
"-e",
"CODEX_SANDBOX_MODE=danger-full-access",
] ]
if settings.codex_auth_mode == "chatgpt": if settings.codex_auth_mode == "chatgpt":
cmd.extend( cmd.extend(
@@ -371,21 +417,17 @@ def _parse_review_result_from_stdout_artifact(
result_start_marker: str, result_start_marker: str,
result_end_marker: str, result_end_marker: str,
) -> dict[str, Any]: ) -> dict[str, Any]:
lines = stdout.splitlines() start_pos = stdout.find(result_start_marker)
start_idx = -1 if start_pos == -1:
end_idx = -1
for idx, line in enumerate(lines):
if line.strip() == result_start_marker:
start_idx = idx
break
if start_idx != -1:
for idx in range(start_idx + 1, len(lines)):
if lines[idx].strip() == result_end_marker:
end_idx = idx
break
if start_idx == -1 or end_idx == -1 or end_idx <= start_idx:
raise RuntimeError("Runner output did not include final review artifact markers.") raise RuntimeError("Runner output did not include final review artifact markers.")
artifact = "\n".join(lines[start_idx + 1 : end_idx]).strip() artifact_start = start_pos + len(result_start_marker)
# Prefer the last end marker so marker-like text inside JSON does not
# truncate the payload when earlier incidental matches exist.
end_pos = stdout.rfind(result_end_marker)
if end_pos == -1 or end_pos <= artifact_start:
raise RuntimeError("Runner output did not include final review artifact markers.")
artifact = stdout[artifact_start:end_pos].strip()
if not artifact: if not artifact:
raise RuntimeError("Runner output contained empty final review artifact.") raise RuntimeError("Runner output contained empty final review artifact.")
try: try:

View File

@@ -116,7 +116,10 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
assert f"git checkout --detach {pr.head_sha}" in command assert f"git checkout --detach {pr.head_sha}" in command
assert "resolved_head=\"$(git rev-parse HEAD)\"" in command assert "resolved_head=\"$(git rev-parse HEAD)\"" in command
assert "unset GITEA_TOKEN auth_b64" in command assert "unset GITEA_TOKEN auth_b64" in command
assert "codex exec --json --output-schema /tmp/codex-review-schema.json -o /tmp/codex-review-result.json" in command assert (
"codex exec --sandbox danger-full-access --json --output-schema /tmp/codex-review-schema.json "
"-o /tmp/codex-review-result.json"
) in command
assert "review: security --full" in command assert "review: security --full" in command
assert "--output-schema /tmp/codex-review-schema.json" in command assert "--output-schema /tmp/codex-review-schema.json" in command
assert "-o /tmp/codex-review-result.json" in command assert "-o /tmp/codex-review-result.json" in command
@@ -369,6 +372,27 @@ def test_build_exec_review_prompt_falls_back_when_no_extra_text() -> None:
assert prompt.startswith("review: review this pull request and report introduced issues.") assert prompt.startswith("review: review this pull request and report introduced issues.")
def test_build_exec_review_prompt_disables_test_execution_by_default() -> None:
prompt = _build_exec_review_prompt(ParsedCommand(name="review", raw="@codex review"), RepoReviewConfig(), _sample_pr())
assert "Do not run tests, benchmarks, or other executables." in prompt
def test_build_exec_review_prompt_allows_test_execution_for_tests_mode() -> None:
prompt = _build_exec_review_prompt(
ParsedCommand(name="review", raw="@codex review tests", mode="tests", mode_explicit=True),
RepoReviewConfig(),
_sample_pr(),
)
assert "Tests may be executed for this run" in prompt
def test_apply_repo_default_review_mode_uses_full_when_not_configured() -> None:
command = ParsedCommand(name="review", raw="@codex review")
cfg = RepoReviewConfig()
_apply_repo_default_review_mode(command, cfg)
assert command.mode == "full"
def test_apply_repo_default_review_mode_for_review_command() -> None: def test_apply_repo_default_review_mode_for_review_command() -> None:
command = ParsedCommand(name="review", raw="@codex review") command = ParsedCommand(name="review", raw="@codex review")
cfg = RepoReviewConfig(default_mode="tests") cfg = RepoReviewConfig(default_mode="tests")
@@ -390,6 +414,22 @@ def test_parse_review_result_from_stdout_artifact_uses_end_marker_after_start()
assert parsed["verdict"] == "correct" assert parsed["verdict"] == "correct"
def test_parse_review_result_from_stdout_artifact_handles_inline_end_marker() -> None:
stdout = (
"noise\n"
f"{RESULT_START_MARKER}_a\n"
'{"verdict":"correct","confidence":0.9,"summary":"ok","findings":[],"markdown_comment":"ok"}'
f"{RESULT_END_MARKER}_a\n"
)
parsed = _parse_review_result_from_stdout_artifact(
stdout,
result_start_marker=f"{RESULT_START_MARKER}_a",
result_end_marker=f"{RESULT_END_MARKER}_a",
)
assert parsed["verdict"] == "correct"
assert parsed["summary"] == "ok"
def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None: def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None:
settings = get_settings() settings = get_settings()
stdout = "\n".join( stdout = "\n".join(

View File

@@ -0,0 +1,8 @@
from gitea_codex_bot.services.repo_config import parse_repo_review_config_text
def test_parse_repo_review_config_defaults_to_full_and_no_tests() -> None:
cfg = parse_repo_review_config_text("enabled: true\n", configured=True)
assert cfg.default_mode == "full"
assert cfg.include_tests is False