fix. default full review without test execution
This commit is contained in:
@@ -10,9 +10,9 @@ import yaml
|
|||||||
class RepoReviewConfig:
|
class RepoReviewConfig:
|
||||||
configured: bool = True
|
configured: bool = True
|
||||||
enabled: bool = True
|
enabled: bool = True
|
||||||
default_mode: str = "summary"
|
default_mode: str = "full"
|
||||||
max_diff_bytes: int = 200000
|
max_diff_bytes: int = 200000
|
||||||
include_tests: bool = True
|
include_tests: bool = False
|
||||||
focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"])
|
focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"])
|
||||||
ignore: list[str] = field(default_factory=list)
|
ignore: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
@@ -27,13 +27,13 @@ def load_repo_review_config(repo_root: Path) -> RepoReviewConfig:
|
|||||||
def parse_repo_review_config_text(text: str, *, configured: bool) -> RepoReviewConfig:
|
def parse_repo_review_config_text(text: str, *, configured: bool) -> RepoReviewConfig:
|
||||||
raw = yaml.safe_load(text) or {}
|
raw = yaml.safe_load(text) or {}
|
||||||
review = raw.get("review", {}) or {}
|
review = raw.get("review", {}) or {}
|
||||||
default_mode = str(review.get("default_mode", "summary")).strip().lower() or "summary"
|
default_mode = str(review.get("default_mode", "full")).strip().lower() or "full"
|
||||||
return RepoReviewConfig(
|
return RepoReviewConfig(
|
||||||
configured=configured,
|
configured=configured,
|
||||||
enabled=bool(raw.get("enabled", True)),
|
enabled=bool(raw.get("enabled", True)),
|
||||||
default_mode=default_mode,
|
default_mode=default_mode,
|
||||||
max_diff_bytes=int(review.get("max_diff_bytes", 200000)),
|
max_diff_bytes=int(review.get("max_diff_bytes", 200000)),
|
||||||
include_tests=bool(review.get("include_tests", True)),
|
include_tests=bool(review.get("include_tests", False)),
|
||||||
focus=list(review.get("focus", ["correctness", "security", "maintainability"])),
|
focus=list(review.get("focus", ["correctness", "security", "maintainability"])),
|
||||||
ignore=list(raw.get("ignore", [])),
|
ignore=list(raw.get("ignore", [])),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from gitea_codex_bot.types import ParsedCommand
|
|||||||
CONTAINER_CODEX_HOME = "/root/.codex"
|
CONTAINER_CODEX_HOME = "/root/.codex"
|
||||||
REVIEW_OUTPUT_FILE = "/tmp/codex-review-result.json"
|
REVIEW_OUTPUT_FILE = "/tmp/codex-review-result.json"
|
||||||
REVIEW_SCHEMA_FILE = "/tmp/codex-review-schema.json"
|
REVIEW_SCHEMA_FILE = "/tmp/codex-review-schema.json"
|
||||||
|
REVIEW_EMITTED_FILE = "/tmp/codex-review-emitted.flag"
|
||||||
RESULT_START_MARKER = "__CODEX_REVIEW_RESULT_BEGIN__"
|
RESULT_START_MARKER = "__CODEX_REVIEW_RESULT_BEGIN__"
|
||||||
RESULT_END_MARKER = "__CODEX_REVIEW_RESULT_END__"
|
RESULT_END_MARKER = "__CODEX_REVIEW_RESULT_END__"
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -38,7 +39,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = {
|
|||||||
"items": {
|
"items": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": False,
|
"additionalProperties": False,
|
||||||
"required": ["severity", "file", "line_start", "line_end", "title", "body"],
|
"required": ["severity", "file", "line_start", "line_end", "title", "body", "suggestion"],
|
||||||
"properties": {
|
"properties": {
|
||||||
"severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]},
|
"severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]},
|
||||||
"file": {"type": "string"},
|
"file": {"type": "string"},
|
||||||
@@ -46,7 +47,7 @@ REVIEW_RESULT_SCHEMA: dict[str, Any] = {
|
|||||||
"line_end": {"type": "integer"},
|
"line_end": {"type": "integer"},
|
||||||
"title": {"type": "string"},
|
"title": {"type": "string"},
|
||||||
"body": {"type": "string"},
|
"body": {"type": "string"},
|
||||||
"suggestion": {"type": "string"},
|
"suggestion": {"type": ["string", "null"]},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -142,7 +143,39 @@ def _build_install_and_run_command(
|
|||||||
result_start_marker: str,
|
result_start_marker: str,
|
||||||
result_end_marker: str,
|
result_end_marker: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
steps = ["set -euo pipefail"]
|
runner_fallback_json = json.dumps(
|
||||||
|
{
|
||||||
|
"verdict": "has_issues",
|
||||||
|
"confidence": 0.67,
|
||||||
|
"summary": "Ephemeral codex execution failed before producing a review result.",
|
||||||
|
"markdown_comment": "Ephemeral codex execution failed before producing a review result.",
|
||||||
|
"findings": [
|
||||||
|
{
|
||||||
|
"severity": "high",
|
||||||
|
"file": "runner",
|
||||||
|
"line_start": 1,
|
||||||
|
"line_end": 1,
|
||||||
|
"title": "Ephemeral review runner failed",
|
||||||
|
"body": "codex exec failed before emitting a valid structured artifact.",
|
||||||
|
"suggestion": "Check ephemeral runner logs for auth/model/network issues and rerun @codex review.",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
separators=(",", ":"),
|
||||||
|
)
|
||||||
|
steps = [
|
||||||
|
"set -euo pipefail",
|
||||||
|
f"rm -f {shlex.quote(REVIEW_EMITTED_FILE)}",
|
||||||
|
"emit_review_artifact() { "
|
||||||
|
"rc=\"$1\"; "
|
||||||
|
f"if [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then "
|
||||||
|
f"cat > {shlex.quote(REVIEW_OUTPUT_FILE)} <<'JSON'\n{runner_fallback_json}\nJSON\n"
|
||||||
|
"fi; "
|
||||||
|
f'if [ ! -f {shlex.quote(REVIEW_EMITTED_FILE)} ]; then echo "{result_start_marker}"; cat {shlex.quote(REVIEW_OUTPUT_FILE)}; echo "{result_end_marker}"; touch {shlex.quote(REVIEW_EMITTED_FILE)}; fi; '
|
||||||
|
"return \"$rc\"; "
|
||||||
|
"}",
|
||||||
|
"trap 'rc=$?; set +e; emit_review_artifact \"$rc\"; exit \"$rc\"' EXIT",
|
||||||
|
]
|
||||||
if settings.codex_auth_mode != "chatgpt":
|
if settings.codex_auth_mode != "chatgpt":
|
||||||
steps.extend(
|
steps.extend(
|
||||||
[
|
[
|
||||||
@@ -198,6 +231,8 @@ def _build_install_and_run_command(
|
|||||||
model = settings.openai_review_model.strip()
|
model = settings.openai_review_model.strip()
|
||||||
codex_exec_parts = [
|
codex_exec_parts = [
|
||||||
"codex exec",
|
"codex exec",
|
||||||
|
"--sandbox",
|
||||||
|
"danger-full-access",
|
||||||
"--json",
|
"--json",
|
||||||
"--output-schema",
|
"--output-schema",
|
||||||
shlex.quote(REVIEW_SCHEMA_FILE),
|
shlex.quote(REVIEW_SCHEMA_FILE),
|
||||||
@@ -209,10 +244,12 @@ def _build_install_and_run_command(
|
|||||||
codex_exec_parts.append(shlex.quote(review_prompt))
|
codex_exec_parts.append(shlex.quote(review_prompt))
|
||||||
steps.extend(
|
steps.extend(
|
||||||
[
|
[
|
||||||
" ".join(codex_exec_parts),
|
"set +e",
|
||||||
f'echo "{result_start_marker}"',
|
"codex_rc=0",
|
||||||
f"cat {shlex.quote(REVIEW_OUTPUT_FILE)}",
|
" ".join(codex_exec_parts) + ' || codex_rc="$?"',
|
||||||
f'echo "{result_end_marker}"',
|
"set -e",
|
||||||
|
f'if [ "$codex_rc" -ne 0 ] || [ ! -s {shlex.quote(REVIEW_OUTPUT_FILE)} ]; then cat > {REVIEW_OUTPUT_FILE} <<\'JSON\'\n{runner_fallback_json}\nJSON\nfi',
|
||||||
|
"emit_review_artifact 0",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
return "\n".join(steps)
|
return "\n".join(steps)
|
||||||
@@ -235,6 +272,12 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig
|
|||||||
focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability"
|
focus = ", ".join(repo_cfg.focus) if repo_cfg.focus else "correctness, security, maintainability"
|
||||||
ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)"
|
ignore = ", ".join(repo_cfg.ignore) if repo_cfg.ignore else "(none)"
|
||||||
mode = command.mode if command.name in {"review", "rerun"} else "summary"
|
mode = command.mode if command.name in {"review", "rerun"} else "summary"
|
||||||
|
allow_test_execution = command.mode == "tests" or repo_cfg.include_tests
|
||||||
|
tests_policy = (
|
||||||
|
"Tests may be executed for this run because tests mode/include_tests is explicitly enabled."
|
||||||
|
if allow_test_execution
|
||||||
|
else "Do not run tests, benchmarks, or other executables. Review changes statically unless explicitly asked."
|
||||||
|
)
|
||||||
return "\n".join(
|
return "\n".join(
|
||||||
[
|
[
|
||||||
f"review: {intent}",
|
f"review: {intent}",
|
||||||
@@ -245,6 +288,7 @@ def _build_exec_review_prompt(command: ParsedCommand, repo_cfg: RepoReviewConfig
|
|||||||
f"Focus areas: {focus}.",
|
f"Focus areas: {focus}.",
|
||||||
f"Ignore patterns: {ignore}.",
|
f"Ignore patterns: {ignore}.",
|
||||||
f"Include tests setting: {repo_cfg.include_tests}.",
|
f"Include tests setting: {repo_cfg.include_tests}.",
|
||||||
|
tests_policy,
|
||||||
f"Full review requested: {command.full}.",
|
f"Full review requested: {command.full}.",
|
||||||
"Return strict JSON matching the provided output schema.",
|
"Return strict JSON matching the provided output schema.",
|
||||||
]
|
]
|
||||||
@@ -261,6 +305,8 @@ def _build_docker_command(settings: Settings, *, container_name: str, install_an
|
|||||||
container_name,
|
container_name,
|
||||||
"-e",
|
"-e",
|
||||||
"CODEX_DISABLE_TELEMETRY=1",
|
"CODEX_DISABLE_TELEMETRY=1",
|
||||||
|
"-e",
|
||||||
|
"CODEX_SANDBOX_MODE=danger-full-access",
|
||||||
]
|
]
|
||||||
if settings.codex_auth_mode == "chatgpt":
|
if settings.codex_auth_mode == "chatgpt":
|
||||||
cmd.extend(
|
cmd.extend(
|
||||||
@@ -371,21 +417,17 @@ def _parse_review_result_from_stdout_artifact(
|
|||||||
result_start_marker: str,
|
result_start_marker: str,
|
||||||
result_end_marker: str,
|
result_end_marker: str,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
lines = stdout.splitlines()
|
start_pos = stdout.find(result_start_marker)
|
||||||
start_idx = -1
|
if start_pos == -1:
|
||||||
end_idx = -1
|
|
||||||
for idx, line in enumerate(lines):
|
|
||||||
if line.strip() == result_start_marker:
|
|
||||||
start_idx = idx
|
|
||||||
break
|
|
||||||
if start_idx != -1:
|
|
||||||
for idx in range(start_idx + 1, len(lines)):
|
|
||||||
if lines[idx].strip() == result_end_marker:
|
|
||||||
end_idx = idx
|
|
||||||
break
|
|
||||||
if start_idx == -1 or end_idx == -1 or end_idx <= start_idx:
|
|
||||||
raise RuntimeError("Runner output did not include final review artifact markers.")
|
raise RuntimeError("Runner output did not include final review artifact markers.")
|
||||||
artifact = "\n".join(lines[start_idx + 1 : end_idx]).strip()
|
artifact_start = start_pos + len(result_start_marker)
|
||||||
|
|
||||||
|
# Prefer the last end marker so marker-like text inside JSON does not
|
||||||
|
# truncate the payload when earlier incidental matches exist.
|
||||||
|
end_pos = stdout.rfind(result_end_marker)
|
||||||
|
if end_pos == -1 or end_pos <= artifact_start:
|
||||||
|
raise RuntimeError("Runner output did not include final review artifact markers.")
|
||||||
|
artifact = stdout[artifact_start:end_pos].strip()
|
||||||
if not artifact:
|
if not artifact:
|
||||||
raise RuntimeError("Runner output contained empty final review artifact.")
|
raise RuntimeError("Runner output contained empty final review artifact.")
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -116,7 +116,10 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
|
|||||||
assert f"git checkout --detach {pr.head_sha}" in command
|
assert f"git checkout --detach {pr.head_sha}" in command
|
||||||
assert "resolved_head=\"$(git rev-parse HEAD)\"" in command
|
assert "resolved_head=\"$(git rev-parse HEAD)\"" in command
|
||||||
assert "unset GITEA_TOKEN auth_b64" in command
|
assert "unset GITEA_TOKEN auth_b64" in command
|
||||||
assert "codex exec --json --output-schema /tmp/codex-review-schema.json -o /tmp/codex-review-result.json" in command
|
assert (
|
||||||
|
"codex exec --sandbox danger-full-access --json --output-schema /tmp/codex-review-schema.json "
|
||||||
|
"-o /tmp/codex-review-result.json"
|
||||||
|
) in command
|
||||||
assert "review: security --full" in command
|
assert "review: security --full" in command
|
||||||
assert "--output-schema /tmp/codex-review-schema.json" in command
|
assert "--output-schema /tmp/codex-review-schema.json" in command
|
||||||
assert "-o /tmp/codex-review-result.json" in command
|
assert "-o /tmp/codex-review-result.json" in command
|
||||||
@@ -369,6 +372,27 @@ def test_build_exec_review_prompt_falls_back_when_no_extra_text() -> None:
|
|||||||
assert prompt.startswith("review: review this pull request and report introduced issues.")
|
assert prompt.startswith("review: review this pull request and report introduced issues.")
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_exec_review_prompt_disables_test_execution_by_default() -> None:
|
||||||
|
prompt = _build_exec_review_prompt(ParsedCommand(name="review", raw="@codex review"), RepoReviewConfig(), _sample_pr())
|
||||||
|
assert "Do not run tests, benchmarks, or other executables." in prompt
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_exec_review_prompt_allows_test_execution_for_tests_mode() -> None:
|
||||||
|
prompt = _build_exec_review_prompt(
|
||||||
|
ParsedCommand(name="review", raw="@codex review tests", mode="tests", mode_explicit=True),
|
||||||
|
RepoReviewConfig(),
|
||||||
|
_sample_pr(),
|
||||||
|
)
|
||||||
|
assert "Tests may be executed for this run" in prompt
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_repo_default_review_mode_uses_full_when_not_configured() -> None:
|
||||||
|
command = ParsedCommand(name="review", raw="@codex review")
|
||||||
|
cfg = RepoReviewConfig()
|
||||||
|
_apply_repo_default_review_mode(command, cfg)
|
||||||
|
assert command.mode == "full"
|
||||||
|
|
||||||
|
|
||||||
def test_apply_repo_default_review_mode_for_review_command() -> None:
|
def test_apply_repo_default_review_mode_for_review_command() -> None:
|
||||||
command = ParsedCommand(name="review", raw="@codex review")
|
command = ParsedCommand(name="review", raw="@codex review")
|
||||||
cfg = RepoReviewConfig(default_mode="tests")
|
cfg = RepoReviewConfig(default_mode="tests")
|
||||||
@@ -390,6 +414,22 @@ def test_parse_review_result_from_stdout_artifact_uses_end_marker_after_start()
|
|||||||
assert parsed["verdict"] == "correct"
|
assert parsed["verdict"] == "correct"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_review_result_from_stdout_artifact_handles_inline_end_marker() -> None:
|
||||||
|
stdout = (
|
||||||
|
"noise\n"
|
||||||
|
f"{RESULT_START_MARKER}_a\n"
|
||||||
|
'{"verdict":"correct","confidence":0.9,"summary":"ok","findings":[],"markdown_comment":"ok"}'
|
||||||
|
f"{RESULT_END_MARKER}_a\n"
|
||||||
|
)
|
||||||
|
parsed = _parse_review_result_from_stdout_artifact(
|
||||||
|
stdout,
|
||||||
|
result_start_marker=f"{RESULT_START_MARKER}_a",
|
||||||
|
result_end_marker=f"{RESULT_END_MARKER}_a",
|
||||||
|
)
|
||||||
|
assert parsed["verdict"] == "correct"
|
||||||
|
assert parsed["summary"] == "ok"
|
||||||
|
|
||||||
|
|
||||||
def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None:
|
def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None:
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
stdout = "\n".join(
|
stdout = "\n".join(
|
||||||
|
|||||||
8
tests/test_repo_config.py
Normal file
8
tests/test_repo_config.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from gitea_codex_bot.services.repo_config import parse_repo_review_config_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_repo_review_config_defaults_to_full_and_no_tests() -> None:
|
||||||
|
cfg = parse_repo_review_config_text("enabled: true\n", configured=True)
|
||||||
|
assert cfg.default_mode == "full"
|
||||||
|
assert cfg.include_tests is False
|
||||||
|
|
||||||
Reference in New Issue
Block a user