improvements: Big Reviewer Update #4

Merged
space merged 7 commits from improv/big-reviewer-update into main 2026-05-23 14:22:33 +02:00
5 changed files with 45 additions and 385 deletions
Showing only changes of commit 30aa737516 - Show all commits

View File

@@ -1,19 +1,12 @@
from __future__ import annotations
import json
import os
import shlex
import subprocess
from fnmatch import fnmatch
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any
import httpx
from gitea_codex_bot.config import Settings
from gitea_codex_bot.services.gitea import GiteaClient, PullRequestContext
from gitea_codex_bot.services.repo_config import RepoReviewConfig, load_repo_review_config
from gitea_codex_bot.services.gitea import PullRequestContext
from gitea_codex_bot.types import ParsedCommand
@@ -34,265 +27,6 @@ def checkout_pr(tmpdir: Path, pr: PullRequestContext) -> Path:
return repo_dir
def collect_diff_context(repo_dir: Path, pr: PullRequestContext, max_diff_bytes: int) -> dict[str, Any]:
diff = _run_git(["diff", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
changed_files_raw = _run_git(["diff", "--name-only", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
changed_files = [line.strip() for line in changed_files_raw.splitlines() if line.strip()]
truncated = False
if len(diff.encode("utf-8")) > max_diff_bytes:
diff = diff.encode("utf-8")[:max_diff_bytes].decode("utf-8", errors="ignore")
truncated = True
return {"diff": diff, "changed_files": changed_files, "truncated": truncated}
def _apply_ignore_patterns(changed_files: list[str], ignore_patterns: list[str]) -> list[str]:
if not ignore_patterns:
return changed_files
kept: list[str] = []
for path in changed_files:
if any(fnmatch(path, pattern) for pattern in ignore_patterns):
continue
kept.append(path)
return kept
def _collect_changed_file_contents(repo_dir: Path, changed_files: list[str], max_total_bytes: int) -> str:
chunks: list[str] = []
total = 0
for rel in changed_files:
path = repo_dir / rel
if not path.exists() or not path.is_file():
continue
try:
content = path.read_text(encoding="utf-8", errors="ignore")
except OSError:
continue
block = f"\n### {rel}\n{content}\n"
block_bytes = len(block.encode("utf-8"))
if total + block_bytes > max_total_bytes:
break
chunks.append(block)
total += block_bytes
return "".join(chunks).strip()
def _collect_test_output(repo_dir: Path, timeout_seconds: int) -> str:
try:
completed = subprocess.run(
["pytest", "-q"],
cwd=repo_dir,
capture_output=True,
text=True,
timeout=timeout_seconds,
check=False,
)
output = (completed.stdout + "\n" + completed.stderr).strip()
return output[:10000]
except Exception as exc:
return f"Test execution unavailable: {exc}"
def _redact_secrets_from_diff(diff: str) -> str:
secret_terms = ("api_key", "token", "secret", "password", "private_key", "-----begin")
redacted_lines: list[str] = []
for line in diff.splitlines():
lower = line.lower()
if any(term in lower for term in secret_terms):
redacted_lines.append("[REDACTED_POTENTIAL_SECRET]")
else:
redacted_lines.append(line)
return "\n".join(redacted_lines)
def _build_prompt(
pr: PullRequestContext,
command: ParsedCommand,
diff_context: dict[str, Any],
repo_cfg: RepoReviewConfig,
*,
changed_file_contents: str,
test_output: str | None,
) -> str:
mode = command.mode if command.name in {"review", "rerun"} else "summary"
changed_files = diff_context.get("changed_files") or []
changed_files_section = os.linesep.join(changed_files) if changed_files else "(none)"
unified_diff = str(diff_context.get("diff", ""))
unified_diff_section = unified_diff if unified_diff.strip() else "(empty)"
return (
"You are reviewing a Gitea pull request.\n\n"
"Focus only on issues introduced by this PR.\n"
"Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.\n"
"Avoid style nitpicks.\n\n"
"You do not have internet/network access. Do not try to fetch URLs.\n"
"Use only the PR metadata, changed files, diff, and optional file/test content included below.\n"
"Never claim that PR content is inaccessible or missing if these sections are present.\n"
"If the changed-file list is `(none)` and unified diff is `(empty)`, treat this as a no-op PR and explain that no code changes were detected.\n\n"
"Return JSON only with schema:\n"
"{\n"
' "verdict": "correct" | "has_issues",\n'
' "confidence": 0.0,\n'
' "summary": "...",\n'
' "findings": [{"severity":"low|medium|high|critical","file":"...","line_start":1,"line_end":1,"title":"...","body":"...","suggestion":"..."}],\n'
' "markdown_comment": "Full markdown comment body to post to Gitea. Include clear section breaks and blank lines."\n'
"}\n\n"
f"PR URL: {pr.html_url}\n"
f"Mode: {mode}\n"
f"Trigger message: {command.raw}\n"
f"Repo focus: {', '.join(repo_cfg.focus)}\n"
f"Diff truncated: {diff_context['truncated']}\n"
f"Changed files:\n{changed_files_section}\n\n"
f"Unified diff:\n{unified_diff_section}\n\n"
f"Changed file content (optional):\n{changed_file_contents or '(not included)'}\n\n"
f"Test output (optional):\n{test_output or '(not included)'}\n"
)
def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]:
api_key = settings.openai_api_key.get_secret_value() if settings.openai_api_key else ""
if not api_key.strip():
raise ReviewError("OPENAI_API_KEY is required for API-key review mode.")
headers: dict[str, str] = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
if settings.openai_org_id:
headers["OpenAI-Organization"] = settings.openai_org_id
if settings.openai_project_id:
headers["OpenAI-Project"] = settings.openai_project_id
body = {
"model": settings.openai_review_model,
"input": prompt,
"text": {"format": {"type": "json_object"}},
}
with httpx.Client(timeout=120.0) as client:
response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body)
response.raise_for_status()
payload = response.json()
for item in payload.get("output", []):
for content in item.get("content", []):
text_value = content.get("text")
if text_value:
result = json.loads(text_value)
if isinstance(result, dict):
result["_meta"] = _build_openai_result_meta(payload, settings)
return result
raise ReviewError("OpenAI response did not contain JSON output text.")
def _build_openai_result_meta(payload: dict[str, Any], settings: Settings) -> dict[str, Any]:
usage_raw = payload.get("usage")
usage: dict[str, int] = {}
if isinstance(usage_raw, dict):
for output_key, source_key in (
("input_tokens", "input_tokens"),
("output_tokens", "output_tokens"),
("total_tokens", "total_tokens"),
):
value = usage_raw.get(source_key)
if isinstance(value, int):
usage[output_key] = value
model = payload.get("model")
if not isinstance(model, str) or not model.strip():
model = settings.openai_review_model
return {"source": "openai_api", "model": model, "usage": usage}
def _summarize_openai_failure(exc: Exception) -> str:
if isinstance(exc, httpx.HTTPStatusError):
status = exc.response.status_code
response_text = exc.response.text.strip()
if response_text:
compact = " ".join(response_text.split())
if len(compact) > 400:
compact = f"{compact[:400]}..."
return f"OpenAI API HTTP {status}: {compact}"
return f"OpenAI API HTTP {status}."
if isinstance(exc, httpx.TimeoutException):
return "OpenAI API request timed out."
message = str(exc).strip()
if message:
return message
return f"{exc.__class__.__name__} (no details)"
def _fallback_review(diff_context: dict[str, Any], *, failure_reason: str | None = None) -> dict[str, Any]:
findings: list[dict[str, Any]] = []
summary = "Fallback analysis was used because OpenAI review was unavailable."
if failure_reason:
summary = f"OpenAI review failed. Error: {failure_reason}"
findings.append(
{
"severity": "high",
"file": "unknown",
"line_start": 1,
"line_end": 1,
"title": "OpenAI review request failed",
"body": failure_reason,
"suggestion": "Fix API/auth/network issues and rerun @codex review.",
}
)
return {
"verdict": "correct" if not findings else "has_issues",
"confidence": 0.4 if not findings else 0.6,
"summary": summary,
"findings": findings,
}
def run_review_for_pr(
settings: Settings,
gitea: GiteaClient,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> tuple[dict[str, Any], RepoReviewConfig]:
prompt, diff_context, repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command)
try:
result = _call_openai_review(settings, prompt)
except Exception as exc:
result = _fallback_review(diff_context, failure_reason=_summarize_openai_failure(exc))
return normalize_review_result(result), repo_cfg
def prepare_review_prompt(
settings: Settings,
gitea: GiteaClient,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> tuple[str, dict[str, Any], RepoReviewConfig]:
pr = gitea.get_pull_request(repo, pr_number)
with TemporaryDirectory(prefix="gitea-codex-") as tmp:
tmpdir = Path(tmp)
repo_dir = checkout_pr(tmpdir, pr)
repo_cfg = load_repo_review_config(repo_dir)
if command.name == "review" and not command.mode_explicit:
configured_mode = repo_cfg.default_mode
command.mode = configured_mode if configured_mode in {"summary", "security", "performance", "tests", "full"} else "summary"
diff_context = collect_diff_context(repo_dir, pr, min(settings.max_diff_bytes, repo_cfg.max_diff_bytes))
diff_context["changed_files"] = _apply_ignore_patterns(diff_context["changed_files"], repo_cfg.ignore)
diff_context["diff"] = _redact_secrets_from_diff(diff_context["diff"])
changed_file_contents = ""
if command.full:
changed_file_contents = _collect_changed_file_contents(repo_dir, diff_context["changed_files"], settings.max_diff_bytes)
test_output = None
if repo_cfg.include_tests and command.mode == "tests":
test_output = _collect_test_output(repo_dir, timeout_seconds=min(settings.max_review_minutes * 60, 300))
prompt = _build_prompt(
pr,
command,
diff_context,
repo_cfg,
changed_file_contents=changed_file_contents,
test_output=test_output,
)
return prompt, diff_context, repo_cfg
def normalize_review_result(result: Any) -> dict[str, Any]:
if not isinstance(result, dict):
raise ReviewError(f"Invalid review result type: {type(result)!r}")

View File

@@ -4,6 +4,7 @@ import base64
import json
import logging
import os
import re
import shlex
import subprocess
import uuid
@@ -63,6 +64,7 @@ def run_review_ephemeral(
gitea = GiteaClient(settings)
pr = gitea.get_pull_request(repo, pr_number)
repo_cfg = _load_repo_review_config_from_gitea(gitea, repo, pr.head_sha)
review_prompt = _build_exec_review_prompt(command)
container_name = f"codex-review-{uuid.uuid4().hex[:12]}"
extra_env: dict[str, str] = {
"GITEA_TOKEN": settings.gitea_token.get_secret_value(),
@@ -81,6 +83,7 @@ def run_review_ephemeral(
settings,
pr=pr,
container_name=container_name,
review_prompt=review_prompt,
extra_env=extra_env,
)
if completed.returncode != 0:
@@ -98,9 +101,10 @@ def _run_ephemeral_container(
*,
pr: PullRequestContext,
container_name: str,
review_prompt: str,
extra_env: dict[str, str],
) -> subprocess.CompletedProcess[str]:
install_and_run = _build_install_and_run_command(settings, pr=pr)
install_and_run = _build_install_and_run_command(settings, pr=pr, review_prompt=review_prompt)
cmd = _build_docker_command(settings, container_name=container_name, install_and_run=install_and_run)
return subprocess.run(
cmd,
@@ -116,6 +120,7 @@ def _build_install_and_run_command(
settings: Settings,
*,
pr: PullRequestContext,
review_prompt: str,
) -> str:
steps = ["set -euo pipefail"]
if settings.codex_auth_mode != "chatgpt":
@@ -172,8 +177,7 @@ def _build_install_and_run_command(
)
model = settings.openai_review_model.strip()
codex_exec_parts = [
"codex exec review",
f"--base {shlex.quote(pr.base_sha)}",
"codex exec",
"--json",
"--output-schema",
shlex.quote(REVIEW_SCHEMA_FILE),
@@ -182,6 +186,7 @@ def _build_install_and_run_command(
]
if model:
codex_exec_parts.append(f"-m {shlex.quote(model)}")
codex_exec_parts.append(shlex.quote(review_prompt))
steps.extend(
[
" ".join(codex_exec_parts),
@@ -193,6 +198,17 @@ def _build_install_and_run_command(
return "\n".join(steps)
def _build_exec_review_prompt(command: ParsedCommand) -> str:
raw = (command.raw or "").strip()
remainder = raw
match = re.match(r"^@[^\s]+\s+\S+\s*(.*)$", raw, flags=re.IGNORECASE | re.DOTALL)
if match:
remainder = match.group(1).strip()
if not remainder:
return "review: review this pull request and report introduced issues."
return f"review: {remainder}"
def _build_docker_command(settings: Settings, *, container_name: str, install_and_run: str) -> list[str]:
cmd = [
"docker",

View File

@@ -4,9 +4,8 @@ import json
import sys
from gitea_codex_bot.config import get_settings
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.reviewer import run_review_for_pr
from gitea_codex_bot.types import ParsedCommand
from gitea_codex_bot.workers.container_runner import run_review_ephemeral
def main() -> int:
@@ -21,8 +20,12 @@ def main() -> int:
branch_fix=bool(command_payload.get("branch_fix", False)),
arguments=list(command_payload.get("arguments", [])),
)
gitea = GiteaClient(settings)
result, _repo_cfg = run_review_for_pr(settings, gitea, payload["repo"], int(payload["pr_number"]), command)
result, _repo_cfg = run_review_ephemeral(
settings,
repo=payload["repo"],
pr_number=int(payload["pr_number"]),
command=command,
)
print(json.dumps(result))
return 0

View File

@@ -12,6 +12,7 @@ from gitea_codex_bot.workers.container_runner import (
RESULT_END_MARKER,
RESULT_START_MARKER,
_build_docker_command,
_build_exec_review_prompt,
_build_install_and_run_command,
_extract_result_meta_from_codex_stdout,
_load_codex_auth_json_b64,
@@ -95,7 +96,7 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
settings = get_settings()
pr = _sample_pr()
command = _build_install_and_run_command(settings, pr=pr)
command = _build_install_and_run_command(settings, pr=pr, review_prompt="review: security --full")
assert 'printf "%s" "$CODEX_AUTH_JSON_B64" | base64 -d > /root/.codex/auth.json' in command
assert "git -c http.extraHeader=" in command
@@ -107,7 +108,8 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
assert f"git checkout --detach {pr.head_sha}" in command
assert "resolved_head=\"$(git rev-parse HEAD)\"" in command
assert "unset GITEA_TOKEN auth_b64" in command
assert f"codex exec review --base {pr.base_sha}" in command
assert "codex exec --json --output-schema /tmp/codex-review-schema.json -o /tmp/codex-review-result.json" in command
assert "review: security --full" in command
assert "--output-schema /tmp/codex-review-schema.json" in command
assert "-o /tmp/codex-review-result.json" in command
assert "npm install -g @openai/codex@latest" in command
@@ -121,7 +123,7 @@ def test_build_install_command_does_not_include_reasoning_effort_flag() -> None:
settings = get_settings()
pr = _sample_pr()
command = _build_install_and_run_command(settings, pr=pr)
command = _build_install_and_run_command(settings, pr=pr, review_prompt="review: tests")
assert "--reasoning-effort" not in command
@@ -130,7 +132,7 @@ def test_build_install_command_uses_upstream_remote_for_fork_pr_base_fetch() ->
settings = get_settings()
pr = _sample_fork_pr()
command = _build_install_and_run_command(settings, pr=pr)
command = _build_install_and_run_command(settings, pr=pr, review_prompt="review: tests")
assert "base_remote=upstream" in command
assert f"git remote add upstream {pr.base_clone_url}" in command
@@ -320,6 +322,18 @@ def test_parse_review_result_from_stdout_artifact_fails_without_markers() -> Non
_parse_review_result_from_stdout_artifact("no markers here")
def test_build_exec_review_prompt_strips_mention_and_command() -> None:
prompt = _build_exec_review_prompt(
ParsedCommand(name="review", raw="@codex review security --full\nfocus session handling")
)
assert prompt == "review: security --full\nfocus session handling"
def test_build_exec_review_prompt_falls_back_when_no_extra_text() -> None:
prompt = _build_exec_review_prompt(ParsedCommand(name="rerun", raw="@codex rerun"))
assert prompt == "review: review this pull request and report introduced issues."
def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None:
settings = get_settings()
stdout = "\n".join(

View File

@@ -1,107 +0,0 @@
from __future__ import annotations
import httpx
from gitea_codex_bot.config import get_settings
from gitea_codex_bot.services.repo_config import RepoReviewConfig
from gitea_codex_bot.services.reviewer import _build_prompt, _fallback_review, prepare_review_prompt, run_review_for_pr
from gitea_codex_bot.types import ParsedCommand
def test_fallback_review_surfaces_failure_reason() -> None:
result = _fallback_review({"diff": ""}, failure_reason="OpenAI API HTTP 401: invalid_api_key")
assert result["verdict"] == "has_issues"
assert result["summary"] == "OpenAI review failed. Error: OpenAI API HTTP 401: invalid_api_key"
assert result["findings"][0]["title"] == "OpenAI review request failed"
assert result["findings"][0]["body"] == "OpenAI API HTTP 401: invalid_api_key"
def test_run_review_for_pr_uses_openai_http_error_in_fallback(monkeypatch) -> None:
def _fake_prepare(*_args, **_kwargs):
return "prompt", {"diff": "TODO: tighten validation"}, RepoReviewConfig()
def _raise_http_error(*_args, **_kwargs):
request = httpx.Request("POST", "https://api.openai.com/v1/responses")
response = httpx.Response(429, request=request, text='{"error":{"message":"rate_limited"}}')
raise httpx.HTTPStatusError("rate limited", request=request, response=response)
monkeypatch.setattr("gitea_codex_bot.services.reviewer.prepare_review_prompt", _fake_prepare)
monkeypatch.setattr("gitea_codex_bot.services.reviewer._call_openai_review", _raise_http_error)
settings = get_settings()
command = ParsedCommand(name="review", raw="@codex review")
result, _repo_cfg = run_review_for_pr(settings, object(), "acme/repo", 9, command)
assert result["summary"].startswith("OpenAI review failed. Error: OpenAI API HTTP 429:")
assert result["findings"][0]["title"] == "OpenAI review request failed"
assert "rate_limited" in result["findings"][0]["body"]
def test_build_prompt_includes_trigger_message() -> None:
pr = type("PR", (), {"html_url": "https://gitea.example/pr/1"})()
command = ParsedCommand(name="review", raw="@codex review security\nPlease focus auth.")
diff_context = {"truncated": False, "changed_files": ["app.py"], "diff": "diff --git a/app.py b/app.py"}
repo_cfg = RepoReviewConfig()
prompt = _build_prompt(
pr,
command,
diff_context,
repo_cfg,
changed_file_contents="",
test_output=None,
)
assert "Trigger message: @codex review security\nPlease focus auth." in prompt
def test_build_prompt_uses_empty_placeholders_and_no_network_instruction() -> None:
pr = type("PR", (), {"html_url": "https://gitea.example/pr/1"})()
command = ParsedCommand(name="review", raw="@codex review")
diff_context = {"truncated": False, "changed_files": [], "diff": ""}
repo_cfg = RepoReviewConfig()
prompt = _build_prompt(
pr,
command,
diff_context,
repo_cfg,
changed_file_contents="",
test_output=None,
)
assert "You do not have internet/network access. Do not try to fetch URLs." in prompt
assert "Never claim that PR content is inaccessible or missing if these sections are present." in prompt
assert "Changed files:\n(none)" in prompt
assert "Unified diff:\n(empty)" in prompt
def test_prepare_review_prompt_applies_repo_default_mode_when_command_mode_not_explicit(monkeypatch, tmp_path) -> None:
repo_dir = tmp_path / "repo"
repo_dir.mkdir(parents=True, exist_ok=True)
(repo_dir / ".codex-review.yml").write_text("review:\n default_mode: tests\n", encoding="utf-8")
pr = type(
"PR",
(),
{
"base_sha": "b" * 40,
"head_sha": "a" * 40,
"html_url": "https://gitea.example/pr/1",
},
)()
monkeypatch.setattr("gitea_codex_bot.services.reviewer.checkout_pr", lambda *_args, **_kwargs: repo_dir)
monkeypatch.setattr(
"gitea_codex_bot.services.reviewer.collect_diff_context",
lambda *_args, **_kwargs: {"diff": "", "changed_files": [], "truncated": False},
)
settings = get_settings()
gitea = type("GiteaStub", (), {"get_pull_request": lambda *_args, **_kwargs: pr})()
command = ParsedCommand(name="review", raw="@codex review")
prompt, _diff, _cfg = prepare_review_prompt(settings, gitea, "acme/repo", 9, command)
assert "Mode: tests" in prompt