2026-05-23 14:22:33 +02:00
5 changed files with 45 additions and 385 deletions
--- a/src/gitea_codex_bot/services/reviewer.py
+++ b/src/gitea_codex_bot/services/reviewer.py
@@ -1,19 +1,12 @@
 from __future__ import annotations

-import json
-import os
 import shlex
 import subprocess
-from fnmatch import fnmatch
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Any

-import httpx
-
-from gitea_codex_bot.config import Settings
-from gitea_codex_bot.services.gitea import GiteaClient, PullRequestContext
-from gitea_codex_bot.services.repo_config import RepoReviewConfig, load_repo_review_config
+from gitea_codex_bot.services.gitea import PullRequestContext
 from gitea_codex_bot.types import ParsedCommand


@@ -34,265 +27,6 @@ def checkout_pr(tmpdir: Path, pr: PullRequestContext) -> Path:
    return repo_dir


-def collect_diff_context(repo_dir: Path, pr: PullRequestContext, max_diff_bytes: int) -> dict[str, Any]:
-    diff = _run_git(["diff", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
-    changed_files_raw = _run_git(["diff", "--name-only", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
-    changed_files = [line.strip() for line in changed_files_raw.splitlines() if line.strip()]
-    truncated = False
-    if len(diff.encode("utf-8")) > max_diff_bytes:
-        diff = diff.encode("utf-8")[:max_diff_bytes].decode("utf-8", errors="ignore")
-        truncated = True
-    return {"diff": diff, "changed_files": changed_files, "truncated": truncated}
-
-
-def _apply_ignore_patterns(changed_files: list[str], ignore_patterns: list[str]) -> list[str]:
-    if not ignore_patterns:
-        return changed_files
-    kept: list[str] = []
-    for path in changed_files:
-        if any(fnmatch(path, pattern) for pattern in ignore_patterns):
-            continue
-        kept.append(path)
-    return kept
-
-
-def _collect_changed_file_contents(repo_dir: Path, changed_files: list[str], max_total_bytes: int) -> str:
-    chunks: list[str] = []
-    total = 0
-    for rel in changed_files:
-        path = repo_dir / rel
-        if not path.exists() or not path.is_file():
-            continue
-        try:
-            content = path.read_text(encoding="utf-8", errors="ignore")
-        except OSError:
-            continue
-        block = f"\n### {rel}\n{content}\n"
-        block_bytes = len(block.encode("utf-8"))
-        if total + block_bytes > max_total_bytes:
-            break
-        chunks.append(block)
-        total += block_bytes
-    return "".join(chunks).strip()
-
-
-def _collect_test_output(repo_dir: Path, timeout_seconds: int) -> str:
-    try:
-        completed = subprocess.run(
-            ["pytest", "-q"],
-            cwd=repo_dir,
-            capture_output=True,
-            text=True,
-            timeout=timeout_seconds,
-            check=False,
-        )
-        output = (completed.stdout + "\n" + completed.stderr).strip()
-        return output[:10000]
-    except Exception as exc:
-        return f"Test execution unavailable: {exc}"
-
-
-def _redact_secrets_from_diff(diff: str) -> str:
-    secret_terms = ("api_key", "token", "secret", "password", "private_key", "-----begin")
-    redacted_lines: list[str] = []
-    for line in diff.splitlines():
-        lower = line.lower()
-        if any(term in lower for term in secret_terms):
-            redacted_lines.append("[REDACTED_POTENTIAL_SECRET]")
-        else:
-            redacted_lines.append(line)
-    return "\n".join(redacted_lines)
-
-
-def _build_prompt(
-    pr: PullRequestContext,
-    command: ParsedCommand,
-    diff_context: dict[str, Any],
-    repo_cfg: RepoReviewConfig,
-    *,
-    changed_file_contents: str,
-    test_output: str | None,
-) -> str:
-    mode = command.mode if command.name in {"review", "rerun"} else "summary"
-    changed_files = diff_context.get("changed_files") or []
-    changed_files_section = os.linesep.join(changed_files) if changed_files else "(none)"
-    unified_diff = str(diff_context.get("diff", ""))
-    unified_diff_section = unified_diff if unified_diff.strip() else "(empty)"
-    return (
-        "You are reviewing a Gitea pull request.\n\n"
-        "Focus only on issues introduced by this PR.\n"
-        "Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.\n"
-        "Avoid style nitpicks.\n\n"
-        "You do not have internet/network access. Do not try to fetch URLs.\n"
-        "Use only the PR metadata, changed files, diff, and optional file/test content included below.\n"
-        "Never claim that PR content is inaccessible or missing if these sections are present.\n"
-        "If the changed-file list is `(none)` and unified diff is `(empty)`, treat this as a no-op PR and explain that no code changes were detected.\n\n"
-        "Return JSON only with schema:\n"
-        "{\n"
-        '  "verdict": "correct" | "has_issues",\n'
-        '  "confidence": 0.0,\n'
-        '  "summary": "...",\n'
-        '  "findings": [{"severity":"low|medium|high|critical","file":"...","line_start":1,"line_end":1,"title":"...","body":"...","suggestion":"..."}],\n'
-        '  "markdown_comment": "Full markdown comment body to post to Gitea. Include clear section breaks and blank lines."\n'
-        "}\n\n"
-        f"PR URL: {pr.html_url}\n"
-        f"Mode: {mode}\n"
-        f"Trigger message: {command.raw}\n"
-        f"Repo focus: {', '.join(repo_cfg.focus)}\n"
-        f"Diff truncated: {diff_context['truncated']}\n"
-        f"Changed files:\n{changed_files_section}\n\n"
-        f"Unified diff:\n{unified_diff_section}\n\n"
-        f"Changed file content (optional):\n{changed_file_contents or '(not included)'}\n\n"
-        f"Test output (optional):\n{test_output or '(not included)'}\n"
-    )
-
-
-def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]:
-    api_key = settings.openai_api_key.get_secret_value() if settings.openai_api_key else ""
-    if not api_key.strip():
-        raise ReviewError("OPENAI_API_KEY is required for API-key review mode.")
-    headers: dict[str, str] = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json",
-    }
-    if settings.openai_org_id:
-        headers["OpenAI-Organization"] = settings.openai_org_id
-    if settings.openai_project_id:
-        headers["OpenAI-Project"] = settings.openai_project_id
-
-    body = {
-        "model": settings.openai_review_model,
-        "input": prompt,
-        "text": {"format": {"type": "json_object"}},
-    }
-    with httpx.Client(timeout=120.0) as client:
-        response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body)
-        response.raise_for_status()
-        payload = response.json()
-
-    for item in payload.get("output", []):
-        for content in item.get("content", []):
-            text_value = content.get("text")
-            if text_value:
-                result = json.loads(text_value)
-                if isinstance(result, dict):
-                    result["_meta"] = _build_openai_result_meta(payload, settings)
-                return result
-    raise ReviewError("OpenAI response did not contain JSON output text.")
-
-
-def _build_openai_result_meta(payload: dict[str, Any], settings: Settings) -> dict[str, Any]:
-    usage_raw = payload.get("usage")
-    usage: dict[str, int] = {}
-    if isinstance(usage_raw, dict):
-        for output_key, source_key in (
-            ("input_tokens", "input_tokens"),
-            ("output_tokens", "output_tokens"),
-            ("total_tokens", "total_tokens"),
-        ):
-            value = usage_raw.get(source_key)
-            if isinstance(value, int):
-                usage[output_key] = value
-    model = payload.get("model")
-    if not isinstance(model, str) or not model.strip():
-        model = settings.openai_review_model
-    return {"source": "openai_api", "model": model, "usage": usage}
-
-
-def _summarize_openai_failure(exc: Exception) -> str:
-    if isinstance(exc, httpx.HTTPStatusError):
-        status = exc.response.status_code
-        response_text = exc.response.text.strip()
-        if response_text:
-            compact = " ".join(response_text.split())
-            if len(compact) > 400:
-                compact = f"{compact[:400]}..."
-            return f"OpenAI API HTTP {status}: {compact}"
-        return f"OpenAI API HTTP {status}."
-    if isinstance(exc, httpx.TimeoutException):
-        return "OpenAI API request timed out."
-    message = str(exc).strip()
-    if message:
-        return message
-    return f"{exc.__class__.__name__} (no details)"
-
-
-def _fallback_review(diff_context: dict[str, Any], *, failure_reason: str | None = None) -> dict[str, Any]:
-    findings: list[dict[str, Any]] = []
-    summary = "Fallback analysis was used because OpenAI review was unavailable."
-
-    if failure_reason:
-        summary = f"OpenAI review failed. Error: {failure_reason}"
-        findings.append(
-            {
-                "severity": "high",
-                "file": "unknown",
-                "line_start": 1,
-                "line_end": 1,
-                "title": "OpenAI review request failed",
-                "body": failure_reason,
-                "suggestion": "Fix API/auth/network issues and rerun @codex review.",
-            }
-        )
-
-    return {
-        "verdict": "correct" if not findings else "has_issues",
-        "confidence": 0.4 if not findings else 0.6,
-        "summary": summary,
-        "findings": findings,
-    }
-
-
-def run_review_for_pr(
-    settings: Settings,
-    gitea: GiteaClient,
-    repo: str,
-    pr_number: int,
-    command: ParsedCommand,
-) -> tuple[dict[str, Any], RepoReviewConfig]:
-    prompt, diff_context, repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command)
-    try:
-        result = _call_openai_review(settings, prompt)
-    except Exception as exc:
-        result = _fallback_review(diff_context, failure_reason=_summarize_openai_failure(exc))
-    return normalize_review_result(result), repo_cfg
-
-
-def prepare_review_prompt(
-    settings: Settings,
-    gitea: GiteaClient,
-    repo: str,
-    pr_number: int,
-    command: ParsedCommand,
-) -> tuple[str, dict[str, Any], RepoReviewConfig]:
-    pr = gitea.get_pull_request(repo, pr_number)
-    with TemporaryDirectory(prefix="gitea-codex-") as tmp:
-        tmpdir = Path(tmp)
-        repo_dir = checkout_pr(tmpdir, pr)
-        repo_cfg = load_repo_review_config(repo_dir)
-        if command.name == "review" and not command.mode_explicit:
-            configured_mode = repo_cfg.default_mode
-            command.mode = configured_mode if configured_mode in {"summary", "security", "performance", "tests", "full"} else "summary"
-        diff_context = collect_diff_context(repo_dir, pr, min(settings.max_diff_bytes, repo_cfg.max_diff_bytes))
-        diff_context["changed_files"] = _apply_ignore_patterns(diff_context["changed_files"], repo_cfg.ignore)
-        diff_context["diff"] = _redact_secrets_from_diff(diff_context["diff"])
-        changed_file_contents = ""
-        if command.full:
-            changed_file_contents = _collect_changed_file_contents(repo_dir, diff_context["changed_files"], settings.max_diff_bytes)
-        test_output = None
-        if repo_cfg.include_tests and command.mode == "tests":
-            test_output = _collect_test_output(repo_dir, timeout_seconds=min(settings.max_review_minutes * 60, 300))
-        prompt = _build_prompt(
-            pr,
-            command,
-            diff_context,
-            repo_cfg,
-            changed_file_contents=changed_file_contents,
-            test_output=test_output,
-        )
-    return prompt, diff_context, repo_cfg
-
-
 def normalize_review_result(result: Any) -> dict[str, Any]:
    if not isinstance(result, dict):
        raise ReviewError(f"Invalid review result type: {type(result)!r}")
--- a/src/gitea_codex_bot/workers/container_runner.py
+++ b/src/gitea_codex_bot/workers/container_runner.py
@@ -4,6 +4,7 @@ import base64
 import json
 import logging
 import os
+import re
 import shlex
 import subprocess
 import uuid
@@ -63,6 +64,7 @@ def run_review_ephemeral(
    gitea = GiteaClient(settings)
    pr = gitea.get_pull_request(repo, pr_number)
    repo_cfg = _load_repo_review_config_from_gitea(gitea, repo, pr.head_sha)
+    review_prompt = _build_exec_review_prompt(command)
    container_name = f"codex-review-{uuid.uuid4().hex[:12]}"
    extra_env: dict[str, str] = {
        "GITEA_TOKEN": settings.gitea_token.get_secret_value(),
@@ -81,6 +83,7 @@ def run_review_ephemeral(
            settings,
            pr=pr,
            container_name=container_name,
+            review_prompt=review_prompt,
            extra_env=extra_env,
        )
        if completed.returncode != 0:
@@ -98,9 +101,10 @@ def _run_ephemeral_container(
    *,
    pr: PullRequestContext,
    container_name: str,
+    review_prompt: str,
    extra_env: dict[str, str],
 ) -> subprocess.CompletedProcess[str]:
-    install_and_run = _build_install_and_run_command(settings, pr=pr)
+    install_and_run = _build_install_and_run_command(settings, pr=pr, review_prompt=review_prompt)
    cmd = _build_docker_command(settings, container_name=container_name, install_and_run=install_and_run)
    return subprocess.run(
        cmd,
@@ -116,6 +120,7 @@ def _build_install_and_run_command(
    settings: Settings,
    *,
    pr: PullRequestContext,
+    review_prompt: str,
 ) -> str:
    steps = ["set -euo pipefail"]
    if settings.codex_auth_mode != "chatgpt":
@@ -172,8 +177,7 @@ def _build_install_and_run_command(
    )
    model = settings.openai_review_model.strip()
    codex_exec_parts = [
-        "codex exec review",
-        f"--base {shlex.quote(pr.base_sha)}",
+        "codex exec",
        "--json",
        "--output-schema",
        shlex.quote(REVIEW_SCHEMA_FILE),
@@ -182,6 +186,7 @@ def _build_install_and_run_command(
    ]
    if model:
        codex_exec_parts.append(f"-m {shlex.quote(model)}")
+    codex_exec_parts.append(shlex.quote(review_prompt))
    steps.extend(
        [
            " ".join(codex_exec_parts),
@@ -193,6 +198,17 @@ def _build_install_and_run_command(
    return "\n".join(steps)


+def _build_exec_review_prompt(command: ParsedCommand) -> str:
+    raw = (command.raw or "").strip()
+    remainder = raw
+    match = re.match(r"^@[^\s]+\s+\S+\s*(.*)$", raw, flags=re.IGNORECASE | re.DOTALL)
+    if match:
+        remainder = match.group(1).strip()
+    if not remainder:
+        return "review: review this pull request and report introduced issues."
+    return f"review: {remainder}"
+
+
 def _build_docker_command(settings: Settings, *, container_name: str, install_and_run: str) -> list[str]:
    cmd = [
        "docker",
--- a/src/gitea_codex_bot/workers/runner_entry.py
+++ b/src/gitea_codex_bot/workers/runner_entry.py
@@ -4,9 +4,8 @@ import json
 import sys

 from gitea_codex_bot.config import get_settings
-from gitea_codex_bot.services.gitea import GiteaClient
-from gitea_codex_bot.services.reviewer import run_review_for_pr
 from gitea_codex_bot.types import ParsedCommand
+from gitea_codex_bot.workers.container_runner import run_review_ephemeral


 def main() -> int:
@@ -21,8 +20,12 @@ def main() -> int:
        branch_fix=bool(command_payload.get("branch_fix", False)),
        arguments=list(command_payload.get("arguments", [])),
    )
-    gitea = GiteaClient(settings)
-    result, _repo_cfg = run_review_for_pr(settings, gitea, payload["repo"], int(payload["pr_number"]), command)
+    result, _repo_cfg = run_review_ephemeral(
+        settings,
+        repo=payload["repo"],
+        pr_number=int(payload["pr_number"]),
+        command=command,
+    )
    print(json.dumps(result))
    return 0

--- a/tests/test_container_runner.py
+++ b/tests/test_container_runner.py
@@ -12,6 +12,7 @@ from gitea_codex_bot.workers.container_runner import (
    RESULT_END_MARKER,
    RESULT_START_MARKER,
    _build_docker_command,
+    _build_exec_review_prompt,
    _build_install_and_run_command,
    _extract_result_meta_from_codex_stdout,
    _load_codex_auth_json_b64,
@@ -95,7 +96,7 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
    settings = get_settings()
    pr = _sample_pr()

-    command = _build_install_and_run_command(settings, pr=pr)
+    command = _build_install_and_run_command(settings, pr=pr, review_prompt="review: security --full")

    assert 'printf "%s" "$CODEX_AUTH_JSON_B64" | base64 -d > /root/.codex/auth.json' in command
    assert "git -c http.extraHeader=" in command
@@ -107,7 +108,8 @@ def test_build_install_command_chatgpt_mode_sets_git_checkout_and_review(monkeyp
    assert f"git checkout --detach {pr.head_sha}" in command
    assert "resolved_head=\"$(git rev-parse HEAD)\"" in command
    assert "unset GITEA_TOKEN auth_b64" in command
-    assert f"codex exec review --base {pr.base_sha}" in command
+    assert "codex exec --json --output-schema /tmp/codex-review-schema.json -o /tmp/codex-review-result.json" in command
+    assert "review: security --full" in command
    assert "--output-schema /tmp/codex-review-schema.json" in command
    assert "-o /tmp/codex-review-result.json" in command
    assert "npm install -g @openai/codex@latest" in command
@@ -121,7 +123,7 @@ def test_build_install_command_does_not_include_reasoning_effort_flag() -> None:
    settings = get_settings()
    pr = _sample_pr()

-    command = _build_install_and_run_command(settings, pr=pr)
+    command = _build_install_and_run_command(settings, pr=pr, review_prompt="review: tests")

    assert "--reasoning-effort" not in command

@@ -130,7 +132,7 @@ def test_build_install_command_uses_upstream_remote_for_fork_pr_base_fetch() ->
    settings = get_settings()
    pr = _sample_fork_pr()

-    command = _build_install_and_run_command(settings, pr=pr)
+    command = _build_install_and_run_command(settings, pr=pr, review_prompt="review: tests")

    assert "base_remote=upstream" in command
    assert f"git remote add upstream {pr.base_clone_url}" in command
@@ -320,6 +322,18 @@ def test_parse_review_result_from_stdout_artifact_fails_without_markers() -> Non
        _parse_review_result_from_stdout_artifact("no markers here")


+def test_build_exec_review_prompt_strips_mention_and_command() -> None:
+    prompt = _build_exec_review_prompt(
+        ParsedCommand(name="review", raw="@codex review security --full\nfocus session handling")
+    )
+    assert prompt == "review: security --full\nfocus session handling"
+
+
+def test_build_exec_review_prompt_falls_back_when_no_extra_text() -> None:
+    prompt = _build_exec_review_prompt(ParsedCommand(name="rerun", raw="@codex rerun"))
+    assert prompt == "review: review this pull request and report introduced issues."
+
+
 def test_extract_result_meta_from_codex_stdout_collects_model_and_usage() -> None:
    settings = get_settings()
    stdout = "\n".join(
--- a/tests/test_reviewer_fallback.py
+++ b/tests/test_reviewer_fallback.py
@@ -1,107 +0,0 @@
-from __future__ import annotations
-
-import httpx
-
-from gitea_codex_bot.config import get_settings
-from gitea_codex_bot.services.repo_config import RepoReviewConfig
-from gitea_codex_bot.services.reviewer import _build_prompt, _fallback_review, prepare_review_prompt, run_review_for_pr
-from gitea_codex_bot.types import ParsedCommand
-
-
-def test_fallback_review_surfaces_failure_reason() -> None:
-    result = _fallback_review({"diff": ""}, failure_reason="OpenAI API HTTP 401: invalid_api_key")
-
-    assert result["verdict"] == "has_issues"
-    assert result["summary"] == "OpenAI review failed. Error: OpenAI API HTTP 401: invalid_api_key"
-    assert result["findings"][0]["title"] == "OpenAI review request failed"
-    assert result["findings"][0]["body"] == "OpenAI API HTTP 401: invalid_api_key"
-
-
-def test_run_review_for_pr_uses_openai_http_error_in_fallback(monkeypatch) -> None:
-    def _fake_prepare(*_args, **_kwargs):
-        return "prompt", {"diff": "TODO: tighten validation"}, RepoReviewConfig()
-
-    def _raise_http_error(*_args, **_kwargs):
-        request = httpx.Request("POST", "https://api.openai.com/v1/responses")
-        response = httpx.Response(429, request=request, text='{"error":{"message":"rate_limited"}}')
-        raise httpx.HTTPStatusError("rate limited", request=request, response=response)
-
-    monkeypatch.setattr("gitea_codex_bot.services.reviewer.prepare_review_prompt", _fake_prepare)
-    monkeypatch.setattr("gitea_codex_bot.services.reviewer._call_openai_review", _raise_http_error)
-
-    settings = get_settings()
-    command = ParsedCommand(name="review", raw="@codex review")
-    result, _repo_cfg = run_review_for_pr(settings, object(), "acme/repo", 9, command)
-
-    assert result["summary"].startswith("OpenAI review failed. Error: OpenAI API HTTP 429:")
-    assert result["findings"][0]["title"] == "OpenAI review request failed"
-    assert "rate_limited" in result["findings"][0]["body"]
-
-
-def test_build_prompt_includes_trigger_message() -> None:
-    pr = type("PR", (), {"html_url": "https://gitea.example/pr/1"})()
-    command = ParsedCommand(name="review", raw="@codex review security\nPlease focus auth.")
-    diff_context = {"truncated": False, "changed_files": ["app.py"], "diff": "diff --git a/app.py b/app.py"}
-    repo_cfg = RepoReviewConfig()
-
-    prompt = _build_prompt(
-        pr,
-        command,
-        diff_context,
-        repo_cfg,
-        changed_file_contents="",
-        test_output=None,
-    )
-
-    assert "Trigger message: @codex review security\nPlease focus auth." in prompt
-
-
-def test_build_prompt_uses_empty_placeholders_and_no_network_instruction() -> None:
-    pr = type("PR", (), {"html_url": "https://gitea.example/pr/1"})()
-    command = ParsedCommand(name="review", raw="@codex review")
-    diff_context = {"truncated": False, "changed_files": [], "diff": ""}
-    repo_cfg = RepoReviewConfig()
-
-    prompt = _build_prompt(
-        pr,
-        command,
-        diff_context,
-        repo_cfg,
-        changed_file_contents="",
-        test_output=None,
-    )
-
-    assert "You do not have internet/network access. Do not try to fetch URLs." in prompt
-    assert "Never claim that PR content is inaccessible or missing if these sections are present." in prompt
-    assert "Changed files:\n(none)" in prompt
-    assert "Unified diff:\n(empty)" in prompt
-
-
-def test_prepare_review_prompt_applies_repo_default_mode_when_command_mode_not_explicit(monkeypatch, tmp_path) -> None:
-    repo_dir = tmp_path / "repo"
-    repo_dir.mkdir(parents=True, exist_ok=True)
-    (repo_dir / ".codex-review.yml").write_text("review:\n  default_mode: tests\n", encoding="utf-8")
-
-    pr = type(
-        "PR",
-        (),
-        {
-            "base_sha": "b" * 40,
-            "head_sha": "a" * 40,
-            "html_url": "https://gitea.example/pr/1",
-        },
-    )()
-
-    monkeypatch.setattr("gitea_codex_bot.services.reviewer.checkout_pr", lambda *_args, **_kwargs: repo_dir)
-    monkeypatch.setattr(
-        "gitea_codex_bot.services.reviewer.collect_diff_context",
-        lambda *_args, **_kwargs: {"diff": "", "changed_files": [], "truncated": False},
-    )
-
-    settings = get_settings()
-    gitea = type("GiteaStub", (), {"get_pull_request": lambda *_args, **_kwargs: pr})()
-    command = ParsedCommand(name="review", raw="@codex review")
-
-    prompt, _diff, _cfg = prepare_review_prompt(settings, gitea, "acme/repo", 9, command)
-
-    assert "Mode: tests" in prompt