First MVP

This commit is contained in:
Space-Banane
2026-05-22 19:25:57 +02:00
parent 673f70b32a
commit 860ccb731d
40 changed files with 2336 additions and 0 deletions

View File

@@ -0,0 +1,290 @@
from __future__ import annotations
import json
import os
import shlex
import subprocess
from fnmatch import fnmatch
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any
import httpx
from gitea_codex_bot.config import Settings
from gitea_codex_bot.services.gitea import GiteaClient, PullRequestContext
from gitea_codex_bot.services.repo_config import RepoReviewConfig, load_repo_review_config
from gitea_codex_bot.types import ParsedCommand
class ReviewError(RuntimeError):
pass
def _run_git(args: list[str], cwd: Path | None = None) -> str:
completed = subprocess.run(["git", *args], cwd=cwd, check=True, capture_output=True, text=True)
return completed.stdout
def checkout_pr(tmpdir: Path, pr: PullRequestContext) -> Path:
repo_dir = tmpdir / "repo"
_run_git(["clone", "--no-tags", "--depth", "50", pr.clone_url, str(repo_dir)])
_run_git(["fetch", "origin", pr.base_ref, pr.head_ref], cwd=repo_dir)
_run_git(["checkout", pr.head_sha], cwd=repo_dir)
return repo_dir
def collect_diff_context(repo_dir: Path, pr: PullRequestContext, max_diff_bytes: int) -> dict[str, Any]:
diff = _run_git(["diff", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
changed_files_raw = _run_git(["diff", "--name-only", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
changed_files = [line.strip() for line in changed_files_raw.splitlines() if line.strip()]
truncated = False
if len(diff.encode("utf-8")) > max_diff_bytes:
diff = diff.encode("utf-8")[:max_diff_bytes].decode("utf-8", errors="ignore")
truncated = True
return {"diff": diff, "changed_files": changed_files, "truncated": truncated}
def _apply_ignore_patterns(changed_files: list[str], ignore_patterns: list[str]) -> list[str]:
if not ignore_patterns:
return changed_files
kept: list[str] = []
for path in changed_files:
if any(fnmatch(path, pattern) for pattern in ignore_patterns):
continue
kept.append(path)
return kept
def _collect_changed_file_contents(repo_dir: Path, changed_files: list[str], max_total_bytes: int) -> str:
chunks: list[str] = []
total = 0
for rel in changed_files:
path = repo_dir / rel
if not path.exists() or not path.is_file():
continue
try:
content = path.read_text(encoding="utf-8", errors="ignore")
except OSError:
continue
block = f"\n### {rel}\n{content}\n"
block_bytes = len(block.encode("utf-8"))
if total + block_bytes > max_total_bytes:
break
chunks.append(block)
total += block_bytes
return "".join(chunks).strip()
def _collect_test_output(repo_dir: Path, timeout_seconds: int) -> str:
try:
completed = subprocess.run(
["pytest", "-q"],
cwd=repo_dir,
capture_output=True,
text=True,
timeout=timeout_seconds,
check=False,
)
output = (completed.stdout + "\n" + completed.stderr).strip()
return output[:10000]
except Exception as exc:
return f"Test execution unavailable: {exc}"
def _redact_secrets_from_diff(diff: str) -> str:
secret_terms = ("api_key", "token", "secret", "password", "private_key", "-----begin")
redacted_lines: list[str] = []
for line in diff.splitlines():
lower = line.lower()
if any(term in lower for term in secret_terms):
redacted_lines.append("[REDACTED_POTENTIAL_SECRET]")
else:
redacted_lines.append(line)
return "\n".join(redacted_lines)
def _build_prompt(
pr: PullRequestContext,
command: ParsedCommand,
diff_context: dict[str, Any],
repo_cfg: RepoReviewConfig,
*,
changed_file_contents: str,
test_output: str | None,
) -> str:
mode = command.mode if command.name in {"review", "rerun"} else "summary"
return (
"You are reviewing a Gitea pull request.\n\n"
"Focus only on issues introduced by this PR.\n"
"Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.\n"
"Avoid style nitpicks.\n\n"
"Return JSON only with schema:\n"
"{\n"
' "verdict": "correct" | "has_issues",\n'
' "confidence": 0.0,\n'
' "summary": "...",\n'
' "findings": [{"severity":"low|medium|high|critical","file":"...","line_start":1,"line_end":1,"title":"...","body":"...","suggestion":"..."}]\n'
"}\n\n"
f"PR URL: {pr.html_url}\n"
f"Mode: {mode}\n"
f"Repo focus: {', '.join(repo_cfg.focus)}\n"
f"Diff truncated: {diff_context['truncated']}\n"
f"Changed files:\n{os.linesep.join(diff_context['changed_files'])}\n\n"
f"Unified diff:\n{diff_context['diff']}\n\n"
f"Changed file content (optional):\n{changed_file_contents or '(not included)'}\n\n"
f"Test output (optional):\n{test_output or '(not included)'}\n"
)
def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]:
headers: dict[str, str] = {
"Authorization": f"Bearer {settings.openai_api_key.get_secret_value()}",
"Content-Type": "application/json",
}
if settings.openai_org_id:
headers["OpenAI-Organization"] = settings.openai_org_id
if settings.openai_project_id:
headers["OpenAI-Project"] = settings.openai_project_id
body = {
"model": settings.openai_review_model,
"input": prompt,
"text": {"format": {"type": "json_object"}},
"reasoning": {"effort": settings.openai_reasoning_effort},
}
with httpx.Client(timeout=120.0) as client:
response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body)
response.raise_for_status()
payload = response.json()
for item in payload.get("output", []):
for content in item.get("content", []):
text_value = content.get("text")
if text_value:
return json.loads(text_value)
raise ReviewError("OpenAI response did not contain JSON output text.")
def _fallback_review(diff_context: dict[str, Any]) -> dict[str, Any]:
findings = []
if "TODO" in diff_context["diff"]:
findings.append(
{
"severity": "low",
"file": "unknown",
"line_start": 1,
"line_end": 1,
"title": "TODO marker in diff",
"body": "The change introduces TODO markers that may indicate incomplete behavior.",
"suggestion": "Resolve or track TODOs before merging.",
}
)
return {
"verdict": "correct" if not findings else "has_issues",
"confidence": 0.4 if not findings else 0.6,
"summary": "Fallback analysis was used because OpenAI review was unavailable.",
"findings": findings,
}
def run_review_for_pr(
settings: Settings,
gitea: GiteaClient,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> tuple[dict[str, Any], RepoReviewConfig]:
prompt, diff_context, repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command)
try:
result = _call_openai_review(settings, prompt)
except Exception:
result = _fallback_review(diff_context)
return normalize_review_result(result), repo_cfg
def prepare_review_prompt(
settings: Settings,
gitea: GiteaClient,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> tuple[str, dict[str, Any], RepoReviewConfig]:
pr = gitea.get_pull_request(repo, pr_number)
with TemporaryDirectory(prefix="gitea-codex-") as tmp:
tmpdir = Path(tmp)
repo_dir = checkout_pr(tmpdir, pr)
repo_cfg = load_repo_review_config(repo_dir)
diff_context = collect_diff_context(repo_dir, pr, min(settings.max_diff_bytes, repo_cfg.max_diff_bytes))
diff_context["changed_files"] = _apply_ignore_patterns(diff_context["changed_files"], repo_cfg.ignore)
diff_context["diff"] = _redact_secrets_from_diff(diff_context["diff"])
changed_file_contents = ""
if command.full:
changed_file_contents = _collect_changed_file_contents(repo_dir, diff_context["changed_files"], settings.max_diff_bytes)
test_output = None
if repo_cfg.include_tests and command.mode == "tests":
test_output = _collect_test_output(repo_dir, timeout_seconds=min(settings.max_review_minutes * 60, 300))
prompt = _build_prompt(
pr,
command,
diff_context,
repo_cfg,
changed_file_contents=changed_file_contents,
test_output=test_output,
)
return prompt, diff_context, repo_cfg
def normalize_review_result(result: Any) -> dict[str, Any]:
if not isinstance(result, dict):
raise ReviewError(f"Invalid review result type: {type(result)!r}")
if "findings" not in result:
result["findings"] = []
if "summary" not in result:
result["summary"] = "No summary returned."
if "verdict" not in result:
result["verdict"] = "has_issues"
if "confidence" not in result:
result["confidence"] = 0.5
return result
def summarize_command(command: ParsedCommand) -> str:
return " ".join(["@codex", command.name, *command.arguments]).strip()
def fix_branch_name(pr_number: int, arguments: list[str] | None = None) -> str:
suffix = "fix"
if arguments:
words = [token.lower().strip() for token in arguments if token.strip() and not token.startswith("--")]
if words:
clean = "-".join(words[:4])
cleaned = "".join(ch if ch.isalnum() or ch == "-" else "-" for ch in clean).strip("-")
if cleaned:
suffix = f"fix-{cleaned}"
return f"codex/pr-{pr_number}-{suffix}"
def create_fix_patch_note(command: ParsedCommand) -> str:
details = shlex.join(command.arguments) if command.arguments else "latest findings"
return f"Fix command requested for {details}."
def create_fix_branch(
pr: PullRequestContext,
*,
note: str,
arguments: list[str] | None = None,
) -> str:
branch = fix_branch_name(pr.pr_number, arguments=arguments)
with TemporaryDirectory(prefix="gitea-codex-fix-") as tmp:
tmpdir = Path(tmp)
repo_dir = checkout_pr(tmpdir, pr)
_run_git(["checkout", "-b", branch], cwd=repo_dir)
notes_dir = repo_dir / ".codex"
notes_dir.mkdir(parents=True, exist_ok=True)
(notes_dir / "fix-note.md").write_text(f"# Codex Fix Note\n\n{note}\n", encoding="utf-8")
_run_git(["add", ".codex/fix-note.md"], cwd=repo_dir)
_run_git(["-c", "user.name=codex-bot", "-c", "user.email=codex-bot@example.invalid", "commit", "-m", f"Codex fix note for PR {pr.pr_number}"], cwd=repo_dir)
_run_git(["push", "origin", f"{branch}:{branch}", "--force"], cwd=repo_dir)
return branch