First MVP

This commit is contained in:
Space-Banane
2026-05-22 19:25:57 +02:00
parent 673f70b32a
commit 860ccb731d
40 changed files with 2336 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
__all__ = ["__version__"]
__version__ = "0.1.0"

View File

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
from functools import lru_cache
from typing import Literal
from pydantic import Field, SecretStr, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
gitea_base_url: str = Field(alias="GITEA_BASE_URL")
gitea_token: SecretStr = Field(alias="GITEA_TOKEN")
gitea_bot_username: str = Field(alias="GITEA_BOT_USERNAME")
gitea_webhook_secret: SecretStr = Field(alias="GITEA_WEBHOOK_SECRET")
openai_api_key: SecretStr = Field(alias="OPENAI_API_KEY")
openai_project_id: str | None = Field(default=None, alias="OPENAI_PROJECT_ID")
openai_org_id: str | None = Field(default=None, alias="OPENAI_ORG_ID")
openai_review_model: str = Field(default="gpt-5.3-codex", alias="OPENAI_REVIEW_MODEL")
openai_reasoning_effort: Literal["none", "low", "medium", "high"] = Field(default="high", alias="OPENAI_REASONING_EFFORT")
allowed_repos: str = Field(alias="ALLOWED_REPOS")
cooldown_seconds: int = Field(default=60, alias="COOLDOWN_SECONDS")
webhook_mode: Literal["repo", "global"] = Field(default="repo", alias="WEBHOOK_MODE")
db_host: str = Field(alias="DB_HOST")
db_port: int = Field(default=3306, alias="DB_PORT")
db_name: str = Field(alias="DB_NAME")
db_user: str = Field(alias="DB_USER")
db_password: SecretStr = Field(alias="DB_PASSWORD")
database_url: str | None = Field(default=None, alias="DATABASE_URL")
workdir: str = Field(default="/var/lib/gitea-codex/worktrees", alias="WORKDIR")
max_diff_bytes: int = Field(default=200000, alias="MAX_DIFF_BYTES")
max_review_minutes: int = Field(default=10, alias="MAX_REVIEW_MINUTES")
concurrency: int = Field(default=1, alias="CONCURRENCY")
review_runner_image: str = Field(default="node:22-bookworm-slim", alias="REVIEW_RUNNER_IMAGE")
enable_fix_commands: bool = Field(default=False, alias="ENABLE_FIX_COMMANDS")
allow_untrusted_forks: bool = Field(default=False, alias="ALLOW_UNTRUSTED_FORKS")
@field_validator("gitea_base_url")
@classmethod
def normalize_base_url(cls, value: str) -> str:
return value.rstrip("/")
@property
def sqlalchemy_url(self) -> str:
if self.database_url:
return self.database_url
password = self.db_password.get_secret_value()
return f"mysql+pymysql://{self.db_user}:{password}@{self.db_host}:{self.db_port}/{self.db_name}?charset=utf8mb4"
@property
def allowed_repo_set(self) -> set[str]:
values = [item.strip() for item in self.allowed_repos.split(",")]
return {value for value in values if value}
@lru_cache(maxsize=1)
def get_settings() -> Settings:
return Settings()

32
src/gitea_codex_bot/db.py Normal file
View File

@@ -0,0 +1,32 @@
from __future__ import annotations
from collections.abc import Generator
from functools import lru_cache
from sqlalchemy import create_engine
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
from gitea_codex_bot.config import get_settings
class Base(DeclarativeBase):
pass
@lru_cache(maxsize=1)
def get_engine():
settings = get_settings()
return create_engine(settings.sqlalchemy_url, pool_pre_ping=True, future=True)
@lru_cache(maxsize=1)
def get_session_factory():
return sessionmaker(bind=get_engine(), class_=Session, autoflush=False, autocommit=False, expire_on_commit=False)
def get_session() -> Generator[Session, None, None]:
session = get_session_factory()()
try:
yield session
finally:
session.close()

175
src/gitea_codex_bot/main.py Normal file
View File

@@ -0,0 +1,175 @@
from __future__ import annotations
import asyncio
import logging
from contextlib import asynccontextmanager
from typing import Any
from fastapi import Depends, FastAPI, Header, HTTPException, Request, status
from sqlalchemy.orm import Session
from gitea_codex_bot.config import Settings, get_settings
from gitea_codex_bot.db import Base, get_engine, get_session
from gitea_codex_bot.services.commands import parse_command
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.jobs import cooldown_remaining_seconds, enqueue_job, persist_webhook_event
from gitea_codex_bot.services.review_format import (
format_cooldown_ack,
format_queue_ack,
format_unsupported_ack,
)
from gitea_codex_bot.services.security import verify_gitea_signature
from gitea_codex_bot.workers.dispatcher import worker_loop
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
logger = logging.getLogger(__name__)
def _validate_required_env(settings: Settings) -> None:
if not settings.openai_api_key.get_secret_value().strip():
raise RuntimeError("OPENAI_API_KEY is required")
def _extract_pr_event(payload: dict[str, Any], event_name: str) -> tuple[str, int, str, int, str] | None:
repository = payload.get("repository", {})
repo = repository.get("full_name")
if not repo:
return None
sender = payload.get("sender", {})
sender_username = sender.get("username", "")
comment = payload.get("comment", {})
comment_id = int(comment.get("id", 0) or 0)
if comment_id <= 0:
return None
if event_name == "issue_comment":
issue = payload.get("issue", {})
if not issue.get("pull_request"):
return None
pr_number = int(issue.get("number", 0) or 0)
head_sha = payload.get("pull_request", {}).get("head", {}).get("sha", "")
elif event_name == "pull_request_comment":
pull_request = payload.get("pull_request", {})
if not pull_request:
return None
pr_number = int(pull_request.get("number", 0) or 0)
head_sha = pull_request.get("head", {}).get("sha", "")
else:
return None
if pr_number <= 0:
return None
if not head_sha:
head_sha = "unknown"
return repo, pr_number, head_sha, comment_id, sender_username
@asynccontextmanager
async def lifespan(app: FastAPI):
settings = get_settings()
_validate_required_env(settings)
Base.metadata.create_all(bind=get_engine())
stop_event = asyncio.Event()
task = asyncio.create_task(worker_loop(settings, stop_event))
app.state.worker_stop_event = stop_event
app.state.worker_task = task
try:
yield
finally:
stop_event.set()
await task
app = FastAPI(title="Gitea Codex Review Bot", lifespan=lifespan)
@app.get("/healthz")
def healthz(settings: Settings = Depends(get_settings)) -> dict[str, str]:
_ = settings.gitea_base_url
return {"status": "ok"}
@app.post("/webhook/gitea")
async def gitea_webhook(
request: Request,
x_gitea_event: str | None = Header(default=None),
x_gitea_delivery: str | None = Header(default=None),
x_gitea_signature: str | None = Header(default=None),
session: Session = Depends(get_session),
settings: Settings = Depends(get_settings),
) -> dict[str, Any]:
payload_bytes = await request.body()
if not verify_gitea_signature(payload_bytes, settings.gitea_webhook_secret.get_secret_value(), x_gitea_signature):
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="invalid signature")
event_name = (x_gitea_event or "").strip()
if event_name not in {"issue_comment", "pull_request_comment"}:
return {"accepted": False, "reason": "event ignored"}
payload = await request.json()
extracted = _extract_pr_event(payload, event_name)
if not extracted:
return {"accepted": False, "reason": "not a pull request comment"}
repo, pr_number, head_sha, comment_id, sender_username = extracted
if sender_username == settings.gitea_bot_username:
return {"accepted": False, "reason": "bot comment ignored"}
comment_body = str(payload.get("comment", {}).get("body", "")).strip()
parsed_command = parse_command(comment_body)
if not parsed_command:
return {"accepted": False, "reason": "no codex command"}
if repo not in settings.allowed_repo_set:
return {"accepted": False, "reason": "repo not allowed"}
inserted = persist_webhook_event(
session,
delivery_id=x_gitea_delivery,
event_name=event_name,
repo=repo,
comment_id=comment_id,
payload=payload_bytes,
)
if not inserted:
return {"accepted": True, "reason": "duplicate event"}
gitea = GiteaClient(settings)
if parsed_command.name in {"review", "rerun"}:
if head_sha == "unknown":
try:
head_sha = gitea.get_pull_request(repo, pr_number).head_sha
except Exception:
pass
if parsed_command.name != "rerun":
remaining = cooldown_remaining_seconds(session, repo, pr_number, settings.cooldown_seconds)
if remaining > 0:
gitea.post_issue_comment(repo, pr_number, format_cooldown_ack(remaining))
return {"accepted": True, "reason": "cooldown active", "cooldown_seconds_remaining": remaining}
job = enqueue_job(
session,
repo=repo,
pr_number=pr_number,
head_sha=head_sha,
trigger_comment_id=comment_id,
requested_by=sender_username,
command=parsed_command,
)
gitea.post_issue_comment(repo, pr_number, format_queue_ack(head_sha))
return {"accepted": True, "job_id": job.id, "status": "queued"}
if parsed_command.name in {"fix", "explain", "ignore"}:
job = enqueue_job(
session,
repo=repo,
pr_number=pr_number,
head_sha=head_sha,
trigger_comment_id=comment_id,
requested_by=sender_username,
command=parsed_command,
)
return {"accepted": True, "job_id": job.id, "status": "queued"}
gitea.post_issue_comment(repo, pr_number, format_unsupported_ack(parsed_command))
return {"accepted": False, "reason": "unsupported command"}

View File

@@ -0,0 +1,113 @@
from __future__ import annotations
import enum
from datetime import datetime
from sqlalchemy import DateTime, Enum, ForeignKey, Index, Integer, JSON, String, Text, UniqueConstraint, func
from sqlalchemy.orm import Mapped, mapped_column, relationship
from gitea_codex_bot.db import Base
class JobStatus(str, enum.Enum):
queued = "queued"
running = "running"
succeeded = "succeeded"
failed = "failed"
skipped = "skipped"
class RunStatus(str, enum.Enum):
running = "running"
succeeded = "succeeded"
failed = "failed"
skipped = "skipped"
class WebhookEvent(Base):
__tablename__ = "webhook_events"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
delivery_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
event_name: Mapped[str] = mapped_column(String(128), nullable=False)
repo: Mapped[str] = mapped_column(String(255), nullable=False)
comment_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
payload_sha256: Mapped[str] = mapped_column(String(64), nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
__table_args__ = (
UniqueConstraint("delivery_id", name="uq_webhook_events_delivery_id"),
UniqueConstraint("repo", "comment_id", name="uq_webhook_events_repo_comment"),
)
class ReviewJob(Base):
__tablename__ = "review_jobs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
repo: Mapped[str] = mapped_column(String(255), nullable=False)
pr_number: Mapped[int] = mapped_column(Integer, nullable=False)
head_sha: Mapped[str] = mapped_column(String(64), nullable=False)
trigger_comment_id: Mapped[int] = mapped_column(Integer, nullable=False)
command: Mapped[str] = mapped_column(String(64), nullable=False, default="review")
command_args: Mapped[str | None] = mapped_column(Text, nullable=True)
requested_by: Mapped[str] = mapped_column(String(255), nullable=False)
status: Mapped[JobStatus] = mapped_column(Enum(JobStatus), nullable=False, default=JobStatus.queued)
last_error: Mapped[str | None] = mapped_column(Text, nullable=True)
result_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
server_default=func.now(),
onupdate=func.now(),
)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
runs: Mapped[list["ReviewRun"]] = relationship(back_populates="job", cascade="all, delete-orphan")
__table_args__ = (
Index("ix_review_jobs_lookup", "repo", "pr_number", "head_sha", "status", "created_at"),
UniqueConstraint("repo", "trigger_comment_id", name="uq_review_jobs_repo_trigger_comment"),
)
class ReviewRun(Base):
__tablename__ = "review_runs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
job_id: Mapped[int] = mapped_column(ForeignKey("review_jobs.id", ondelete="CASCADE"), nullable=False)
status: Mapped[RunStatus] = mapped_column(Enum(RunStatus), nullable=False, default=RunStatus.running)
runner_container_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
result_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
job: Mapped["ReviewJob"] = relationship(back_populates="runs")
__table_args__ = (Index("ix_review_runs_job_status", "job_id", "status"),)
class BotComment(Base):
__tablename__ = "bot_comments"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
repo: Mapped[str] = mapped_column(String(255), nullable=False)
pr_number: Mapped[int] = mapped_column(Integer, nullable=False)
head_sha: Mapped[str] = mapped_column(String(64), nullable=False)
gitea_comment_id: Mapped[int] = mapped_column(Integer, nullable=False)
marker: Mapped[str] = mapped_column(String(255), nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
server_default=func.now(),
onupdate=func.now(),
)
__table_args__ = (
UniqueConstraint("repo", "pr_number", "marker", name="uq_bot_comments_marker"),
Index("ix_bot_comments_repo_pr", "repo", "pr_number"),
)

View File

View File

@@ -0,0 +1,30 @@
from __future__ import annotations
import re
from gitea_codex_bot.types import ParsedCommand
COMMAND_RE = re.compile(r"^@codex\s+(review|explain|fix|ignore|rerun)\b(.*)$", re.IGNORECASE | re.DOTALL)
def parse_command(body: str) -> ParsedCommand | None:
stripped = body.strip()
match = COMMAND_RE.match(stripped)
if not match:
return None
name = match.group(1).lower()
rest = match.group(2).strip()
tokens = [token for token in rest.split() if token]
parsed = ParsedCommand(name=name, raw=stripped, arguments=tokens)
if name == "review":
if "--full" in tokens:
parsed.full = True
parsed.mode = "full"
for mode in ("security", "performance", "tests"):
if mode in tokens:
parsed.mode = mode
break
elif name == "fix":
parsed.branch_fix = "--branch" in tokens
return parsed

View File

@@ -0,0 +1,40 @@
from __future__ import annotations
from sqlalchemy import select
from sqlalchemy.orm import Session
from gitea_codex_bot.models import BotComment
REVIEW_MARKER = "codex-review"
def get_persistent_review_comment_id(session: Session, repo: str, pr_number: int) -> int | None:
row = session.execute(
select(BotComment)
.where(BotComment.repo == repo, BotComment.pr_number == pr_number, BotComment.marker == REVIEW_MARKER)
.limit(1)
).scalar_one_or_none()
return row.gitea_comment_id if row else None
def upsert_persistent_review_comment_id(
session: Session,
*,
repo: str,
pr_number: int,
head_sha: str,
comment_id: int,
) -> None:
row = session.execute(
select(BotComment)
.where(BotComment.repo == repo, BotComment.pr_number == pr_number, BotComment.marker == REVIEW_MARKER)
.limit(1)
).scalar_one_or_none()
if not row:
row = BotComment(repo=repo, pr_number=pr_number, head_sha=head_sha, gitea_comment_id=comment_id, marker=REVIEW_MARKER)
session.add(row)
else:
row.head_sha = head_sha
row.gitea_comment_id = comment_id
session.commit()

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
from urllib.parse import quote
import httpx
from gitea_codex_bot.config import Settings
@dataclass(slots=True)
class PullRequestContext:
repo: str
pr_number: int
base_ref: str
base_sha: str
head_ref: str
head_sha: str
clone_url: str
html_url: str
is_fork: bool
class GiteaClient:
def __init__(self, settings: Settings) -> None:
self.settings = settings
self.base_url = settings.gitea_base_url
self.headers = {
"Authorization": f"token {settings.gitea_token.get_secret_value()}",
"Accept": "application/json",
"Content-Type": "application/json",
}
def _request(self, method: str, path: str, *, json_body: dict[str, Any] | None = None) -> Any:
with httpx.Client(timeout=20.0) as client:
response = client.request(
method,
f"{self.base_url}{path}",
headers=self.headers,
json=json_body,
)
response.raise_for_status()
if response.status_code == 204:
return None
return response.json()
@staticmethod
def split_repo(repo: str) -> tuple[str, str]:
owner, name = repo.split("/", 1)
return owner, name
def get_pull_request(self, repo: str, pr_number: int) -> PullRequestContext:
owner, name = self.split_repo(repo)
encoded_owner = quote(owner, safe="")
encoded_name = quote(name, safe="")
payload = self._request("GET", f"/api/v1/repos/{encoded_owner}/{encoded_name}/pulls/{pr_number}")
return PullRequestContext(
repo=repo,
pr_number=pr_number,
base_ref=payload["base"]["ref"],
base_sha=payload["base"]["sha"],
head_ref=payload["head"]["ref"],
head_sha=payload["head"]["sha"],
clone_url=payload["head"]["repo"]["clone_url"],
html_url=payload["html_url"],
is_fork=bool(payload["head"]["repo"]["full_name"] != payload["base"]["repo"]["full_name"]),
)
def post_issue_comment(self, repo: str, pr_number: int, body: str) -> int:
owner, name = self.split_repo(repo)
encoded_owner = quote(owner, safe="")
encoded_name = quote(name, safe="")
payload = self._request(
"POST",
f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/{pr_number}/comments",
json_body={"body": body},
)
return int(payload["id"])
def edit_issue_comment(self, repo: str, comment_id: int, body: str) -> int:
owner, name = self.split_repo(repo)
encoded_owner = quote(owner, safe="")
encoded_name = quote(name, safe="")
payload = self._request(
"PATCH",
f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/comments/{comment_id}",
json_body={"body": body},
)
return int(payload["id"])
def list_issue_comments(self, repo: str, pr_number: int) -> list[dict[str, Any]]:
owner, name = self.split_repo(repo)
encoded_owner = quote(owner, safe="")
encoded_name = quote(name, safe="")
payload = self._request("GET", f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/{pr_number}/comments")
return list(payload)

View File

@@ -0,0 +1,136 @@
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from gitea_codex_bot.models import JobStatus, ReviewJob, ReviewRun, RunStatus, WebhookEvent
from gitea_codex_bot.services.security import payload_digest
from gitea_codex_bot.types import ParsedCommand
def persist_webhook_event(
session: Session,
*,
delivery_id: str | None,
event_name: str,
repo: str,
comment_id: int | None,
payload: bytes,
) -> bool:
event = WebhookEvent(
delivery_id=delivery_id,
event_name=event_name,
repo=repo,
comment_id=comment_id,
payload_sha256=payload_digest(payload),
)
session.add(event)
try:
session.commit()
return True
except IntegrityError:
session.rollback()
return False
def cooldown_remaining_seconds(session: Session, repo: str, pr_number: int, cooldown_seconds: int) -> int:
cutoff = datetime.now(timezone.utc) - timedelta(seconds=cooldown_seconds)
row = session.execute(
select(ReviewJob)
.where(ReviewJob.repo == repo, ReviewJob.pr_number == pr_number, ReviewJob.created_at >= cutoff)
.order_by(ReviewJob.created_at.desc())
.limit(1)
).scalar_one_or_none()
if not row:
return 0
created_at = row.created_at
if created_at.tzinfo is None:
created_at = created_at.replace(tzinfo=timezone.utc)
age = (datetime.now(timezone.utc) - created_at).total_seconds()
remaining = int(max(cooldown_seconds - age, 0))
return remaining
def enqueue_job(
session: Session,
*,
repo: str,
pr_number: int,
head_sha: str,
trigger_comment_id: int,
requested_by: str,
command: ParsedCommand,
) -> ReviewJob:
job = ReviewJob(
repo=repo,
pr_number=pr_number,
head_sha=head_sha,
trigger_comment_id=trigger_comment_id,
command=command.name,
command_args=" ".join(command.arguments) if command.arguments else None,
requested_by=requested_by,
status=JobStatus.queued,
)
session.add(job)
session.commit()
session.refresh(job)
return job
def claim_next_job(session: Session) -> ReviewJob | None:
job = session.execute(
select(ReviewJob).where(ReviewJob.status == JobStatus.queued).order_by(ReviewJob.created_at.asc()).limit(1).with_for_update(skip_locked=True)
).scalar_one_or_none()
if not job:
session.rollback()
return None
job.status = JobStatus.running
job.started_at = datetime.now(timezone.utc)
run = ReviewRun(job_id=job.id, status=RunStatus.running)
session.add(run)
session.commit()
session.refresh(job)
return job
def finish_job(
session: Session,
*,
job_id: int,
success: bool,
skipped: bool,
result: dict | None,
error_message: str | None,
) -> None:
job = session.get(ReviewJob, job_id)
if not job:
return
latest_run = (
session.execute(select(ReviewRun).where(ReviewRun.job_id == job_id).order_by(ReviewRun.id.desc()).limit(1)).scalar_one_or_none()
)
if skipped:
job.status = JobStatus.skipped
run_status = RunStatus.skipped
elif success:
job.status = JobStatus.succeeded
run_status = RunStatus.succeeded
else:
job.status = JobStatus.failed
run_status = RunStatus.failed
now = datetime.now(timezone.utc)
job.finished_at = now
job.last_error = error_message
if result is not None:
job.result_json = result
if latest_run:
latest_run.status = run_status
latest_run.finished_at = now
latest_run.result_json = result
latest_run.error_message = error_message
session.commit()

View File

@@ -0,0 +1,35 @@
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
import yaml
@dataclass(slots=True)
class RepoReviewConfig:
enabled: bool = True
default_mode: str = "summary"
max_diff_bytes: int = 200000
include_tests: bool = True
focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"])
ignore: list[str] = field(default_factory=list)
allow_fix: bool = False
def load_repo_review_config(repo_root: Path) -> RepoReviewConfig:
path = repo_root / ".codex-review.yml"
if not path.exists():
return RepoReviewConfig()
raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
review = raw.get("review", {}) or {}
commands = raw.get("commands", {}) or {}
return RepoReviewConfig(
enabled=bool(raw.get("enabled", True)),
default_mode=str(review.get("default_mode", "summary")),
max_diff_bytes=int(review.get("max_diff_bytes", 200000)),
include_tests=bool(review.get("include_tests", True)),
focus=list(review.get("focus", ["correctness", "security", "maintainability"])),
ignore=list(raw.get("ignore", [])),
allow_fix=bool(commands.get("allow_fix", False)),
)

View File

@@ -0,0 +1,50 @@
from __future__ import annotations
from gitea_codex_bot.types import ParsedCommand
def format_queue_ack(head_sha: str) -> str:
short_sha = head_sha[:7]
return f"👀 Codex review queued for commit `{short_sha}`."
def format_cooldown_ack(seconds: int) -> str:
return f"⏳ Cooldown active. Please wait {seconds}s before requesting another review on this PR."
def format_disabled_ack() -> str:
return "🚫 Review is disabled by `.codex-review.yml` for this repository."
def format_unsupported_ack(command: ParsedCommand) -> str:
return f"⚠️ Command `@codex {command.name}` is not enabled on this repository."
def format_result_comment(head_sha: str, result: dict) -> str:
verdict = result.get("verdict", "has_issues")
confidence = float(result.get("confidence", 0.0))
summary = str(result.get("summary", "No summary returned."))
findings = result.get("findings", []) or []
lines = [f"<!-- codex-review:head_sha={head_sha} -->", "## Codex Review", "", f"Verdict: `{verdict}`", f"Confidence: `{confidence:.2f}`", "", summary, ""]
if not findings:
lines.append("No blocking issues found.")
else:
lines.append("Findings:")
for idx, finding in enumerate(findings, start=1):
severity = finding.get("severity", "unknown")
file_path = finding.get("file", "unknown")
line_start = finding.get("line_start", "?")
line_end = finding.get("line_end", line_start)
title = finding.get("title", "Issue")
body = finding.get("body", "")
suggestion = finding.get("suggestion", "")
lines.extend(
[
f"{idx}. `{file_path}:{line_start}-{line_end}` ({severity})",
f" {title}",
f" {body}",
f" Suggestion: {suggestion}" if suggestion else " Suggestion: n/a",
]
)
return "\n".join(lines).strip()

View File

@@ -0,0 +1,290 @@
from __future__ import annotations
import json
import os
import shlex
import subprocess
from fnmatch import fnmatch
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any
import httpx
from gitea_codex_bot.config import Settings
from gitea_codex_bot.services.gitea import GiteaClient, PullRequestContext
from gitea_codex_bot.services.repo_config import RepoReviewConfig, load_repo_review_config
from gitea_codex_bot.types import ParsedCommand
class ReviewError(RuntimeError):
pass
def _run_git(args: list[str], cwd: Path | None = None) -> str:
completed = subprocess.run(["git", *args], cwd=cwd, check=True, capture_output=True, text=True)
return completed.stdout
def checkout_pr(tmpdir: Path, pr: PullRequestContext) -> Path:
repo_dir = tmpdir / "repo"
_run_git(["clone", "--no-tags", "--depth", "50", pr.clone_url, str(repo_dir)])
_run_git(["fetch", "origin", pr.base_ref, pr.head_ref], cwd=repo_dir)
_run_git(["checkout", pr.head_sha], cwd=repo_dir)
return repo_dir
def collect_diff_context(repo_dir: Path, pr: PullRequestContext, max_diff_bytes: int) -> dict[str, Any]:
diff = _run_git(["diff", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
changed_files_raw = _run_git(["diff", "--name-only", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
changed_files = [line.strip() for line in changed_files_raw.splitlines() if line.strip()]
truncated = False
if len(diff.encode("utf-8")) > max_diff_bytes:
diff = diff.encode("utf-8")[:max_diff_bytes].decode("utf-8", errors="ignore")
truncated = True
return {"diff": diff, "changed_files": changed_files, "truncated": truncated}
def _apply_ignore_patterns(changed_files: list[str], ignore_patterns: list[str]) -> list[str]:
if not ignore_patterns:
return changed_files
kept: list[str] = []
for path in changed_files:
if any(fnmatch(path, pattern) for pattern in ignore_patterns):
continue
kept.append(path)
return kept
def _collect_changed_file_contents(repo_dir: Path, changed_files: list[str], max_total_bytes: int) -> str:
chunks: list[str] = []
total = 0
for rel in changed_files:
path = repo_dir / rel
if not path.exists() or not path.is_file():
continue
try:
content = path.read_text(encoding="utf-8", errors="ignore")
except OSError:
continue
block = f"\n### {rel}\n{content}\n"
block_bytes = len(block.encode("utf-8"))
if total + block_bytes > max_total_bytes:
break
chunks.append(block)
total += block_bytes
return "".join(chunks).strip()
def _collect_test_output(repo_dir: Path, timeout_seconds: int) -> str:
try:
completed = subprocess.run(
["pytest", "-q"],
cwd=repo_dir,
capture_output=True,
text=True,
timeout=timeout_seconds,
check=False,
)
output = (completed.stdout + "\n" + completed.stderr).strip()
return output[:10000]
except Exception as exc:
return f"Test execution unavailable: {exc}"
def _redact_secrets_from_diff(diff: str) -> str:
secret_terms = ("api_key", "token", "secret", "password", "private_key", "-----begin")
redacted_lines: list[str] = []
for line in diff.splitlines():
lower = line.lower()
if any(term in lower for term in secret_terms):
redacted_lines.append("[REDACTED_POTENTIAL_SECRET]")
else:
redacted_lines.append(line)
return "\n".join(redacted_lines)
def _build_prompt(
pr: PullRequestContext,
command: ParsedCommand,
diff_context: dict[str, Any],
repo_cfg: RepoReviewConfig,
*,
changed_file_contents: str,
test_output: str | None,
) -> str:
mode = command.mode if command.name in {"review", "rerun"} else "summary"
return (
"You are reviewing a Gitea pull request.\n\n"
"Focus only on issues introduced by this PR.\n"
"Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.\n"
"Avoid style nitpicks.\n\n"
"Return JSON only with schema:\n"
"{\n"
' "verdict": "correct" | "has_issues",\n'
' "confidence": 0.0,\n'
' "summary": "...",\n'
' "findings": [{"severity":"low|medium|high|critical","file":"...","line_start":1,"line_end":1,"title":"...","body":"...","suggestion":"..."}]\n'
"}\n\n"
f"PR URL: {pr.html_url}\n"
f"Mode: {mode}\n"
f"Repo focus: {', '.join(repo_cfg.focus)}\n"
f"Diff truncated: {diff_context['truncated']}\n"
f"Changed files:\n{os.linesep.join(diff_context['changed_files'])}\n\n"
f"Unified diff:\n{diff_context['diff']}\n\n"
f"Changed file content (optional):\n{changed_file_contents or '(not included)'}\n\n"
f"Test output (optional):\n{test_output or '(not included)'}\n"
)
def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]:
headers: dict[str, str] = {
"Authorization": f"Bearer {settings.openai_api_key.get_secret_value()}",
"Content-Type": "application/json",
}
if settings.openai_org_id:
headers["OpenAI-Organization"] = settings.openai_org_id
if settings.openai_project_id:
headers["OpenAI-Project"] = settings.openai_project_id
body = {
"model": settings.openai_review_model,
"input": prompt,
"text": {"format": {"type": "json_object"}},
"reasoning": {"effort": settings.openai_reasoning_effort},
}
with httpx.Client(timeout=120.0) as client:
response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body)
response.raise_for_status()
payload = response.json()
for item in payload.get("output", []):
for content in item.get("content", []):
text_value = content.get("text")
if text_value:
return json.loads(text_value)
raise ReviewError("OpenAI response did not contain JSON output text.")
def _fallback_review(diff_context: dict[str, Any]) -> dict[str, Any]:
findings = []
if "TODO" in diff_context["diff"]:
findings.append(
{
"severity": "low",
"file": "unknown",
"line_start": 1,
"line_end": 1,
"title": "TODO marker in diff",
"body": "The change introduces TODO markers that may indicate incomplete behavior.",
"suggestion": "Resolve or track TODOs before merging.",
}
)
return {
"verdict": "correct" if not findings else "has_issues",
"confidence": 0.4 if not findings else 0.6,
"summary": "Fallback analysis was used because OpenAI review was unavailable.",
"findings": findings,
}
def run_review_for_pr(
settings: Settings,
gitea: GiteaClient,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> tuple[dict[str, Any], RepoReviewConfig]:
prompt, diff_context, repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command)
try:
result = _call_openai_review(settings, prompt)
except Exception:
result = _fallback_review(diff_context)
return normalize_review_result(result), repo_cfg
def prepare_review_prompt(
settings: Settings,
gitea: GiteaClient,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> tuple[str, dict[str, Any], RepoReviewConfig]:
pr = gitea.get_pull_request(repo, pr_number)
with TemporaryDirectory(prefix="gitea-codex-") as tmp:
tmpdir = Path(tmp)
repo_dir = checkout_pr(tmpdir, pr)
repo_cfg = load_repo_review_config(repo_dir)
diff_context = collect_diff_context(repo_dir, pr, min(settings.max_diff_bytes, repo_cfg.max_diff_bytes))
diff_context["changed_files"] = _apply_ignore_patterns(diff_context["changed_files"], repo_cfg.ignore)
diff_context["diff"] = _redact_secrets_from_diff(diff_context["diff"])
changed_file_contents = ""
if command.full:
changed_file_contents = _collect_changed_file_contents(repo_dir, diff_context["changed_files"], settings.max_diff_bytes)
test_output = None
if repo_cfg.include_tests and command.mode == "tests":
test_output = _collect_test_output(repo_dir, timeout_seconds=min(settings.max_review_minutes * 60, 300))
prompt = _build_prompt(
pr,
command,
diff_context,
repo_cfg,
changed_file_contents=changed_file_contents,
test_output=test_output,
)
return prompt, diff_context, repo_cfg
def normalize_review_result(result: Any) -> dict[str, Any]:
if not isinstance(result, dict):
raise ReviewError(f"Invalid review result type: {type(result)!r}")
if "findings" not in result:
result["findings"] = []
if "summary" not in result:
result["summary"] = "No summary returned."
if "verdict" not in result:
result["verdict"] = "has_issues"
if "confidence" not in result:
result["confidence"] = 0.5
return result
def summarize_command(command: ParsedCommand) -> str:
return " ".join(["@codex", command.name, *command.arguments]).strip()
def fix_branch_name(pr_number: int, arguments: list[str] | None = None) -> str:
suffix = "fix"
if arguments:
words = [token.lower().strip() for token in arguments if token.strip() and not token.startswith("--")]
if words:
clean = "-".join(words[:4])
cleaned = "".join(ch if ch.isalnum() or ch == "-" else "-" for ch in clean).strip("-")
if cleaned:
suffix = f"fix-{cleaned}"
return f"codex/pr-{pr_number}-{suffix}"
def create_fix_patch_note(command: ParsedCommand) -> str:
details = shlex.join(command.arguments) if command.arguments else "latest findings"
return f"Fix command requested for {details}."
def create_fix_branch(
pr: PullRequestContext,
*,
note: str,
arguments: list[str] | None = None,
) -> str:
branch = fix_branch_name(pr.pr_number, arguments=arguments)
with TemporaryDirectory(prefix="gitea-codex-fix-") as tmp:
tmpdir = Path(tmp)
repo_dir = checkout_pr(tmpdir, pr)
_run_git(["checkout", "-b", branch], cwd=repo_dir)
notes_dir = repo_dir / ".codex"
notes_dir.mkdir(parents=True, exist_ok=True)
(notes_dir / "fix-note.md").write_text(f"# Codex Fix Note\n\n{note}\n", encoding="utf-8")
_run_git(["add", ".codex/fix-note.md"], cwd=repo_dir)
_run_git(["-c", "user.name=codex-bot", "-c", "user.email=codex-bot@example.invalid", "commit", "-m", f"Codex fix note for PR {pr.pr_number}"], cwd=repo_dir)
_run_git(["push", "origin", f"{branch}:{branch}", "--force"], cwd=repo_dir)
return branch

View File

@@ -0,0 +1,16 @@
from __future__ import annotations
import hashlib
import hmac
def verify_gitea_signature(payload: bytes, secret: str, received_signature: str | None) -> bool:
if not received_signature:
return False
expected = hmac.new(secret.encode("utf-8"), payload, hashlib.sha256).hexdigest()
normalized = received_signature.removeprefix("sha256=").strip()
return hmac.compare_digest(expected, normalized)
def payload_digest(payload: bytes) -> str:
return hashlib.sha256(payload).hexdigest()

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Literal
CommandName = Literal["review", "explain", "fix", "ignore", "rerun"]
@dataclass(slots=True)
class ParsedCommand:
name: CommandName
raw: str
mode: str = "summary"
full: bool = False
branch_fix: bool = False
arguments: list[str] = field(default_factory=list)

View File

View File

@@ -0,0 +1,110 @@
from __future__ import annotations
import json
import subprocess
import uuid
from pathlib import Path
from typing import Any
from gitea_codex_bot.config import Settings
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.reviewer import normalize_review_result, prepare_review_prompt, run_review_for_pr
from gitea_codex_bot.types import ParsedCommand
def run_review_ephemeral(
settings: Settings,
*,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> dict[str, Any]:
gitea = GiteaClient(settings)
prompt, _diff_context, _repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command)
container_name = f"codex-review-{uuid.uuid4().hex[:12]}"
install_and_run = (
"set -euo pipefail; "
"npm install -g @openai/codex >/tmp/codex-install.log 2>&1; "
"codex exec --json -m gpt-5"
)
cmd = [
"docker",
"run",
"--rm",
"-i",
"--name",
container_name,
"-e",
"OPENAI_API_KEY",
"-e",
"OPENAI_ORG_ID",
"-e",
"OPENAI_PROJECT_ID",
"-e",
"CODEX_DISABLE_TELEMETRY=1",
settings.review_runner_image,
"bash",
"-lc",
install_and_run,
]
try:
completed = subprocess.run(
cmd,
input=prompt,
text=True,
check=True,
capture_output=True,
timeout=settings.max_review_minutes * 60,
)
parsed = _parse_codex_exec_stdout(completed.stdout)
return normalize_review_result(parsed)
except Exception:
result, _repo_cfg = run_review_for_pr(settings, gitea, repo, pr_number, command)
return result
def ensure_workdir(path: str) -> Path:
target = Path(path)
target.mkdir(parents=True, exist_ok=True)
return target
def _parse_codex_exec_stdout(stdout: str) -> dict[str, Any]:
last_text: str | None = None
for line in stdout.splitlines():
line = line.strip()
if not line:
continue
try:
payload = json.loads(line)
except json.JSONDecodeError:
continue
if isinstance(payload, dict) and {"verdict", "summary", "findings"}.issubset(payload.keys()):
return payload
extracted = _extract_text(payload)
if extracted:
last_text = extracted
if not last_text:
raise RuntimeError("codex exec output did not include parseable JSON text")
return json.loads(last_text)
def _extract_text(payload: Any) -> str | None:
if isinstance(payload, str):
return payload
if isinstance(payload, dict):
for key in ("text", "message", "content", "output"):
value = payload.get(key)
text = _extract_text(value)
if text:
return text
for value in payload.values():
text = _extract_text(value)
if text:
return text
if isinstance(payload, list):
for item in payload:
text = _extract_text(item)
if text:
return text
return None

View File

@@ -0,0 +1,135 @@
from __future__ import annotations
import asyncio
import logging
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from gitea_codex_bot.config import Settings
from gitea_codex_bot.db import get_session_factory
from gitea_codex_bot.models import ReviewJob
from gitea_codex_bot.services.comments import get_persistent_review_comment_id, upsert_persistent_review_comment_id
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.jobs import claim_next_job, finish_job
from gitea_codex_bot.services.review_format import format_result_comment
from gitea_codex_bot.services.reviewer import create_fix_branch, create_fix_patch_note
from gitea_codex_bot.types import ParsedCommand
from gitea_codex_bot.workers.container_runner import run_review_ephemeral
logger = logging.getLogger(__name__)
def _command_from_job(job: ReviewJob) -> ParsedCommand:
args = job.command_args.split() if job.command_args else []
return ParsedCommand(name=job.command, raw=f"@codex {job.command}", arguments=args, full="--full" in args, branch_fix="--branch" in args)
def _handle_non_review_command(
settings: Settings,
session: Session,
gitea: GiteaClient,
job: ReviewJob,
command: ParsedCommand,
) -> tuple[bool, bool, dict[str, Any] | None, str | None]:
if command.name == "ignore":
return True, True, {"summary": "Ignore command acknowledged. No review run executed."}, None
if command.name == "explain":
latest_review_job = session.execute(
select(ReviewJob)
.where(
ReviewJob.repo == job.repo,
ReviewJob.pr_number == job.pr_number,
ReviewJob.command.in_(["review", "rerun"]),
ReviewJob.status == "succeeded",
)
.order_by(ReviewJob.id.desc())
.limit(1)
).scalar_one_or_none()
if latest_review_job and latest_review_job.result_json:
message = f"## Codex Explain\n\n{latest_review_job.result_json.get('summary', 'No previous summary available.')}"
else:
message = "## Codex Explain\n\nNo previous result found for this command."
gitea.post_issue_comment(job.repo, job.pr_number, message)
return True, True, {"summary": message}, None
if command.name == "fix":
if not settings.enable_fix_commands:
message = "⚠️ `@codex fix` is disabled on this bot instance."
gitea.post_issue_comment(job.repo, job.pr_number, message)
return True, True, {"summary": message}, None
note = create_fix_patch_note(command)
if command.branch_fix:
try:
pr = gitea.get_pull_request(job.repo, job.pr_number)
branch = create_fix_branch(pr, note=note, arguments=command.arguments)
message = f"## Codex Fix\n\n{note}\n\nCreated branch `{branch}`."
gitea.post_issue_comment(job.repo, job.pr_number, message)
return True, True, {"summary": note, "mode": "branch", "branch": branch}, None
except Exception as exc:
return True, False, None, f"Failed to create fix branch: {exc}"
gitea.post_issue_comment(job.repo, job.pr_number, f"## Codex Fix\n\n{note}\n\nPatch suggestion mode.")
return True, True, {"summary": note, "mode": "patch"}, None
return False, False, None, None
def process_one_job(settings: Settings) -> bool:
session_factory = get_session_factory()
with session_factory() as session:
job = claim_next_job(session)
if not job:
return False
command = _command_from_job(job)
gitea = GiteaClient(settings)
with session_factory() as session:
db_job = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one()
handled, skipped, result, error = _handle_non_review_command(settings, session, gitea, db_job, command)
if handled:
finish_job(session, job_id=db_job.id, success=error is None, skipped=skipped, result=result, error_message=error)
return True
try:
pr_ctx = gitea.get_pull_request(job.repo, job.pr_number)
if pr_ctx.is_fork and not settings.allow_untrusted_forks:
with session_factory() as session:
skip_message = "Skipped review for fork PR because `ALLOW_UNTRUSTED_FORKS=false`."
gitea.post_issue_comment(job.repo, job.pr_number, skip_message)
finish_job(
session,
job_id=job.id,
success=True,
skipped=True,
result={"summary": skip_message},
error_message=None,
)
return True
result = run_review_ephemeral(settings, repo=job.repo, pr_number=job.pr_number, command=command)
comment_body = format_result_comment(job.head_sha, result)
with session_factory() as session:
comment_id = get_persistent_review_comment_id(session, job.repo, job.pr_number)
if comment_id:
gitea.edit_issue_comment(job.repo, comment_id, comment_body)
else:
comment_id = gitea.post_issue_comment(job.repo, job.pr_number, comment_body)
upsert_persistent_review_comment_id(
session,
repo=job.repo,
pr_number=job.pr_number,
head_sha=job.head_sha,
comment_id=comment_id,
)
finish_job(session, job_id=job.id, success=True, skipped=False, result=result, error_message=None)
except Exception as exc:
logger.exception("Review job failed id=%s", job.id)
with session_factory() as session:
finish_job(session, job_id=job.id, success=False, skipped=False, result=None, error_message=str(exc))
return True
async def worker_loop(settings: Settings, stop_event: asyncio.Event) -> None:
while not stop_event.is_set():
processed = await asyncio.to_thread(process_one_job, settings)
if not processed:
await asyncio.sleep(1.0)

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
import json
import sys
from gitea_codex_bot.config import get_settings
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.reviewer import run_review_for_pr
from gitea_codex_bot.types import ParsedCommand
def main() -> int:
settings = get_settings()
payload = json.loads(sys.stdin.read())
command_payload = payload["command"]
command = ParsedCommand(
name=command_payload["name"],
raw=f"@codex {command_payload['name']}",
mode=command_payload.get("mode", "summary"),
full=bool(command_payload.get("full", False)),
branch_fix=bool(command_payload.get("branch_fix", False)),
arguments=list(command_payload.get("arguments", [])),
)
gitea = GiteaClient(settings)
result, _repo_cfg = run_review_for_pr(settings, gitea, payload["repo"], int(payload["pr_number"]), command)
print(json.dumps(result))
return 0
if __name__ == "__main__":
raise SystemExit(main())