First MVP

This commit is contained in:
Space-Banane
2026-05-22 19:25:57 +02:00
parent 673f70b32a
commit 860ccb731d
40 changed files with 2336 additions and 0 deletions

46
.env.example Normal file
View File

@@ -0,0 +1,46 @@
# Base URL of your self-hosted Gitea instance.
GITEA_BASE_URL=https://gitea.reversed.dev
# Bot account token used to read PRs and write comments.
GITEA_TOKEN=replace
GITEA_BOT_USERNAME=codex-bot
# Shared secret configured on the Gitea webhook.
GITEA_WEBHOOK_SECRET=replace
# OpenAI API credentials for Codex review generation.
OPENAI_API_KEY=replace
OPENAI_PROJECT_ID=
OPENAI_ORG_ID=
# Comma-separated allowlist of repositories this bot may process.
# Example: space/gitea-codex,space/another-repo
ALLOWED_REPOS=space/gitea-codex
COOLDOWN_SECONDS=60
# WEBHOOK_MODE is informational for your deployment model:
# - repo: you configured repository-level webhooks in Gitea.
# - global: you configured one instance-level/admin webhook in Gitea.
# This bot does NOT auto-provision webhooks. Admin config is manual.
WEBHOOK_MODE=repo
DB_HOST=mariadb
DB_PORT=3306
DB_NAME=gitea_codex
DB_USER=gitea_codex
DB_PASSWORD=replace
WORKDIR=/var/lib/gitea-codex/worktrees
MAX_DIFF_BYTES=200000
MAX_REVIEW_MINUTES=10
CONCURRENCY=1
# Image used for ephemeral job containers (Node + npm + Codex CLI install).
REVIEW_RUNNER_IMAGE=node:22-bookworm-slim
# Keep false for review-only mode.
ENABLE_FIX_COMMANDS=false
# Security: fork PRs are skipped unless explicitly enabled.
ALLOW_UNTRUSTED_FORKS=false

107
.gitea/workflows/ci.yml Normal file
View File

@@ -0,0 +1,107 @@
name: ci
on:
push:
branches: [ main ]
tags: [ 'v*' ]
pull_request:
jobs:
test:
runs-on: ubuntu-latest
services:
mariadb:
image: mariadb:11
env:
MARIADB_DATABASE: gitea_codex
MARIADB_USER: gitea_codex
MARIADB_PASSWORD: gitea_codex
MARIADB_ROOT_PASSWORD: rootpass
ports:
- 3306:3306
options: >-
--health-cmd "mariadb-admin ping -h localhost -uroot -prootpass"
--health-interval 10s
--health-timeout 5s
--health-retries 10
env:
GITEA_BASE_URL: https://gitea.reversed.dev
GITEA_TOKEN: test
GITEA_BOT_USERNAME: codex-bot
GITEA_WEBHOOK_SECRET: testsecret
OPENAI_API_KEY: test-openai
ALLOWED_REPOS: org/repo
COOLDOWN_SECONDS: 60
WEBHOOK_MODE: repo
DB_HOST: 127.0.0.1
DB_PORT: 3306
DB_NAME: gitea_codex
DB_USER: gitea_codex
DB_PASSWORD: gitea_codex
TEST_DATABASE_URL: mysql+pymysql://gitea_codex:gitea_codex@127.0.0.1:3306/gitea_codex?charset=utf8mb4
WORKDIR: /tmp/work
MAX_DIFF_BYTES: 200000
MAX_REVIEW_MINUTES: 10
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install deps
run: |
python -m pip install --upgrade pip
pip install -e .[dev]
- name: Run Alembic migrations
run: alembic upgrade head
- name: Run tests
run: pytest
publish:
runs-on: ubuntu-latest
needs: test
if: gitea.event_name == 'push'
env:
REGISTRY: gitea.reversed.dev
IMAGE_NAME: space/gitea-codex
steps:
- uses: actions/checkout@v4
- uses: docker/setup-buildx-action@v3
- name: Login to Gitea container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build and push tags
shell: bash
env:
CI_SHA: ${{ gitea.sha }}
CI_REF_NAME: ${{ gitea.ref_name }}
run: |
set -euo pipefail
IMAGE="${REGISTRY}/${IMAGE_NAME}"
SHA_TAG="sha-${CI_SHA::12}"
REF_TAG="${CI_REF_NAME}"
docker buildx build --push \
-t "${IMAGE}:${SHA_TAG}" \
-t "${IMAGE}:${REF_TAG}" \
.
if [ "${CI_REF_NAME}" = "main" ]; then
docker buildx build --push -t "${IMAGE}:latest" .
fi
- name: Publish image summary
shell: bash
env:
CI_SHA: ${{ gitea.sha }}
CI_REF_NAME: ${{ gitea.ref_name }}
run: |
set -euo pipefail
IMAGE="${REGISTRY}/${IMAGE_NAME}"
echo "Published image tags:" >> "${GITHUB_STEP_SUMMARY}"
echo "- ${IMAGE}:${CI_REF_NAME}" >> "${GITHUB_STEP_SUMMARY}"
echo "- ${IMAGE}:sha-${CI_SHA::12}" >> "${GITHUB_STEP_SUMMARY}"
if [ "${CI_REF_NAME}" = "main" ]; then
echo "- ${IMAGE}:latest" >> "${GITHUB_STEP_SUMMARY}"
fi

9
.gitignore vendored Normal file
View File

@@ -0,0 +1,9 @@
__pycache__/
.pytest_cache/
.venv/
.env
*.pyc
worktrees/
.mypy_cache/
.coverage
htmlcov/

18
Dockerfile Normal file
View File

@@ -0,0 +1,18 @@
FROM python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
RUN apt-get update && apt-get install -y --no-install-recommends git docker.io ca-certificates && rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY pyproject.toml README.md /app/
COPY src /app/src
COPY alembic.ini /app/
COPY alembic /app/alembic
RUN pip install --no-cache-dir .
EXPOSE 8000
CMD ["uvicorn", "gitea_codex_bot.main:app", "--host", "0.0.0.0", "--port", "8000"]

245
Idea.md Normal file
View File

@@ -0,0 +1,245 @@
Architecture:
```text
Gitea
└─ webhook: pull_request_comment / issue_comment
└─ gitea-codex-bot API
├─ verifies X-Gitea-Signature
├─ checks body starts with @codex review
├─ queues review job
└─ worker:
├─ clones repo / fetches PR branches
├─ builds git diff + context
├─ runs codex headless
├─ parses JSON findings
└─ posts review comment as codex-bot
```
Use a real Gitea user, e.g. `codex-bot`. Give it a token with minimum access: read repo, read PRs/issues, write comments. Do not use your personal admin token. Gitea exposes Swagger/OpenAPI per instance at `/api/swagger` and `/swagger.v1.json`, so you can wire against your actual server version instead of guessing endpoints. ([Gitea Documentation][3])
MVP behavior:
```text
User comments:
@codex review
Bot replies:
👀 Codex review queued for commit abc123...
Later edits/posts:
## Codex Review
Verdict: patch mostly correct
Confidence: 0.78
Findings:
1. src/auth.ts:42-55
Token validation accepts expired tokens in one path.
2. api/users.ts:88
Missing permission check before update.
No blocking issues found in tests.
```
For v1, post one normal PR timeline comment. Do not fight inline comments yet. Gitea has PR review webhook concepts, but line-level diff review API support can be version-sensitive/awkward; there are still recent reports about API-token support for diff-level review comments being unclear. ([Gitea Documentation][1]) Summary comments are reliable and still useful.
Core trigger logic:
```ts
if (event !== "pull_request_comment" && event !== "issue_comment") return;
if (!payload.is_pull && !payload.pull_request) return;
if (payload.sender.username === "codex-bot") return;
if (!payload.comment.body.trim().startsWith("@codex review")) return;
enqueueReview(payload.repository.full_name, payload.pull_request.number);
```
Job flow:
```text
1. Verify webhook HMAC.
2. Dedupe by delivery ID/comment ID.
3. Parse command:
@codex review
@codex review security
@codex review tests
@codex review --full
4. Create “queued” comment.
5. Clone/fetch repo into isolated temp dir.
6. Checkout PR head.
7. Generate:
git diff base...head
changed file list
optional full changed-file content
optional test output
8. Run Codex headless with JSON schema.
9. Validate JSON.
10. Post/update review comment.
```
Use SQLite first:
```sql
reviews(
id,
repo,
pr_number,
head_sha,
trigger_comment_id,
status,
requested_by,
created_at,
updated_at,
result_json
)
```
Suggested service stack:
```text
Backend: Python FastAPI or Node/TS Fastify
Queue: SQLite jobs first, Redis later
Runner: Docker worker container
Storage: /var/lib/gitea-codex-bot
Auth: bot PAT + webhook secret
Deployment: docker compose
```
Config:
```env
GITEA_BASE_URL=https://git.example.com
GITEA_TOKEN=...
GITEA_BOT_USERNAME=codex-bot
GITEA_WEBHOOK_SECRET=...
OPENAI_API_KEY=...
WORKDIR=/var/lib/gitea-codex/worktrees
MAX_DIFF_BYTES=200000
MAX_REVIEW_MINUTES=10
CONCURRENCY=1
```
Good commands to support later:
```text
@codex review
@codex review security
@codex review performance
@codex review tests
@codex review --full
@codex explain
@codex fix
@codex fix --branch
@codex ignore
@codex rerun
```
Best v2 feature: persistent review comment. Instead of spamming new comments, the bot finds its previous comment on that PR and edits it:
```text
<!-- codex-review:head_sha=abc123 -->
## Codex Review
...
```
Then reruns replace the same block.
Best v3 feature: fixes. User comments:
```text
@codex fix finding 2
```
Bot creates a branch:
```text
codex/pr-42-fix-permission-check
```
Then opens a PR or pushes to the existing PR branch only if allowed. Keep this disabled by default. Review-only is safer.
Security rules that matter:
```text
- Verify X-Gitea-Signature.
- Ignore bots own comments.
- Allowlist repos/orgs.
- Never run on untrusted fork PRs unless sandboxed hard.
- No Docker socket mount.
- No host filesystem mount except temp workdir.
- Timeout every job.
- Limit diff size.
- Redact .env, secrets, keys.
- Use bot token, not admin token.
- Log prompt + result, but not secrets.
```
Prompt shape for Codex:
```text
You are reviewing a Gitea pull request.
Focus only on issues introduced by this PR.
Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.
Avoid style nitpicks.
Return JSON:
{
"verdict": "correct" | "has_issues",
"confidence": 0.0-1.0,
"summary": "...",
"findings": [
{
"severity": "low|medium|high|critical",
"file": "...",
"line_start": 1,
"line_end": 1,
"title": "...",
"body": "...",
"suggestion": "..."
}
]
}
```
Practical build order:
```text
1. Make bot account + token.
2. Add webhook receiver.
3. Verify signature + parse @codex review.
4. Post “queued” comment.
5. Clone repo and generate diff.
6. Run Codex headless.
7. Post one summary comment.
8. Add dedupe + SQLite.
9. Add per-repo config file.
10. Add optional inline comments/fix branches later.
```
Per-repo config idea:
```yaml
# .codex-review.yml
enabled: true
review:
default_mode: summary
max_diff_bytes: 200000
include_tests: true
focus:
- correctness
- security
- maintainability
ignore:
- "dist/**"
- "pnpm-lock.yaml"
- "*.min.js"
commands:
allow_fix: false
```
Final recommendation: external webhook bot, summary comments first, bot account + token, Codex headless JSON, SQLite queue. Inline review comments and auto-fix branches are v2/v3. Trying to make the first version “full GitHub Copilot Reviews clone” is how this becomes annoying trash.
[1]: https://docs.gitea.com/usage/repository/webhooks "Webhooks | Gitea Documentation"
[2]: https://developers.openai.com/cookbook/examples/codex/build_code_review_with_codex_sdk "Build Code Review with the Codex SDK"
[3]: https://docs.gitea.com/development/api-usage?utm_source=chatgpt.com "API Usage"

20
TODO.md Normal file
View File

@@ -0,0 +1,20 @@
# TODO
## Open Items By Priority
### P0 (Critical)
- [ ] True isolated runner flow: clone/fetch/checkout PR branch inside the ephemeral container itself, not on host before prompt generation.
- [ ] Remove host-side fallback path for review execution or gate it behind explicit `ALLOW_HOST_FALLBACK` to avoid silently bypassing isolation.
- [ ] Add integration test that proves runner container receives repo+PR context and executes review for the exact PR head SHA.
### P1 (Important)
- [ ] `WEBHOOK_MODE` is currently informational only; add runtime validation/check endpoint that confirms expected webhook scope (`repo` or `global`) is actually configured in Gitea by host admin.
- [ ] Make review model configurable via env (for example `OPENAI_REVIEW_MODEL`) instead of hardcoding `gpt-5`.
- [ ] Add retries/backoff for `codex exec` bootstrap (`npm install -g @openai/codex`) to reduce transient network/setup failures.
- [ ] Add end-to-end test path against live Gitea + MariaDB + docker runner (webhook -> queue -> runner -> PR comment update).
### P2 (Nice to have)
- [ ] Add explicit env docs for reverse-proxy deployment (`BASE_PUBLIC_URL`, trusted headers).
- [ ] Add per-repo command policy in `.codex-review.yml` for enabling/disabling commands (`review`, `fix`, `explain`, `rerun`).
- [ ] Add structured log redaction tests to ensure PAT/keys never appear in logs/comments.

38
alembic.ini Normal file
View File

@@ -0,0 +1,38 @@
[alembic]
script_location = alembic
prepend_sys_path = .
path_separator = os
sqlalchemy.url = mysql+pymysql://user:pass@localhost/db
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s

1
alembic/README Normal file
View File

@@ -0,0 +1 @@
# Alembic migrations

46
alembic/env.py Normal file
View File

@@ -0,0 +1,46 @@
from __future__ import annotations
from logging.config import fileConfig
from alembic import context
from sqlalchemy import engine_from_config, pool
from gitea_codex_bot.config import get_settings
from gitea_codex_bot.db import Base
from gitea_codex_bot import models # noqa: F401
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
settings = get_settings()
config.set_main_option("sqlalchemy.url", settings.sqlalchemy_url)
def run_migrations_offline() -> None:
url = config.get_main_option("sqlalchemy.url")
context.configure(url=url, target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"})
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,107 @@
"""initial schema
Revision ID: 0001_initial
Revises:
Create Date: 2026-05-22 19:00:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
revision: str = "0001_initial"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"webhook_events",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("delivery_id", sa.String(length=255), nullable=True),
sa.Column("event_name", sa.String(length=128), nullable=False),
sa.Column("repo", sa.String(length=255), nullable=False),
sa.Column("comment_id", sa.Integer(), nullable=True),
sa.Column("payload_sha256", sa.String(length=64), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("delivery_id", name="uq_webhook_events_delivery_id"),
sa.UniqueConstraint("repo", "comment_id", name="uq_webhook_events_repo_comment"),
)
job_status_enum = sa.Enum("queued", "running", "succeeded", "failed", "skipped", name="jobstatus")
job_status_enum.create(op.get_bind(), checkfirst=True)
op.create_table(
"review_jobs",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("repo", sa.String(length=255), nullable=False),
sa.Column("pr_number", sa.Integer(), nullable=False),
sa.Column("head_sha", sa.String(length=64), nullable=False),
sa.Column("trigger_comment_id", sa.Integer(), nullable=False),
sa.Column("command", sa.String(length=64), nullable=False),
sa.Column("command_args", sa.Text(), nullable=True),
sa.Column("requested_by", sa.String(length=255), nullable=False),
sa.Column("status", job_status_enum, nullable=False),
sa.Column("last_error", sa.Text(), nullable=True),
sa.Column("result_json", sa.JSON(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False),
sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("repo", "trigger_comment_id", name="uq_review_jobs_repo_trigger_comment"),
)
op.create_index("ix_review_jobs_lookup", "review_jobs", ["repo", "pr_number", "head_sha", "status", "created_at"], unique=False)
run_status_enum = sa.Enum("running", "succeeded", "failed", "skipped", name="runstatus")
run_status_enum.create(op.get_bind(), checkfirst=True)
op.create_table(
"review_runs",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("job_id", sa.Integer(), nullable=False),
sa.Column("status", run_status_enum, nullable=False),
sa.Column("runner_container_id", sa.String(length=128), nullable=True),
sa.Column("result_json", sa.JSON(), nullable=True),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False),
sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(["job_id"], ["review_jobs.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_review_runs_job_status", "review_runs", ["job_id", "status"], unique=False)
op.create_table(
"bot_comments",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("repo", sa.String(length=255), nullable=False),
sa.Column("pr_number", sa.Integer(), nullable=False),
sa.Column("head_sha", sa.String(length=64), nullable=False),
sa.Column("gitea_comment_id", sa.Integer(), nullable=False),
sa.Column("marker", sa.String(length=255), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("repo", "pr_number", "marker", name="uq_bot_comments_marker"),
)
op.create_index("ix_bot_comments_repo_pr", "bot_comments", ["repo", "pr_number"], unique=False)
def downgrade() -> None:
op.drop_index("ix_bot_comments_repo_pr", table_name="bot_comments")
op.drop_table("bot_comments")
op.drop_index("ix_review_runs_job_status", table_name="review_runs")
op.drop_table("review_runs")
op.drop_index("ix_review_jobs_lookup", table_name="review_jobs")
op.drop_table("review_jobs")
op.drop_table("webhook_events")
sa.Enum(name="runstatus").drop(op.get_bind(), checkfirst=True)
sa.Enum(name="jobstatus").drop(op.get_bind(), checkfirst=True)

28
docker-compose.yml Normal file
View File

@@ -0,0 +1,28 @@
services:
mariadb:
image: mariadb:11
restart: unless-stopped
environment:
MARIADB_DATABASE: gitea_codex
MARIADB_USER: gitea_codex
MARIADB_PASSWORD: gitea_codex
MARIADB_ROOT_PASSWORD: rootpass
ports:
- "3306:3306"
healthcheck:
test: ["CMD", "mariadb-admin", "ping", "-h", "localhost", "-uroot", "-prootpass"]
interval: 5s
timeout: 3s
retries: 20
bot:
build: .
depends_on:
mariadb:
condition: service_healthy
env_file:
- .env
volumes:
- ./worktrees:/var/lib/gitea-codex/worktrees
ports:
- "8000:8000"

42
pyproject.toml Normal file
View File

@@ -0,0 +1,42 @@
[build-system]
requires = ["setuptools>=69", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "gitea-codex-bot"
version = "0.1.0"
description = "Webhook-driven Codex review bot for Gitea"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"fastapi>=0.115.0",
"uvicorn[standard]>=0.30.0",
"sqlalchemy>=2.0.30",
"alembic>=1.13.2",
"pymysql>=1.1.1",
"httpx>=0.27.0",
"pydantic>=2.7.0",
"pydantic-settings>=2.3.0",
"python-dotenv>=1.0.1",
"pyyaml>=6.0.2",
]
[project.optional-dependencies]
dev = [
"pytest>=8.2.0",
"pytest-asyncio>=0.23.7",
"pytest-cov>=5.0.0",
]
[tool.pytest.ini_options]
addopts = "-q"
testpaths = ["tests"]
markers = [
"no_schema: skip automatic schema setup fixture for migration-focused tests",
]
[tool.setuptools]
package-dir = {"" = "src"}
[tool.setuptools.packages.find]
where = ["src"]

View File

@@ -0,0 +1,3 @@
__all__ = ["__version__"]
__version__ = "0.1.0"

View File

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
from functools import lru_cache
from typing import Literal
from pydantic import Field, SecretStr, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
gitea_base_url: str = Field(alias="GITEA_BASE_URL")
gitea_token: SecretStr = Field(alias="GITEA_TOKEN")
gitea_bot_username: str = Field(alias="GITEA_BOT_USERNAME")
gitea_webhook_secret: SecretStr = Field(alias="GITEA_WEBHOOK_SECRET")
openai_api_key: SecretStr = Field(alias="OPENAI_API_KEY")
openai_project_id: str | None = Field(default=None, alias="OPENAI_PROJECT_ID")
openai_org_id: str | None = Field(default=None, alias="OPENAI_ORG_ID")
openai_review_model: str = Field(default="gpt-5.3-codex", alias="OPENAI_REVIEW_MODEL")
openai_reasoning_effort: Literal["none", "low", "medium", "high"] = Field(default="high", alias="OPENAI_REASONING_EFFORT")
allowed_repos: str = Field(alias="ALLOWED_REPOS")
cooldown_seconds: int = Field(default=60, alias="COOLDOWN_SECONDS")
webhook_mode: Literal["repo", "global"] = Field(default="repo", alias="WEBHOOK_MODE")
db_host: str = Field(alias="DB_HOST")
db_port: int = Field(default=3306, alias="DB_PORT")
db_name: str = Field(alias="DB_NAME")
db_user: str = Field(alias="DB_USER")
db_password: SecretStr = Field(alias="DB_PASSWORD")
database_url: str | None = Field(default=None, alias="DATABASE_URL")
workdir: str = Field(default="/var/lib/gitea-codex/worktrees", alias="WORKDIR")
max_diff_bytes: int = Field(default=200000, alias="MAX_DIFF_BYTES")
max_review_minutes: int = Field(default=10, alias="MAX_REVIEW_MINUTES")
concurrency: int = Field(default=1, alias="CONCURRENCY")
review_runner_image: str = Field(default="node:22-bookworm-slim", alias="REVIEW_RUNNER_IMAGE")
enable_fix_commands: bool = Field(default=False, alias="ENABLE_FIX_COMMANDS")
allow_untrusted_forks: bool = Field(default=False, alias="ALLOW_UNTRUSTED_FORKS")
@field_validator("gitea_base_url")
@classmethod
def normalize_base_url(cls, value: str) -> str:
return value.rstrip("/")
@property
def sqlalchemy_url(self) -> str:
if self.database_url:
return self.database_url
password = self.db_password.get_secret_value()
return f"mysql+pymysql://{self.db_user}:{password}@{self.db_host}:{self.db_port}/{self.db_name}?charset=utf8mb4"
@property
def allowed_repo_set(self) -> set[str]:
values = [item.strip() for item in self.allowed_repos.split(",")]
return {value for value in values if value}
@lru_cache(maxsize=1)
def get_settings() -> Settings:
return Settings()

32
src/gitea_codex_bot/db.py Normal file
View File

@@ -0,0 +1,32 @@
from __future__ import annotations
from collections.abc import Generator
from functools import lru_cache
from sqlalchemy import create_engine
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
from gitea_codex_bot.config import get_settings
class Base(DeclarativeBase):
pass
@lru_cache(maxsize=1)
def get_engine():
settings = get_settings()
return create_engine(settings.sqlalchemy_url, pool_pre_ping=True, future=True)
@lru_cache(maxsize=1)
def get_session_factory():
return sessionmaker(bind=get_engine(), class_=Session, autoflush=False, autocommit=False, expire_on_commit=False)
def get_session() -> Generator[Session, None, None]:
session = get_session_factory()()
try:
yield session
finally:
session.close()

175
src/gitea_codex_bot/main.py Normal file
View File

@@ -0,0 +1,175 @@
from __future__ import annotations
import asyncio
import logging
from contextlib import asynccontextmanager
from typing import Any
from fastapi import Depends, FastAPI, Header, HTTPException, Request, status
from sqlalchemy.orm import Session
from gitea_codex_bot.config import Settings, get_settings
from gitea_codex_bot.db import Base, get_engine, get_session
from gitea_codex_bot.services.commands import parse_command
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.jobs import cooldown_remaining_seconds, enqueue_job, persist_webhook_event
from gitea_codex_bot.services.review_format import (
format_cooldown_ack,
format_queue_ack,
format_unsupported_ack,
)
from gitea_codex_bot.services.security import verify_gitea_signature
from gitea_codex_bot.workers.dispatcher import worker_loop
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
logger = logging.getLogger(__name__)
def _validate_required_env(settings: Settings) -> None:
if not settings.openai_api_key.get_secret_value().strip():
raise RuntimeError("OPENAI_API_KEY is required")
def _extract_pr_event(payload: dict[str, Any], event_name: str) -> tuple[str, int, str, int, str] | None:
repository = payload.get("repository", {})
repo = repository.get("full_name")
if not repo:
return None
sender = payload.get("sender", {})
sender_username = sender.get("username", "")
comment = payload.get("comment", {})
comment_id = int(comment.get("id", 0) or 0)
if comment_id <= 0:
return None
if event_name == "issue_comment":
issue = payload.get("issue", {})
if not issue.get("pull_request"):
return None
pr_number = int(issue.get("number", 0) or 0)
head_sha = payload.get("pull_request", {}).get("head", {}).get("sha", "")
elif event_name == "pull_request_comment":
pull_request = payload.get("pull_request", {})
if not pull_request:
return None
pr_number = int(pull_request.get("number", 0) or 0)
head_sha = pull_request.get("head", {}).get("sha", "")
else:
return None
if pr_number <= 0:
return None
if not head_sha:
head_sha = "unknown"
return repo, pr_number, head_sha, comment_id, sender_username
@asynccontextmanager
async def lifespan(app: FastAPI):
settings = get_settings()
_validate_required_env(settings)
Base.metadata.create_all(bind=get_engine())
stop_event = asyncio.Event()
task = asyncio.create_task(worker_loop(settings, stop_event))
app.state.worker_stop_event = stop_event
app.state.worker_task = task
try:
yield
finally:
stop_event.set()
await task
app = FastAPI(title="Gitea Codex Review Bot", lifespan=lifespan)
@app.get("/healthz")
def healthz(settings: Settings = Depends(get_settings)) -> dict[str, str]:
_ = settings.gitea_base_url
return {"status": "ok"}
@app.post("/webhook/gitea")
async def gitea_webhook(
request: Request,
x_gitea_event: str | None = Header(default=None),
x_gitea_delivery: str | None = Header(default=None),
x_gitea_signature: str | None = Header(default=None),
session: Session = Depends(get_session),
settings: Settings = Depends(get_settings),
) -> dict[str, Any]:
payload_bytes = await request.body()
if not verify_gitea_signature(payload_bytes, settings.gitea_webhook_secret.get_secret_value(), x_gitea_signature):
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="invalid signature")
event_name = (x_gitea_event or "").strip()
if event_name not in {"issue_comment", "pull_request_comment"}:
return {"accepted": False, "reason": "event ignored"}
payload = await request.json()
extracted = _extract_pr_event(payload, event_name)
if not extracted:
return {"accepted": False, "reason": "not a pull request comment"}
repo, pr_number, head_sha, comment_id, sender_username = extracted
if sender_username == settings.gitea_bot_username:
return {"accepted": False, "reason": "bot comment ignored"}
comment_body = str(payload.get("comment", {}).get("body", "")).strip()
parsed_command = parse_command(comment_body)
if not parsed_command:
return {"accepted": False, "reason": "no codex command"}
if repo not in settings.allowed_repo_set:
return {"accepted": False, "reason": "repo not allowed"}
inserted = persist_webhook_event(
session,
delivery_id=x_gitea_delivery,
event_name=event_name,
repo=repo,
comment_id=comment_id,
payload=payload_bytes,
)
if not inserted:
return {"accepted": True, "reason": "duplicate event"}
gitea = GiteaClient(settings)
if parsed_command.name in {"review", "rerun"}:
if head_sha == "unknown":
try:
head_sha = gitea.get_pull_request(repo, pr_number).head_sha
except Exception:
pass
if parsed_command.name != "rerun":
remaining = cooldown_remaining_seconds(session, repo, pr_number, settings.cooldown_seconds)
if remaining > 0:
gitea.post_issue_comment(repo, pr_number, format_cooldown_ack(remaining))
return {"accepted": True, "reason": "cooldown active", "cooldown_seconds_remaining": remaining}
job = enqueue_job(
session,
repo=repo,
pr_number=pr_number,
head_sha=head_sha,
trigger_comment_id=comment_id,
requested_by=sender_username,
command=parsed_command,
)
gitea.post_issue_comment(repo, pr_number, format_queue_ack(head_sha))
return {"accepted": True, "job_id": job.id, "status": "queued"}
if parsed_command.name in {"fix", "explain", "ignore"}:
job = enqueue_job(
session,
repo=repo,
pr_number=pr_number,
head_sha=head_sha,
trigger_comment_id=comment_id,
requested_by=sender_username,
command=parsed_command,
)
return {"accepted": True, "job_id": job.id, "status": "queued"}
gitea.post_issue_comment(repo, pr_number, format_unsupported_ack(parsed_command))
return {"accepted": False, "reason": "unsupported command"}

View File

@@ -0,0 +1,113 @@
from __future__ import annotations
import enum
from datetime import datetime
from sqlalchemy import DateTime, Enum, ForeignKey, Index, Integer, JSON, String, Text, UniqueConstraint, func
from sqlalchemy.orm import Mapped, mapped_column, relationship
from gitea_codex_bot.db import Base
class JobStatus(str, enum.Enum):
queued = "queued"
running = "running"
succeeded = "succeeded"
failed = "failed"
skipped = "skipped"
class RunStatus(str, enum.Enum):
running = "running"
succeeded = "succeeded"
failed = "failed"
skipped = "skipped"
class WebhookEvent(Base):
__tablename__ = "webhook_events"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
delivery_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
event_name: Mapped[str] = mapped_column(String(128), nullable=False)
repo: Mapped[str] = mapped_column(String(255), nullable=False)
comment_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
payload_sha256: Mapped[str] = mapped_column(String(64), nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
__table_args__ = (
UniqueConstraint("delivery_id", name="uq_webhook_events_delivery_id"),
UniqueConstraint("repo", "comment_id", name="uq_webhook_events_repo_comment"),
)
class ReviewJob(Base):
__tablename__ = "review_jobs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
repo: Mapped[str] = mapped_column(String(255), nullable=False)
pr_number: Mapped[int] = mapped_column(Integer, nullable=False)
head_sha: Mapped[str] = mapped_column(String(64), nullable=False)
trigger_comment_id: Mapped[int] = mapped_column(Integer, nullable=False)
command: Mapped[str] = mapped_column(String(64), nullable=False, default="review")
command_args: Mapped[str | None] = mapped_column(Text, nullable=True)
requested_by: Mapped[str] = mapped_column(String(255), nullable=False)
status: Mapped[JobStatus] = mapped_column(Enum(JobStatus), nullable=False, default=JobStatus.queued)
last_error: Mapped[str | None] = mapped_column(Text, nullable=True)
result_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
server_default=func.now(),
onupdate=func.now(),
)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
runs: Mapped[list["ReviewRun"]] = relationship(back_populates="job", cascade="all, delete-orphan")
__table_args__ = (
Index("ix_review_jobs_lookup", "repo", "pr_number", "head_sha", "status", "created_at"),
UniqueConstraint("repo", "trigger_comment_id", name="uq_review_jobs_repo_trigger_comment"),
)
class ReviewRun(Base):
__tablename__ = "review_runs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
job_id: Mapped[int] = mapped_column(ForeignKey("review_jobs.id", ondelete="CASCADE"), nullable=False)
status: Mapped[RunStatus] = mapped_column(Enum(RunStatus), nullable=False, default=RunStatus.running)
runner_container_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
result_json: Mapped[dict | None] = mapped_column(JSON, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
job: Mapped["ReviewJob"] = relationship(back_populates="runs")
__table_args__ = (Index("ix_review_runs_job_status", "job_id", "status"),)
class BotComment(Base):
__tablename__ = "bot_comments"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
repo: Mapped[str] = mapped_column(String(255), nullable=False)
pr_number: Mapped[int] = mapped_column(Integer, nullable=False)
head_sha: Mapped[str] = mapped_column(String(64), nullable=False)
gitea_comment_id: Mapped[int] = mapped_column(Integer, nullable=False)
marker: Mapped[str] = mapped_column(String(255), nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
server_default=func.now(),
onupdate=func.now(),
)
__table_args__ = (
UniqueConstraint("repo", "pr_number", "marker", name="uq_bot_comments_marker"),
Index("ix_bot_comments_repo_pr", "repo", "pr_number"),
)

View File

View File

@@ -0,0 +1,30 @@
from __future__ import annotations
import re
from gitea_codex_bot.types import ParsedCommand
COMMAND_RE = re.compile(r"^@codex\s+(review|explain|fix|ignore|rerun)\b(.*)$", re.IGNORECASE | re.DOTALL)
def parse_command(body: str) -> ParsedCommand | None:
stripped = body.strip()
match = COMMAND_RE.match(stripped)
if not match:
return None
name = match.group(1).lower()
rest = match.group(2).strip()
tokens = [token for token in rest.split() if token]
parsed = ParsedCommand(name=name, raw=stripped, arguments=tokens)
if name == "review":
if "--full" in tokens:
parsed.full = True
parsed.mode = "full"
for mode in ("security", "performance", "tests"):
if mode in tokens:
parsed.mode = mode
break
elif name == "fix":
parsed.branch_fix = "--branch" in tokens
return parsed

View File

@@ -0,0 +1,40 @@
from __future__ import annotations
from sqlalchemy import select
from sqlalchemy.orm import Session
from gitea_codex_bot.models import BotComment
REVIEW_MARKER = "codex-review"
def get_persistent_review_comment_id(session: Session, repo: str, pr_number: int) -> int | None:
row = session.execute(
select(BotComment)
.where(BotComment.repo == repo, BotComment.pr_number == pr_number, BotComment.marker == REVIEW_MARKER)
.limit(1)
).scalar_one_or_none()
return row.gitea_comment_id if row else None
def upsert_persistent_review_comment_id(
session: Session,
*,
repo: str,
pr_number: int,
head_sha: str,
comment_id: int,
) -> None:
row = session.execute(
select(BotComment)
.where(BotComment.repo == repo, BotComment.pr_number == pr_number, BotComment.marker == REVIEW_MARKER)
.limit(1)
).scalar_one_or_none()
if not row:
row = BotComment(repo=repo, pr_number=pr_number, head_sha=head_sha, gitea_comment_id=comment_id, marker=REVIEW_MARKER)
session.add(row)
else:
row.head_sha = head_sha
row.gitea_comment_id = comment_id
session.commit()

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
from urllib.parse import quote
import httpx
from gitea_codex_bot.config import Settings
@dataclass(slots=True)
class PullRequestContext:
repo: str
pr_number: int
base_ref: str
base_sha: str
head_ref: str
head_sha: str
clone_url: str
html_url: str
is_fork: bool
class GiteaClient:
def __init__(self, settings: Settings) -> None:
self.settings = settings
self.base_url = settings.gitea_base_url
self.headers = {
"Authorization": f"token {settings.gitea_token.get_secret_value()}",
"Accept": "application/json",
"Content-Type": "application/json",
}
def _request(self, method: str, path: str, *, json_body: dict[str, Any] | None = None) -> Any:
with httpx.Client(timeout=20.0) as client:
response = client.request(
method,
f"{self.base_url}{path}",
headers=self.headers,
json=json_body,
)
response.raise_for_status()
if response.status_code == 204:
return None
return response.json()
@staticmethod
def split_repo(repo: str) -> tuple[str, str]:
owner, name = repo.split("/", 1)
return owner, name
def get_pull_request(self, repo: str, pr_number: int) -> PullRequestContext:
owner, name = self.split_repo(repo)
encoded_owner = quote(owner, safe="")
encoded_name = quote(name, safe="")
payload = self._request("GET", f"/api/v1/repos/{encoded_owner}/{encoded_name}/pulls/{pr_number}")
return PullRequestContext(
repo=repo,
pr_number=pr_number,
base_ref=payload["base"]["ref"],
base_sha=payload["base"]["sha"],
head_ref=payload["head"]["ref"],
head_sha=payload["head"]["sha"],
clone_url=payload["head"]["repo"]["clone_url"],
html_url=payload["html_url"],
is_fork=bool(payload["head"]["repo"]["full_name"] != payload["base"]["repo"]["full_name"]),
)
def post_issue_comment(self, repo: str, pr_number: int, body: str) -> int:
owner, name = self.split_repo(repo)
encoded_owner = quote(owner, safe="")
encoded_name = quote(name, safe="")
payload = self._request(
"POST",
f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/{pr_number}/comments",
json_body={"body": body},
)
return int(payload["id"])
def edit_issue_comment(self, repo: str, comment_id: int, body: str) -> int:
owner, name = self.split_repo(repo)
encoded_owner = quote(owner, safe="")
encoded_name = quote(name, safe="")
payload = self._request(
"PATCH",
f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/comments/{comment_id}",
json_body={"body": body},
)
return int(payload["id"])
def list_issue_comments(self, repo: str, pr_number: int) -> list[dict[str, Any]]:
owner, name = self.split_repo(repo)
encoded_owner = quote(owner, safe="")
encoded_name = quote(name, safe="")
payload = self._request("GET", f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/{pr_number}/comments")
return list(payload)

View File

@@ -0,0 +1,136 @@
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from gitea_codex_bot.models import JobStatus, ReviewJob, ReviewRun, RunStatus, WebhookEvent
from gitea_codex_bot.services.security import payload_digest
from gitea_codex_bot.types import ParsedCommand
def persist_webhook_event(
session: Session,
*,
delivery_id: str | None,
event_name: str,
repo: str,
comment_id: int | None,
payload: bytes,
) -> bool:
event = WebhookEvent(
delivery_id=delivery_id,
event_name=event_name,
repo=repo,
comment_id=comment_id,
payload_sha256=payload_digest(payload),
)
session.add(event)
try:
session.commit()
return True
except IntegrityError:
session.rollback()
return False
def cooldown_remaining_seconds(session: Session, repo: str, pr_number: int, cooldown_seconds: int) -> int:
cutoff = datetime.now(timezone.utc) - timedelta(seconds=cooldown_seconds)
row = session.execute(
select(ReviewJob)
.where(ReviewJob.repo == repo, ReviewJob.pr_number == pr_number, ReviewJob.created_at >= cutoff)
.order_by(ReviewJob.created_at.desc())
.limit(1)
).scalar_one_or_none()
if not row:
return 0
created_at = row.created_at
if created_at.tzinfo is None:
created_at = created_at.replace(tzinfo=timezone.utc)
age = (datetime.now(timezone.utc) - created_at).total_seconds()
remaining = int(max(cooldown_seconds - age, 0))
return remaining
def enqueue_job(
session: Session,
*,
repo: str,
pr_number: int,
head_sha: str,
trigger_comment_id: int,
requested_by: str,
command: ParsedCommand,
) -> ReviewJob:
job = ReviewJob(
repo=repo,
pr_number=pr_number,
head_sha=head_sha,
trigger_comment_id=trigger_comment_id,
command=command.name,
command_args=" ".join(command.arguments) if command.arguments else None,
requested_by=requested_by,
status=JobStatus.queued,
)
session.add(job)
session.commit()
session.refresh(job)
return job
def claim_next_job(session: Session) -> ReviewJob | None:
job = session.execute(
select(ReviewJob).where(ReviewJob.status == JobStatus.queued).order_by(ReviewJob.created_at.asc()).limit(1).with_for_update(skip_locked=True)
).scalar_one_or_none()
if not job:
session.rollback()
return None
job.status = JobStatus.running
job.started_at = datetime.now(timezone.utc)
run = ReviewRun(job_id=job.id, status=RunStatus.running)
session.add(run)
session.commit()
session.refresh(job)
return job
def finish_job(
session: Session,
*,
job_id: int,
success: bool,
skipped: bool,
result: dict | None,
error_message: str | None,
) -> None:
job = session.get(ReviewJob, job_id)
if not job:
return
latest_run = (
session.execute(select(ReviewRun).where(ReviewRun.job_id == job_id).order_by(ReviewRun.id.desc()).limit(1)).scalar_one_or_none()
)
if skipped:
job.status = JobStatus.skipped
run_status = RunStatus.skipped
elif success:
job.status = JobStatus.succeeded
run_status = RunStatus.succeeded
else:
job.status = JobStatus.failed
run_status = RunStatus.failed
now = datetime.now(timezone.utc)
job.finished_at = now
job.last_error = error_message
if result is not None:
job.result_json = result
if latest_run:
latest_run.status = run_status
latest_run.finished_at = now
latest_run.result_json = result
latest_run.error_message = error_message
session.commit()

View File

@@ -0,0 +1,35 @@
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
import yaml
@dataclass(slots=True)
class RepoReviewConfig:
enabled: bool = True
default_mode: str = "summary"
max_diff_bytes: int = 200000
include_tests: bool = True
focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"])
ignore: list[str] = field(default_factory=list)
allow_fix: bool = False
def load_repo_review_config(repo_root: Path) -> RepoReviewConfig:
path = repo_root / ".codex-review.yml"
if not path.exists():
return RepoReviewConfig()
raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
review = raw.get("review", {}) or {}
commands = raw.get("commands", {}) or {}
return RepoReviewConfig(
enabled=bool(raw.get("enabled", True)),
default_mode=str(review.get("default_mode", "summary")),
max_diff_bytes=int(review.get("max_diff_bytes", 200000)),
include_tests=bool(review.get("include_tests", True)),
focus=list(review.get("focus", ["correctness", "security", "maintainability"])),
ignore=list(raw.get("ignore", [])),
allow_fix=bool(commands.get("allow_fix", False)),
)

View File

@@ -0,0 +1,50 @@
from __future__ import annotations
from gitea_codex_bot.types import ParsedCommand
def format_queue_ack(head_sha: str) -> str:
short_sha = head_sha[:7]
return f"👀 Codex review queued for commit `{short_sha}`."
def format_cooldown_ack(seconds: int) -> str:
return f"⏳ Cooldown active. Please wait {seconds}s before requesting another review on this PR."
def format_disabled_ack() -> str:
return "🚫 Review is disabled by `.codex-review.yml` for this repository."
def format_unsupported_ack(command: ParsedCommand) -> str:
return f"⚠️ Command `@codex {command.name}` is not enabled on this repository."
def format_result_comment(head_sha: str, result: dict) -> str:
verdict = result.get("verdict", "has_issues")
confidence = float(result.get("confidence", 0.0))
summary = str(result.get("summary", "No summary returned."))
findings = result.get("findings", []) or []
lines = [f"<!-- codex-review:head_sha={head_sha} -->", "## Codex Review", "", f"Verdict: `{verdict}`", f"Confidence: `{confidence:.2f}`", "", summary, ""]
if not findings:
lines.append("No blocking issues found.")
else:
lines.append("Findings:")
for idx, finding in enumerate(findings, start=1):
severity = finding.get("severity", "unknown")
file_path = finding.get("file", "unknown")
line_start = finding.get("line_start", "?")
line_end = finding.get("line_end", line_start)
title = finding.get("title", "Issue")
body = finding.get("body", "")
suggestion = finding.get("suggestion", "")
lines.extend(
[
f"{idx}. `{file_path}:{line_start}-{line_end}` ({severity})",
f" {title}",
f" {body}",
f" Suggestion: {suggestion}" if suggestion else " Suggestion: n/a",
]
)
return "\n".join(lines).strip()

View File

@@ -0,0 +1,290 @@
from __future__ import annotations
import json
import os
import shlex
import subprocess
from fnmatch import fnmatch
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any
import httpx
from gitea_codex_bot.config import Settings
from gitea_codex_bot.services.gitea import GiteaClient, PullRequestContext
from gitea_codex_bot.services.repo_config import RepoReviewConfig, load_repo_review_config
from gitea_codex_bot.types import ParsedCommand
class ReviewError(RuntimeError):
pass
def _run_git(args: list[str], cwd: Path | None = None) -> str:
completed = subprocess.run(["git", *args], cwd=cwd, check=True, capture_output=True, text=True)
return completed.stdout
def checkout_pr(tmpdir: Path, pr: PullRequestContext) -> Path:
repo_dir = tmpdir / "repo"
_run_git(["clone", "--no-tags", "--depth", "50", pr.clone_url, str(repo_dir)])
_run_git(["fetch", "origin", pr.base_ref, pr.head_ref], cwd=repo_dir)
_run_git(["checkout", pr.head_sha], cwd=repo_dir)
return repo_dir
def collect_diff_context(repo_dir: Path, pr: PullRequestContext, max_diff_bytes: int) -> dict[str, Any]:
diff = _run_git(["diff", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
changed_files_raw = _run_git(["diff", "--name-only", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir)
changed_files = [line.strip() for line in changed_files_raw.splitlines() if line.strip()]
truncated = False
if len(diff.encode("utf-8")) > max_diff_bytes:
diff = diff.encode("utf-8")[:max_diff_bytes].decode("utf-8", errors="ignore")
truncated = True
return {"diff": diff, "changed_files": changed_files, "truncated": truncated}
def _apply_ignore_patterns(changed_files: list[str], ignore_patterns: list[str]) -> list[str]:
if not ignore_patterns:
return changed_files
kept: list[str] = []
for path in changed_files:
if any(fnmatch(path, pattern) for pattern in ignore_patterns):
continue
kept.append(path)
return kept
def _collect_changed_file_contents(repo_dir: Path, changed_files: list[str], max_total_bytes: int) -> str:
chunks: list[str] = []
total = 0
for rel in changed_files:
path = repo_dir / rel
if not path.exists() or not path.is_file():
continue
try:
content = path.read_text(encoding="utf-8", errors="ignore")
except OSError:
continue
block = f"\n### {rel}\n{content}\n"
block_bytes = len(block.encode("utf-8"))
if total + block_bytes > max_total_bytes:
break
chunks.append(block)
total += block_bytes
return "".join(chunks).strip()
def _collect_test_output(repo_dir: Path, timeout_seconds: int) -> str:
try:
completed = subprocess.run(
["pytest", "-q"],
cwd=repo_dir,
capture_output=True,
text=True,
timeout=timeout_seconds,
check=False,
)
output = (completed.stdout + "\n" + completed.stderr).strip()
return output[:10000]
except Exception as exc:
return f"Test execution unavailable: {exc}"
def _redact_secrets_from_diff(diff: str) -> str:
secret_terms = ("api_key", "token", "secret", "password", "private_key", "-----begin")
redacted_lines: list[str] = []
for line in diff.splitlines():
lower = line.lower()
if any(term in lower for term in secret_terms):
redacted_lines.append("[REDACTED_POTENTIAL_SECRET]")
else:
redacted_lines.append(line)
return "\n".join(redacted_lines)
def _build_prompt(
pr: PullRequestContext,
command: ParsedCommand,
diff_context: dict[str, Any],
repo_cfg: RepoReviewConfig,
*,
changed_file_contents: str,
test_output: str | None,
) -> str:
mode = command.mode if command.name in {"review", "rerun"} else "summary"
return (
"You are reviewing a Gitea pull request.\n\n"
"Focus only on issues introduced by this PR.\n"
"Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.\n"
"Avoid style nitpicks.\n\n"
"Return JSON only with schema:\n"
"{\n"
' "verdict": "correct" | "has_issues",\n'
' "confidence": 0.0,\n'
' "summary": "...",\n'
' "findings": [{"severity":"low|medium|high|critical","file":"...","line_start":1,"line_end":1,"title":"...","body":"...","suggestion":"..."}]\n'
"}\n\n"
f"PR URL: {pr.html_url}\n"
f"Mode: {mode}\n"
f"Repo focus: {', '.join(repo_cfg.focus)}\n"
f"Diff truncated: {diff_context['truncated']}\n"
f"Changed files:\n{os.linesep.join(diff_context['changed_files'])}\n\n"
f"Unified diff:\n{diff_context['diff']}\n\n"
f"Changed file content (optional):\n{changed_file_contents or '(not included)'}\n\n"
f"Test output (optional):\n{test_output or '(not included)'}\n"
)
def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]:
headers: dict[str, str] = {
"Authorization": f"Bearer {settings.openai_api_key.get_secret_value()}",
"Content-Type": "application/json",
}
if settings.openai_org_id:
headers["OpenAI-Organization"] = settings.openai_org_id
if settings.openai_project_id:
headers["OpenAI-Project"] = settings.openai_project_id
body = {
"model": settings.openai_review_model,
"input": prompt,
"text": {"format": {"type": "json_object"}},
"reasoning": {"effort": settings.openai_reasoning_effort},
}
with httpx.Client(timeout=120.0) as client:
response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body)
response.raise_for_status()
payload = response.json()
for item in payload.get("output", []):
for content in item.get("content", []):
text_value = content.get("text")
if text_value:
return json.loads(text_value)
raise ReviewError("OpenAI response did not contain JSON output text.")
def _fallback_review(diff_context: dict[str, Any]) -> dict[str, Any]:
findings = []
if "TODO" in diff_context["diff"]:
findings.append(
{
"severity": "low",
"file": "unknown",
"line_start": 1,
"line_end": 1,
"title": "TODO marker in diff",
"body": "The change introduces TODO markers that may indicate incomplete behavior.",
"suggestion": "Resolve or track TODOs before merging.",
}
)
return {
"verdict": "correct" if not findings else "has_issues",
"confidence": 0.4 if not findings else 0.6,
"summary": "Fallback analysis was used because OpenAI review was unavailable.",
"findings": findings,
}
def run_review_for_pr(
settings: Settings,
gitea: GiteaClient,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> tuple[dict[str, Any], RepoReviewConfig]:
prompt, diff_context, repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command)
try:
result = _call_openai_review(settings, prompt)
except Exception:
result = _fallback_review(diff_context)
return normalize_review_result(result), repo_cfg
def prepare_review_prompt(
settings: Settings,
gitea: GiteaClient,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> tuple[str, dict[str, Any], RepoReviewConfig]:
pr = gitea.get_pull_request(repo, pr_number)
with TemporaryDirectory(prefix="gitea-codex-") as tmp:
tmpdir = Path(tmp)
repo_dir = checkout_pr(tmpdir, pr)
repo_cfg = load_repo_review_config(repo_dir)
diff_context = collect_diff_context(repo_dir, pr, min(settings.max_diff_bytes, repo_cfg.max_diff_bytes))
diff_context["changed_files"] = _apply_ignore_patterns(diff_context["changed_files"], repo_cfg.ignore)
diff_context["diff"] = _redact_secrets_from_diff(diff_context["diff"])
changed_file_contents = ""
if command.full:
changed_file_contents = _collect_changed_file_contents(repo_dir, diff_context["changed_files"], settings.max_diff_bytes)
test_output = None
if repo_cfg.include_tests and command.mode == "tests":
test_output = _collect_test_output(repo_dir, timeout_seconds=min(settings.max_review_minutes * 60, 300))
prompt = _build_prompt(
pr,
command,
diff_context,
repo_cfg,
changed_file_contents=changed_file_contents,
test_output=test_output,
)
return prompt, diff_context, repo_cfg
def normalize_review_result(result: Any) -> dict[str, Any]:
if not isinstance(result, dict):
raise ReviewError(f"Invalid review result type: {type(result)!r}")
if "findings" not in result:
result["findings"] = []
if "summary" not in result:
result["summary"] = "No summary returned."
if "verdict" not in result:
result["verdict"] = "has_issues"
if "confidence" not in result:
result["confidence"] = 0.5
return result
def summarize_command(command: ParsedCommand) -> str:
return " ".join(["@codex", command.name, *command.arguments]).strip()
def fix_branch_name(pr_number: int, arguments: list[str] | None = None) -> str:
suffix = "fix"
if arguments:
words = [token.lower().strip() for token in arguments if token.strip() and not token.startswith("--")]
if words:
clean = "-".join(words[:4])
cleaned = "".join(ch if ch.isalnum() or ch == "-" else "-" for ch in clean).strip("-")
if cleaned:
suffix = f"fix-{cleaned}"
return f"codex/pr-{pr_number}-{suffix}"
def create_fix_patch_note(command: ParsedCommand) -> str:
details = shlex.join(command.arguments) if command.arguments else "latest findings"
return f"Fix command requested for {details}."
def create_fix_branch(
pr: PullRequestContext,
*,
note: str,
arguments: list[str] | None = None,
) -> str:
branch = fix_branch_name(pr.pr_number, arguments=arguments)
with TemporaryDirectory(prefix="gitea-codex-fix-") as tmp:
tmpdir = Path(tmp)
repo_dir = checkout_pr(tmpdir, pr)
_run_git(["checkout", "-b", branch], cwd=repo_dir)
notes_dir = repo_dir / ".codex"
notes_dir.mkdir(parents=True, exist_ok=True)
(notes_dir / "fix-note.md").write_text(f"# Codex Fix Note\n\n{note}\n", encoding="utf-8")
_run_git(["add", ".codex/fix-note.md"], cwd=repo_dir)
_run_git(["-c", "user.name=codex-bot", "-c", "user.email=codex-bot@example.invalid", "commit", "-m", f"Codex fix note for PR {pr.pr_number}"], cwd=repo_dir)
_run_git(["push", "origin", f"{branch}:{branch}", "--force"], cwd=repo_dir)
return branch

View File

@@ -0,0 +1,16 @@
from __future__ import annotations
import hashlib
import hmac
def verify_gitea_signature(payload: bytes, secret: str, received_signature: str | None) -> bool:
if not received_signature:
return False
expected = hmac.new(secret.encode("utf-8"), payload, hashlib.sha256).hexdigest()
normalized = received_signature.removeprefix("sha256=").strip()
return hmac.compare_digest(expected, normalized)
def payload_digest(payload: bytes) -> str:
return hashlib.sha256(payload).hexdigest()

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Literal
CommandName = Literal["review", "explain", "fix", "ignore", "rerun"]
@dataclass(slots=True)
class ParsedCommand:
name: CommandName
raw: str
mode: str = "summary"
full: bool = False
branch_fix: bool = False
arguments: list[str] = field(default_factory=list)

View File

View File

@@ -0,0 +1,110 @@
from __future__ import annotations
import json
import subprocess
import uuid
from pathlib import Path
from typing import Any
from gitea_codex_bot.config import Settings
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.reviewer import normalize_review_result, prepare_review_prompt, run_review_for_pr
from gitea_codex_bot.types import ParsedCommand
def run_review_ephemeral(
settings: Settings,
*,
repo: str,
pr_number: int,
command: ParsedCommand,
) -> dict[str, Any]:
gitea = GiteaClient(settings)
prompt, _diff_context, _repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command)
container_name = f"codex-review-{uuid.uuid4().hex[:12]}"
install_and_run = (
"set -euo pipefail; "
"npm install -g @openai/codex >/tmp/codex-install.log 2>&1; "
"codex exec --json -m gpt-5"
)
cmd = [
"docker",
"run",
"--rm",
"-i",
"--name",
container_name,
"-e",
"OPENAI_API_KEY",
"-e",
"OPENAI_ORG_ID",
"-e",
"OPENAI_PROJECT_ID",
"-e",
"CODEX_DISABLE_TELEMETRY=1",
settings.review_runner_image,
"bash",
"-lc",
install_and_run,
]
try:
completed = subprocess.run(
cmd,
input=prompt,
text=True,
check=True,
capture_output=True,
timeout=settings.max_review_minutes * 60,
)
parsed = _parse_codex_exec_stdout(completed.stdout)
return normalize_review_result(parsed)
except Exception:
result, _repo_cfg = run_review_for_pr(settings, gitea, repo, pr_number, command)
return result
def ensure_workdir(path: str) -> Path:
target = Path(path)
target.mkdir(parents=True, exist_ok=True)
return target
def _parse_codex_exec_stdout(stdout: str) -> dict[str, Any]:
last_text: str | None = None
for line in stdout.splitlines():
line = line.strip()
if not line:
continue
try:
payload = json.loads(line)
except json.JSONDecodeError:
continue
if isinstance(payload, dict) and {"verdict", "summary", "findings"}.issubset(payload.keys()):
return payload
extracted = _extract_text(payload)
if extracted:
last_text = extracted
if not last_text:
raise RuntimeError("codex exec output did not include parseable JSON text")
return json.loads(last_text)
def _extract_text(payload: Any) -> str | None:
if isinstance(payload, str):
return payload
if isinstance(payload, dict):
for key in ("text", "message", "content", "output"):
value = payload.get(key)
text = _extract_text(value)
if text:
return text
for value in payload.values():
text = _extract_text(value)
if text:
return text
if isinstance(payload, list):
for item in payload:
text = _extract_text(item)
if text:
return text
return None

View File

@@ -0,0 +1,135 @@
from __future__ import annotations
import asyncio
import logging
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from gitea_codex_bot.config import Settings
from gitea_codex_bot.db import get_session_factory
from gitea_codex_bot.models import ReviewJob
from gitea_codex_bot.services.comments import get_persistent_review_comment_id, upsert_persistent_review_comment_id
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.jobs import claim_next_job, finish_job
from gitea_codex_bot.services.review_format import format_result_comment
from gitea_codex_bot.services.reviewer import create_fix_branch, create_fix_patch_note
from gitea_codex_bot.types import ParsedCommand
from gitea_codex_bot.workers.container_runner import run_review_ephemeral
logger = logging.getLogger(__name__)
def _command_from_job(job: ReviewJob) -> ParsedCommand:
args = job.command_args.split() if job.command_args else []
return ParsedCommand(name=job.command, raw=f"@codex {job.command}", arguments=args, full="--full" in args, branch_fix="--branch" in args)
def _handle_non_review_command(
settings: Settings,
session: Session,
gitea: GiteaClient,
job: ReviewJob,
command: ParsedCommand,
) -> tuple[bool, bool, dict[str, Any] | None, str | None]:
if command.name == "ignore":
return True, True, {"summary": "Ignore command acknowledged. No review run executed."}, None
if command.name == "explain":
latest_review_job = session.execute(
select(ReviewJob)
.where(
ReviewJob.repo == job.repo,
ReviewJob.pr_number == job.pr_number,
ReviewJob.command.in_(["review", "rerun"]),
ReviewJob.status == "succeeded",
)
.order_by(ReviewJob.id.desc())
.limit(1)
).scalar_one_or_none()
if latest_review_job and latest_review_job.result_json:
message = f"## Codex Explain\n\n{latest_review_job.result_json.get('summary', 'No previous summary available.')}"
else:
message = "## Codex Explain\n\nNo previous result found for this command."
gitea.post_issue_comment(job.repo, job.pr_number, message)
return True, True, {"summary": message}, None
if command.name == "fix":
if not settings.enable_fix_commands:
message = "⚠️ `@codex fix` is disabled on this bot instance."
gitea.post_issue_comment(job.repo, job.pr_number, message)
return True, True, {"summary": message}, None
note = create_fix_patch_note(command)
if command.branch_fix:
try:
pr = gitea.get_pull_request(job.repo, job.pr_number)
branch = create_fix_branch(pr, note=note, arguments=command.arguments)
message = f"## Codex Fix\n\n{note}\n\nCreated branch `{branch}`."
gitea.post_issue_comment(job.repo, job.pr_number, message)
return True, True, {"summary": note, "mode": "branch", "branch": branch}, None
except Exception as exc:
return True, False, None, f"Failed to create fix branch: {exc}"
gitea.post_issue_comment(job.repo, job.pr_number, f"## Codex Fix\n\n{note}\n\nPatch suggestion mode.")
return True, True, {"summary": note, "mode": "patch"}, None
return False, False, None, None
def process_one_job(settings: Settings) -> bool:
session_factory = get_session_factory()
with session_factory() as session:
job = claim_next_job(session)
if not job:
return False
command = _command_from_job(job)
gitea = GiteaClient(settings)
with session_factory() as session:
db_job = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one()
handled, skipped, result, error = _handle_non_review_command(settings, session, gitea, db_job, command)
if handled:
finish_job(session, job_id=db_job.id, success=error is None, skipped=skipped, result=result, error_message=error)
return True
try:
pr_ctx = gitea.get_pull_request(job.repo, job.pr_number)
if pr_ctx.is_fork and not settings.allow_untrusted_forks:
with session_factory() as session:
skip_message = "Skipped review for fork PR because `ALLOW_UNTRUSTED_FORKS=false`."
gitea.post_issue_comment(job.repo, job.pr_number, skip_message)
finish_job(
session,
job_id=job.id,
success=True,
skipped=True,
result={"summary": skip_message},
error_message=None,
)
return True
result = run_review_ephemeral(settings, repo=job.repo, pr_number=job.pr_number, command=command)
comment_body = format_result_comment(job.head_sha, result)
with session_factory() as session:
comment_id = get_persistent_review_comment_id(session, job.repo, job.pr_number)
if comment_id:
gitea.edit_issue_comment(job.repo, comment_id, comment_body)
else:
comment_id = gitea.post_issue_comment(job.repo, job.pr_number, comment_body)
upsert_persistent_review_comment_id(
session,
repo=job.repo,
pr_number=job.pr_number,
head_sha=job.head_sha,
comment_id=comment_id,
)
finish_job(session, job_id=job.id, success=True, skipped=False, result=result, error_message=None)
except Exception as exc:
logger.exception("Review job failed id=%s", job.id)
with session_factory() as session:
finish_job(session, job_id=job.id, success=False, skipped=False, result=None, error_message=str(exc))
return True
async def worker_loop(settings: Settings, stop_event: asyncio.Event) -> None:
while not stop_event.is_set():
processed = await asyncio.to_thread(process_one_job, settings)
if not processed:
await asyncio.sleep(1.0)

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
import json
import sys
from gitea_codex_bot.config import get_settings
from gitea_codex_bot.services.gitea import GiteaClient
from gitea_codex_bot.services.reviewer import run_review_for_pr
from gitea_codex_bot.types import ParsedCommand
def main() -> int:
settings = get_settings()
payload = json.loads(sys.stdin.read())
command_payload = payload["command"]
command = ParsedCommand(
name=command_payload["name"],
raw=f"@codex {command_payload['name']}",
mode=command_payload.get("mode", "summary"),
full=bool(command_payload.get("full", False)),
branch_fix=bool(command_payload.get("branch_fix", False)),
arguments=list(command_payload.get("arguments", [])),
)
gitea = GiteaClient(settings)
result, _repo_cfg = run_review_for_pr(settings, gitea, payload["repo"], int(payload["pr_number"]), command)
print(json.dumps(result))
return 0
if __name__ == "__main__":
raise SystemExit(main())

44
tests/conftest.py Normal file
View File

@@ -0,0 +1,44 @@
from __future__ import annotations
from collections.abc import Generator
import os
import pytest
from gitea_codex_bot.config import get_settings
from gitea_codex_bot.db import Base, get_engine, get_session_factory
@pytest.fixture(autouse=True)
def _env_defaults(monkeypatch: pytest.MonkeyPatch, tmp_path, request: pytest.FixtureRequest) -> Generator[None, None, None]:
monkeypatch.setenv("GITEA_BASE_URL", "https://gitea.test")
monkeypatch.setenv("GITEA_TOKEN", "token")
monkeypatch.setenv("GITEA_BOT_USERNAME", "codex-bot")
monkeypatch.setenv("GITEA_WEBHOOK_SECRET", "secret")
monkeypatch.setenv("OPENAI_API_KEY", "openai-key")
monkeypatch.setenv("ALLOWED_REPOS", "acme/repo")
monkeypatch.setenv("COOLDOWN_SECONDS", "60")
monkeypatch.setenv("WEBHOOK_MODE", "repo")
monkeypatch.setenv("DB_HOST", "localhost")
monkeypatch.setenv("DB_PORT", "3306")
monkeypatch.setenv("DB_NAME", "ignored")
monkeypatch.setenv("DB_USER", "ignored")
monkeypatch.setenv("DB_PASSWORD", "ignored")
database_url = os.getenv("TEST_DATABASE_URL", "").strip() or f"sqlite+pysqlite:///{tmp_path / 'test.db'}"
monkeypatch.setenv("DATABASE_URL", database_url)
monkeypatch.setenv("WORKDIR", str(tmp_path / "work"))
get_settings.cache_clear()
get_engine.cache_clear()
get_session_factory.cache_clear()
engine = get_engine()
skip_schema = request.node.get_closest_marker("no_schema") is not None
if not skip_schema:
Base.metadata.create_all(bind=engine)
yield
if not skip_schema:
Base.metadata.drop_all(bind=engine)
get_settings.cache_clear()
get_engine.cache_clear()
get_session_factory.cache_clear()

20
tests/test_commands.py Normal file
View File

@@ -0,0 +1,20 @@
from gitea_codex_bot.services.commands import parse_command
def test_parse_review_command_modes() -> None:
cmd = parse_command("@codex review security --full")
assert cmd is not None
assert cmd.name == "review"
assert cmd.mode == "security"
assert cmd.full is True
def test_parse_fix_branch() -> None:
cmd = parse_command("@codex fix --branch finding 2")
assert cmd is not None
assert cmd.name == "fix"
assert cmd.branch_fix is True
def test_invalid_command_returns_none() -> None:
assert parse_command("hello") is None

6
tests/test_config.py Normal file
View File

@@ -0,0 +1,6 @@
from gitea_codex_bot.config import get_settings
def test_openai_api_key_required() -> None:
settings = get_settings()
assert settings.openai_api_key.get_secret_value() == "openai-key"

38
tests/test_jobs.py Normal file
View File

@@ -0,0 +1,38 @@
from __future__ import annotations
from sqlalchemy.exc import IntegrityError
from gitea_codex_bot.db import get_session_factory
from gitea_codex_bot.services.jobs import cooldown_remaining_seconds, enqueue_job, persist_webhook_event
from gitea_codex_bot.types import ParsedCommand
def test_persist_webhook_dedupe() -> None:
session_factory = get_session_factory()
with session_factory() as session:
first = persist_webhook_event(session, delivery_id="d1", event_name="issue_comment", repo="acme/repo", comment_id=1, payload=b"{}")
second = persist_webhook_event(session, delivery_id="d1", event_name="issue_comment", repo="acme/repo", comment_id=1, payload=b"{}")
assert first is True
assert second is False
def test_enqueue_and_cooldown() -> None:
session_factory = get_session_factory()
with session_factory() as session:
cmd = ParsedCommand(name="review", raw="@codex review")
enqueue_job(session, repo="acme/repo", pr_number=42, head_sha="abc", trigger_comment_id=100, requested_by="user", command=cmd)
remaining = cooldown_remaining_seconds(session, "acme/repo", 42, 60)
assert remaining >= 0
def test_trigger_comment_unique() -> None:
session_factory = get_session_factory()
with session_factory() as session:
cmd = ParsedCommand(name="review", raw="@codex review")
enqueue_job(session, repo="acme/repo", pr_number=7, head_sha="x", trigger_comment_id=321, requested_by="user", command=cmd)
try:
enqueue_job(session, repo="acme/repo", pr_number=7, head_sha="x", trigger_comment_id=321, requested_by="user", command=cmd)
duplicate_raised = False
except IntegrityError:
duplicate_raised = True
session.rollback()
assert duplicate_raised is True

15
tests/test_migrations.py Normal file
View File

@@ -0,0 +1,15 @@
from __future__ import annotations
from alembic import command
from alembic.config import Config
import pytest
@pytest.mark.no_schema
def test_alembic_upgrade_and_downgrade() -> None:
cfg = Config("alembic.ini")
command.upgrade(cfg, "head")
command.downgrade(cfg, "base")
command.upgrade(cfg, "head")

15
tests/test_security.py Normal file
View File

@@ -0,0 +1,15 @@
import hmac
import hashlib
from gitea_codex_bot.services.security import verify_gitea_signature
def test_verify_signature_success() -> None:
payload = b'{"a":1}'
secret = "abc"
signature = hmac.new(secret.encode(), payload, hashlib.sha256).hexdigest()
assert verify_gitea_signature(payload, secret, signature)
def test_verify_signature_failure() -> None:
assert not verify_gitea_signature(b"x", "abc", "deadbeef")

36
tests/test_transitions.py Normal file
View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from sqlalchemy import select
from gitea_codex_bot.db import get_session_factory
from gitea_codex_bot.models import JobStatus, ReviewJob
from gitea_codex_bot.services.jobs import claim_next_job, enqueue_job, finish_job
from gitea_codex_bot.types import ParsedCommand
def test_claim_and_transition() -> None:
session_factory = get_session_factory()
with session_factory() as session:
job = enqueue_job(
session,
repo="acme/repo",
pr_number=314,
head_sha="deadbeef",
trigger_comment_id=9901,
requested_by="alice",
command=ParsedCommand(name="review", raw="@codex review"),
)
with session_factory() as session:
claimed = claim_next_job(session)
assert claimed is not None
assert claimed.id == job.id
assert claimed.status == JobStatus.running
with session_factory() as session:
finish_job(session, job_id=job.id, success=True, skipped=False, result={"summary": "ok"}, error_message=None)
with session_factory() as session:
loaded = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one()
assert loaded.status == JobStatus.succeeded
assert loaded.result_json is not None

81
tests/test_webhook.py Normal file
View File

@@ -0,0 +1,81 @@
from __future__ import annotations
import hashlib
import hmac
import json
from typing import Any
from fastapi.testclient import TestClient
from gitea_codex_bot.main import app
def _sign(payload: bytes) -> str:
return hmac.new(b"secret", payload, hashlib.sha256).hexdigest()
def _payload(comment_body: str, *, username: str = "alice", comment_id: int = 11) -> dict[str, Any]:
return {
"repository": {"full_name": "acme/repo"},
"sender": {"username": username},
"comment": {"id": comment_id, "body": comment_body},
"issue": {"number": 9, "pull_request": {"url": "x"}},
"pull_request": {"head": {"sha": "abcdef123"}},
}
def test_webhook_rejects_bad_signature() -> None:
client = TestClient(app)
payload = b"{}"
response = client.post(
"/webhook/gitea",
content=payload,
headers={"X-Gitea-Event": "issue_comment", "X-Gitea-Signature": "bad"},
)
assert response.status_code == 401
def test_webhook_ignores_bot_comment(monkeypatch) -> None:
client = TestClient(app)
payload = _payload("@codex review", username="codex-bot")
raw = json.dumps(payload).encode()
response = client.post(
"/webhook/gitea",
content=raw,
headers={
"X-Gitea-Event": "issue_comment",
"X-Gitea-Delivery": "d-1",
"X-Gitea-Signature": _sign(raw),
"Content-Type": "application/json",
},
)
assert response.status_code == 200
assert response.json()["reason"] == "bot comment ignored"
def test_webhook_accepts_review_and_queues(monkeypatch) -> None:
posted_comments: list[str] = []
def _post_issue_comment(self, repo: str, pr_number: int, body: str) -> int:
posted_comments.append(body)
return 100
monkeypatch.setattr("gitea_codex_bot.services.gitea.GiteaClient.post_issue_comment", _post_issue_comment)
client = TestClient(app)
payload_obj = _payload("@codex review security", username="alice", comment_id=111)
raw = json.dumps(payload_obj).encode()
response = client.post(
"/webhook/gitea",
content=raw,
headers={
"X-Gitea-Event": "issue_comment",
"X-Gitea-Delivery": "d-2",
"X-Gitea-Signature": _sign(raw),
"Content-Type": "application/json",
},
)
assert response.status_code == 200
assert response.json()["status"] == "queued"
assert posted_comments