diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..0dec048 --- /dev/null +++ b/.env.example @@ -0,0 +1,46 @@ +# Base URL of your self-hosted Gitea instance. +GITEA_BASE_URL=https://gitea.reversed.dev + +# Bot account token used to read PRs and write comments. +GITEA_TOKEN=replace +GITEA_BOT_USERNAME=codex-bot + +# Shared secret configured on the Gitea webhook. +GITEA_WEBHOOK_SECRET=replace + +# OpenAI API credentials for Codex review generation. +OPENAI_API_KEY=replace +OPENAI_PROJECT_ID= +OPENAI_ORG_ID= + +# Comma-separated allowlist of repositories this bot may process. +# Example: space/gitea-codex,space/another-repo +ALLOWED_REPOS=space/gitea-codex + +COOLDOWN_SECONDS=60 + +# WEBHOOK_MODE is informational for your deployment model: +# - repo: you configured repository-level webhooks in Gitea. +# - global: you configured one instance-level/admin webhook in Gitea. +# This bot does NOT auto-provision webhooks. Admin config is manual. +WEBHOOK_MODE=repo + +DB_HOST=mariadb +DB_PORT=3306 +DB_NAME=gitea_codex +DB_USER=gitea_codex +DB_PASSWORD=replace + +WORKDIR=/var/lib/gitea-codex/worktrees +MAX_DIFF_BYTES=200000 +MAX_REVIEW_MINUTES=10 +CONCURRENCY=1 + +# Image used for ephemeral job containers (Node + npm + Codex CLI install). +REVIEW_RUNNER_IMAGE=node:22-bookworm-slim + +# Keep false for review-only mode. +ENABLE_FIX_COMMANDS=false + +# Security: fork PRs are skipped unless explicitly enabled. +ALLOW_UNTRUSTED_FORKS=false diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..22ebe82 --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,107 @@ +name: ci + +on: + push: + branches: [ main ] + tags: [ 'v*' ] + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + services: + mariadb: + image: mariadb:11 + env: + MARIADB_DATABASE: gitea_codex + MARIADB_USER: gitea_codex + MARIADB_PASSWORD: gitea_codex + MARIADB_ROOT_PASSWORD: rootpass + ports: + - 3306:3306 + options: >- + --health-cmd "mariadb-admin ping -h localhost -uroot -prootpass" + --health-interval 10s + --health-timeout 5s + --health-retries 10 + + env: + GITEA_BASE_URL: https://gitea.reversed.dev + GITEA_TOKEN: test + GITEA_BOT_USERNAME: codex-bot + GITEA_WEBHOOK_SECRET: testsecret + OPENAI_API_KEY: test-openai + ALLOWED_REPOS: org/repo + COOLDOWN_SECONDS: 60 + WEBHOOK_MODE: repo + DB_HOST: 127.0.0.1 + DB_PORT: 3306 + DB_NAME: gitea_codex + DB_USER: gitea_codex + DB_PASSWORD: gitea_codex + TEST_DATABASE_URL: mysql+pymysql://gitea_codex:gitea_codex@127.0.0.1:3306/gitea_codex?charset=utf8mb4 + WORKDIR: /tmp/work + MAX_DIFF_BYTES: 200000 + MAX_REVIEW_MINUTES: 10 + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install deps + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + - name: Run Alembic migrations + run: alembic upgrade head + - name: Run tests + run: pytest + + publish: + runs-on: ubuntu-latest + needs: test + if: gitea.event_name == 'push' + env: + REGISTRY: gitea.reversed.dev + IMAGE_NAME: space/gitea-codex + steps: + - uses: actions/checkout@v4 + - uses: docker/setup-buildx-action@v3 + - name: Login to Gitea container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + - name: Build and push tags + shell: bash + env: + CI_SHA: ${{ gitea.sha }} + CI_REF_NAME: ${{ gitea.ref_name }} + run: | + set -euo pipefail + IMAGE="${REGISTRY}/${IMAGE_NAME}" + SHA_TAG="sha-${CI_SHA::12}" + REF_TAG="${CI_REF_NAME}" + docker buildx build --push \ + -t "${IMAGE}:${SHA_TAG}" \ + -t "${IMAGE}:${REF_TAG}" \ + . + if [ "${CI_REF_NAME}" = "main" ]; then + docker buildx build --push -t "${IMAGE}:latest" . + fi + - name: Publish image summary + shell: bash + env: + CI_SHA: ${{ gitea.sha }} + CI_REF_NAME: ${{ gitea.ref_name }} + run: | + set -euo pipefail + IMAGE="${REGISTRY}/${IMAGE_NAME}" + echo "Published image tags:" >> "${GITHUB_STEP_SUMMARY}" + echo "- ${IMAGE}:${CI_REF_NAME}" >> "${GITHUB_STEP_SUMMARY}" + echo "- ${IMAGE}:sha-${CI_SHA::12}" >> "${GITHUB_STEP_SUMMARY}" + if [ "${CI_REF_NAME}" = "main" ]; then + echo "- ${IMAGE}:latest" >> "${GITHUB_STEP_SUMMARY}" + fi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..29be0aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +__pycache__/ +.pytest_cache/ +.venv/ +.env +*.pyc +worktrees/ +.mypy_cache/ +.coverage +htmlcov/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4bf7766 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.12-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +RUN apt-get update && apt-get install -y --no-install-recommends git docker.io ca-certificates && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY pyproject.toml README.md /app/ +COPY src /app/src +COPY alembic.ini /app/ +COPY alembic /app/alembic + +RUN pip install --no-cache-dir . + +EXPOSE 8000 +CMD ["uvicorn", "gitea_codex_bot.main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/Idea.md b/Idea.md new file mode 100644 index 0000000..d46a422 --- /dev/null +++ b/Idea.md @@ -0,0 +1,245 @@ +Architecture: + +```text +Gitea + └─ webhook: pull_request_comment / issue_comment + └─ gitea-codex-bot API + ├─ verifies X-Gitea-Signature + ├─ checks body starts with @codex review + ├─ queues review job + └─ worker: + ├─ clones repo / fetches PR branches + ├─ builds git diff + context + ├─ runs codex headless + ├─ parses JSON findings + └─ posts review comment as codex-bot +``` + +Use a real Gitea user, e.g. `codex-bot`. Give it a token with minimum access: read repo, read PRs/issues, write comments. Do not use your personal admin token. Gitea exposes Swagger/OpenAPI per instance at `/api/swagger` and `/swagger.v1.json`, so you can wire against your actual server version instead of guessing endpoints. ([Gitea Documentation][3]) + +MVP behavior: + +```text +User comments: +@codex review + +Bot replies: +👀 Codex review queued for commit abc123... + +Later edits/posts: +## Codex Review + +Verdict: patch mostly correct +Confidence: 0.78 + +Findings: +1. src/auth.ts:42-55 + Token validation accepts expired tokens in one path. + +2. api/users.ts:88 + Missing permission check before update. + +No blocking issues found in tests. +``` + +For v1, post one normal PR timeline comment. Do not fight inline comments yet. Gitea has PR review webhook concepts, but line-level diff review API support can be version-sensitive/awkward; there are still recent reports about API-token support for diff-level review comments being unclear. ([Gitea Documentation][1]) Summary comments are reliable and still useful. + +Core trigger logic: + +```ts +if (event !== "pull_request_comment" && event !== "issue_comment") return; +if (!payload.is_pull && !payload.pull_request) return; +if (payload.sender.username === "codex-bot") return; +if (!payload.comment.body.trim().startsWith("@codex review")) return; +enqueueReview(payload.repository.full_name, payload.pull_request.number); +``` + +Job flow: + +```text +1. Verify webhook HMAC. +2. Dedupe by delivery ID/comment ID. +3. Parse command: + @codex review + @codex review security + @codex review tests + @codex review --full +4. Create “queued” comment. +5. Clone/fetch repo into isolated temp dir. +6. Checkout PR head. +7. Generate: + git diff base...head + changed file list + optional full changed-file content + optional test output +8. Run Codex headless with JSON schema. +9. Validate JSON. +10. Post/update review comment. +``` + +Use SQLite first: + +```sql +reviews( + id, + repo, + pr_number, + head_sha, + trigger_comment_id, + status, + requested_by, + created_at, + updated_at, + result_json +) +``` + +Suggested service stack: + +```text +Backend: Python FastAPI or Node/TS Fastify +Queue: SQLite jobs first, Redis later +Runner: Docker worker container +Storage: /var/lib/gitea-codex-bot +Auth: bot PAT + webhook secret +Deployment: docker compose +``` + +Config: + +```env +GITEA_BASE_URL=https://git.example.com +GITEA_TOKEN=... +GITEA_BOT_USERNAME=codex-bot +GITEA_WEBHOOK_SECRET=... +OPENAI_API_KEY=... +WORKDIR=/var/lib/gitea-codex/worktrees +MAX_DIFF_BYTES=200000 +MAX_REVIEW_MINUTES=10 +CONCURRENCY=1 +``` + +Good commands to support later: + +```text +@codex review +@codex review security +@codex review performance +@codex review tests +@codex review --full +@codex explain +@codex fix +@codex fix --branch +@codex ignore +@codex rerun +``` + +Best v2 feature: persistent review comment. Instead of spamming new comments, the bot finds its previous comment on that PR and edits it: + +```text + +## Codex Review +... +``` + +Then reruns replace the same block. + +Best v3 feature: fixes. User comments: + +```text +@codex fix finding 2 +``` + +Bot creates a branch: + +```text +codex/pr-42-fix-permission-check +``` + +Then opens a PR or pushes to the existing PR branch only if allowed. Keep this disabled by default. Review-only is safer. + +Security rules that matter: + +```text +- Verify X-Gitea-Signature. +- Ignore bot’s own comments. +- Allowlist repos/orgs. +- Never run on untrusted fork PRs unless sandboxed hard. +- No Docker socket mount. +- No host filesystem mount except temp workdir. +- Timeout every job. +- Limit diff size. +- Redact .env, secrets, keys. +- Use bot token, not admin token. +- Log prompt + result, but not secrets. +``` + +Prompt shape for Codex: + +```text +You are reviewing a Gitea pull request. + +Focus only on issues introduced by this PR. +Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests. +Avoid style nitpicks. + +Return JSON: +{ + "verdict": "correct" | "has_issues", + "confidence": 0.0-1.0, + "summary": "...", + "findings": [ + { + "severity": "low|medium|high|critical", + "file": "...", + "line_start": 1, + "line_end": 1, + "title": "...", + "body": "...", + "suggestion": "..." + } + ] +} +``` + +Practical build order: + +```text +1. Make bot account + token. +2. Add webhook receiver. +3. Verify signature + parse @codex review. +4. Post “queued” comment. +5. Clone repo and generate diff. +6. Run Codex headless. +7. Post one summary comment. +8. Add dedupe + SQLite. +9. Add per-repo config file. +10. Add optional inline comments/fix branches later. +``` + +Per-repo config idea: + +```yaml +# .codex-review.yml +enabled: true +review: + default_mode: summary + max_diff_bytes: 200000 + include_tests: true + focus: + - correctness + - security + - maintainability +ignore: + - "dist/**" + - "pnpm-lock.yaml" + - "*.min.js" +commands: + allow_fix: false +``` + +Final recommendation: external webhook bot, summary comments first, bot account + token, Codex headless JSON, SQLite queue. Inline review comments and auto-fix branches are v2/v3. Trying to make the first version “full GitHub Copilot Reviews clone” is how this becomes annoying trash. + +[1]: https://docs.gitea.com/usage/repository/webhooks "Webhooks | Gitea Documentation" +[2]: https://developers.openai.com/cookbook/examples/codex/build_code_review_with_codex_sdk "Build Code Review with the Codex SDK" +[3]: https://docs.gitea.com/development/api-usage?utm_source=chatgpt.com "API Usage" diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..93bfe64 --- /dev/null +++ b/TODO.md @@ -0,0 +1,20 @@ +# TODO + + +## Open Items By Priority + +### P0 (Critical) +- [ ] True isolated runner flow: clone/fetch/checkout PR branch inside the ephemeral container itself, not on host before prompt generation. +- [ ] Remove host-side fallback path for review execution or gate it behind explicit `ALLOW_HOST_FALLBACK` to avoid silently bypassing isolation. +- [ ] Add integration test that proves runner container receives repo+PR context and executes review for the exact PR head SHA. + +### P1 (Important) +- [ ] `WEBHOOK_MODE` is currently informational only; add runtime validation/check endpoint that confirms expected webhook scope (`repo` or `global`) is actually configured in Gitea by host admin. +- [ ] Make review model configurable via env (for example `OPENAI_REVIEW_MODEL`) instead of hardcoding `gpt-5`. +- [ ] Add retries/backoff for `codex exec` bootstrap (`npm install -g @openai/codex`) to reduce transient network/setup failures. +- [ ] Add end-to-end test path against live Gitea + MariaDB + docker runner (webhook -> queue -> runner -> PR comment update). + +### P2 (Nice to have) +- [ ] Add explicit env docs for reverse-proxy deployment (`BASE_PUBLIC_URL`, trusted headers). +- [ ] Add per-repo command policy in `.codex-review.yml` for enabling/disabling commands (`review`, `fix`, `explain`, `rerun`). +- [ ] Add structured log redaction tests to ensure PAT/keys never appear in logs/comments. diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..b6e761e --- /dev/null +++ b/alembic.ini @@ -0,0 +1,38 @@ +[alembic] +script_location = alembic +prepend_sys_path = . +path_separator = os + +sqlalchemy.url = mysql+pymysql://user:pass@localhost/db + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s diff --git a/alembic/README b/alembic/README new file mode 100644 index 0000000..c5abd7d --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +# Alembic migrations \ No newline at end of file diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..33a6420 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool + +from gitea_codex_bot.config import get_settings +from gitea_codex_bot.db import Base +from gitea_codex_bot import models # noqa: F401 + +config = context.config +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +target_metadata = Base.metadata +settings = get_settings() +config.set_main_option("sqlalchemy.url", settings.sqlalchemy_url) + + +def run_migrations_offline() -> None: + url = config.get_main_option("sqlalchemy.url") + context.configure(url=url, target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() \ No newline at end of file diff --git a/alembic/versions/0001_initial.py b/alembic/versions/0001_initial.py new file mode 100644 index 0000000..de50aa7 --- /dev/null +++ b/alembic/versions/0001_initial.py @@ -0,0 +1,107 @@ +"""initial schema + +Revision ID: 0001_initial +Revises: +Create Date: 2026-05-22 19:00:00 +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +revision: str = "0001_initial" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "webhook_events", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("delivery_id", sa.String(length=255), nullable=True), + sa.Column("event_name", sa.String(length=128), nullable=False), + sa.Column("repo", sa.String(length=255), nullable=False), + sa.Column("comment_id", sa.Integer(), nullable=True), + sa.Column("payload_sha256", sa.String(length=64), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("delivery_id", name="uq_webhook_events_delivery_id"), + sa.UniqueConstraint("repo", "comment_id", name="uq_webhook_events_repo_comment"), + ) + + job_status_enum = sa.Enum("queued", "running", "succeeded", "failed", "skipped", name="jobstatus") + job_status_enum.create(op.get_bind(), checkfirst=True) + + op.create_table( + "review_jobs", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("repo", sa.String(length=255), nullable=False), + sa.Column("pr_number", sa.Integer(), nullable=False), + sa.Column("head_sha", sa.String(length=64), nullable=False), + sa.Column("trigger_comment_id", sa.Integer(), nullable=False), + sa.Column("command", sa.String(length=64), nullable=False), + sa.Column("command_args", sa.Text(), nullable=True), + sa.Column("requested_by", sa.String(length=255), nullable=False), + sa.Column("status", job_status_enum, nullable=False), + sa.Column("last_error", sa.Text(), nullable=True), + sa.Column("result_json", sa.JSON(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("repo", "trigger_comment_id", name="uq_review_jobs_repo_trigger_comment"), + ) + op.create_index("ix_review_jobs_lookup", "review_jobs", ["repo", "pr_number", "head_sha", "status", "created_at"], unique=False) + + run_status_enum = sa.Enum("running", "succeeded", "failed", "skipped", name="runstatus") + run_status_enum.create(op.get_bind(), checkfirst=True) + + op.create_table( + "review_runs", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("job_id", sa.Integer(), nullable=False), + sa.Column("status", run_status_enum, nullable=False), + sa.Column("runner_container_id", sa.String(length=128), nullable=True), + sa.Column("result_json", sa.JSON(), nullable=True), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["job_id"], ["review_jobs.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("ix_review_runs_job_status", "review_runs", ["job_id", "status"], unique=False) + + op.create_table( + "bot_comments", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("repo", sa.String(length=255), nullable=False), + sa.Column("pr_number", sa.Integer(), nullable=False), + sa.Column("head_sha", sa.String(length=64), nullable=False), + sa.Column("gitea_comment_id", sa.Integer(), nullable=False), + sa.Column("marker", sa.String(length=255), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("repo", "pr_number", "marker", name="uq_bot_comments_marker"), + ) + op.create_index("ix_bot_comments_repo_pr", "bot_comments", ["repo", "pr_number"], unique=False) + + +def downgrade() -> None: + op.drop_index("ix_bot_comments_repo_pr", table_name="bot_comments") + op.drop_table("bot_comments") + + op.drop_index("ix_review_runs_job_status", table_name="review_runs") + op.drop_table("review_runs") + + op.drop_index("ix_review_jobs_lookup", table_name="review_jobs") + op.drop_table("review_jobs") + + op.drop_table("webhook_events") + + sa.Enum(name="runstatus").drop(op.get_bind(), checkfirst=True) + sa.Enum(name="jobstatus").drop(op.get_bind(), checkfirst=True) \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6b3d30c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,28 @@ +services: + mariadb: + image: mariadb:11 + restart: unless-stopped + environment: + MARIADB_DATABASE: gitea_codex + MARIADB_USER: gitea_codex + MARIADB_PASSWORD: gitea_codex + MARIADB_ROOT_PASSWORD: rootpass + ports: + - "3306:3306" + healthcheck: + test: ["CMD", "mariadb-admin", "ping", "-h", "localhost", "-uroot", "-prootpass"] + interval: 5s + timeout: 3s + retries: 20 + + bot: + build: . + depends_on: + mariadb: + condition: service_healthy + env_file: + - .env + volumes: + - ./worktrees:/var/lib/gitea-codex/worktrees + ports: + - "8000:8000" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..553b39a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,42 @@ +[build-system] +requires = ["setuptools>=69", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "gitea-codex-bot" +version = "0.1.0" +description = "Webhook-driven Codex review bot for Gitea" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.115.0", + "uvicorn[standard]>=0.30.0", + "sqlalchemy>=2.0.30", + "alembic>=1.13.2", + "pymysql>=1.1.1", + "httpx>=0.27.0", + "pydantic>=2.7.0", + "pydantic-settings>=2.3.0", + "python-dotenv>=1.0.1", + "pyyaml>=6.0.2", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.2.0", + "pytest-asyncio>=0.23.7", + "pytest-cov>=5.0.0", +] + +[tool.pytest.ini_options] +addopts = "-q" +testpaths = ["tests"] +markers = [ + "no_schema: skip automatic schema setup fixture for migration-focused tests", +] + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/src/gitea_codex_bot/__init__.py b/src/gitea_codex_bot/__init__.py new file mode 100644 index 0000000..a05eb9a --- /dev/null +++ b/src/gitea_codex_bot/__init__.py @@ -0,0 +1,3 @@ +__all__ = ["__version__"] + +__version__ = "0.1.0" diff --git a/src/gitea_codex_bot/api/__init__.py b/src/gitea_codex_bot/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/gitea_codex_bot/config.py b/src/gitea_codex_bot/config.py new file mode 100644 index 0000000..250b6ad --- /dev/null +++ b/src/gitea_codex_bot/config.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from functools import lru_cache +from typing import Literal + +from pydantic import Field, SecretStr, field_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") + + gitea_base_url: str = Field(alias="GITEA_BASE_URL") + gitea_token: SecretStr = Field(alias="GITEA_TOKEN") + gitea_bot_username: str = Field(alias="GITEA_BOT_USERNAME") + gitea_webhook_secret: SecretStr = Field(alias="GITEA_WEBHOOK_SECRET") + + openai_api_key: SecretStr = Field(alias="OPENAI_API_KEY") + openai_project_id: str | None = Field(default=None, alias="OPENAI_PROJECT_ID") + openai_org_id: str | None = Field(default=None, alias="OPENAI_ORG_ID") + openai_review_model: str = Field(default="gpt-5.3-codex", alias="OPENAI_REVIEW_MODEL") + openai_reasoning_effort: Literal["none", "low", "medium", "high"] = Field(default="high", alias="OPENAI_REASONING_EFFORT") + + allowed_repos: str = Field(alias="ALLOWED_REPOS") + cooldown_seconds: int = Field(default=60, alias="COOLDOWN_SECONDS") + webhook_mode: Literal["repo", "global"] = Field(default="repo", alias="WEBHOOK_MODE") + + db_host: str = Field(alias="DB_HOST") + db_port: int = Field(default=3306, alias="DB_PORT") + db_name: str = Field(alias="DB_NAME") + db_user: str = Field(alias="DB_USER") + db_password: SecretStr = Field(alias="DB_PASSWORD") + database_url: str | None = Field(default=None, alias="DATABASE_URL") + + workdir: str = Field(default="/var/lib/gitea-codex/worktrees", alias="WORKDIR") + max_diff_bytes: int = Field(default=200000, alias="MAX_DIFF_BYTES") + max_review_minutes: int = Field(default=10, alias="MAX_REVIEW_MINUTES") + concurrency: int = Field(default=1, alias="CONCURRENCY") + + review_runner_image: str = Field(default="node:22-bookworm-slim", alias="REVIEW_RUNNER_IMAGE") + enable_fix_commands: bool = Field(default=False, alias="ENABLE_FIX_COMMANDS") + allow_untrusted_forks: bool = Field(default=False, alias="ALLOW_UNTRUSTED_FORKS") + + @field_validator("gitea_base_url") + @classmethod + def normalize_base_url(cls, value: str) -> str: + return value.rstrip("/") + + @property + def sqlalchemy_url(self) -> str: + if self.database_url: + return self.database_url + password = self.db_password.get_secret_value() + return f"mysql+pymysql://{self.db_user}:{password}@{self.db_host}:{self.db_port}/{self.db_name}?charset=utf8mb4" + + @property + def allowed_repo_set(self) -> set[str]: + values = [item.strip() for item in self.allowed_repos.split(",")] + return {value for value in values if value} + + +@lru_cache(maxsize=1) +def get_settings() -> Settings: + return Settings() diff --git a/src/gitea_codex_bot/db.py b/src/gitea_codex_bot/db.py new file mode 100644 index 0000000..73ea63e --- /dev/null +++ b/src/gitea_codex_bot/db.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from collections.abc import Generator +from functools import lru_cache + +from sqlalchemy import create_engine +from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker + +from gitea_codex_bot.config import get_settings + + +class Base(DeclarativeBase): + pass + + +@lru_cache(maxsize=1) +def get_engine(): + settings = get_settings() + return create_engine(settings.sqlalchemy_url, pool_pre_ping=True, future=True) + + +@lru_cache(maxsize=1) +def get_session_factory(): + return sessionmaker(bind=get_engine(), class_=Session, autoflush=False, autocommit=False, expire_on_commit=False) + + +def get_session() -> Generator[Session, None, None]: + session = get_session_factory()() + try: + yield session + finally: + session.close() diff --git a/src/gitea_codex_bot/main.py b/src/gitea_codex_bot/main.py new file mode 100644 index 0000000..815029a --- /dev/null +++ b/src/gitea_codex_bot/main.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import asyncio +import logging +from contextlib import asynccontextmanager +from typing import Any + +from fastapi import Depends, FastAPI, Header, HTTPException, Request, status +from sqlalchemy.orm import Session + +from gitea_codex_bot.config import Settings, get_settings +from gitea_codex_bot.db import Base, get_engine, get_session +from gitea_codex_bot.services.commands import parse_command +from gitea_codex_bot.services.gitea import GiteaClient +from gitea_codex_bot.services.jobs import cooldown_remaining_seconds, enqueue_job, persist_webhook_event +from gitea_codex_bot.services.review_format import ( + format_cooldown_ack, + format_queue_ack, + format_unsupported_ack, +) +from gitea_codex_bot.services.security import verify_gitea_signature +from gitea_codex_bot.workers.dispatcher import worker_loop + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s") +logger = logging.getLogger(__name__) + + +def _validate_required_env(settings: Settings) -> None: + if not settings.openai_api_key.get_secret_value().strip(): + raise RuntimeError("OPENAI_API_KEY is required") + + +def _extract_pr_event(payload: dict[str, Any], event_name: str) -> tuple[str, int, str, int, str] | None: + repository = payload.get("repository", {}) + repo = repository.get("full_name") + if not repo: + return None + sender = payload.get("sender", {}) + sender_username = sender.get("username", "") + comment = payload.get("comment", {}) + comment_id = int(comment.get("id", 0) or 0) + if comment_id <= 0: + return None + + if event_name == "issue_comment": + issue = payload.get("issue", {}) + if not issue.get("pull_request"): + return None + pr_number = int(issue.get("number", 0) or 0) + head_sha = payload.get("pull_request", {}).get("head", {}).get("sha", "") + elif event_name == "pull_request_comment": + pull_request = payload.get("pull_request", {}) + if not pull_request: + return None + pr_number = int(pull_request.get("number", 0) or 0) + head_sha = pull_request.get("head", {}).get("sha", "") + else: + return None + + if pr_number <= 0: + return None + if not head_sha: + head_sha = "unknown" + return repo, pr_number, head_sha, comment_id, sender_username + + +@asynccontextmanager +async def lifespan(app: FastAPI): + settings = get_settings() + _validate_required_env(settings) + Base.metadata.create_all(bind=get_engine()) + + stop_event = asyncio.Event() + task = asyncio.create_task(worker_loop(settings, stop_event)) + app.state.worker_stop_event = stop_event + app.state.worker_task = task + try: + yield + finally: + stop_event.set() + await task + + +app = FastAPI(title="Gitea Codex Review Bot", lifespan=lifespan) + + +@app.get("/healthz") +def healthz(settings: Settings = Depends(get_settings)) -> dict[str, str]: + _ = settings.gitea_base_url + return {"status": "ok"} + + +@app.post("/webhook/gitea") +async def gitea_webhook( + request: Request, + x_gitea_event: str | None = Header(default=None), + x_gitea_delivery: str | None = Header(default=None), + x_gitea_signature: str | None = Header(default=None), + session: Session = Depends(get_session), + settings: Settings = Depends(get_settings), +) -> dict[str, Any]: + payload_bytes = await request.body() + if not verify_gitea_signature(payload_bytes, settings.gitea_webhook_secret.get_secret_value(), x_gitea_signature): + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="invalid signature") + + event_name = (x_gitea_event or "").strip() + if event_name not in {"issue_comment", "pull_request_comment"}: + return {"accepted": False, "reason": "event ignored"} + + payload = await request.json() + extracted = _extract_pr_event(payload, event_name) + if not extracted: + return {"accepted": False, "reason": "not a pull request comment"} + repo, pr_number, head_sha, comment_id, sender_username = extracted + + if sender_username == settings.gitea_bot_username: + return {"accepted": False, "reason": "bot comment ignored"} + + comment_body = str(payload.get("comment", {}).get("body", "")).strip() + parsed_command = parse_command(comment_body) + if not parsed_command: + return {"accepted": False, "reason": "no codex command"} + + if repo not in settings.allowed_repo_set: + return {"accepted": False, "reason": "repo not allowed"} + + inserted = persist_webhook_event( + session, + delivery_id=x_gitea_delivery, + event_name=event_name, + repo=repo, + comment_id=comment_id, + payload=payload_bytes, + ) + if not inserted: + return {"accepted": True, "reason": "duplicate event"} + + gitea = GiteaClient(settings) + if parsed_command.name in {"review", "rerun"}: + if head_sha == "unknown": + try: + head_sha = gitea.get_pull_request(repo, pr_number).head_sha + except Exception: + pass + if parsed_command.name != "rerun": + remaining = cooldown_remaining_seconds(session, repo, pr_number, settings.cooldown_seconds) + if remaining > 0: + gitea.post_issue_comment(repo, pr_number, format_cooldown_ack(remaining)) + return {"accepted": True, "reason": "cooldown active", "cooldown_seconds_remaining": remaining} + job = enqueue_job( + session, + repo=repo, + pr_number=pr_number, + head_sha=head_sha, + trigger_comment_id=comment_id, + requested_by=sender_username, + command=parsed_command, + ) + gitea.post_issue_comment(repo, pr_number, format_queue_ack(head_sha)) + return {"accepted": True, "job_id": job.id, "status": "queued"} + + if parsed_command.name in {"fix", "explain", "ignore"}: + job = enqueue_job( + session, + repo=repo, + pr_number=pr_number, + head_sha=head_sha, + trigger_comment_id=comment_id, + requested_by=sender_username, + command=parsed_command, + ) + return {"accepted": True, "job_id": job.id, "status": "queued"} + + gitea.post_issue_comment(repo, pr_number, format_unsupported_ack(parsed_command)) + return {"accepted": False, "reason": "unsupported command"} diff --git a/src/gitea_codex_bot/models.py b/src/gitea_codex_bot/models.py new file mode 100644 index 0000000..e10f922 --- /dev/null +++ b/src/gitea_codex_bot/models.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import enum +from datetime import datetime + +from sqlalchemy import DateTime, Enum, ForeignKey, Index, Integer, JSON, String, Text, UniqueConstraint, func +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from gitea_codex_bot.db import Base + + +class JobStatus(str, enum.Enum): + queued = "queued" + running = "running" + succeeded = "succeeded" + failed = "failed" + skipped = "skipped" + + +class RunStatus(str, enum.Enum): + running = "running" + succeeded = "succeeded" + failed = "failed" + skipped = "skipped" + + +class WebhookEvent(Base): + __tablename__ = "webhook_events" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + delivery_id: Mapped[str | None] = mapped_column(String(255), nullable=True) + event_name: Mapped[str] = mapped_column(String(128), nullable=False) + repo: Mapped[str] = mapped_column(String(255), nullable=False) + comment_id: Mapped[int | None] = mapped_column(Integer, nullable=True) + payload_sha256: Mapped[str] = mapped_column(String(64), nullable=False) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + + __table_args__ = ( + UniqueConstraint("delivery_id", name="uq_webhook_events_delivery_id"), + UniqueConstraint("repo", "comment_id", name="uq_webhook_events_repo_comment"), + ) + + +class ReviewJob(Base): + __tablename__ = "review_jobs" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + repo: Mapped[str] = mapped_column(String(255), nullable=False) + pr_number: Mapped[int] = mapped_column(Integer, nullable=False) + head_sha: Mapped[str] = mapped_column(String(64), nullable=False) + trigger_comment_id: Mapped[int] = mapped_column(Integer, nullable=False) + command: Mapped[str] = mapped_column(String(64), nullable=False, default="review") + command_args: Mapped[str | None] = mapped_column(Text, nullable=True) + requested_by: Mapped[str] = mapped_column(String(255), nullable=False) + status: Mapped[JobStatus] = mapped_column(Enum(JobStatus), nullable=False, default=JobStatus.queued) + last_error: Mapped[str | None] = mapped_column(Text, nullable=True) + result_json: Mapped[dict | None] = mapped_column(JSON, nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default=func.now(), + onupdate=func.now(), + ) + started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + + runs: Mapped[list["ReviewRun"]] = relationship(back_populates="job", cascade="all, delete-orphan") + + __table_args__ = ( + Index("ix_review_jobs_lookup", "repo", "pr_number", "head_sha", "status", "created_at"), + UniqueConstraint("repo", "trigger_comment_id", name="uq_review_jobs_repo_trigger_comment"), + ) + + +class ReviewRun(Base): + __tablename__ = "review_runs" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + job_id: Mapped[int] = mapped_column(ForeignKey("review_jobs.id", ondelete="CASCADE"), nullable=False) + status: Mapped[RunStatus] = mapped_column(Enum(RunStatus), nullable=False, default=RunStatus.running) + runner_container_id: Mapped[str | None] = mapped_column(String(128), nullable=True) + result_json: Mapped[dict | None] = mapped_column(JSON, nullable=True) + error_message: Mapped[str | None] = mapped_column(Text, nullable=True) + started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + + job: Mapped["ReviewJob"] = relationship(back_populates="runs") + + __table_args__ = (Index("ix_review_runs_job_status", "job_id", "status"),) + + +class BotComment(Base): + __tablename__ = "bot_comments" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + repo: Mapped[str] = mapped_column(String(255), nullable=False) + pr_number: Mapped[int] = mapped_column(Integer, nullable=False) + head_sha: Mapped[str] = mapped_column(String(64), nullable=False) + gitea_comment_id: Mapped[int] = mapped_column(Integer, nullable=False) + marker: Mapped[str] = mapped_column(String(255), nullable=False) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default=func.now(), + onupdate=func.now(), + ) + + __table_args__ = ( + UniqueConstraint("repo", "pr_number", "marker", name="uq_bot_comments_marker"), + Index("ix_bot_comments_repo_pr", "repo", "pr_number"), + ) diff --git a/src/gitea_codex_bot/services/__init__.py b/src/gitea_codex_bot/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/gitea_codex_bot/services/commands.py b/src/gitea_codex_bot/services/commands.py new file mode 100644 index 0000000..78b4087 --- /dev/null +++ b/src/gitea_codex_bot/services/commands.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import re + +from gitea_codex_bot.types import ParsedCommand + +COMMAND_RE = re.compile(r"^@codex\s+(review|explain|fix|ignore|rerun)\b(.*)$", re.IGNORECASE | re.DOTALL) + + +def parse_command(body: str) -> ParsedCommand | None: + stripped = body.strip() + match = COMMAND_RE.match(stripped) + if not match: + return None + name = match.group(1).lower() + rest = match.group(2).strip() + tokens = [token for token in rest.split() if token] + + parsed = ParsedCommand(name=name, raw=stripped, arguments=tokens) + if name == "review": + if "--full" in tokens: + parsed.full = True + parsed.mode = "full" + for mode in ("security", "performance", "tests"): + if mode in tokens: + parsed.mode = mode + break + elif name == "fix": + parsed.branch_fix = "--branch" in tokens + return parsed diff --git a/src/gitea_codex_bot/services/comments.py b/src/gitea_codex_bot/services/comments.py new file mode 100644 index 0000000..51ea808 --- /dev/null +++ b/src/gitea_codex_bot/services/comments.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from gitea_codex_bot.models import BotComment + + +REVIEW_MARKER = "codex-review" + + +def get_persistent_review_comment_id(session: Session, repo: str, pr_number: int) -> int | None: + row = session.execute( + select(BotComment) + .where(BotComment.repo == repo, BotComment.pr_number == pr_number, BotComment.marker == REVIEW_MARKER) + .limit(1) + ).scalar_one_or_none() + return row.gitea_comment_id if row else None + + +def upsert_persistent_review_comment_id( + session: Session, + *, + repo: str, + pr_number: int, + head_sha: str, + comment_id: int, +) -> None: + row = session.execute( + select(BotComment) + .where(BotComment.repo == repo, BotComment.pr_number == pr_number, BotComment.marker == REVIEW_MARKER) + .limit(1) + ).scalar_one_or_none() + if not row: + row = BotComment(repo=repo, pr_number=pr_number, head_sha=head_sha, gitea_comment_id=comment_id, marker=REVIEW_MARKER) + session.add(row) + else: + row.head_sha = head_sha + row.gitea_comment_id = comment_id + session.commit() diff --git a/src/gitea_codex_bot/services/gitea.py b/src/gitea_codex_bot/services/gitea.py new file mode 100644 index 0000000..221d3cb --- /dev/null +++ b/src/gitea_codex_bot/services/gitea.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any +from urllib.parse import quote + +import httpx + +from gitea_codex_bot.config import Settings + + +@dataclass(slots=True) +class PullRequestContext: + repo: str + pr_number: int + base_ref: str + base_sha: str + head_ref: str + head_sha: str + clone_url: str + html_url: str + is_fork: bool + + +class GiteaClient: + def __init__(self, settings: Settings) -> None: + self.settings = settings + self.base_url = settings.gitea_base_url + self.headers = { + "Authorization": f"token {settings.gitea_token.get_secret_value()}", + "Accept": "application/json", + "Content-Type": "application/json", + } + + def _request(self, method: str, path: str, *, json_body: dict[str, Any] | None = None) -> Any: + with httpx.Client(timeout=20.0) as client: + response = client.request( + method, + f"{self.base_url}{path}", + headers=self.headers, + json=json_body, + ) + response.raise_for_status() + if response.status_code == 204: + return None + return response.json() + + @staticmethod + def split_repo(repo: str) -> tuple[str, str]: + owner, name = repo.split("/", 1) + return owner, name + + def get_pull_request(self, repo: str, pr_number: int) -> PullRequestContext: + owner, name = self.split_repo(repo) + encoded_owner = quote(owner, safe="") + encoded_name = quote(name, safe="") + payload = self._request("GET", f"/api/v1/repos/{encoded_owner}/{encoded_name}/pulls/{pr_number}") + return PullRequestContext( + repo=repo, + pr_number=pr_number, + base_ref=payload["base"]["ref"], + base_sha=payload["base"]["sha"], + head_ref=payload["head"]["ref"], + head_sha=payload["head"]["sha"], + clone_url=payload["head"]["repo"]["clone_url"], + html_url=payload["html_url"], + is_fork=bool(payload["head"]["repo"]["full_name"] != payload["base"]["repo"]["full_name"]), + ) + + def post_issue_comment(self, repo: str, pr_number: int, body: str) -> int: + owner, name = self.split_repo(repo) + encoded_owner = quote(owner, safe="") + encoded_name = quote(name, safe="") + payload = self._request( + "POST", + f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/{pr_number}/comments", + json_body={"body": body}, + ) + return int(payload["id"]) + + def edit_issue_comment(self, repo: str, comment_id: int, body: str) -> int: + owner, name = self.split_repo(repo) + encoded_owner = quote(owner, safe="") + encoded_name = quote(name, safe="") + payload = self._request( + "PATCH", + f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/comments/{comment_id}", + json_body={"body": body}, + ) + return int(payload["id"]) + + def list_issue_comments(self, repo: str, pr_number: int) -> list[dict[str, Any]]: + owner, name = self.split_repo(repo) + encoded_owner = quote(owner, safe="") + encoded_name = quote(name, safe="") + payload = self._request("GET", f"/api/v1/repos/{encoded_owner}/{encoded_name}/issues/{pr_number}/comments") + return list(payload) diff --git a/src/gitea_codex_bot/services/jobs.py b/src/gitea_codex_bot/services/jobs.py new file mode 100644 index 0000000..b775c58 --- /dev/null +++ b/src/gitea_codex_bot/services/jobs.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +from datetime import datetime, timedelta, timezone + +from sqlalchemy import select +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session + +from gitea_codex_bot.models import JobStatus, ReviewJob, ReviewRun, RunStatus, WebhookEvent +from gitea_codex_bot.services.security import payload_digest +from gitea_codex_bot.types import ParsedCommand + + +def persist_webhook_event( + session: Session, + *, + delivery_id: str | None, + event_name: str, + repo: str, + comment_id: int | None, + payload: bytes, +) -> bool: + event = WebhookEvent( + delivery_id=delivery_id, + event_name=event_name, + repo=repo, + comment_id=comment_id, + payload_sha256=payload_digest(payload), + ) + session.add(event) + try: + session.commit() + return True + except IntegrityError: + session.rollback() + return False + + +def cooldown_remaining_seconds(session: Session, repo: str, pr_number: int, cooldown_seconds: int) -> int: + cutoff = datetime.now(timezone.utc) - timedelta(seconds=cooldown_seconds) + row = session.execute( + select(ReviewJob) + .where(ReviewJob.repo == repo, ReviewJob.pr_number == pr_number, ReviewJob.created_at >= cutoff) + .order_by(ReviewJob.created_at.desc()) + .limit(1) + ).scalar_one_or_none() + if not row: + return 0 + created_at = row.created_at + if created_at.tzinfo is None: + created_at = created_at.replace(tzinfo=timezone.utc) + age = (datetime.now(timezone.utc) - created_at).total_seconds() + remaining = int(max(cooldown_seconds - age, 0)) + return remaining + + +def enqueue_job( + session: Session, + *, + repo: str, + pr_number: int, + head_sha: str, + trigger_comment_id: int, + requested_by: str, + command: ParsedCommand, +) -> ReviewJob: + job = ReviewJob( + repo=repo, + pr_number=pr_number, + head_sha=head_sha, + trigger_comment_id=trigger_comment_id, + command=command.name, + command_args=" ".join(command.arguments) if command.arguments else None, + requested_by=requested_by, + status=JobStatus.queued, + ) + session.add(job) + session.commit() + session.refresh(job) + return job + + +def claim_next_job(session: Session) -> ReviewJob | None: + job = session.execute( + select(ReviewJob).where(ReviewJob.status == JobStatus.queued).order_by(ReviewJob.created_at.asc()).limit(1).with_for_update(skip_locked=True) + ).scalar_one_or_none() + if not job: + session.rollback() + return None + job.status = JobStatus.running + job.started_at = datetime.now(timezone.utc) + run = ReviewRun(job_id=job.id, status=RunStatus.running) + session.add(run) + session.commit() + session.refresh(job) + return job + + +def finish_job( + session: Session, + *, + job_id: int, + success: bool, + skipped: bool, + result: dict | None, + error_message: str | None, +) -> None: + job = session.get(ReviewJob, job_id) + if not job: + return + latest_run = ( + session.execute(select(ReviewRun).where(ReviewRun.job_id == job_id).order_by(ReviewRun.id.desc()).limit(1)).scalar_one_or_none() + ) + if skipped: + job.status = JobStatus.skipped + run_status = RunStatus.skipped + elif success: + job.status = JobStatus.succeeded + run_status = RunStatus.succeeded + else: + job.status = JobStatus.failed + run_status = RunStatus.failed + + now = datetime.now(timezone.utc) + job.finished_at = now + job.last_error = error_message + if result is not None: + job.result_json = result + + if latest_run: + latest_run.status = run_status + latest_run.finished_at = now + latest_run.result_json = result + latest_run.error_message = error_message + + session.commit() diff --git a/src/gitea_codex_bot/services/repo_config.py b/src/gitea_codex_bot/services/repo_config.py new file mode 100644 index 0000000..bb692ce --- /dev/null +++ b/src/gitea_codex_bot/services/repo_config.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path + +import yaml + + +@dataclass(slots=True) +class RepoReviewConfig: + enabled: bool = True + default_mode: str = "summary" + max_diff_bytes: int = 200000 + include_tests: bool = True + focus: list[str] = field(default_factory=lambda: ["correctness", "security", "maintainability"]) + ignore: list[str] = field(default_factory=list) + allow_fix: bool = False + + +def load_repo_review_config(repo_root: Path) -> RepoReviewConfig: + path = repo_root / ".codex-review.yml" + if not path.exists(): + return RepoReviewConfig() + raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {} + review = raw.get("review", {}) or {} + commands = raw.get("commands", {}) or {} + return RepoReviewConfig( + enabled=bool(raw.get("enabled", True)), + default_mode=str(review.get("default_mode", "summary")), + max_diff_bytes=int(review.get("max_diff_bytes", 200000)), + include_tests=bool(review.get("include_tests", True)), + focus=list(review.get("focus", ["correctness", "security", "maintainability"])), + ignore=list(raw.get("ignore", [])), + allow_fix=bool(commands.get("allow_fix", False)), + ) diff --git a/src/gitea_codex_bot/services/review_format.py b/src/gitea_codex_bot/services/review_format.py new file mode 100644 index 0000000..b207770 --- /dev/null +++ b/src/gitea_codex_bot/services/review_format.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from gitea_codex_bot.types import ParsedCommand + + +def format_queue_ack(head_sha: str) -> str: + short_sha = head_sha[:7] + return f"👀 Codex review queued for commit `{short_sha}`." + + +def format_cooldown_ack(seconds: int) -> str: + return f"⏳ Cooldown active. Please wait {seconds}s before requesting another review on this PR." + + +def format_disabled_ack() -> str: + return "🚫 Review is disabled by `.codex-review.yml` for this repository." + + +def format_unsupported_ack(command: ParsedCommand) -> str: + return f"⚠️ Command `@codex {command.name}` is not enabled on this repository." + + +def format_result_comment(head_sha: str, result: dict) -> str: + verdict = result.get("verdict", "has_issues") + confidence = float(result.get("confidence", 0.0)) + summary = str(result.get("summary", "No summary returned.")) + findings = result.get("findings", []) or [] + + lines = [f"", "## Codex Review", "", f"Verdict: `{verdict}`", f"Confidence: `{confidence:.2f}`", "", summary, ""] + if not findings: + lines.append("No blocking issues found.") + else: + lines.append("Findings:") + for idx, finding in enumerate(findings, start=1): + severity = finding.get("severity", "unknown") + file_path = finding.get("file", "unknown") + line_start = finding.get("line_start", "?") + line_end = finding.get("line_end", line_start) + title = finding.get("title", "Issue") + body = finding.get("body", "") + suggestion = finding.get("suggestion", "") + lines.extend( + [ + f"{idx}. `{file_path}:{line_start}-{line_end}` ({severity})", + f" {title}", + f" {body}", + f" Suggestion: {suggestion}" if suggestion else " Suggestion: n/a", + ] + ) + return "\n".join(lines).strip() diff --git a/src/gitea_codex_bot/services/reviewer.py b/src/gitea_codex_bot/services/reviewer.py new file mode 100644 index 0000000..c3a8b03 --- /dev/null +++ b/src/gitea_codex_bot/services/reviewer.py @@ -0,0 +1,290 @@ +from __future__ import annotations + +import json +import os +import shlex +import subprocess +from fnmatch import fnmatch +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Any + +import httpx + +from gitea_codex_bot.config import Settings +from gitea_codex_bot.services.gitea import GiteaClient, PullRequestContext +from gitea_codex_bot.services.repo_config import RepoReviewConfig, load_repo_review_config +from gitea_codex_bot.types import ParsedCommand + + +class ReviewError(RuntimeError): + pass + + +def _run_git(args: list[str], cwd: Path | None = None) -> str: + completed = subprocess.run(["git", *args], cwd=cwd, check=True, capture_output=True, text=True) + return completed.stdout + + +def checkout_pr(tmpdir: Path, pr: PullRequestContext) -> Path: + repo_dir = tmpdir / "repo" + _run_git(["clone", "--no-tags", "--depth", "50", pr.clone_url, str(repo_dir)]) + _run_git(["fetch", "origin", pr.base_ref, pr.head_ref], cwd=repo_dir) + _run_git(["checkout", pr.head_sha], cwd=repo_dir) + return repo_dir + + +def collect_diff_context(repo_dir: Path, pr: PullRequestContext, max_diff_bytes: int) -> dict[str, Any]: + diff = _run_git(["diff", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir) + changed_files_raw = _run_git(["diff", "--name-only", f"{pr.base_sha}...{pr.head_sha}"], cwd=repo_dir) + changed_files = [line.strip() for line in changed_files_raw.splitlines() if line.strip()] + truncated = False + if len(diff.encode("utf-8")) > max_diff_bytes: + diff = diff.encode("utf-8")[:max_diff_bytes].decode("utf-8", errors="ignore") + truncated = True + return {"diff": diff, "changed_files": changed_files, "truncated": truncated} + + +def _apply_ignore_patterns(changed_files: list[str], ignore_patterns: list[str]) -> list[str]: + if not ignore_patterns: + return changed_files + kept: list[str] = [] + for path in changed_files: + if any(fnmatch(path, pattern) for pattern in ignore_patterns): + continue + kept.append(path) + return kept + + +def _collect_changed_file_contents(repo_dir: Path, changed_files: list[str], max_total_bytes: int) -> str: + chunks: list[str] = [] + total = 0 + for rel in changed_files: + path = repo_dir / rel + if not path.exists() or not path.is_file(): + continue + try: + content = path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + block = f"\n### {rel}\n{content}\n" + block_bytes = len(block.encode("utf-8")) + if total + block_bytes > max_total_bytes: + break + chunks.append(block) + total += block_bytes + return "".join(chunks).strip() + + +def _collect_test_output(repo_dir: Path, timeout_seconds: int) -> str: + try: + completed = subprocess.run( + ["pytest", "-q"], + cwd=repo_dir, + capture_output=True, + text=True, + timeout=timeout_seconds, + check=False, + ) + output = (completed.stdout + "\n" + completed.stderr).strip() + return output[:10000] + except Exception as exc: + return f"Test execution unavailable: {exc}" + + +def _redact_secrets_from_diff(diff: str) -> str: + secret_terms = ("api_key", "token", "secret", "password", "private_key", "-----begin") + redacted_lines: list[str] = [] + for line in diff.splitlines(): + lower = line.lower() + if any(term in lower for term in secret_terms): + redacted_lines.append("[REDACTED_POTENTIAL_SECRET]") + else: + redacted_lines.append(line) + return "\n".join(redacted_lines) + + +def _build_prompt( + pr: PullRequestContext, + command: ParsedCommand, + diff_context: dict[str, Any], + repo_cfg: RepoReviewConfig, + *, + changed_file_contents: str, + test_output: str | None, +) -> str: + mode = command.mode if command.name in {"review", "rerun"} else "summary" + return ( + "You are reviewing a Gitea pull request.\n\n" + "Focus only on issues introduced by this PR.\n" + "Prioritize correctness, security, data loss, broken behavior, bad migrations, and missing tests.\n" + "Avoid style nitpicks.\n\n" + "Return JSON only with schema:\n" + "{\n" + ' "verdict": "correct" | "has_issues",\n' + ' "confidence": 0.0,\n' + ' "summary": "...",\n' + ' "findings": [{"severity":"low|medium|high|critical","file":"...","line_start":1,"line_end":1,"title":"...","body":"...","suggestion":"..."}]\n' + "}\n\n" + f"PR URL: {pr.html_url}\n" + f"Mode: {mode}\n" + f"Repo focus: {', '.join(repo_cfg.focus)}\n" + f"Diff truncated: {diff_context['truncated']}\n" + f"Changed files:\n{os.linesep.join(diff_context['changed_files'])}\n\n" + f"Unified diff:\n{diff_context['diff']}\n\n" + f"Changed file content (optional):\n{changed_file_contents or '(not included)'}\n\n" + f"Test output (optional):\n{test_output or '(not included)'}\n" + ) + + +def _call_openai_review(settings: Settings, prompt: str) -> dict[str, Any]: + headers: dict[str, str] = { + "Authorization": f"Bearer {settings.openai_api_key.get_secret_value()}", + "Content-Type": "application/json", + } + if settings.openai_org_id: + headers["OpenAI-Organization"] = settings.openai_org_id + if settings.openai_project_id: + headers["OpenAI-Project"] = settings.openai_project_id + + body = { + "model": settings.openai_review_model, + "input": prompt, + "text": {"format": {"type": "json_object"}}, + "reasoning": {"effort": settings.openai_reasoning_effort}, + } + with httpx.Client(timeout=120.0) as client: + response = client.post("https://api.openai.com/v1/responses", headers=headers, json=body) + response.raise_for_status() + payload = response.json() + + for item in payload.get("output", []): + for content in item.get("content", []): + text_value = content.get("text") + if text_value: + return json.loads(text_value) + raise ReviewError("OpenAI response did not contain JSON output text.") + + +def _fallback_review(diff_context: dict[str, Any]) -> dict[str, Any]: + findings = [] + if "TODO" in diff_context["diff"]: + findings.append( + { + "severity": "low", + "file": "unknown", + "line_start": 1, + "line_end": 1, + "title": "TODO marker in diff", + "body": "The change introduces TODO markers that may indicate incomplete behavior.", + "suggestion": "Resolve or track TODOs before merging.", + } + ) + return { + "verdict": "correct" if not findings else "has_issues", + "confidence": 0.4 if not findings else 0.6, + "summary": "Fallback analysis was used because OpenAI review was unavailable.", + "findings": findings, + } + + +def run_review_for_pr( + settings: Settings, + gitea: GiteaClient, + repo: str, + pr_number: int, + command: ParsedCommand, +) -> tuple[dict[str, Any], RepoReviewConfig]: + prompt, diff_context, repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command) + try: + result = _call_openai_review(settings, prompt) + except Exception: + result = _fallback_review(diff_context) + return normalize_review_result(result), repo_cfg + + +def prepare_review_prompt( + settings: Settings, + gitea: GiteaClient, + repo: str, + pr_number: int, + command: ParsedCommand, +) -> tuple[str, dict[str, Any], RepoReviewConfig]: + pr = gitea.get_pull_request(repo, pr_number) + with TemporaryDirectory(prefix="gitea-codex-") as tmp: + tmpdir = Path(tmp) + repo_dir = checkout_pr(tmpdir, pr) + repo_cfg = load_repo_review_config(repo_dir) + diff_context = collect_diff_context(repo_dir, pr, min(settings.max_diff_bytes, repo_cfg.max_diff_bytes)) + diff_context["changed_files"] = _apply_ignore_patterns(diff_context["changed_files"], repo_cfg.ignore) + diff_context["diff"] = _redact_secrets_from_diff(diff_context["diff"]) + changed_file_contents = "" + if command.full: + changed_file_contents = _collect_changed_file_contents(repo_dir, diff_context["changed_files"], settings.max_diff_bytes) + test_output = None + if repo_cfg.include_tests and command.mode == "tests": + test_output = _collect_test_output(repo_dir, timeout_seconds=min(settings.max_review_minutes * 60, 300)) + prompt = _build_prompt( + pr, + command, + diff_context, + repo_cfg, + changed_file_contents=changed_file_contents, + test_output=test_output, + ) + return prompt, diff_context, repo_cfg + + +def normalize_review_result(result: Any) -> dict[str, Any]: + if not isinstance(result, dict): + raise ReviewError(f"Invalid review result type: {type(result)!r}") + if "findings" not in result: + result["findings"] = [] + if "summary" not in result: + result["summary"] = "No summary returned." + if "verdict" not in result: + result["verdict"] = "has_issues" + if "confidence" not in result: + result["confidence"] = 0.5 + return result + + +def summarize_command(command: ParsedCommand) -> str: + return " ".join(["@codex", command.name, *command.arguments]).strip() + + +def fix_branch_name(pr_number: int, arguments: list[str] | None = None) -> str: + suffix = "fix" + if arguments: + words = [token.lower().strip() for token in arguments if token.strip() and not token.startswith("--")] + if words: + clean = "-".join(words[:4]) + cleaned = "".join(ch if ch.isalnum() or ch == "-" else "-" for ch in clean).strip("-") + if cleaned: + suffix = f"fix-{cleaned}" + return f"codex/pr-{pr_number}-{suffix}" + + +def create_fix_patch_note(command: ParsedCommand) -> str: + details = shlex.join(command.arguments) if command.arguments else "latest findings" + return f"Fix command requested for {details}." + + +def create_fix_branch( + pr: PullRequestContext, + *, + note: str, + arguments: list[str] | None = None, +) -> str: + branch = fix_branch_name(pr.pr_number, arguments=arguments) + with TemporaryDirectory(prefix="gitea-codex-fix-") as tmp: + tmpdir = Path(tmp) + repo_dir = checkout_pr(tmpdir, pr) + _run_git(["checkout", "-b", branch], cwd=repo_dir) + notes_dir = repo_dir / ".codex" + notes_dir.mkdir(parents=True, exist_ok=True) + (notes_dir / "fix-note.md").write_text(f"# Codex Fix Note\n\n{note}\n", encoding="utf-8") + _run_git(["add", ".codex/fix-note.md"], cwd=repo_dir) + _run_git(["-c", "user.name=codex-bot", "-c", "user.email=codex-bot@example.invalid", "commit", "-m", f"Codex fix note for PR {pr.pr_number}"], cwd=repo_dir) + _run_git(["push", "origin", f"{branch}:{branch}", "--force"], cwd=repo_dir) + return branch diff --git a/src/gitea_codex_bot/services/security.py b/src/gitea_codex_bot/services/security.py new file mode 100644 index 0000000..6c3e0d3 --- /dev/null +++ b/src/gitea_codex_bot/services/security.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +import hashlib +import hmac + + +def verify_gitea_signature(payload: bytes, secret: str, received_signature: str | None) -> bool: + if not received_signature: + return False + expected = hmac.new(secret.encode("utf-8"), payload, hashlib.sha256).hexdigest() + normalized = received_signature.removeprefix("sha256=").strip() + return hmac.compare_digest(expected, normalized) + + +def payload_digest(payload: bytes) -> str: + return hashlib.sha256(payload).hexdigest() diff --git a/src/gitea_codex_bot/types.py b/src/gitea_codex_bot/types.py new file mode 100644 index 0000000..213626c --- /dev/null +++ b/src/gitea_codex_bot/types.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Literal + + +CommandName = Literal["review", "explain", "fix", "ignore", "rerun"] + + +@dataclass(slots=True) +class ParsedCommand: + name: CommandName + raw: str + mode: str = "summary" + full: bool = False + branch_fix: bool = False + arguments: list[str] = field(default_factory=list) diff --git a/src/gitea_codex_bot/workers/__init__.py b/src/gitea_codex_bot/workers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/gitea_codex_bot/workers/container_runner.py b/src/gitea_codex_bot/workers/container_runner.py new file mode 100644 index 0000000..6af790c --- /dev/null +++ b/src/gitea_codex_bot/workers/container_runner.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import json +import subprocess +import uuid +from pathlib import Path +from typing import Any + +from gitea_codex_bot.config import Settings +from gitea_codex_bot.services.gitea import GiteaClient +from gitea_codex_bot.services.reviewer import normalize_review_result, prepare_review_prompt, run_review_for_pr +from gitea_codex_bot.types import ParsedCommand + + +def run_review_ephemeral( + settings: Settings, + *, + repo: str, + pr_number: int, + command: ParsedCommand, +) -> dict[str, Any]: + gitea = GiteaClient(settings) + prompt, _diff_context, _repo_cfg = prepare_review_prompt(settings, gitea, repo, pr_number, command) + container_name = f"codex-review-{uuid.uuid4().hex[:12]}" + install_and_run = ( + "set -euo pipefail; " + "npm install -g @openai/codex >/tmp/codex-install.log 2>&1; " + "codex exec --json -m gpt-5" + ) + cmd = [ + "docker", + "run", + "--rm", + "-i", + "--name", + container_name, + "-e", + "OPENAI_API_KEY", + "-e", + "OPENAI_ORG_ID", + "-e", + "OPENAI_PROJECT_ID", + "-e", + "CODEX_DISABLE_TELEMETRY=1", + settings.review_runner_image, + "bash", + "-lc", + install_and_run, + ] + try: + completed = subprocess.run( + cmd, + input=prompt, + text=True, + check=True, + capture_output=True, + timeout=settings.max_review_minutes * 60, + ) + parsed = _parse_codex_exec_stdout(completed.stdout) + return normalize_review_result(parsed) + except Exception: + result, _repo_cfg = run_review_for_pr(settings, gitea, repo, pr_number, command) + return result + + +def ensure_workdir(path: str) -> Path: + target = Path(path) + target.mkdir(parents=True, exist_ok=True) + return target + + +def _parse_codex_exec_stdout(stdout: str) -> dict[str, Any]: + last_text: str | None = None + for line in stdout.splitlines(): + line = line.strip() + if not line: + continue + try: + payload = json.loads(line) + except json.JSONDecodeError: + continue + if isinstance(payload, dict) and {"verdict", "summary", "findings"}.issubset(payload.keys()): + return payload + extracted = _extract_text(payload) + if extracted: + last_text = extracted + if not last_text: + raise RuntimeError("codex exec output did not include parseable JSON text") + return json.loads(last_text) + + +def _extract_text(payload: Any) -> str | None: + if isinstance(payload, str): + return payload + if isinstance(payload, dict): + for key in ("text", "message", "content", "output"): + value = payload.get(key) + text = _extract_text(value) + if text: + return text + for value in payload.values(): + text = _extract_text(value) + if text: + return text + if isinstance(payload, list): + for item in payload: + text = _extract_text(item) + if text: + return text + return None diff --git a/src/gitea_codex_bot/workers/dispatcher.py b/src/gitea_codex_bot/workers/dispatcher.py new file mode 100644 index 0000000..9e62797 --- /dev/null +++ b/src/gitea_codex_bot/workers/dispatcher.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +import asyncio +import logging +from typing import Any + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from gitea_codex_bot.config import Settings +from gitea_codex_bot.db import get_session_factory +from gitea_codex_bot.models import ReviewJob +from gitea_codex_bot.services.comments import get_persistent_review_comment_id, upsert_persistent_review_comment_id +from gitea_codex_bot.services.gitea import GiteaClient +from gitea_codex_bot.services.jobs import claim_next_job, finish_job +from gitea_codex_bot.services.review_format import format_result_comment +from gitea_codex_bot.services.reviewer import create_fix_branch, create_fix_patch_note +from gitea_codex_bot.types import ParsedCommand +from gitea_codex_bot.workers.container_runner import run_review_ephemeral + +logger = logging.getLogger(__name__) + + +def _command_from_job(job: ReviewJob) -> ParsedCommand: + args = job.command_args.split() if job.command_args else [] + return ParsedCommand(name=job.command, raw=f"@codex {job.command}", arguments=args, full="--full" in args, branch_fix="--branch" in args) + + +def _handle_non_review_command( + settings: Settings, + session: Session, + gitea: GiteaClient, + job: ReviewJob, + command: ParsedCommand, +) -> tuple[bool, bool, dict[str, Any] | None, str | None]: + if command.name == "ignore": + return True, True, {"summary": "Ignore command acknowledged. No review run executed."}, None + if command.name == "explain": + latest_review_job = session.execute( + select(ReviewJob) + .where( + ReviewJob.repo == job.repo, + ReviewJob.pr_number == job.pr_number, + ReviewJob.command.in_(["review", "rerun"]), + ReviewJob.status == "succeeded", + ) + .order_by(ReviewJob.id.desc()) + .limit(1) + ).scalar_one_or_none() + if latest_review_job and latest_review_job.result_json: + message = f"## Codex Explain\n\n{latest_review_job.result_json.get('summary', 'No previous summary available.')}" + else: + message = "## Codex Explain\n\nNo previous result found for this command." + gitea.post_issue_comment(job.repo, job.pr_number, message) + return True, True, {"summary": message}, None + if command.name == "fix": + if not settings.enable_fix_commands: + message = "⚠️ `@codex fix` is disabled on this bot instance." + gitea.post_issue_comment(job.repo, job.pr_number, message) + return True, True, {"summary": message}, None + note = create_fix_patch_note(command) + if command.branch_fix: + try: + pr = gitea.get_pull_request(job.repo, job.pr_number) + branch = create_fix_branch(pr, note=note, arguments=command.arguments) + message = f"## Codex Fix\n\n{note}\n\nCreated branch `{branch}`." + gitea.post_issue_comment(job.repo, job.pr_number, message) + return True, True, {"summary": note, "mode": "branch", "branch": branch}, None + except Exception as exc: + return True, False, None, f"Failed to create fix branch: {exc}" + gitea.post_issue_comment(job.repo, job.pr_number, f"## Codex Fix\n\n{note}\n\nPatch suggestion mode.") + return True, True, {"summary": note, "mode": "patch"}, None + return False, False, None, None + + +def process_one_job(settings: Settings) -> bool: + session_factory = get_session_factory() + with session_factory() as session: + job = claim_next_job(session) + if not job: + return False + + command = _command_from_job(job) + gitea = GiteaClient(settings) + + with session_factory() as session: + db_job = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one() + handled, skipped, result, error = _handle_non_review_command(settings, session, gitea, db_job, command) + if handled: + finish_job(session, job_id=db_job.id, success=error is None, skipped=skipped, result=result, error_message=error) + return True + + try: + pr_ctx = gitea.get_pull_request(job.repo, job.pr_number) + if pr_ctx.is_fork and not settings.allow_untrusted_forks: + with session_factory() as session: + skip_message = "Skipped review for fork PR because `ALLOW_UNTRUSTED_FORKS=false`." + gitea.post_issue_comment(job.repo, job.pr_number, skip_message) + finish_job( + session, + job_id=job.id, + success=True, + skipped=True, + result={"summary": skip_message}, + error_message=None, + ) + return True + result = run_review_ephemeral(settings, repo=job.repo, pr_number=job.pr_number, command=command) + comment_body = format_result_comment(job.head_sha, result) + with session_factory() as session: + comment_id = get_persistent_review_comment_id(session, job.repo, job.pr_number) + if comment_id: + gitea.edit_issue_comment(job.repo, comment_id, comment_body) + else: + comment_id = gitea.post_issue_comment(job.repo, job.pr_number, comment_body) + upsert_persistent_review_comment_id( + session, + repo=job.repo, + pr_number=job.pr_number, + head_sha=job.head_sha, + comment_id=comment_id, + ) + finish_job(session, job_id=job.id, success=True, skipped=False, result=result, error_message=None) + except Exception as exc: + logger.exception("Review job failed id=%s", job.id) + with session_factory() as session: + finish_job(session, job_id=job.id, success=False, skipped=False, result=None, error_message=str(exc)) + return True + + +async def worker_loop(settings: Settings, stop_event: asyncio.Event) -> None: + while not stop_event.is_set(): + processed = await asyncio.to_thread(process_one_job, settings) + if not processed: + await asyncio.sleep(1.0) diff --git a/src/gitea_codex_bot/workers/runner_entry.py b/src/gitea_codex_bot/workers/runner_entry.py new file mode 100644 index 0000000..c380143 --- /dev/null +++ b/src/gitea_codex_bot/workers/runner_entry.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import json +import sys + +from gitea_codex_bot.config import get_settings +from gitea_codex_bot.services.gitea import GiteaClient +from gitea_codex_bot.services.reviewer import run_review_for_pr +from gitea_codex_bot.types import ParsedCommand + + +def main() -> int: + settings = get_settings() + payload = json.loads(sys.stdin.read()) + command_payload = payload["command"] + command = ParsedCommand( + name=command_payload["name"], + raw=f"@codex {command_payload['name']}", + mode=command_payload.get("mode", "summary"), + full=bool(command_payload.get("full", False)), + branch_fix=bool(command_payload.get("branch_fix", False)), + arguments=list(command_payload.get("arguments", [])), + ) + gitea = GiteaClient(settings) + result, _repo_cfg = run_review_for_pr(settings, gitea, payload["repo"], int(payload["pr_number"]), command) + print(json.dumps(result)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..7d3f020 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from collections.abc import Generator +import os + +import pytest + +from gitea_codex_bot.config import get_settings +from gitea_codex_bot.db import Base, get_engine, get_session_factory + + +@pytest.fixture(autouse=True) +def _env_defaults(monkeypatch: pytest.MonkeyPatch, tmp_path, request: pytest.FixtureRequest) -> Generator[None, None, None]: + monkeypatch.setenv("GITEA_BASE_URL", "https://gitea.test") + monkeypatch.setenv("GITEA_TOKEN", "token") + monkeypatch.setenv("GITEA_BOT_USERNAME", "codex-bot") + monkeypatch.setenv("GITEA_WEBHOOK_SECRET", "secret") + monkeypatch.setenv("OPENAI_API_KEY", "openai-key") + monkeypatch.setenv("ALLOWED_REPOS", "acme/repo") + monkeypatch.setenv("COOLDOWN_SECONDS", "60") + monkeypatch.setenv("WEBHOOK_MODE", "repo") + monkeypatch.setenv("DB_HOST", "localhost") + monkeypatch.setenv("DB_PORT", "3306") + monkeypatch.setenv("DB_NAME", "ignored") + monkeypatch.setenv("DB_USER", "ignored") + monkeypatch.setenv("DB_PASSWORD", "ignored") + database_url = os.getenv("TEST_DATABASE_URL", "").strip() or f"sqlite+pysqlite:///{tmp_path / 'test.db'}" + monkeypatch.setenv("DATABASE_URL", database_url) + monkeypatch.setenv("WORKDIR", str(tmp_path / "work")) + + get_settings.cache_clear() + get_engine.cache_clear() + get_session_factory.cache_clear() + + engine = get_engine() + skip_schema = request.node.get_closest_marker("no_schema") is not None + if not skip_schema: + Base.metadata.create_all(bind=engine) + yield + if not skip_schema: + Base.metadata.drop_all(bind=engine) + get_settings.cache_clear() + get_engine.cache_clear() + get_session_factory.cache_clear() diff --git a/tests/test_commands.py b/tests/test_commands.py new file mode 100644 index 0000000..6b8db9e --- /dev/null +++ b/tests/test_commands.py @@ -0,0 +1,20 @@ +from gitea_codex_bot.services.commands import parse_command + + +def test_parse_review_command_modes() -> None: + cmd = parse_command("@codex review security --full") + assert cmd is not None + assert cmd.name == "review" + assert cmd.mode == "security" + assert cmd.full is True + + +def test_parse_fix_branch() -> None: + cmd = parse_command("@codex fix --branch finding 2") + assert cmd is not None + assert cmd.name == "fix" + assert cmd.branch_fix is True + + +def test_invalid_command_returns_none() -> None: + assert parse_command("hello") is None \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..6de8797 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,6 @@ +from gitea_codex_bot.config import get_settings + + +def test_openai_api_key_required() -> None: + settings = get_settings() + assert settings.openai_api_key.get_secret_value() == "openai-key" \ No newline at end of file diff --git a/tests/test_jobs.py b/tests/test_jobs.py new file mode 100644 index 0000000..91af894 --- /dev/null +++ b/tests/test_jobs.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from sqlalchemy.exc import IntegrityError + +from gitea_codex_bot.db import get_session_factory +from gitea_codex_bot.services.jobs import cooldown_remaining_seconds, enqueue_job, persist_webhook_event +from gitea_codex_bot.types import ParsedCommand + +def test_persist_webhook_dedupe() -> None: + session_factory = get_session_factory() + with session_factory() as session: + first = persist_webhook_event(session, delivery_id="d1", event_name="issue_comment", repo="acme/repo", comment_id=1, payload=b"{}") + second = persist_webhook_event(session, delivery_id="d1", event_name="issue_comment", repo="acme/repo", comment_id=1, payload=b"{}") + assert first is True + assert second is False + + +def test_enqueue_and_cooldown() -> None: + session_factory = get_session_factory() + with session_factory() as session: + cmd = ParsedCommand(name="review", raw="@codex review") + enqueue_job(session, repo="acme/repo", pr_number=42, head_sha="abc", trigger_comment_id=100, requested_by="user", command=cmd) + remaining = cooldown_remaining_seconds(session, "acme/repo", 42, 60) + assert remaining >= 0 + + +def test_trigger_comment_unique() -> None: + session_factory = get_session_factory() + with session_factory() as session: + cmd = ParsedCommand(name="review", raw="@codex review") + enqueue_job(session, repo="acme/repo", pr_number=7, head_sha="x", trigger_comment_id=321, requested_by="user", command=cmd) + try: + enqueue_job(session, repo="acme/repo", pr_number=7, head_sha="x", trigger_comment_id=321, requested_by="user", command=cmd) + duplicate_raised = False + except IntegrityError: + duplicate_raised = True + session.rollback() + assert duplicate_raised is True diff --git a/tests/test_migrations.py b/tests/test_migrations.py new file mode 100644 index 0000000..59d5a09 --- /dev/null +++ b/tests/test_migrations.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from alembic import command +from alembic.config import Config + + +import pytest + + +@pytest.mark.no_schema +def test_alembic_upgrade_and_downgrade() -> None: + cfg = Config("alembic.ini") + command.upgrade(cfg, "head") + command.downgrade(cfg, "base") + command.upgrade(cfg, "head") diff --git a/tests/test_security.py b/tests/test_security.py new file mode 100644 index 0000000..b9105b2 --- /dev/null +++ b/tests/test_security.py @@ -0,0 +1,15 @@ +import hmac +import hashlib + +from gitea_codex_bot.services.security import verify_gitea_signature + + +def test_verify_signature_success() -> None: + payload = b'{"a":1}' + secret = "abc" + signature = hmac.new(secret.encode(), payload, hashlib.sha256).hexdigest() + assert verify_gitea_signature(payload, secret, signature) + + +def test_verify_signature_failure() -> None: + assert not verify_gitea_signature(b"x", "abc", "deadbeef") \ No newline at end of file diff --git a/tests/test_transitions.py b/tests/test_transitions.py new file mode 100644 index 0000000..8b4bf3e --- /dev/null +++ b/tests/test_transitions.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from sqlalchemy import select + +from gitea_codex_bot.db import get_session_factory +from gitea_codex_bot.models import JobStatus, ReviewJob +from gitea_codex_bot.services.jobs import claim_next_job, enqueue_job, finish_job +from gitea_codex_bot.types import ParsedCommand + + +def test_claim_and_transition() -> None: + session_factory = get_session_factory() + with session_factory() as session: + job = enqueue_job( + session, + repo="acme/repo", + pr_number=314, + head_sha="deadbeef", + trigger_comment_id=9901, + requested_by="alice", + command=ParsedCommand(name="review", raw="@codex review"), + ) + + with session_factory() as session: + claimed = claim_next_job(session) + assert claimed is not None + assert claimed.id == job.id + assert claimed.status == JobStatus.running + + with session_factory() as session: + finish_job(session, job_id=job.id, success=True, skipped=False, result={"summary": "ok"}, error_message=None) + + with session_factory() as session: + loaded = session.execute(select(ReviewJob).where(ReviewJob.id == job.id)).scalar_one() + assert loaded.status == JobStatus.succeeded + assert loaded.result_json is not None \ No newline at end of file diff --git a/tests/test_webhook.py b/tests/test_webhook.py new file mode 100644 index 0000000..14cf819 --- /dev/null +++ b/tests/test_webhook.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import hashlib +import hmac +import json +from typing import Any + +from fastapi.testclient import TestClient + +from gitea_codex_bot.main import app + + +def _sign(payload: bytes) -> str: + return hmac.new(b"secret", payload, hashlib.sha256).hexdigest() + + +def _payload(comment_body: str, *, username: str = "alice", comment_id: int = 11) -> dict[str, Any]: + return { + "repository": {"full_name": "acme/repo"}, + "sender": {"username": username}, + "comment": {"id": comment_id, "body": comment_body}, + "issue": {"number": 9, "pull_request": {"url": "x"}}, + "pull_request": {"head": {"sha": "abcdef123"}}, + } + + +def test_webhook_rejects_bad_signature() -> None: + client = TestClient(app) + payload = b"{}" + response = client.post( + "/webhook/gitea", + content=payload, + headers={"X-Gitea-Event": "issue_comment", "X-Gitea-Signature": "bad"}, + ) + assert response.status_code == 401 + + +def test_webhook_ignores_bot_comment(monkeypatch) -> None: + client = TestClient(app) + payload = _payload("@codex review", username="codex-bot") + raw = json.dumps(payload).encode() + response = client.post( + "/webhook/gitea", + content=raw, + headers={ + "X-Gitea-Event": "issue_comment", + "X-Gitea-Delivery": "d-1", + "X-Gitea-Signature": _sign(raw), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 200 + assert response.json()["reason"] == "bot comment ignored" + + +def test_webhook_accepts_review_and_queues(monkeypatch) -> None: + posted_comments: list[str] = [] + + def _post_issue_comment(self, repo: str, pr_number: int, body: str) -> int: + posted_comments.append(body) + return 100 + + monkeypatch.setattr("gitea_codex_bot.services.gitea.GiteaClient.post_issue_comment", _post_issue_comment) + + client = TestClient(app) + payload_obj = _payload("@codex review security", username="alice", comment_id=111) + raw = json.dumps(payload_obj).encode() + + response = client.post( + "/webhook/gitea", + content=raw, + headers={ + "X-Gitea-Event": "issue_comment", + "X-Gitea-Delivery": "d-2", + "X-Gitea-Signature": _sign(raw), + "Content-Type": "application/json", + }, + ) + assert response.status_code == 200 + assert response.json()["status"] == "queued" + assert posted_comments