Initial FastAPI face-lock scaffold

2026-04-11 16:32:26 +02:00
commit 660ce4e7cc
9 changed files with 306 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1 @@
 ENV=dev
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,7 @@
 __pycache__/
 *.pyc
 .venv/
 .env
 .pytest_cache/
 .coverage
 htmlcov/
--- a/README.md
+++ b/README.md
@@ -0,0 +1,13 @@
 # face-lock
 FastAPI microservice that finds the primary subject in an image, draws a square around it, and returns a buffered crop.
 ## Dev
 ```bash
 cp .env.example .env
 pip install -r requirements.txt
 uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
 ```
 Set `ENV=dev` to enable the Tailwind UI at `/`.
--- a/app/init.py
+++ b/app/init.py
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,13 @@
 from dataclasses import dataclass
 from dotenv import load_dotenv
 import os
 load_dotenv()
@dataclass(frozen=True)
 class Settings:
    env: str = os.getenv("ENV", "prod").strip().lower()
 settings = Settings()
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,88 @@
 from fastapi import FastAPI, File, HTTPException, UploadFile
 from fastapi.responses import HTMLResponse, StreamingResponse
 from app.config import settings
 app = FastAPI(title="face-lock", version="0.1.0")
@app.get("/health")
 def health():
    return {"ok": True, "env": settings.env}
@app.get("/", response_class=HTMLResponse)
 def index():
    if settings.env != "dev":
        return HTMLResponse("<h1>face-lock</h1><p>Set ENV=dev for the UI.</p>")
    return HTMLResponse(
        """
 <!doctype html>
 <html lang="en">
 <head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <script src="https://cdn.tailwindcss.com"></script>
  <title>face-lock</title>
 </head>
 <body class="bg-slate-950 text-slate-100 min-h-screen">
  <main class="mx-auto max-w-5xl p-6">
    <div class="mb-6">
      <h1 class="text-3xl font-bold">face-lock</h1>
      <p class="text-slate-400">Drop an image, get the primary subject squared and cropped.</p>
    </div>
    <div class="grid gap-6 md:grid-cols-2">
      <section class="rounded-2xl border border-slate-800 bg-slate-900 p-4">
        <input id="file" type="file" accept="image/*" class="block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" />
        <button id="go" class="mt-4 rounded-lg bg-cyan-500 px-4 py-2 font-semibold text-slate-950">Process</button>
        <pre id="meta" class="mt-4 whitespace-pre-wrap rounded-lg bg-slate-950 p-3 text-xs text-slate-300"></pre>
      </section>
      <section class="rounded-2xl border border-slate-800 bg-slate-900 p-4">
        <div class="mb-3 text-sm font-semibold text-slate-400">Result</div>
        <img id="result" class="hidden w-full rounded-xl border border-slate-800" />
      </section>
    </div>
  </main>
  <script>
    const file = document.getElementById('file');
    const go = document.getElementById('go');
    const result = document.getElementById('result');
    const meta = document.getElementById('meta');
    go.onclick = async () => {
      if (!file.files.length) return;
      const form = new FormData();
      form.append('file', file.files[0]);
      meta.textContent = 'Working...';
      const resp = await fetch('/api/focus', { method: 'POST', body: form });
      const data = await resp.json();
      meta.textContent = JSON.stringify(data, null, 2);
      result.src = data.crop_data_url;
      result.classList.remove('hidden');
    };
  </script>
 </body>
 </html>
        """
    )
@app.post("/api/focus")
 async def focus(file: UploadFile = File(...)):
    from app.vision import process_image
    try:
        payload = await file.read()
        return process_image(payload, file.filename or "upload")
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.post("/api/focus/image")
 async def focus_image(file: UploadFile = File(...)):
    from app.vision import process_image
    try:
        payload = await file.read()
        result = process_image(payload, file.filename or "upload")
        return StreamingResponse(result["crop_bytes_io"], media_type=result["mime_type"])
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc
--- a/app/vision.py
+++ b/app/vision.py
@@ -0,0 +1,149 @@
 from __future__ import annotations
 from dataclasses import dataclass
 from io import BytesIO
 from typing import Any
 import cv2
 import numpy as np
@dataclass(frozen=True)
 class BBox:
    x: int
    y: int
    w: int
    h: int
    @property
    def right(self) -> int:
        return self.x + self.w
    @property
    def bottom(self) -> int:
        return self.y + self.h
 HOG = cv2.HOGDescriptor()
 HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
 def decode_image(image_bytes: bytes) -> np.ndarray:
    data = np.frombuffer(image_bytes, dtype=np.uint8)
    image = cv2.imdecode(data, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("could not decode image")
    return image
 def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]:
    person_bbox = detect_person(image)
    if person_bbox is not None:
        return person_bbox, "person_hog"
    contour_bbox = detect_salient_object(image)
    if contour_bbox is not None:
        return contour_bbox, "contour"
    h, w = image.shape[:2]
    side = int(min(w, h) * 0.8)
    x = max(0, (w - side) // 2)
    y = max(0, (h - side) // 2)
    return BBox(x=x, y=y, w=side, h=side), "center_fallback"
 def detect_person(image: np.ndarray) -> BBox | None:
    rects, weights = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
    if len(rects) == 0:
        return None
    best = max(zip(rects, weights), key=lambda item: int(item[0][2]) * int(item[0][3]))[0]
    x, y, w, h = map(int, best)
    return BBox(x=x, y=y, w=w, h=h)
 def detect_salient_object(image: np.ndarray) -> BBox | None:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (7, 7), 0)
    edges = cv2.Canny(blurred, 40, 120)
    kernel = np.ones((5, 5), np.uint8)
    closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None
    h, w = image.shape[:2]
    image_area = h * w
    candidates: list[tuple[int, BBox]] = []
    for contour in contours:
        x, y, bw, bh = cv2.boundingRect(contour)
        area = bw * bh
        if area < max(500, int(image_area * 0.01)):
            continue
        candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))
    if not candidates:
        return None
    return max(candidates, key=lambda item: item[0])[1]
 def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
    image_h, image_w = image_shape[:2]
    side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
    side = max(1, min(side, image_w, image_h))
    cx = bbox.x + bbox.w / 2
    cy = bbox.y + bbox.h / 2
    x = int(round(cx - side / 2))
    y = int(round(cy - side / 2))
    x = max(0, min(x, image_w - side))
    y = max(0, min(y, image_h - side))
    return BBox(x=x, y=y, w=side, h=side)
 def draw_square(image: np.ndarray, bbox: BBox) -> np.ndarray:
    annotated = image.copy()
    cv2.rectangle(annotated, (bbox.x, bbox.y), (bbox.right, bbox.bottom), (0, 255, 0), 3)
    return annotated
 def crop_image(image: np.ndarray, bbox: BBox) -> np.ndarray:
    return image[bbox.y:bbox.bottom, bbox.x:bbox.right]
 def encode_image(image: np.ndarray, ext: str = ".jpg") -> tuple[bytes, str]:
    ok, encoded = cv2.imencode(ext, image)
    if not ok:
        raise ValueError("could not encode image")
    mime_type = "image/jpeg" if ext.lower() in {".jpg", ".jpeg"} else "image/png"
    return encoded.tobytes(), mime_type
 def _data_url(image_bytes: bytes, mime_type: str) -> str:
    import base64
    return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"
 def process_image(image_bytes: bytes, filename: str) -> dict[str, Any]:
    image = decode_image(image_bytes)
    bbox, method = select_primary_bbox(image)
    square = square_bbox(bbox, image.shape)
    crop = crop_image(image, square)
    annotated = draw_square(image, square)
    crop_bytes, mime_type = encode_image(crop, ".jpg")
    annotated_bytes, _ = encode_image(annotated, ".jpg")
    return {
        "filename": filename,
        "method": method,
        "detected_bbox": bbox.__dict__,
        "square_bbox": square.__dict__,
        "source_size": {"width": int(image.shape[1]), "height": int(image.shape[0])},
        "crop_data_url": _data_url(crop_bytes, mime_type),
        "annotated_data_url": _data_url(annotated_bytes, mime_type),
        "mime_type": mime_type,
        "crop_bytes_io": BytesIO(crop_bytes),
    }
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,5 @@
 fastapi
 uvicorn[standard]
 opencv-python-headless
 python-dotenv
 python-multipart
--- a/tests/test_vision.py
+++ b/tests/test_vision.py
@@ -0,0 +1,30 @@
 import numpy as np
 from app.vision import BBox, crop_image, detect_salient_object, square_bbox
 def test_square_bbox_is_square_and_inside_bounds():
    bbox = BBox(x=10, y=20, w=40, h=20)
    square = square_bbox(bbox, (100, 120, 3), buffer_ratio=0.1)
    assert square.w == square.h
    assert square.x >= 0
    assert square.y >= 0
    assert square.right <= 120
    assert square.bottom <= 100
 def test_crop_image_uses_bbox():
    image = np.zeros((60, 80, 3), dtype=np.uint8)
    image[10:30, 20:50] = 255
    bbox = BBox(x=20, y=10, w=30, h=20)
    crop = crop_image(image, bbox)
    assert crop.shape[:2] == (20, 30)
 def test_detect_salient_object_finds_rectangle():
    image = np.zeros((100, 100, 3), dtype=np.uint8)
    image[25:75, 30:80] = 255
    bbox = detect_salient_object(image)
    assert bbox is not None
    assert bbox.w >= 45
    assert bbox.h >= 45