Initial FastAPI face-lock scaffold

2026-04-11 16:32:26 +02:00
commit 660ce4e7cc
9 changed files with 306 additions and 0 deletions
--- a/app/init.py
+++ b/app/init.py
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+
+@dataclass(frozen=True)
+class Settings:
+    env: str = os.getenv("ENV", "prod").strip().lower()
+
+
+settings = Settings()
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,88 @@
+from fastapi import FastAPI, File, HTTPException, UploadFile
+from fastapi.responses import HTMLResponse, StreamingResponse
+from app.config import settings
+
+app = FastAPI(title="face-lock", version="0.1.0")
+
+
+@app.get("/health")
+def health():
+    return {"ok": True, "env": settings.env}
+
+
+@app.get("/", response_class=HTMLResponse)
+def index():
+    if settings.env != "dev":
+        return HTMLResponse("<h1>face-lock</h1><p>Set ENV=dev for the UI.</p>")
+    return HTMLResponse(
+        """
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <script src="https://cdn.tailwindcss.com"></script>
+  <title>face-lock</title>
+</head>
+<body class="bg-slate-950 text-slate-100 min-h-screen">
+  <main class="mx-auto max-w-5xl p-6">
+    <div class="mb-6">
+      <h1 class="text-3xl font-bold">face-lock</h1>
+      <p class="text-slate-400">Drop an image, get the primary subject squared and cropped.</p>
+    </div>
+    <div class="grid gap-6 md:grid-cols-2">
+      <section class="rounded-2xl border border-slate-800 bg-slate-900 p-4">
+        <input id="file" type="file" accept="image/*" class="block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" />
+        <button id="go" class="mt-4 rounded-lg bg-cyan-500 px-4 py-2 font-semibold text-slate-950">Process</button>
+        <pre id="meta" class="mt-4 whitespace-pre-wrap rounded-lg bg-slate-950 p-3 text-xs text-slate-300"></pre>
+      </section>
+      <section class="rounded-2xl border border-slate-800 bg-slate-900 p-4">
+        <div class="mb-3 text-sm font-semibold text-slate-400">Result</div>
+        <img id="result" class="hidden w-full rounded-xl border border-slate-800" />
+      </section>
+    </div>
+  </main>
+  <script>
+    const file = document.getElementById('file');
+    const go = document.getElementById('go');
+    const result = document.getElementById('result');
+    const meta = document.getElementById('meta');
+    go.onclick = async () => {
+      if (!file.files.length) return;
+      const form = new FormData();
+      form.append('file', file.files[0]);
+      meta.textContent = 'Working...';
+      const resp = await fetch('/api/focus', { method: 'POST', body: form });
+      const data = await resp.json();
+      meta.textContent = JSON.stringify(data, null, 2);
+      result.src = data.crop_data_url;
+      result.classList.remove('hidden');
+    };
+  </script>
+</body>
+</html>
+        """
+    )
+
+
+@app.post("/api/focus")
+async def focus(file: UploadFile = File(...)):
+    from app.vision import process_image
+
+    try:
+        payload = await file.read()
+        return process_image(payload, file.filename or "upload")
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@app.post("/api/focus/image")
+async def focus_image(file: UploadFile = File(...)):
+    from app.vision import process_image
+
+    try:
+        payload = await file.read()
+        result = process_image(payload, file.filename or "upload")
+        return StreamingResponse(result["crop_bytes_io"], media_type=result["mime_type"])
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
--- a/app/vision.py
+++ b/app/vision.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from io import BytesIO
+from typing import Any
+
+import cv2
+import numpy as np
+
+
+@dataclass(frozen=True)
+class BBox:
+    x: int
+    y: int
+    w: int
+    h: int
+
+    @property
+    def right(self) -> int:
+        return self.x + self.w
+
+    @property
+    def bottom(self) -> int:
+        return self.y + self.h
+
+
+HOG = cv2.HOGDescriptor()
+HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
+
+
+def decode_image(image_bytes: bytes) -> np.ndarray:
+    data = np.frombuffer(image_bytes, dtype=np.uint8)
+    image = cv2.imdecode(data, cv2.IMREAD_COLOR)
+    if image is None:
+        raise ValueError("could not decode image")
+    return image
+
+
+def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]:
+    person_bbox = detect_person(image)
+    if person_bbox is not None:
+        return person_bbox, "person_hog"
+
+    contour_bbox = detect_salient_object(image)
+    if contour_bbox is not None:
+        return contour_bbox, "contour"
+
+    h, w = image.shape[:2]
+    side = int(min(w, h) * 0.8)
+    x = max(0, (w - side) // 2)
+    y = max(0, (h - side) // 2)
+    return BBox(x=x, y=y, w=side, h=side), "center_fallback"
+
+
+def detect_person(image: np.ndarray) -> BBox | None:
+    rects, weights = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
+    if len(rects) == 0:
+        return None
+    best = max(zip(rects, weights), key=lambda item: int(item[0][2]) * int(item[0][3]))[0]
+    x, y, w, h = map(int, best)
+    return BBox(x=x, y=y, w=w, h=h)
+
+
+def detect_salient_object(image: np.ndarray) -> BBox | None:
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    blurred = cv2.GaussianBlur(gray, (7, 7), 0)
+    edges = cv2.Canny(blurred, 40, 120)
+    kernel = np.ones((5, 5), np.uint8)
+    closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)
+    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if not contours:
+        return None
+
+    h, w = image.shape[:2]
+    image_area = h * w
+    candidates: list[tuple[int, BBox]] = []
+    for contour in contours:
+        x, y, bw, bh = cv2.boundingRect(contour)
+        area = bw * bh
+        if area < max(500, int(image_area * 0.01)):
+            continue
+        candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))
+
+    if not candidates:
+        return None
+
+    return max(candidates, key=lambda item: item[0])[1]
+
+
+def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
+    image_h, image_w = image_shape[:2]
+    side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
+    side = max(1, min(side, image_w, image_h))
+
+    cx = bbox.x + bbox.w / 2
+    cy = bbox.y + bbox.h / 2
+    x = int(round(cx - side / 2))
+    y = int(round(cy - side / 2))
+
+    x = max(0, min(x, image_w - side))
+    y = max(0, min(y, image_h - side))
+    return BBox(x=x, y=y, w=side, h=side)
+
+
+def draw_square(image: np.ndarray, bbox: BBox) -> np.ndarray:
+    annotated = image.copy()
+    cv2.rectangle(annotated, (bbox.x, bbox.y), (bbox.right, bbox.bottom), (0, 255, 0), 3)
+    return annotated
+
+
+def crop_image(image: np.ndarray, bbox: BBox) -> np.ndarray:
+    return image[bbox.y:bbox.bottom, bbox.x:bbox.right]
+
+
+def encode_image(image: np.ndarray, ext: str = ".jpg") -> tuple[bytes, str]:
+    ok, encoded = cv2.imencode(ext, image)
+    if not ok:
+        raise ValueError("could not encode image")
+    mime_type = "image/jpeg" if ext.lower() in {".jpg", ".jpeg"} else "image/png"
+    return encoded.tobytes(), mime_type
+
+
+def _data_url(image_bytes: bytes, mime_type: str) -> str:
+    import base64
+
+    return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"
+
+
+def process_image(image_bytes: bytes, filename: str) -> dict[str, Any]:
+    image = decode_image(image_bytes)
+    bbox, method = select_primary_bbox(image)
+    square = square_bbox(bbox, image.shape)
+    crop = crop_image(image, square)
+    annotated = draw_square(image, square)
+
+    crop_bytes, mime_type = encode_image(crop, ".jpg")
+    annotated_bytes, _ = encode_image(annotated, ".jpg")
+
+    return {
+        "filename": filename,
+        "method": method,
+        "detected_bbox": bbox.__dict__,
+        "square_bbox": square.__dict__,
+        "source_size": {"width": int(image.shape[1]), "height": int(image.shape[0])},
+        "crop_data_url": _data_url(crop_bytes, mime_type),
+        "annotated_data_url": _data_url(annotated_bytes, mime_type),
+        "mime_type": mime_type,
+        "crop_bytes_io": BytesIO(crop_bytes),
+    }