Add dockerized detector and UI cleanup

2026-04-11 16:45:22 +02:00
parent 3b5a9e8635
commit ecbf948a74
7 changed files with 148 additions and 26 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -2,7 +2,7 @@ from fastapi import FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.responses import HTMLResponse, StreamingResponse
 from app.config import settings

-app = FastAPI(title="face-lock", version="0.1.0")
+app = FastAPI(title="face-lock", version="0.2.0")


@app.get("/health")
@@ -25,16 +25,30 @@ def index():
  <title>face-lock</title>
 </head>
 <body class="bg-slate-950 text-slate-100 min-h-screen">
-  <main class="mx-auto max-w-5xl p-6">
+  <main class="mx-auto max-w-6xl p-6">
    <div class="mb-6">
      <h1 class="text-3xl font-bold">face-lock</h1>
-      <p class="text-slate-400">Drop an image, get the primary subject squared and cropped.</p>
+      <p class="text-slate-400">Auto-detect the subject, square it up, and crop with buffer.</p>
    </div>
    <div class="grid gap-6 md:grid-cols-2">
      <section class="rounded-2xl border border-slate-800 bg-slate-900 p-4">
-        <input id="file" type="file" accept="image/*" class="block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" />
-        <label class="mt-4 block text-sm text-slate-400">Buffer ratio</label>
-        <input id="buffer_ratio" type="number" step="0.05" min="0" max="0.5" value="0.15" class="block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" />
+        <label class="block text-sm text-slate-400">Image</label>
+        <input id="file" type="file" accept="image/*" class="mt-2 block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" />
+        <div class="mt-4 grid gap-4 sm:grid-cols-2">
+          <div>
+            <label class="block text-sm text-slate-400">Detector</label>
+            <select id="detector" class="mt-2 block w-full rounded-lg border border-slate-700 bg-slate-950 p-3">
+              <option value="auto">Auto</option>
+              <option value="face">Face</option>
+              <option value="person">Person</option>
+              <option value="salient">Subject</option>
+            </select>
+          </div>
+          <div>
+            <label class="block text-sm text-slate-400">Buffer ratio</label>
+            <input id="buffer_ratio" type="number" step="0.05" min="0" max="0.6" value="0.20" class="mt-2 block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" />
+          </div>
+        </div>
        <button id="go" class="mt-4 rounded-lg bg-cyan-500 px-4 py-2 font-semibold text-slate-950">Process</button>
        <pre id="meta" class="mt-4 whitespace-pre-wrap rounded-lg bg-slate-950 p-3 text-xs text-slate-300"></pre>
      </section>
@@ -63,11 +77,20 @@ def index():
      if (!file.files.length) return;
      const form = new FormData();
      form.append('file', file.files[0]);
+      form.append('detector', document.getElementById('detector').value);
      form.append('buffer_ratio', document.getElementById('buffer_ratio').value);
      meta.textContent = 'Working...';
      const resp = await fetch('/api/focus', { method: 'POST', body: form });
      const data = await resp.json();
-      meta.textContent = JSON.stringify(data, null, 2);
+      meta.textContent = JSON.stringify({
+        filename: data.filename,
+        detector: data.detector,
+        method: data.method,
+        buffer_ratio: data.buffer_ratio,
+        detected_bbox: data.detected_bbox,
+        square_bbox: data.square_bbox,
+        source_size: data.source_size,
+      }, null, 2);
      crop.src = data.crop_data_url;
      annotated.src = data.annotated_data_url;
      crop.classList.remove('hidden');
@@ -81,23 +104,31 @@ def index():


@app.post("/api/focus")
-async def focus(file: UploadFile = File(...), buffer_ratio: float = Form(0.15)):
+async def focus(
+    file: UploadFile = File(...),
+    buffer_ratio: float = Form(0.15),
+    detector: str = Form("auto"),
+):
    from app.vision import process_image

    try:
        payload = await file.read()
-        return process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio)
+        return process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio, detector=detector)
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc


@app.post("/api/focus/image")
-async def focus_image(file: UploadFile = File(...), buffer_ratio: float = Form(0.15)):
+async def focus_image(
+    file: UploadFile = File(...),
+    buffer_ratio: float = Form(0.15),
+    detector: str = Form("auto"),
+):
    from app.vision import process_image

    try:
        payload = await file.read()
-        result = process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio)
+        result = process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio, detector=detector)
        return StreamingResponse(result["crop_bytes_io"], media_type=result["mime_type"])
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc)) from exc
--- a/app/vision.py
+++ b/app/vision.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 from dataclasses import dataclass
 from io import BytesIO
+from pathlib import Path
 from typing import Any

 import cv2
@@ -24,6 +25,9 @@ class BBox:
        return self.y + self.h


+FACE_CASCADE = cv2.CascadeClassifier(
+    str(Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml")
+)
 HOG = cv2.HOGDescriptor()
 HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

@@ -36,20 +40,59 @@ def decode_image(image_bytes: bytes) -> np.ndarray:
    return image


-def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]:
+def select_primary_bbox(image: np.ndarray, detector: str = "auto") -> tuple[BBox, str]:
+    detector = (detector or "auto").strip().lower()
+
+    if detector == "face":
+        face_bbox = detect_face(image)
+        if face_bbox is not None:
+            return face_bbox, "face_cascade"
+        return fallback_bbox(image), "center_fallback"
+
+    if detector == "person":
+        person_bbox = detect_person(image)
+        if person_bbox is not None:
+            return person_bbox, "person_hog"
+        return fallback_bbox(image), "center_fallback"
+
+    if detector == "salient":
+        salient_bbox = detect_salient_object(image)
+        if salient_bbox is not None:
+            return salient_bbox, "salient_contour"
+        return fallback_bbox(image), "center_fallback"
+
+    face_bbox = detect_face(image)
+    if face_bbox is not None:
+        return face_bbox, "face_cascade"
+
    person_bbox = detect_person(image)
    if person_bbox is not None:
        return person_bbox, "person_hog"

-    contour_bbox = detect_salient_object(image)
-    if contour_bbox is not None:
-        return contour_bbox, "contour"
+    salient_bbox = detect_salient_object(image)
+    if salient_bbox is not None:
+        return salient_bbox, "salient_contour"

+    return fallback_bbox(image), "center_fallback"
+
+
+def fallback_bbox(image: np.ndarray) -> BBox:
    h, w = image.shape[:2]
-    side = int(min(w, h) * 0.8)
+    side = int(min(w, h) * 0.85)
+    side = max(1, min(side, w, h))
    x = max(0, (w - side) // 2)
    y = max(0, (h - side) // 2)
-    return BBox(x=x, y=y, w=side, h=side), "center_fallback"
+    return BBox(x=x, y=y, w=side, h=side)
+
+
+def detect_face(image: np.ndarray) -> BBox | None:
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    gray = cv2.equalizeHist(gray)
+    faces = FACE_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(24, 24))
+    if len(faces) == 0:
+        return None
+    x, y, w, h = max((map(int, face) for face in faces), key=lambda rect: rect[2] * rect[3])
+    return BBox(x=x, y=y, w=w, h=h)


 def detect_person(image: np.ndarray) -> BBox | None:
@@ -63,10 +106,11 @@ def detect_person(image: np.ndarray) -> BBox | None:

 def detect_salient_object(image: np.ndarray) -> BBox | None:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    blurred = cv2.GaussianBlur(gray, (7, 7), 0)
-    edges = cv2.Canny(blurred, 40, 120)
-    kernel = np.ones((5, 5), np.uint8)
-    closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)
+    blurred = cv2.GaussianBlur(gray, (9, 9), 0)
+    edges = cv2.Canny(blurred, 30, 110)
+    kernel = np.ones((13, 13), np.uint8)
+    expanded = cv2.dilate(edges, kernel, iterations=1)
+    closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None
@@ -77,7 +121,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
    for contour in contours:
        x, y, bw, bh = cv2.boundingRect(contour)
        area = bw * bh
-        if area < max(500, int(image_area * 0.01)):
+        if area < max(1000, int(image_area * 0.015)):
            continue
        candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))

@@ -89,7 +133,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:

 def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
    image_h, image_w = image_shape[:2]
-    buffer_ratio = max(0.0, min(buffer_ratio, 0.5))
+    buffer_ratio = max(0.0, min(buffer_ratio, 0.6))
    side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
    side = max(1, min(side, image_w, image_h))

@@ -127,9 +171,9 @@ def _data_url(image_bytes: bytes, mime_type: str) -> str:
    return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"


-def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15) -> dict[str, Any]:
+def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "auto") -> dict[str, Any]:
    image = decode_image(image_bytes)
-    bbox, method = select_primary_bbox(image)
+    bbox, method = select_primary_bbox(image, detector=detector)
    square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio)
    crop = crop_image(image, square)
    annotated = draw_square(image, square)
@@ -139,6 +183,7 @@ def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15)

    return {
        "filename": filename,
+        "detector": detector,
        "method": method,
        "buffer_ratio": buffer_ratio,
        "detected_bbox": bbox.__dict__,