Add dockerized detector and UI cleanup

This commit is contained in:
2026-04-11 16:45:22 +02:00
parent 3b5a9e8635
commit ecbf948a74
7 changed files with 148 additions and 26 deletions

6
.dockerignore Normal file
View File

@@ -0,0 +1,6 @@
__pycache__/
*.pyc
.venv/
.env
.pytest_cache/
.git/

19
Dockerfile Normal file
View File

@@ -0,0 +1,19 @@
FROM python:3.13-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
RUN apt-get update \
&& apt-get install -y --no-install-recommends libgl1 libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -11,3 +11,9 @@ uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
``` ```
Set `ENV=dev` to enable the Tailwind UI at `/`. Set `ENV=dev` to enable the Tailwind UI at `/`.
## Docker
```bash
docker compose up --build
```

View File

@@ -2,7 +2,7 @@ from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import HTMLResponse, StreamingResponse from fastapi.responses import HTMLResponse, StreamingResponse
from app.config import settings from app.config import settings
app = FastAPI(title="face-lock", version="0.1.0") app = FastAPI(title="face-lock", version="0.2.0")
@app.get("/health") @app.get("/health")
@@ -25,16 +25,30 @@ def index():
<title>face-lock</title> <title>face-lock</title>
</head> </head>
<body class="bg-slate-950 text-slate-100 min-h-screen"> <body class="bg-slate-950 text-slate-100 min-h-screen">
<main class="mx-auto max-w-5xl p-6"> <main class="mx-auto max-w-6xl p-6">
<div class="mb-6"> <div class="mb-6">
<h1 class="text-3xl font-bold">face-lock</h1> <h1 class="text-3xl font-bold">face-lock</h1>
<p class="text-slate-400">Drop an image, get the primary subject squared and cropped.</p> <p class="text-slate-400">Auto-detect the subject, square it up, and crop with buffer.</p>
</div> </div>
<div class="grid gap-6 md:grid-cols-2"> <div class="grid gap-6 md:grid-cols-2">
<section class="rounded-2xl border border-slate-800 bg-slate-900 p-4"> <section class="rounded-2xl border border-slate-800 bg-slate-900 p-4">
<input id="file" type="file" accept="image/*" class="block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" /> <label class="block text-sm text-slate-400">Image</label>
<label class="mt-4 block text-sm text-slate-400">Buffer ratio</label> <input id="file" type="file" accept="image/*" class="mt-2 block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" />
<input id="buffer_ratio" type="number" step="0.05" min="0" max="0.5" value="0.15" class="block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" /> <div class="mt-4 grid gap-4 sm:grid-cols-2">
<div>
<label class="block text-sm text-slate-400">Detector</label>
<select id="detector" class="mt-2 block w-full rounded-lg border border-slate-700 bg-slate-950 p-3">
<option value="auto">Auto</option>
<option value="face">Face</option>
<option value="person">Person</option>
<option value="salient">Subject</option>
</select>
</div>
<div>
<label class="block text-sm text-slate-400">Buffer ratio</label>
<input id="buffer_ratio" type="number" step="0.05" min="0" max="0.6" value="0.20" class="mt-2 block w-full rounded-lg border border-slate-700 bg-slate-950 p-3" />
</div>
</div>
<button id="go" class="mt-4 rounded-lg bg-cyan-500 px-4 py-2 font-semibold text-slate-950">Process</button> <button id="go" class="mt-4 rounded-lg bg-cyan-500 px-4 py-2 font-semibold text-slate-950">Process</button>
<pre id="meta" class="mt-4 whitespace-pre-wrap rounded-lg bg-slate-950 p-3 text-xs text-slate-300"></pre> <pre id="meta" class="mt-4 whitespace-pre-wrap rounded-lg bg-slate-950 p-3 text-xs text-slate-300"></pre>
</section> </section>
@@ -63,11 +77,20 @@ def index():
if (!file.files.length) return; if (!file.files.length) return;
const form = new FormData(); const form = new FormData();
form.append('file', file.files[0]); form.append('file', file.files[0]);
form.append('detector', document.getElementById('detector').value);
form.append('buffer_ratio', document.getElementById('buffer_ratio').value); form.append('buffer_ratio', document.getElementById('buffer_ratio').value);
meta.textContent = 'Working...'; meta.textContent = 'Working...';
const resp = await fetch('/api/focus', { method: 'POST', body: form }); const resp = await fetch('/api/focus', { method: 'POST', body: form });
const data = await resp.json(); const data = await resp.json();
meta.textContent = JSON.stringify(data, null, 2); meta.textContent = JSON.stringify({
filename: data.filename,
detector: data.detector,
method: data.method,
buffer_ratio: data.buffer_ratio,
detected_bbox: data.detected_bbox,
square_bbox: data.square_bbox,
source_size: data.source_size,
}, null, 2);
crop.src = data.crop_data_url; crop.src = data.crop_data_url;
annotated.src = data.annotated_data_url; annotated.src = data.annotated_data_url;
crop.classList.remove('hidden'); crop.classList.remove('hidden');
@@ -81,23 +104,31 @@ def index():
@app.post("/api/focus") @app.post("/api/focus")
async def focus(file: UploadFile = File(...), buffer_ratio: float = Form(0.15)): async def focus(
file: UploadFile = File(...),
buffer_ratio: float = Form(0.15),
detector: str = Form("auto"),
):
from app.vision import process_image from app.vision import process_image
try: try:
payload = await file.read() payload = await file.read()
return process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio) return process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio, detector=detector)
except ValueError as exc: except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.post("/api/focus/image") @app.post("/api/focus/image")
async def focus_image(file: UploadFile = File(...), buffer_ratio: float = Form(0.15)): async def focus_image(
file: UploadFile = File(...),
buffer_ratio: float = Form(0.15),
detector: str = Form("auto"),
):
from app.vision import process_image from app.vision import process_image
try: try:
payload = await file.read() payload = await file.read()
result = process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio) result = process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio, detector=detector)
return StreamingResponse(result["crop_bytes_io"], media_type=result["mime_type"]) return StreamingResponse(result["crop_bytes_io"], media_type=result["mime_type"])
except ValueError as exc: except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc raise HTTPException(status_code=400, detail=str(exc)) from exc

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from io import BytesIO from io import BytesIO
from pathlib import Path
from typing import Any from typing import Any
import cv2 import cv2
@@ -24,6 +25,9 @@ class BBox:
return self.y + self.h return self.y + self.h
FACE_CASCADE = cv2.CascadeClassifier(
str(Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml")
)
HOG = cv2.HOGDescriptor() HOG = cv2.HOGDescriptor()
HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
@@ -36,20 +40,59 @@ def decode_image(image_bytes: bytes) -> np.ndarray:
return image return image
def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]: def select_primary_bbox(image: np.ndarray, detector: str = "auto") -> tuple[BBox, str]:
detector = (detector or "auto").strip().lower()
if detector == "face":
face_bbox = detect_face(image)
if face_bbox is not None:
return face_bbox, "face_cascade"
return fallback_bbox(image), "center_fallback"
if detector == "person":
person_bbox = detect_person(image)
if person_bbox is not None:
return person_bbox, "person_hog"
return fallback_bbox(image), "center_fallback"
if detector == "salient":
salient_bbox = detect_salient_object(image)
if salient_bbox is not None:
return salient_bbox, "salient_contour"
return fallback_bbox(image), "center_fallback"
face_bbox = detect_face(image)
if face_bbox is not None:
return face_bbox, "face_cascade"
person_bbox = detect_person(image) person_bbox = detect_person(image)
if person_bbox is not None: if person_bbox is not None:
return person_bbox, "person_hog" return person_bbox, "person_hog"
contour_bbox = detect_salient_object(image) salient_bbox = detect_salient_object(image)
if contour_bbox is not None: if salient_bbox is not None:
return contour_bbox, "contour" return salient_bbox, "salient_contour"
return fallback_bbox(image), "center_fallback"
def fallback_bbox(image: np.ndarray) -> BBox:
h, w = image.shape[:2] h, w = image.shape[:2]
side = int(min(w, h) * 0.8) side = int(min(w, h) * 0.85)
side = max(1, min(side, w, h))
x = max(0, (w - side) // 2) x = max(0, (w - side) // 2)
y = max(0, (h - side) // 2) y = max(0, (h - side) // 2)
return BBox(x=x, y=y, w=side, h=side), "center_fallback" return BBox(x=x, y=y, w=side, h=side)
def detect_face(image: np.ndarray) -> BBox | None:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.equalizeHist(gray)
faces = FACE_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(24, 24))
if len(faces) == 0:
return None
x, y, w, h = max((map(int, face) for face in faces), key=lambda rect: rect[2] * rect[3])
return BBox(x=x, y=y, w=w, h=h)
def detect_person(image: np.ndarray) -> BBox | None: def detect_person(image: np.ndarray) -> BBox | None:
@@ -63,10 +106,11 @@ def detect_person(image: np.ndarray) -> BBox | None:
def detect_salient_object(image: np.ndarray) -> BBox | None: def detect_salient_object(image: np.ndarray) -> BBox | None:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (7, 7), 0) blurred = cv2.GaussianBlur(gray, (9, 9), 0)
edges = cv2.Canny(blurred, 40, 120) edges = cv2.Canny(blurred, 30, 110)
kernel = np.ones((5, 5), np.uint8) kernel = np.ones((13, 13), np.uint8)
closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2) expanded = cv2.dilate(edges, kernel, iterations=1)
closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1)
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours: if not contours:
return None return None
@@ -77,7 +121,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
for contour in contours: for contour in contours:
x, y, bw, bh = cv2.boundingRect(contour) x, y, bw, bh = cv2.boundingRect(contour)
area = bw * bh area = bw * bh
if area < max(500, int(image_area * 0.01)): if area < max(1000, int(image_area * 0.015)):
continue continue
candidates.append((area, BBox(x=x, y=y, w=bw, h=bh))) candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))
@@ -89,7 +133,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox: def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
image_h, image_w = image_shape[:2] image_h, image_w = image_shape[:2]
buffer_ratio = max(0.0, min(buffer_ratio, 0.5)) buffer_ratio = max(0.0, min(buffer_ratio, 0.6))
side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2))) side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
side = max(1, min(side, image_w, image_h)) side = max(1, min(side, image_w, image_h))
@@ -127,9 +171,9 @@ def _data_url(image_bytes: bytes, mime_type: str) -> str:
return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}" return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"
def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15) -> dict[str, Any]: def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "auto") -> dict[str, Any]:
image = decode_image(image_bytes) image = decode_image(image_bytes)
bbox, method = select_primary_bbox(image) bbox, method = select_primary_bbox(image, detector=detector)
square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio) square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio)
crop = crop_image(image, square) crop = crop_image(image, square)
annotated = draw_square(image, square) annotated = draw_square(image, square)
@@ -139,6 +183,7 @@ def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15)
return { return {
"filename": filename, "filename": filename,
"detector": detector,
"method": method, "method": method,
"buffer_ratio": buffer_ratio, "buffer_ratio": buffer_ratio,
"detected_bbox": bbox.__dict__, "detected_bbox": bbox.__dict__,

8
docker-compose.yml Normal file
View File

@@ -0,0 +1,8 @@
services:
face-lock:
build: .
ports:
- "8000:8000"
env_file:
- .env
restart: unless-stopped

View File

@@ -1,6 +1,6 @@
import numpy as np import numpy as np
from app.vision import BBox, crop_image, detect_salient_object, square_bbox from app.vision import BBox, crop_image, detect_salient_object, select_primary_bbox, square_bbox
def test_square_bbox_is_square_and_inside_bounds(): def test_square_bbox_is_square_and_inside_bounds():
@@ -28,3 +28,10 @@ def test_detect_salient_object_finds_rectangle():
assert bbox is not None assert bbox is not None
assert bbox.w >= 45 assert bbox.w >= 45
assert bbox.h >= 45 assert bbox.h >= 45
def test_select_primary_bbox_falls_back_when_detector_disabled():
image = np.zeros((100, 120, 3), dtype=np.uint8)
bbox, method = select_primary_bbox(image, detector="center")
assert method == "center_fallback"
assert bbox.w == bbox.h