@@ -63,11 +77,20 @@ def index():
if (!file.files.length) return;
const form = new FormData();
form.append('file', file.files[0]);
+ form.append('detector', document.getElementById('detector').value);
form.append('buffer_ratio', document.getElementById('buffer_ratio').value);
meta.textContent = 'Working...';
const resp = await fetch('/api/focus', { method: 'POST', body: form });
const data = await resp.json();
- meta.textContent = JSON.stringify(data, null, 2);
+ meta.textContent = JSON.stringify({
+ filename: data.filename,
+ detector: data.detector,
+ method: data.method,
+ buffer_ratio: data.buffer_ratio,
+ detected_bbox: data.detected_bbox,
+ square_bbox: data.square_bbox,
+ source_size: data.source_size,
+ }, null, 2);
crop.src = data.crop_data_url;
annotated.src = data.annotated_data_url;
crop.classList.remove('hidden');
@@ -81,23 +104,31 @@ def index():
@app.post("/api/focus")
-async def focus(file: UploadFile = File(...), buffer_ratio: float = Form(0.15)):
+async def focus(
+ file: UploadFile = File(...),
+ buffer_ratio: float = Form(0.15),
+ detector: str = Form("auto"),
+):
from app.vision import process_image
try:
payload = await file.read()
- return process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio)
+ return process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio, detector=detector)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.post("/api/focus/image")
-async def focus_image(file: UploadFile = File(...), buffer_ratio: float = Form(0.15)):
+async def focus_image(
+ file: UploadFile = File(...),
+ buffer_ratio: float = Form(0.15),
+ detector: str = Form("auto"),
+):
from app.vision import process_image
try:
payload = await file.read()
- result = process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio)
+ result = process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio, detector=detector)
return StreamingResponse(result["crop_bytes_io"], media_type=result["mime_type"])
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
diff --git a/app/vision.py b/app/vision.py
index 0863aad..8ec0580 100644
--- a/app/vision.py
+++ b/app/vision.py
@@ -2,6 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass
from io import BytesIO
+from pathlib import Path
from typing import Any
import cv2
@@ -24,6 +25,9 @@ class BBox:
return self.y + self.h
+FACE_CASCADE = cv2.CascadeClassifier(
+ str(Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml")
+)
HOG = cv2.HOGDescriptor()
HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
@@ -36,20 +40,59 @@ def decode_image(image_bytes: bytes) -> np.ndarray:
return image
-def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]:
+def select_primary_bbox(image: np.ndarray, detector: str = "auto") -> tuple[BBox, str]:
+ detector = (detector or "auto").strip().lower()
+
+ if detector == "face":
+ face_bbox = detect_face(image)
+ if face_bbox is not None:
+ return face_bbox, "face_cascade"
+ return fallback_bbox(image), "center_fallback"
+
+ if detector == "person":
+ person_bbox = detect_person(image)
+ if person_bbox is not None:
+ return person_bbox, "person_hog"
+ return fallback_bbox(image), "center_fallback"
+
+ if detector == "salient":
+ salient_bbox = detect_salient_object(image)
+ if salient_bbox is not None:
+ return salient_bbox, "salient_contour"
+ return fallback_bbox(image), "center_fallback"
+
+ face_bbox = detect_face(image)
+ if face_bbox is not None:
+ return face_bbox, "face_cascade"
+
person_bbox = detect_person(image)
if person_bbox is not None:
return person_bbox, "person_hog"
- contour_bbox = detect_salient_object(image)
- if contour_bbox is not None:
- return contour_bbox, "contour"
+ salient_bbox = detect_salient_object(image)
+ if salient_bbox is not None:
+ return salient_bbox, "salient_contour"
+ return fallback_bbox(image), "center_fallback"
+
+
+def fallback_bbox(image: np.ndarray) -> BBox:
h, w = image.shape[:2]
- side = int(min(w, h) * 0.8)
+ side = int(min(w, h) * 0.85)
+ side = max(1, min(side, w, h))
x = max(0, (w - side) // 2)
y = max(0, (h - side) // 2)
- return BBox(x=x, y=y, w=side, h=side), "center_fallback"
+ return BBox(x=x, y=y, w=side, h=side)
+
+
+def detect_face(image: np.ndarray) -> BBox | None:
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ gray = cv2.equalizeHist(gray)
+ faces = FACE_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(24, 24))
+ if len(faces) == 0:
+ return None
+ x, y, w, h = max((map(int, face) for face in faces), key=lambda rect: rect[2] * rect[3])
+ return BBox(x=x, y=y, w=w, h=h)
def detect_person(image: np.ndarray) -> BBox | None:
@@ -63,10 +106,11 @@ def detect_person(image: np.ndarray) -> BBox | None:
def detect_salient_object(image: np.ndarray) -> BBox | None:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
- blurred = cv2.GaussianBlur(gray, (7, 7), 0)
- edges = cv2.Canny(blurred, 40, 120)
- kernel = np.ones((5, 5), np.uint8)
- closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)
+ blurred = cv2.GaussianBlur(gray, (9, 9), 0)
+ edges = cv2.Canny(blurred, 30, 110)
+ kernel = np.ones((13, 13), np.uint8)
+ expanded = cv2.dilate(edges, kernel, iterations=1)
+ closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1)
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return None
@@ -77,7 +121,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
for contour in contours:
x, y, bw, bh = cv2.boundingRect(contour)
area = bw * bh
- if area < max(500, int(image_area * 0.01)):
+ if area < max(1000, int(image_area * 0.015)):
continue
candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))
@@ -89,7 +133,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
image_h, image_w = image_shape[:2]
- buffer_ratio = max(0.0, min(buffer_ratio, 0.5))
+ buffer_ratio = max(0.0, min(buffer_ratio, 0.6))
side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
side = max(1, min(side, image_w, image_h))
@@ -127,9 +171,9 @@ def _data_url(image_bytes: bytes, mime_type: str) -> str:
return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"
-def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15) -> dict[str, Any]:
+def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "auto") -> dict[str, Any]:
image = decode_image(image_bytes)
- bbox, method = select_primary_bbox(image)
+ bbox, method = select_primary_bbox(image, detector=detector)
square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio)
crop = crop_image(image, square)
annotated = draw_square(image, square)
@@ -139,6 +183,7 @@ def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15)
return {
"filename": filename,
+ "detector": detector,
"method": method,
"buffer_ratio": buffer_ratio,
"detected_bbox": bbox.__dict__,
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..865af35
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,8 @@
+services:
+ face-lock:
+ build: .
+ ports:
+ - "8000:8000"
+ env_file:
+ - .env
+ restart: unless-stopped
diff --git a/tests/test_vision.py b/tests/test_vision.py
index dc5ae27..827b2e8 100644
--- a/tests/test_vision.py
+++ b/tests/test_vision.py
@@ -1,6 +1,6 @@
import numpy as np
-from app.vision import BBox, crop_image, detect_salient_object, square_bbox
+from app.vision import BBox, crop_image, detect_salient_object, select_primary_bbox, square_bbox
def test_square_bbox_is_square_and_inside_bounds():
@@ -28,3 +28,10 @@ def test_detect_salient_object_finds_rectangle():
assert bbox is not None
assert bbox.w >= 45
assert bbox.h >= 45
+
+
+def test_select_primary_bbox_falls_back_when_detector_disabled():
+ image = np.zeros((100, 120, 3), dtype=np.uint8)
+ bbox, method = select_primary_bbox(image, detector="center")
+ assert method == "center_fallback"
+ assert bbox.w == bbox.h