diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..ed7a256 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +__pycache__/ +*.pyc +.venv/ +.env +.pytest_cache/ +.git/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..12febf7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.13-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y --no-install-recommends libgl1 libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 9adcd10..6ea3513 100644 --- a/README.md +++ b/README.md @@ -11,3 +11,9 @@ uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 ``` Set `ENV=dev` to enable the Tailwind UI at `/`. + +## Docker + +```bash +docker compose up --build +``` diff --git a/app/main.py b/app/main.py index 88cf28d..26ec063 100644 --- a/app/main.py +++ b/app/main.py @@ -2,7 +2,7 @@ from fastapi import FastAPI, File, Form, HTTPException, UploadFile from fastapi.responses import HTMLResponse, StreamingResponse from app.config import settings -app = FastAPI(title="face-lock", version="0.1.0") +app = FastAPI(title="face-lock", version="0.2.0") @app.get("/health") @@ -25,16 +25,30 @@ def index(): face-lock -
+

face-lock

-

Drop an image, get the primary subject squared and cropped.

+

Auto-detect the subject, square it up, and crop with buffer.

- - - + + +
+
+ + +
+
+ + +
+

       
@@ -63,11 +77,20 @@ def index(): if (!file.files.length) return; const form = new FormData(); form.append('file', file.files[0]); + form.append('detector', document.getElementById('detector').value); form.append('buffer_ratio', document.getElementById('buffer_ratio').value); meta.textContent = 'Working...'; const resp = await fetch('/api/focus', { method: 'POST', body: form }); const data = await resp.json(); - meta.textContent = JSON.stringify(data, null, 2); + meta.textContent = JSON.stringify({ + filename: data.filename, + detector: data.detector, + method: data.method, + buffer_ratio: data.buffer_ratio, + detected_bbox: data.detected_bbox, + square_bbox: data.square_bbox, + source_size: data.source_size, + }, null, 2); crop.src = data.crop_data_url; annotated.src = data.annotated_data_url; crop.classList.remove('hidden'); @@ -81,23 +104,31 @@ def index(): @app.post("/api/focus") -async def focus(file: UploadFile = File(...), buffer_ratio: float = Form(0.15)): +async def focus( + file: UploadFile = File(...), + buffer_ratio: float = Form(0.15), + detector: str = Form("auto"), +): from app.vision import process_image try: payload = await file.read() - return process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio) + return process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio, detector=detector) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc @app.post("/api/focus/image") -async def focus_image(file: UploadFile = File(...), buffer_ratio: float = Form(0.15)): +async def focus_image( + file: UploadFile = File(...), + buffer_ratio: float = Form(0.15), + detector: str = Form("auto"), +): from app.vision import process_image try: payload = await file.read() - result = process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio) + result = process_image(payload, file.filename or "upload", buffer_ratio=buffer_ratio, detector=detector) return StreamingResponse(result["crop_bytes_io"], media_type=result["mime_type"]) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc diff --git a/app/vision.py b/app/vision.py index 0863aad..8ec0580 100644 --- a/app/vision.py +++ b/app/vision.py @@ -2,6 +2,7 @@ from __future__ import annotations from dataclasses import dataclass from io import BytesIO +from pathlib import Path from typing import Any import cv2 @@ -24,6 +25,9 @@ class BBox: return self.y + self.h +FACE_CASCADE = cv2.CascadeClassifier( + str(Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml") +) HOG = cv2.HOGDescriptor() HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) @@ -36,20 +40,59 @@ def decode_image(image_bytes: bytes) -> np.ndarray: return image -def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]: +def select_primary_bbox(image: np.ndarray, detector: str = "auto") -> tuple[BBox, str]: + detector = (detector or "auto").strip().lower() + + if detector == "face": + face_bbox = detect_face(image) + if face_bbox is not None: + return face_bbox, "face_cascade" + return fallback_bbox(image), "center_fallback" + + if detector == "person": + person_bbox = detect_person(image) + if person_bbox is not None: + return person_bbox, "person_hog" + return fallback_bbox(image), "center_fallback" + + if detector == "salient": + salient_bbox = detect_salient_object(image) + if salient_bbox is not None: + return salient_bbox, "salient_contour" + return fallback_bbox(image), "center_fallback" + + face_bbox = detect_face(image) + if face_bbox is not None: + return face_bbox, "face_cascade" + person_bbox = detect_person(image) if person_bbox is not None: return person_bbox, "person_hog" - contour_bbox = detect_salient_object(image) - if contour_bbox is not None: - return contour_bbox, "contour" + salient_bbox = detect_salient_object(image) + if salient_bbox is not None: + return salient_bbox, "salient_contour" + return fallback_bbox(image), "center_fallback" + + +def fallback_bbox(image: np.ndarray) -> BBox: h, w = image.shape[:2] - side = int(min(w, h) * 0.8) + side = int(min(w, h) * 0.85) + side = max(1, min(side, w, h)) x = max(0, (w - side) // 2) y = max(0, (h - side) // 2) - return BBox(x=x, y=y, w=side, h=side), "center_fallback" + return BBox(x=x, y=y, w=side, h=side) + + +def detect_face(image: np.ndarray) -> BBox | None: + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + gray = cv2.equalizeHist(gray) + faces = FACE_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(24, 24)) + if len(faces) == 0: + return None + x, y, w, h = max((map(int, face) for face in faces), key=lambda rect: rect[2] * rect[3]) + return BBox(x=x, y=y, w=w, h=h) def detect_person(image: np.ndarray) -> BBox | None: @@ -63,10 +106,11 @@ def detect_person(image: np.ndarray) -> BBox | None: def detect_salient_object(image: np.ndarray) -> BBox | None: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - blurred = cv2.GaussianBlur(gray, (7, 7), 0) - edges = cv2.Canny(blurred, 40, 120) - kernel = np.ones((5, 5), np.uint8) - closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2) + blurred = cv2.GaussianBlur(gray, (9, 9), 0) + edges = cv2.Canny(blurred, 30, 110) + kernel = np.ones((13, 13), np.uint8) + expanded = cv2.dilate(edges, kernel, iterations=1) + closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1) contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: return None @@ -77,7 +121,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None: for contour in contours: x, y, bw, bh = cv2.boundingRect(contour) area = bw * bh - if area < max(500, int(image_area * 0.01)): + if area < max(1000, int(image_area * 0.015)): continue candidates.append((area, BBox(x=x, y=y, w=bw, h=bh))) @@ -89,7 +133,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None: def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox: image_h, image_w = image_shape[:2] - buffer_ratio = max(0.0, min(buffer_ratio, 0.5)) + buffer_ratio = max(0.0, min(buffer_ratio, 0.6)) side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2))) side = max(1, min(side, image_w, image_h)) @@ -127,9 +171,9 @@ def _data_url(image_bytes: bytes, mime_type: str) -> str: return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}" -def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15) -> dict[str, Any]: +def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "auto") -> dict[str, Any]: image = decode_image(image_bytes) - bbox, method = select_primary_bbox(image) + bbox, method = select_primary_bbox(image, detector=detector) square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio) crop = crop_image(image, square) annotated = draw_square(image, square) @@ -139,6 +183,7 @@ def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15) return { "filename": filename, + "detector": detector, "method": method, "buffer_ratio": buffer_ratio, "detected_bbox": bbox.__dict__, diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..865af35 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,8 @@ +services: + face-lock: + build: . + ports: + - "8000:8000" + env_file: + - .env + restart: unless-stopped diff --git a/tests/test_vision.py b/tests/test_vision.py index dc5ae27..827b2e8 100644 --- a/tests/test_vision.py +++ b/tests/test_vision.py @@ -1,6 +1,6 @@ import numpy as np -from app.vision import BBox, crop_image, detect_salient_object, square_bbox +from app.vision import BBox, crop_image, detect_salient_object, select_primary_bbox, square_bbox def test_square_bbox_is_square_and_inside_bounds(): @@ -28,3 +28,10 @@ def test_detect_salient_object_finds_rectangle(): assert bbox is not None assert bbox.w >= 45 assert bbox.h >= 45 + + +def test_select_primary_bbox_falls_back_when_detector_disabled(): + image = np.zeros((100, 120, 3), dtype=np.uint8) + bbox, method = select_primary_bbox(image, detector="center") + assert method == "center_fallback" + assert bbox.w == bbox.h