commit 660ce4e7ccfb7dcb0d8605daeea47a0de9d78afe Author: Luna Date: Sat Apr 11 16:32:26 2026 +0200 Initial FastAPI face-lock scaffold diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..053059e --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +ENV=dev diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3e9028d --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +*.pyc +.venv/ +.env +.pytest_cache/ +.coverage +htmlcov/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..9adcd10 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# face-lock + +FastAPI microservice that finds the primary subject in an image, draws a square around it, and returns a buffered crop. + +## Dev + +```bash +cp .env.example .env +pip install -r requirements.txt +uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 +``` + +Set `ENV=dev` to enable the Tailwind UI at `/`. diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..acf2359 --- /dev/null +++ b/app/config.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass +from dotenv import load_dotenv +import os + +load_dotenv() + + +@dataclass(frozen=True) +class Settings: + env: str = os.getenv("ENV", "prod").strip().lower() + + +settings = Settings() diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..e0e5e6b --- /dev/null +++ b/app/main.py @@ -0,0 +1,88 @@ +from fastapi import FastAPI, File, HTTPException, UploadFile +from fastapi.responses import HTMLResponse, StreamingResponse +from app.config import settings + +app = FastAPI(title="face-lock", version="0.1.0") + + +@app.get("/health") +def health(): + return {"ok": True, "env": settings.env} + + +@app.get("/", response_class=HTMLResponse) +def index(): + if settings.env != "dev": + return HTMLResponse("

face-lock

Set ENV=dev for the UI.

") + return HTMLResponse( + """ + + + + + + + face-lock + + +

face-lock

Drop an image, get the primary subject squared and cropped.

+ + +

Result

+ +

+ + + + """ + ) + + +@app.post("/api/focus") +async def focus(file: UploadFile = File(...)): + from app.vision import process_image + + try: + payload = await file.read() + return process_image(payload, file.filename or "upload") + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + +@app.post("/api/focus/image") +async def focus_image(file: UploadFile = File(...)): + from app.vision import process_image + + try: + payload = await file.read() + result = process_image(payload, file.filename or "upload") + return StreamingResponse(result["crop_bytes_io"], media_type=result["mime_type"]) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc diff --git a/app/vision.py b/app/vision.py new file mode 100644 index 0000000..c79f21b --- /dev/null +++ b/app/vision.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +from dataclasses import dataclass +from io import BytesIO +from typing import Any + +import cv2 +import numpy as np + + +@dataclass(frozen=True) +class BBox: + x: int + y: int + w: int + h: int + + @property + def right(self) -> int: + return self.x + self.w + + @property + def bottom(self) -> int: + return self.y + self.h + + +HOG = cv2.HOGDescriptor() +HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) + + +def decode_image(image_bytes: bytes) -> np.ndarray: + data = np.frombuffer(image_bytes, dtype=np.uint8) + image = cv2.imdecode(data, cv2.IMREAD_COLOR) + if image is None: + raise ValueError("could not decode image") + return image + + +def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]: + person_bbox = detect_person(image) + if person_bbox is not None: + return person_bbox, "person_hog" + + contour_bbox = detect_salient_object(image) + if contour_bbox is not None: + return contour_bbox, "contour" + + h, w = image.shape[:2] + side = int(min(w, h) * 0.8) + x = max(0, (w - side) // 2) + y = max(0, (h - side) // 2) + return BBox(x=x, y=y, w=side, h=side), "center_fallback" + + +def detect_person(image: np.ndarray) -> BBox | None: + rects, weights = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05) + if len(rects) == 0: + return None + best = max(zip(rects, weights), key=lambda item: int(item[0][2]) * int(item[0][3]))[0] + x, y, w, h = map(int, best) + return BBox(x=x, y=y, w=w, h=h) + + +def detect_salient_object(image: np.ndarray) -> BBox | None: + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + blurred = cv2.GaussianBlur(gray, (7, 7), 0) + edges = cv2.Canny(blurred, 40, 120) + kernel = np.ones((5, 5), np.uint8) + closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2) + contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if not contours: + return None + + h, w = image.shape[:2] + image_area = h * w + candidates: list[tuple[int, BBox]] = [] + for contour in contours: + x, y, bw, bh = cv2.boundingRect(contour) + area = bw * bh + if area < max(500, int(image_area * 0.01)): + continue + candidates.append((area, BBox(x=x, y=y, w=bw, h=bh))) + + if not candidates: + return None + + return max(candidates, key=lambda item: item[0])[1] + + +def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox: + image_h, image_w = image_shape[:2] + side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2))) + side = max(1, min(side, image_w, image_h)) + + cx = bbox.x + bbox.w / 2 + cy = bbox.y + bbox.h / 2 + x = int(round(cx - side / 2)) + y = int(round(cy - side / 2)) + + x = max(0, min(x, image_w - side)) + y = max(0, min(y, image_h - side)) + return BBox(x=x, y=y, w=side, h=side) + + +def draw_square(image: np.ndarray, bbox: BBox) -> np.ndarray: + annotated = image.copy() + cv2.rectangle(annotated, (bbox.x, bbox.y), (bbox.right, bbox.bottom), (0, 255, 0), 3) + return annotated + + +def crop_image(image: np.ndarray, bbox: BBox) -> np.ndarray: + return image[bbox.y:bbox.bottom, bbox.x:bbox.right] + + +def encode_image(image: np.ndarray, ext: str = ".jpg") -> tuple[bytes, str]: + ok, encoded = cv2.imencode(ext, image) + if not ok: + raise ValueError("could not encode image") + mime_type = "image/jpeg" if ext.lower() in {".jpg", ".jpeg"} else "image/png" + return encoded.tobytes(), mime_type + + +def _data_url(image_bytes: bytes, mime_type: str) -> str: + import base64 + + return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}" + + +def process_image(image_bytes: bytes, filename: str) -> dict[str, Any]: + image = decode_image(image_bytes) + bbox, method = select_primary_bbox(image) + square = square_bbox(bbox, image.shape) + crop = crop_image(image, square) + annotated = draw_square(image, square) + + crop_bytes, mime_type = encode_image(crop, ".jpg") + annotated_bytes, _ = encode_image(annotated, ".jpg") + + return { + "filename": filename, + "method": method, + "detected_bbox": bbox.__dict__, + "square_bbox": square.__dict__, + "source_size": {"width": int(image.shape[1]), "height": int(image.shape[0])}, + "crop_data_url": _data_url(crop_bytes, mime_type), + "annotated_data_url": _data_url(annotated_bytes, mime_type), + "mime_type": mime_type, + "crop_bytes_io": BytesIO(crop_bytes), + } diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8924bf9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +fastapi +uvicorn[standard] +opencv-python-headless +python-dotenv +python-multipart diff --git a/tests/test_vision.py b/tests/test_vision.py new file mode 100644 index 0000000..dc5ae27 --- /dev/null +++ b/tests/test_vision.py @@ -0,0 +1,30 @@ +import numpy as np + +from app.vision import BBox, crop_image, detect_salient_object, square_bbox + + +def test_square_bbox_is_square_and_inside_bounds(): + bbox = BBox(x=10, y=20, w=40, h=20) + square = square_bbox(bbox, (100, 120, 3), buffer_ratio=0.1) + assert square.w == square.h + assert square.x >= 0 + assert square.y >= 0 + assert square.right <= 120 + assert square.bottom <= 100 + + +def test_crop_image_uses_bbox(): + image = np.zeros((60, 80, 3), dtype=np.uint8) + image[10:30, 20:50] = 255 + bbox = BBox(x=20, y=10, w=30, h=20) + crop = crop_image(image, bbox) + assert crop.shape[:2] == (20, 30) + + +def test_detect_salient_object_finds_rectangle(): + image = np.zeros((100, 100, 3), dtype=np.uint8) + image[25:75, 30:80] = 255 + bbox = detect_salient_object(image) + assert bbox is not None + assert bbox.w >= 45 + assert bbox.h >= 45