Initial FastAPI face-lock scaffold

This commit is contained in:
2026-04-11 16:32:26 +02:00
commit 660ce4e7cc
9 changed files with 306 additions and 0 deletions

149
app/vision.py Normal file
View File

@@ -0,0 +1,149 @@
from __future__ import annotations
from dataclasses import dataclass
from io import BytesIO
from typing import Any
import cv2
import numpy as np
@dataclass(frozen=True)
class BBox:
x: int
y: int
w: int
h: int
@property
def right(self) -> int:
return self.x + self.w
@property
def bottom(self) -> int:
return self.y + self.h
HOG = cv2.HOGDescriptor()
HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
def decode_image(image_bytes: bytes) -> np.ndarray:
data = np.frombuffer(image_bytes, dtype=np.uint8)
image = cv2.imdecode(data, cv2.IMREAD_COLOR)
if image is None:
raise ValueError("could not decode image")
return image
def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]:
person_bbox = detect_person(image)
if person_bbox is not None:
return person_bbox, "person_hog"
contour_bbox = detect_salient_object(image)
if contour_bbox is not None:
return contour_bbox, "contour"
h, w = image.shape[:2]
side = int(min(w, h) * 0.8)
x = max(0, (w - side) // 2)
y = max(0, (h - side) // 2)
return BBox(x=x, y=y, w=side, h=side), "center_fallback"
def detect_person(image: np.ndarray) -> BBox | None:
rects, weights = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
if len(rects) == 0:
return None
best = max(zip(rects, weights), key=lambda item: int(item[0][2]) * int(item[0][3]))[0]
x, y, w, h = map(int, best)
return BBox(x=x, y=y, w=w, h=h)
def detect_salient_object(image: np.ndarray) -> BBox | None:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (7, 7), 0)
edges = cv2.Canny(blurred, 40, 120)
kernel = np.ones((5, 5), np.uint8)
closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return None
h, w = image.shape[:2]
image_area = h * w
candidates: list[tuple[int, BBox]] = []
for contour in contours:
x, y, bw, bh = cv2.boundingRect(contour)
area = bw * bh
if area < max(500, int(image_area * 0.01)):
continue
candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))
if not candidates:
return None
return max(candidates, key=lambda item: item[0])[1]
def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
image_h, image_w = image_shape[:2]
side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
side = max(1, min(side, image_w, image_h))
cx = bbox.x + bbox.w / 2
cy = bbox.y + bbox.h / 2
x = int(round(cx - side / 2))
y = int(round(cy - side / 2))
x = max(0, min(x, image_w - side))
y = max(0, min(y, image_h - side))
return BBox(x=x, y=y, w=side, h=side)
def draw_square(image: np.ndarray, bbox: BBox) -> np.ndarray:
annotated = image.copy()
cv2.rectangle(annotated, (bbox.x, bbox.y), (bbox.right, bbox.bottom), (0, 255, 0), 3)
return annotated
def crop_image(image: np.ndarray, bbox: BBox) -> np.ndarray:
return image[bbox.y:bbox.bottom, bbox.x:bbox.right]
def encode_image(image: np.ndarray, ext: str = ".jpg") -> tuple[bytes, str]:
ok, encoded = cv2.imencode(ext, image)
if not ok:
raise ValueError("could not encode image")
mime_type = "image/jpeg" if ext.lower() in {".jpg", ".jpeg"} else "image/png"
return encoded.tobytes(), mime_type
def _data_url(image_bytes: bytes, mime_type: str) -> str:
import base64
return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"
def process_image(image_bytes: bytes, filename: str) -> dict[str, Any]:
image = decode_image(image_bytes)
bbox, method = select_primary_bbox(image)
square = square_bbox(bbox, image.shape)
crop = crop_image(image, square)
annotated = draw_square(image, square)
crop_bytes, mime_type = encode_image(crop, ".jpg")
annotated_bytes, _ = encode_image(annotated, ".jpg")
return {
"filename": filename,
"method": method,
"detected_bbox": bbox.__dict__,
"square_bbox": square.__dict__,
"source_size": {"width": int(image.shape[1]), "height": int(image.shape[0])},
"crop_data_url": _data_url(crop_bytes, mime_type),
"annotated_data_url": _data_url(annotated_bytes, mime_type),
"mime_type": mime_type,
"crop_bytes_io": BytesIO(crop_bytes),
}