face-lock/app/vision.py

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Any

import cv2
import numpy as np


@dataclass(frozen=True)
class BBox:
    x: int
    y: int
    w: int
    h: int

    @property
    def right(self) -> int:
        return self.x + self.w

    @property
    def bottom(self) -> int:
        return self.y + self.h


HAAR_DIR = Path(cv2.data.haarcascades)
FACE_CASCADE = cv2.CascadeClassifier(str(HAAR_DIR / "haarcascade_frontalface_default.xml"))
CAT_CASCADE = cv2.CascadeClassifier(str(HAAR_DIR / "haarcascade_frontalcatface_extended.xml"))
HOG = cv2.HOGDescriptor()
HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())


def decode_image(image_bytes: bytes) -> np.ndarray:
    data = np.frombuffer(image_bytes, dtype=np.uint8)
    image = cv2.imdecode(data, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError("could not decode image")
    return image


def select_primary_bbox(image: np.ndarray, detector: str = "subject") -> tuple[BBox, str]:
    detector = (detector or "subject").strip().lower()

    if detector == "face":
        bbox = detect_face(image)
        return (bbox, "face_cascade") if bbox is not None else (fallback_bbox(image), "center_fallback")

    if detector == "person":
        bbox = detect_person(image)
        return (bbox, "person_hog") if bbox is not None else (fallback_bbox(image), "center_fallback")

    if detector == "animal":
        bbox = detect_animal(image)
        return (bbox, "animal_cascade") if bbox is not None else (fallback_bbox(image), "center_fallback")

    bbox = detect_subject(image)
    return (bbox, "subject_contour") if bbox is not None else (fallback_bbox(image), "center_fallback")


def fallback_bbox(image: np.ndarray) -> BBox:
    h, w = image.shape[:2]
    side = int(min(w, h) * 0.85)
    side = max(1, min(side, w, h))
    x = max(0, (w - side) // 2)
    y = max(0, (h - side) // 2)
    return BBox(x=x, y=y, w=side, h=side)


def detect_face(image: np.ndarray) -> BBox | None:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    faces = FACE_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(24, 24))
    if len(faces) == 0:
        return None
    x, y, w, h = max((tuple(map(int, face)) for face in faces), key=lambda rect: rect[2] * rect[3])
    return BBox(x=x, y=y, w=w, h=h)


def detect_person(image: np.ndarray) -> BBox | None:
    rects, _ = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
    if len(rects) == 0:
        return None
    best = max((tuple(map(int, rect)) for rect in rects), key=lambda rect: rect[2] * rect[3])
    return BBox(x=best[0], y=best[1], w=best[2], h=best[3])


def detect_animal(image: np.ndarray) -> BBox | None:
    if CAT_CASCADE.empty():
        return detect_subject(image, min_area_ratio=0.02, blur_size=7, dilate_size=11)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    cats = CAT_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=4, minSize=(24, 24))
    if len(cats) > 0:
        x, y, w, h = max((tuple(map(int, cat)) for cat in cats), key=lambda rect: rect[2] * rect[3])
        return BBox(x=x, y=y, w=w, h=h)

    return detect_subject(image, min_area_ratio=0.02, blur_size=7, dilate_size=11)


def detect_subject(
    image: np.ndarray,
    min_area_ratio: float = 0.015,
    blur_size: int = 9,
    dilate_size: int = 13,
) -> BBox | None:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur_size = blur_size + (blur_size % 2 == 0)
    dilate_size = max(3, dilate_size)
    kernel = np.ones((dilate_size, dilate_size), np.uint8)
    blurred = cv2.GaussianBlur(gray, (blur_size, blur_size), 0)
    edges = cv2.Canny(blurred, 30, 110)
    expanded = cv2.dilate(edges, kernel, iterations=1)
    closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None

    h, w = image.shape[:2]
    image_area = h * w
    candidates: list[tuple[int, BBox]] = []
    for contour in contours:
        x, y, bw, bh = cv2.boundingRect(contour)
        area = bw * bh
        if area < max(1000, int(image_area * min_area_ratio)):
            continue
        candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))

    if not candidates:
        return None

    return max(candidates, key=lambda item: item[0])[1]


def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
    image_h, image_w = image_shape[:2]
    buffer_ratio = max(0.0, min(buffer_ratio, 0.6))
    side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
    side = max(1, min(side, image_w, image_h))

    cx = bbox.x + bbox.w / 2
    cy = bbox.y + bbox.h / 2
    x = int(round(cx - side / 2))
    y = int(round(cy - side / 2))

    x = max(0, min(x, image_w - side))
    y = max(0, min(y, image_h - side))
    return BBox(x=x, y=y, w=side, h=side)


def draw_square(image: np.ndarray, bbox: BBox) -> np.ndarray:
    annotated = image.copy()
    cv2.rectangle(annotated, (bbox.x, bbox.y), (bbox.right, bbox.bottom), (0, 255, 0), 3)
    return annotated


def crop_image(image: np.ndarray, bbox: BBox) -> np.ndarray:
    return image[bbox.y:bbox.bottom, bbox.x:bbox.right]


def encode_image(image: np.ndarray, ext: str = ".jpg") -> tuple[bytes, str]:
    ok, encoded = cv2.imencode(ext, image)
    if not ok:
        raise ValueError("could not encode image")
    mime_type = "image/jpeg" if ext.lower() in {".jpg", ".jpeg"} else "image/png"
    return encoded.tobytes(), mime_type


def _data_url(image_bytes: bytes, mime_type: str) -> str:
    import base64

    return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"


def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "subject") -> dict[str, Any]:
    image = decode_image(image_bytes)
    bbox, method = select_primary_bbox(image, detector=detector)
    square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio)
    crop = crop_image(image, square)
    annotated = draw_square(image, square)

    crop_bytes, mime_type = encode_image(crop, ".jpg")
    annotated_bytes, _ = encode_image(annotated, ".jpg")

    return {
        "filename": filename,
        "detector": detector,
        "method": method,
        "buffer_ratio": buffer_ratio,
        "detected_bbox": bbox.__dict__,
        "square_bbox": square.__dict__,
        "source_size": {"width": int(image.shape[1]), "height": int(image.shape[0])},
        "crop_data_url": _data_url(crop_bytes, mime_type),
        "annotated_data_url": _data_url(annotated_bytes, mime_type),
        "mime_type": mime_type,
        "crop_bytes": crop_bytes,
    }