Add explicit detectors and optional auth

2026-04-11 17:23:25 +02:00
parent 19a5ac16b7
commit 09119e8c0e
7 changed files with 189 additions and 65 deletions
--- a/app/vision.py
+++ b/app/vision.py
@@ -24,9 +24,9 @@ class BBox:
        return self.y + self.h


-FACE_CASCADE = cv2.CascadeClassifier(
-    str(Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml")
-)
+HAAR_DIR = Path(cv2.data.haarcascades)
+FACE_CASCADE = cv2.CascadeClassifier(str(HAAR_DIR / "haarcascade_frontalface_default.xml"))
+CAT_CASCADE = cv2.CascadeClassifier(str(HAAR_DIR / "haarcascade_frontalcatface_extended.xml"))
 HOG = cv2.HOGDescriptor()
 HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

@@ -39,40 +39,23 @@ def decode_image(image_bytes: bytes) -> np.ndarray:
    return image


-def select_primary_bbox(image: np.ndarray, detector: str = "auto") -> tuple[BBox, str]:
-    detector = (detector or "auto").strip().lower()
+def select_primary_bbox(image: np.ndarray, detector: str = "subject") -> tuple[BBox, str]:
+    detector = (detector or "subject").strip().lower()

    if detector == "face":
-        face_bbox = detect_face(image)
-        if face_bbox is not None:
-            return face_bbox, "face_cascade"
-        return fallback_bbox(image), "center_fallback"
+        bbox = detect_face(image)
+        return (bbox, "face_cascade") if bbox is not None else (fallback_bbox(image), "center_fallback")

    if detector == "person":
-        person_bbox = detect_person(image)
-        if person_bbox is not None:
-            return person_bbox, "person_hog"
-        return fallback_bbox(image), "center_fallback"
+        bbox = detect_person(image)
+        return (bbox, "person_hog") if bbox is not None else (fallback_bbox(image), "center_fallback")

-    if detector == "salient":
-        salient_bbox = detect_salient_object(image)
-        if salient_bbox is not None:
-            return salient_bbox, "salient_contour"
-        return fallback_bbox(image), "center_fallback"
+    if detector == "animal":
+        bbox = detect_animal(image)
+        return (bbox, "animal_cascade") if bbox is not None else (fallback_bbox(image), "center_fallback")

-    face_bbox = detect_face(image)
-    if face_bbox is not None:
-        return face_bbox, "face_cascade"
-
-    person_bbox = detect_person(image)
-    if person_bbox is not None:
-        return person_bbox, "person_hog"
-
-    salient_bbox = detect_salient_object(image)
-    if salient_bbox is not None:
-        return salient_bbox, "salient_contour"
-
-    return fallback_bbox(image), "center_fallback"
+    bbox = detect_subject(image)
+    return (bbox, "subject_contour") if bbox is not None else (fallback_bbox(image), "center_fallback")


 def fallback_bbox(image: np.ndarray) -> BBox:
@@ -95,19 +78,39 @@ def detect_face(image: np.ndarray) -> BBox | None:


 def detect_person(image: np.ndarray) -> BBox | None:
-    rects, weights = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
+    rects, _ = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
    if len(rects) == 0:
        return None
-    best = max(zip(rects, weights), key=lambda item: int(item[0][2]) * int(item[0][3]))[0]
-    x, y, w, h = map(int, best)
-    return BBox(x=x, y=y, w=w, h=h)
+    best = max((tuple(map(int, rect)) for rect in rects), key=lambda rect: rect[2] * rect[3])
+    return BBox(x=best[0], y=best[1], w=best[2], h=best[3])


-def detect_salient_object(image: np.ndarray) -> BBox | None:
+def detect_animal(image: np.ndarray) -> BBox | None:
+    if CAT_CASCADE.empty():
+        return detect_subject(image, min_area_ratio=0.02, blur_size=7, dilate_size=11)
+
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    blurred = cv2.GaussianBlur(gray, (9, 9), 0)
+    gray = cv2.equalizeHist(gray)
+    cats = CAT_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=4, minSize=(24, 24))
+    if len(cats) > 0:
+        x, y, w, h = max((tuple(map(int, cat)) for cat in cats), key=lambda rect: rect[2] * rect[3])
+        return BBox(x=x, y=y, w=w, h=h)
+
+    return detect_subject(image, min_area_ratio=0.02, blur_size=7, dilate_size=11)
+
+
+def detect_subject(
+    image: np.ndarray,
+    min_area_ratio: float = 0.015,
+    blur_size: int = 9,
+    dilate_size: int = 13,
+) -> BBox | None:
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    blur_size = blur_size + (blur_size % 2 == 0)
+    dilate_size = max(3, dilate_size)
+    kernel = np.ones((dilate_size, dilate_size), np.uint8)
+    blurred = cv2.GaussianBlur(gray, (blur_size, blur_size), 0)
    edges = cv2.Canny(blurred, 30, 110)
-    kernel = np.ones((13, 13), np.uint8)
    expanded = cv2.dilate(edges, kernel, iterations=1)
    closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
@@ -120,7 +123,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
    for contour in contours:
        x, y, bw, bh = cv2.boundingRect(contour)
        area = bw * bh
-        if area < max(1000, int(image_area * 0.015)):
+        if area < max(1000, int(image_area * min_area_ratio)):
            continue
        candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))

@@ -170,7 +173,7 @@ def _data_url(image_bytes: bytes, mime_type: str) -> str:
    return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"


-def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "auto") -> dict[str, Any]:
+def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "subject") -> dict[str, Any]:
    image = decode_image(image_bytes)
    bbox, method = select_primary_bbox(image, detector=detector)
    square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio)