Add dockerized detector and UI cleanup
This commit is contained in:
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
@@ -24,6 +25,9 @@ class BBox:
|
||||
return self.y + self.h
|
||||
|
||||
|
||||
FACE_CASCADE = cv2.CascadeClassifier(
|
||||
str(Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml")
|
||||
)
|
||||
HOG = cv2.HOGDescriptor()
|
||||
HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
|
||||
|
||||
@@ -36,20 +40,59 @@ def decode_image(image_bytes: bytes) -> np.ndarray:
|
||||
return image
|
||||
|
||||
|
||||
def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]:
|
||||
def select_primary_bbox(image: np.ndarray, detector: str = "auto") -> tuple[BBox, str]:
|
||||
detector = (detector or "auto").strip().lower()
|
||||
|
||||
if detector == "face":
|
||||
face_bbox = detect_face(image)
|
||||
if face_bbox is not None:
|
||||
return face_bbox, "face_cascade"
|
||||
return fallback_bbox(image), "center_fallback"
|
||||
|
||||
if detector == "person":
|
||||
person_bbox = detect_person(image)
|
||||
if person_bbox is not None:
|
||||
return person_bbox, "person_hog"
|
||||
return fallback_bbox(image), "center_fallback"
|
||||
|
||||
if detector == "salient":
|
||||
salient_bbox = detect_salient_object(image)
|
||||
if salient_bbox is not None:
|
||||
return salient_bbox, "salient_contour"
|
||||
return fallback_bbox(image), "center_fallback"
|
||||
|
||||
face_bbox = detect_face(image)
|
||||
if face_bbox is not None:
|
||||
return face_bbox, "face_cascade"
|
||||
|
||||
person_bbox = detect_person(image)
|
||||
if person_bbox is not None:
|
||||
return person_bbox, "person_hog"
|
||||
|
||||
contour_bbox = detect_salient_object(image)
|
||||
if contour_bbox is not None:
|
||||
return contour_bbox, "contour"
|
||||
salient_bbox = detect_salient_object(image)
|
||||
if salient_bbox is not None:
|
||||
return salient_bbox, "salient_contour"
|
||||
|
||||
return fallback_bbox(image), "center_fallback"
|
||||
|
||||
|
||||
def fallback_bbox(image: np.ndarray) -> BBox:
|
||||
h, w = image.shape[:2]
|
||||
side = int(min(w, h) * 0.8)
|
||||
side = int(min(w, h) * 0.85)
|
||||
side = max(1, min(side, w, h))
|
||||
x = max(0, (w - side) // 2)
|
||||
y = max(0, (h - side) // 2)
|
||||
return BBox(x=x, y=y, w=side, h=side), "center_fallback"
|
||||
return BBox(x=x, y=y, w=side, h=side)
|
||||
|
||||
|
||||
def detect_face(image: np.ndarray) -> BBox | None:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
gray = cv2.equalizeHist(gray)
|
||||
faces = FACE_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(24, 24))
|
||||
if len(faces) == 0:
|
||||
return None
|
||||
x, y, w, h = max((map(int, face) for face in faces), key=lambda rect: rect[2] * rect[3])
|
||||
return BBox(x=x, y=y, w=w, h=h)
|
||||
|
||||
|
||||
def detect_person(image: np.ndarray) -> BBox | None:
|
||||
@@ -63,10 +106,11 @@ def detect_person(image: np.ndarray) -> BBox | None:
|
||||
|
||||
def detect_salient_object(image: np.ndarray) -> BBox | None:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
blurred = cv2.GaussianBlur(gray, (7, 7), 0)
|
||||
edges = cv2.Canny(blurred, 40, 120)
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)
|
||||
blurred = cv2.GaussianBlur(gray, (9, 9), 0)
|
||||
edges = cv2.Canny(blurred, 30, 110)
|
||||
kernel = np.ones((13, 13), np.uint8)
|
||||
expanded = cv2.dilate(edges, kernel, iterations=1)
|
||||
closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1)
|
||||
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
if not contours:
|
||||
return None
|
||||
@@ -77,7 +121,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
|
||||
for contour in contours:
|
||||
x, y, bw, bh = cv2.boundingRect(contour)
|
||||
area = bw * bh
|
||||
if area < max(500, int(image_area * 0.01)):
|
||||
if area < max(1000, int(image_area * 0.015)):
|
||||
continue
|
||||
candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))
|
||||
|
||||
@@ -89,7 +133,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
|
||||
|
||||
def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
|
||||
image_h, image_w = image_shape[:2]
|
||||
buffer_ratio = max(0.0, min(buffer_ratio, 0.5))
|
||||
buffer_ratio = max(0.0, min(buffer_ratio, 0.6))
|
||||
side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
|
||||
side = max(1, min(side, image_w, image_h))
|
||||
|
||||
@@ -127,9 +171,9 @@ def _data_url(image_bytes: bytes, mime_type: str) -> str:
|
||||
return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"
|
||||
|
||||
|
||||
def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15) -> dict[str, Any]:
|
||||
def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "auto") -> dict[str, Any]:
|
||||
image = decode_image(image_bytes)
|
||||
bbox, method = select_primary_bbox(image)
|
||||
bbox, method = select_primary_bbox(image, detector=detector)
|
||||
square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio)
|
||||
crop = crop_image(image, square)
|
||||
annotated = draw_square(image, square)
|
||||
@@ -139,6 +183,7 @@ def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15)
|
||||
|
||||
return {
|
||||
"filename": filename,
|
||||
"detector": detector,
|
||||
"method": method,
|
||||
"buffer_ratio": buffer_ratio,
|
||||
"detected_bbox": bbox.__dict__,
|
||||
|
||||
Reference in New Issue
Block a user