Add explicit detectors and optional auth

This commit is contained in:
2026-04-11 17:23:25 +02:00
parent 19a5ac16b7
commit 09119e8c0e
7 changed files with 189 additions and 65 deletions

View File

@@ -24,9 +24,9 @@ class BBox:
return self.y + self.h
FACE_CASCADE = cv2.CascadeClassifier(
str(Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml")
)
HAAR_DIR = Path(cv2.data.haarcascades)
FACE_CASCADE = cv2.CascadeClassifier(str(HAAR_DIR / "haarcascade_frontalface_default.xml"))
CAT_CASCADE = cv2.CascadeClassifier(str(HAAR_DIR / "haarcascade_frontalcatface_extended.xml"))
HOG = cv2.HOGDescriptor()
HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
@@ -39,40 +39,23 @@ def decode_image(image_bytes: bytes) -> np.ndarray:
return image
def select_primary_bbox(image: np.ndarray, detector: str = "auto") -> tuple[BBox, str]:
detector = (detector or "auto").strip().lower()
def select_primary_bbox(image: np.ndarray, detector: str = "subject") -> tuple[BBox, str]:
detector = (detector or "subject").strip().lower()
if detector == "face":
face_bbox = detect_face(image)
if face_bbox is not None:
return face_bbox, "face_cascade"
return fallback_bbox(image), "center_fallback"
bbox = detect_face(image)
return (bbox, "face_cascade") if bbox is not None else (fallback_bbox(image), "center_fallback")
if detector == "person":
person_bbox = detect_person(image)
if person_bbox is not None:
return person_bbox, "person_hog"
return fallback_bbox(image), "center_fallback"
bbox = detect_person(image)
return (bbox, "person_hog") if bbox is not None else (fallback_bbox(image), "center_fallback")
if detector == "salient":
salient_bbox = detect_salient_object(image)
if salient_bbox is not None:
return salient_bbox, "salient_contour"
return fallback_bbox(image), "center_fallback"
if detector == "animal":
bbox = detect_animal(image)
return (bbox, "animal_cascade") if bbox is not None else (fallback_bbox(image), "center_fallback")
face_bbox = detect_face(image)
if face_bbox is not None:
return face_bbox, "face_cascade"
person_bbox = detect_person(image)
if person_bbox is not None:
return person_bbox, "person_hog"
salient_bbox = detect_salient_object(image)
if salient_bbox is not None:
return salient_bbox, "salient_contour"
return fallback_bbox(image), "center_fallback"
bbox = detect_subject(image)
return (bbox, "subject_contour") if bbox is not None else (fallback_bbox(image), "center_fallback")
def fallback_bbox(image: np.ndarray) -> BBox:
@@ -95,19 +78,39 @@ def detect_face(image: np.ndarray) -> BBox | None:
def detect_person(image: np.ndarray) -> BBox | None:
rects, weights = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
rects, _ = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
if len(rects) == 0:
return None
best = max(zip(rects, weights), key=lambda item: int(item[0][2]) * int(item[0][3]))[0]
x, y, w, h = map(int, best)
return BBox(x=x, y=y, w=w, h=h)
best = max((tuple(map(int, rect)) for rect in rects), key=lambda rect: rect[2] * rect[3])
return BBox(x=best[0], y=best[1], w=best[2], h=best[3])
def detect_salient_object(image: np.ndarray) -> BBox | None:
def detect_animal(image: np.ndarray) -> BBox | None:
if CAT_CASCADE.empty():
return detect_subject(image, min_area_ratio=0.02, blur_size=7, dilate_size=11)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (9, 9), 0)
gray = cv2.equalizeHist(gray)
cats = CAT_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=4, minSize=(24, 24))
if len(cats) > 0:
x, y, w, h = max((tuple(map(int, cat)) for cat in cats), key=lambda rect: rect[2] * rect[3])
return BBox(x=x, y=y, w=w, h=h)
return detect_subject(image, min_area_ratio=0.02, blur_size=7, dilate_size=11)
def detect_subject(
image: np.ndarray,
min_area_ratio: float = 0.015,
blur_size: int = 9,
dilate_size: int = 13,
) -> BBox | None:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur_size = blur_size + (blur_size % 2 == 0)
dilate_size = max(3, dilate_size)
kernel = np.ones((dilate_size, dilate_size), np.uint8)
blurred = cv2.GaussianBlur(gray, (blur_size, blur_size), 0)
edges = cv2.Canny(blurred, 30, 110)
kernel = np.ones((13, 13), np.uint8)
expanded = cv2.dilate(edges, kernel, iterations=1)
closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1)
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
@@ -120,7 +123,7 @@ def detect_salient_object(image: np.ndarray) -> BBox | None:
for contour in contours:
x, y, bw, bh = cv2.boundingRect(contour)
area = bw * bh
if area < max(1000, int(image_area * 0.015)):
if area < max(1000, int(image_area * min_area_ratio)):
continue
candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))
@@ -170,7 +173,7 @@ def _data_url(image_bytes: bytes, mime_type: str) -> str:
return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"
def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "auto") -> dict[str, Any]:
def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "subject") -> dict[str, Any]:
image = decode_image(image_bytes)
bbox, method = select_primary_bbox(image, detector=detector)
square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio)