199 lines
6.7 KiB
Python
199 lines
6.7 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class BBox:
|
|
x: int
|
|
y: int
|
|
w: int
|
|
h: int
|
|
|
|
@property
|
|
def right(self) -> int:
|
|
return self.x + self.w
|
|
|
|
@property
|
|
def bottom(self) -> int:
|
|
return self.y + self.h
|
|
|
|
|
|
HAAR_DIR = Path(cv2.data.haarcascades)
|
|
FACE_CASCADE = cv2.CascadeClassifier(str(HAAR_DIR / "haarcascade_frontalface_default.xml"))
|
|
CAT_CASCADE = cv2.CascadeClassifier(str(HAAR_DIR / "haarcascade_frontalcatface_extended.xml"))
|
|
HOG = cv2.HOGDescriptor()
|
|
HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
|
|
|
|
|
|
def decode_image(image_bytes: bytes) -> np.ndarray:
|
|
data = np.frombuffer(image_bytes, dtype=np.uint8)
|
|
image = cv2.imdecode(data, cv2.IMREAD_COLOR)
|
|
if image is None:
|
|
raise ValueError("could not decode image")
|
|
return image
|
|
|
|
|
|
def select_primary_bbox(image: np.ndarray, detector: str = "subject") -> tuple[BBox, str]:
|
|
detector = (detector or "subject").strip().lower()
|
|
|
|
if detector == "face":
|
|
bbox = detect_face(image)
|
|
return (bbox, "face_cascade") if bbox is not None else (fallback_bbox(image), "center_fallback")
|
|
|
|
if detector == "person":
|
|
bbox = detect_person(image)
|
|
return (bbox, "person_hog") if bbox is not None else (fallback_bbox(image), "center_fallback")
|
|
|
|
if detector == "animal":
|
|
bbox = detect_animal(image)
|
|
return (bbox, "animal_cascade") if bbox is not None else (fallback_bbox(image), "center_fallback")
|
|
|
|
bbox = detect_subject(image)
|
|
return (bbox, "subject_contour") if bbox is not None else (fallback_bbox(image), "center_fallback")
|
|
|
|
|
|
def fallback_bbox(image: np.ndarray) -> BBox:
|
|
h, w = image.shape[:2]
|
|
side = int(min(w, h) * 0.85)
|
|
side = max(1, min(side, w, h))
|
|
x = max(0, (w - side) // 2)
|
|
y = max(0, (h - side) // 2)
|
|
return BBox(x=x, y=y, w=side, h=side)
|
|
|
|
|
|
def detect_face(image: np.ndarray) -> BBox | None:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
gray = cv2.equalizeHist(gray)
|
|
faces = FACE_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=5, minSize=(24, 24))
|
|
if len(faces) == 0:
|
|
return None
|
|
x, y, w, h = max((tuple(map(int, face)) for face in faces), key=lambda rect: rect[2] * rect[3])
|
|
return BBox(x=x, y=y, w=w, h=h)
|
|
|
|
|
|
def detect_person(image: np.ndarray) -> BBox | None:
|
|
rects, _ = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05)
|
|
if len(rects) == 0:
|
|
return None
|
|
best = max((tuple(map(int, rect)) for rect in rects), key=lambda rect: rect[2] * rect[3])
|
|
return BBox(x=best[0], y=best[1], w=best[2], h=best[3])
|
|
|
|
|
|
def detect_animal(image: np.ndarray) -> BBox | None:
|
|
if CAT_CASCADE.empty():
|
|
return detect_subject(image, min_area_ratio=0.02, blur_size=7, dilate_size=11)
|
|
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
gray = cv2.equalizeHist(gray)
|
|
cats = CAT_CASCADE.detectMultiScale(gray, scaleFactor=1.08, minNeighbors=4, minSize=(24, 24))
|
|
if len(cats) > 0:
|
|
x, y, w, h = max((tuple(map(int, cat)) for cat in cats), key=lambda rect: rect[2] * rect[3])
|
|
return BBox(x=x, y=y, w=w, h=h)
|
|
|
|
return detect_subject(image, min_area_ratio=0.02, blur_size=7, dilate_size=11)
|
|
|
|
|
|
def detect_subject(
|
|
image: np.ndarray,
|
|
min_area_ratio: float = 0.015,
|
|
blur_size: int = 9,
|
|
dilate_size: int = 13,
|
|
) -> BBox | None:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
blur_size = blur_size + (blur_size % 2 == 0)
|
|
dilate_size = max(3, dilate_size)
|
|
kernel = np.ones((dilate_size, dilate_size), np.uint8)
|
|
blurred = cv2.GaussianBlur(gray, (blur_size, blur_size), 0)
|
|
edges = cv2.Canny(blurred, 30, 110)
|
|
expanded = cv2.dilate(edges, kernel, iterations=1)
|
|
closed = cv2.morphologyEx(expanded, cv2.MORPH_CLOSE, kernel, iterations=1)
|
|
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
if not contours:
|
|
return None
|
|
|
|
h, w = image.shape[:2]
|
|
image_area = h * w
|
|
candidates: list[tuple[int, BBox]] = []
|
|
for contour in contours:
|
|
x, y, bw, bh = cv2.boundingRect(contour)
|
|
area = bw * bh
|
|
if area < max(1000, int(image_area * min_area_ratio)):
|
|
continue
|
|
candidates.append((area, BBox(x=x, y=y, w=bw, h=bh)))
|
|
|
|
if not candidates:
|
|
return None
|
|
|
|
return max(candidates, key=lambda item: item[0])[1]
|
|
|
|
|
|
def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox:
|
|
image_h, image_w = image_shape[:2]
|
|
buffer_ratio = max(0.0, min(buffer_ratio, 0.6))
|
|
side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2)))
|
|
side = max(1, min(side, image_w, image_h))
|
|
|
|
cx = bbox.x + bbox.w / 2
|
|
cy = bbox.y + bbox.h / 2
|
|
x = int(round(cx - side / 2))
|
|
y = int(round(cy - side / 2))
|
|
|
|
x = max(0, min(x, image_w - side))
|
|
y = max(0, min(y, image_h - side))
|
|
return BBox(x=x, y=y, w=side, h=side)
|
|
|
|
|
|
def draw_square(image: np.ndarray, bbox: BBox) -> np.ndarray:
|
|
annotated = image.copy()
|
|
cv2.rectangle(annotated, (bbox.x, bbox.y), (bbox.right, bbox.bottom), (0, 255, 0), 3)
|
|
return annotated
|
|
|
|
|
|
def crop_image(image: np.ndarray, bbox: BBox) -> np.ndarray:
|
|
return image[bbox.y:bbox.bottom, bbox.x:bbox.right]
|
|
|
|
|
|
def encode_image(image: np.ndarray, ext: str = ".jpg") -> tuple[bytes, str]:
|
|
ok, encoded = cv2.imencode(ext, image)
|
|
if not ok:
|
|
raise ValueError("could not encode image")
|
|
mime_type = "image/jpeg" if ext.lower() in {".jpg", ".jpeg"} else "image/png"
|
|
return encoded.tobytes(), mime_type
|
|
|
|
|
|
def _data_url(image_bytes: bytes, mime_type: str) -> str:
|
|
import base64
|
|
|
|
return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}"
|
|
|
|
|
|
def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15, detector: str = "subject") -> dict[str, Any]:
|
|
image = decode_image(image_bytes)
|
|
bbox, method = select_primary_bbox(image, detector=detector)
|
|
square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio)
|
|
crop = crop_image(image, square)
|
|
annotated = draw_square(image, square)
|
|
|
|
crop_bytes, mime_type = encode_image(crop, ".jpg")
|
|
annotated_bytes, _ = encode_image(annotated, ".jpg")
|
|
|
|
return {
|
|
"filename": filename,
|
|
"detector": detector,
|
|
"method": method,
|
|
"buffer_ratio": buffer_ratio,
|
|
"detected_bbox": bbox.__dict__,
|
|
"square_bbox": square.__dict__,
|
|
"source_size": {"width": int(image.shape[1]), "height": int(image.shape[0])},
|
|
"crop_data_url": _data_url(crop_bytes, mime_type),
|
|
"annotated_data_url": _data_url(annotated_bytes, mime_type),
|
|
"mime_type": mime_type,
|
|
"crop_bytes": crop_bytes,
|
|
}
|