from __future__ import annotations from dataclasses import dataclass from io import BytesIO from typing import Any import cv2 import numpy as np @dataclass(frozen=True) class BBox: x: int y: int w: int h: int @property def right(self) -> int: return self.x + self.w @property def bottom(self) -> int: return self.y + self.h HOG = cv2.HOGDescriptor() HOG.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) def decode_image(image_bytes: bytes) -> np.ndarray: data = np.frombuffer(image_bytes, dtype=np.uint8) image = cv2.imdecode(data, cv2.IMREAD_COLOR) if image is None: raise ValueError("could not decode image") return image def select_primary_bbox(image: np.ndarray) -> tuple[BBox, str]: person_bbox = detect_person(image) if person_bbox is not None: return person_bbox, "person_hog" contour_bbox = detect_salient_object(image) if contour_bbox is not None: return contour_bbox, "contour" h, w = image.shape[:2] side = int(min(w, h) * 0.8) x = max(0, (w - side) // 2) y = max(0, (h - side) // 2) return BBox(x=x, y=y, w=side, h=side), "center_fallback" def detect_person(image: np.ndarray) -> BBox | None: rects, weights = HOG.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05) if len(rects) == 0: return None best = max(zip(rects, weights), key=lambda item: int(item[0][2]) * int(item[0][3]))[0] x, y, w, h = map(int, best) return BBox(x=x, y=y, w=w, h=h) def detect_salient_object(image: np.ndarray) -> BBox | None: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (7, 7), 0) edges = cv2.Canny(blurred, 40, 120) kernel = np.ones((5, 5), np.uint8) closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2) contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: return None h, w = image.shape[:2] image_area = h * w candidates: list[tuple[int, BBox]] = [] for contour in contours: x, y, bw, bh = cv2.boundingRect(contour) area = bw * bh if area < max(500, int(image_area * 0.01)): continue candidates.append((area, BBox(x=x, y=y, w=bw, h=bh))) if not candidates: return None return max(candidates, key=lambda item: item[0])[1] def square_bbox(bbox: BBox, image_shape: tuple[int, int, int], buffer_ratio: float = 0.15) -> BBox: image_h, image_w = image_shape[:2] buffer_ratio = max(0.0, min(buffer_ratio, 0.5)) side = int(round(max(bbox.w, bbox.h) * (1 + buffer_ratio * 2))) side = max(1, min(side, image_w, image_h)) cx = bbox.x + bbox.w / 2 cy = bbox.y + bbox.h / 2 x = int(round(cx - side / 2)) y = int(round(cy - side / 2)) x = max(0, min(x, image_w - side)) y = max(0, min(y, image_h - side)) return BBox(x=x, y=y, w=side, h=side) def draw_square(image: np.ndarray, bbox: BBox) -> np.ndarray: annotated = image.copy() cv2.rectangle(annotated, (bbox.x, bbox.y), (bbox.right, bbox.bottom), (0, 255, 0), 3) return annotated def crop_image(image: np.ndarray, bbox: BBox) -> np.ndarray: return image[bbox.y:bbox.bottom, bbox.x:bbox.right] def encode_image(image: np.ndarray, ext: str = ".jpg") -> tuple[bytes, str]: ok, encoded = cv2.imencode(ext, image) if not ok: raise ValueError("could not encode image") mime_type = "image/jpeg" if ext.lower() in {".jpg", ".jpeg"} else "image/png" return encoded.tobytes(), mime_type def _data_url(image_bytes: bytes, mime_type: str) -> str: import base64 return f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode('ascii')}" def process_image(image_bytes: bytes, filename: str, buffer_ratio: float = 0.15) -> dict[str, Any]: image = decode_image(image_bytes) bbox, method = select_primary_bbox(image) square = square_bbox(bbox, image.shape, buffer_ratio=buffer_ratio) crop = crop_image(image, square) annotated = draw_square(image, square) crop_bytes, mime_type = encode_image(crop, ".jpg") annotated_bytes, _ = encode_image(annotated, ".jpg") return { "filename": filename, "method": method, "buffer_ratio": buffer_ratio, "detected_bbox": bbox.__dict__, "square_bbox": square.__dict__, "source_size": {"width": int(image.shape[1]), "height": int(image.shape[0])}, "crop_data_url": _data_url(crop_bytes, mime_type), "annotated_data_url": _data_url(annotated_bytes, mime_type), "mime_type": mime_type, "crop_bytes_io": BytesIO(crop_bytes), }