ultralytic export

2025-11-11 01:28:19 +07:00 · 2025-11-11 01:28:19 +07:00 · fdaeb9981c
commit fdaeb9981c
parent bf7b68edb1
14 changed files with 2241 additions and 507 deletions
--- a/services/ultralytics_model_controller.py
+++ b/services/ultralytics_model_controller.py
@ -0,0 +1,217 @@
+"""
+Ultralytics Model Controller - YOLO inference with batched processing.
+"""
+
+import logging
+from typing import Any, Callable, Dict, List, Optional
+
+import torch
+
+from .base_model_controller import BaseModelController, BatchFrame
+
+logger = logging.getLogger(__name__)
+
+
+class UltralyticsModelController(BaseModelController):
+    """
+    Model controller for Ultralytics YOLO inference.
+
+    Uses UltralyticsEngine which wraps the Ultralytics YOLO model with
+    native TensorRT backend for GPU-accelerated inference.
+    """
+
+    def __init__(
+        self,
+        inference_engine,
+        model_id: str,
+        batch_size: int = 16,
+        force_timeout: float = 0.05,
+        preprocess_fn: Optional[Callable] = None,
+        postprocess_fn: Optional[Callable] = None,
+    ):
+        # Auto-detect actual batch size from the YOLO engine
+        engine_batch_size = self._detect_engine_batch_size(inference_engine)
+
+        # If engine has fixed batch size, use it. Otherwise use user's batch_size
+        actual_batch_size = engine_batch_size if engine_batch_size > 0 else batch_size
+
+        super().__init__(
+            model_id=model_id,
+            batch_size=actual_batch_size,
+            force_timeout=force_timeout,
+            preprocess_fn=preprocess_fn,
+            postprocess_fn=postprocess_fn,
+        )
+        self.inference_engine = inference_engine
+        self.engine_batch_size = engine_batch_size  # Store for padding logic
+
+        if engine_batch_size > 0:
+            logger.info(
+                f"Ultralytics engine has fixed batch_size={engine_batch_size}, "
+                f"will pad batches to match"
+            )
+        else:
+            logger.info(
+                f"Ultralytics engine supports dynamic batching, "
+                f"using max batch_size={actual_batch_size}"
+            )
+
+    def _detect_engine_batch_size(self, inference_engine) -> int:
+        """
+        Detect the batch size from Ultralytics engine.
+
+        Returns:
+            Fixed batch size (e.g., 2, 4, 8) or -1 for dynamic batching
+        """
+        try:
+            # Get engine metadata
+            metadata = inference_engine.get_metadata()
+
+            # Check input shape for batch dimension
+            if "images" in metadata.input_shapes:
+                input_shape = metadata.input_shapes["images"]
+                batch_dim = input_shape[0]
+
+                if batch_dim > 0:
+                    # Fixed batch size
+                    return batch_dim
+                else:
+                    # Dynamic batch size (-1)
+                    return -1
+
+            # Fallback: try to get from model directly
+            if (
+                hasattr(inference_engine, "_model")
+                and inference_engine._model is not None
+            ):
+                model = inference_engine._model
+
+                # Try to get batch info from Ultralytics model
+                if hasattr(model, "predictor") and model.predictor is not None:
+                    predictor = model.predictor
+                    if hasattr(predictor, "model") and hasattr(
+                        predictor.model, "batch"
+                    ):
+                        return predictor.model.batch
+
+                # Try to get from model.model (for .engine files)
+                if hasattr(model, "model"):
+                    # For TensorRT engines, check input shape
+                    if hasattr(model.model, "get_input_details"):
+                        details = model.model.get_input_details()
+                        if details and len(details) > 0:
+                            shape = details[0].get("shape")
+                            if shape and len(shape) > 0:
+                                return shape[0] if shape[0] > 0 else -1
+
+        except Exception as e:
+            logger.warning(f"Could not detect engine batch size: {e}")
+
+        # Default: assume dynamic batching
+        return -1
+
+    def _run_batch_inference(self, batch: List[BatchFrame]) -> List[Dict[str, Any]]:
+        """
+        Run Ultralytics YOLO inference on a batch of frames.
+
+        Ultralytics handles batching natively and returns Results objects.
+        """
+        # Preprocess frames
+        preprocessed = []
+        for batch_frame in batch:
+            if self.preprocess_fn:
+                processed = self.preprocess_fn(batch_frame.frame)
+                # Ensure shape is (C, H, W) not (1, C, H, W)
+                if processed.dim() == 4 and processed.shape[0] == 1:
+                    processed = processed.squeeze(0)
+            else:
+                processed = batch_frame.frame
+            preprocessed.append(processed)
+
+        # Stack into batch tensor: (B, C, H, W)
+        batch_tensor = torch.stack(preprocessed, dim=0)
+        actual_batch_size = len(batch)
+
+        # Handle fixed batch size engines (pad if needed)
+        if self.engine_batch_size > 0:
+            # Engine has fixed batch size
+            if batch_tensor.shape[0] > self.engine_batch_size:
+                # Truncate to engine's max batch size
+                logger.warning(
+                    f"Batch size {batch_tensor.shape[0]} exceeds engine max {self.engine_batch_size}, truncating"
+                )
+                batch_tensor = batch_tensor[: self.engine_batch_size]
+                batch = batch[: self.engine_batch_size]
+                actual_batch_size = self.engine_batch_size
+            elif batch_tensor.shape[0] < self.engine_batch_size:
+                # Pad to match engine's fixed batch size
+                padding_size = self.engine_batch_size - batch_tensor.shape[0]
+                # Replicate last frame to pad (cheaper than zeros)
+                padding = batch_tensor[-1:].repeat(padding_size, 1, 1, 1)
+                batch_tensor = torch.cat([batch_tensor, padding], dim=0)
+                logger.debug(
+                    f"Padded batch from {actual_batch_size} to {self.engine_batch_size} frames"
+                )
+        else:
+            # Dynamic batching - just limit to max
+            if batch_tensor.shape[0] > self.batch_size:
+                logger.warning(
+                    f"Batch size {batch_tensor.shape[0]} exceeds configured max {self.batch_size}"
+                )
+                batch_tensor = batch_tensor[: self.batch_size]
+                batch = batch[: self.batch_size]
+                actual_batch_size = self.batch_size
+
+        # Run Ultralytics inference
+        # Input should be (B, 3, H, W) in range [0, 1], RGB format
+        outputs = self.inference_engine.infer(
+            inputs={"images": batch_tensor},
+            conf=0.25,  # Confidence threshold
+            iou=0.45,  # NMS IoU threshold
+        )
+
+        # Ultralytics returns Results objects in outputs["results"]
+        yolo_results = outputs["results"]
+
+        # Convert Results objects to our standard format
+        # Only process actual batch size (ignore padded results if any)
+        results = []
+        for i in range(actual_batch_size):
+            batch_frame = batch[i]
+            yolo_result = yolo_results[i]
+            # Extract detections from YOLO Results object
+            # yolo_result.boxes.data has format: [x1, y1, x2, y2, conf, cls]
+            if hasattr(yolo_result, "boxes") and yolo_result.boxes is not None:
+                detections = yolo_result.boxes.data  # Already a tensor on GPU
+            else:
+                # No detections
+                detections = torch.zeros((0, 6), device=batch_tensor.device)
+
+            # Apply custom postprocessing if provided
+            if self.postprocess_fn:
+                try:
+                    # For Ultralytics, postprocess_fn might do additional filtering
+                    # Pass the raw boxes tensor in the same format as TensorRT output
+                    detections = self.postprocess_fn(
+                        {
+                            "output0": detections.unsqueeze(
+                                0
+                            )  # Add batch dim for compatibility
+                        }
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"Error in postprocess for stream {batch_frame.stream_id}: {e}"
+                    )
+                    detections = torch.zeros((0, 6), device=batch_tensor.device)
+
+            result = {
+                "stream_id": batch_frame.stream_id,
+                "timestamp": batch_frame.timestamp,
+                "detections": detections,
+                "metadata": batch_frame.metadata,
+                "yolo_result": yolo_result,  # Keep original Results object for debugging
+            }
+            results.append(result)
+
+        return results