""" Ultralytics Model Controller - YOLO inference with batched processing. """ import logging from typing import Any, Callable, Dict, List, Optional import torch from .base_model_controller import BaseModelController, BatchFrame logger = logging.getLogger(__name__) class UltralyticsModelController(BaseModelController): """ Model controller for Ultralytics YOLO inference. Uses UltralyticsEngine which wraps the Ultralytics YOLO model with native TensorRT backend for GPU-accelerated inference. """ def __init__( self, inference_engine, model_id: str, batch_size: int = 16, max_queue_size: int = 100, preprocess_fn: Optional[Callable] = None, postprocess_fn: Optional[Callable] = None, ): # Auto-detect actual batch size from the YOLO engine print(f"[UltralyticsModelController] Detecting batch size from engine...") engine_batch_size = self._detect_engine_batch_size(inference_engine) print( f"[UltralyticsModelController] Detected engine_batch_size={engine_batch_size}" ) # If engine has fixed batch size, use it. Otherwise use user's batch_size actual_batch_size = engine_batch_size if engine_batch_size > 0 else batch_size print( f"[UltralyticsModelController] Using actual_batch_size={actual_batch_size}" ) super().__init__( model_id=model_id, batch_size=actual_batch_size, max_queue_size=max_queue_size, preprocess_fn=preprocess_fn, postprocess_fn=postprocess_fn, ) self.inference_engine = inference_engine self.engine_batch_size = engine_batch_size # Store for padding logic if engine_batch_size > 0: print(f"✓ Ultralytics engine has FIXED batch_size={engine_batch_size}") print( f" Will pad/truncate all batches to exactly {engine_batch_size} frames" ) logger.info( f"Ultralytics engine has fixed batch_size={engine_batch_size}, " f"will pad batches to match" ) # CRITICAL: Override the parent's batch_size to match engine's fixed size # This prevents buffer accumulation beyond the engine's capacity self.batch_size = engine_batch_size print(f" Controller self.batch_size is now: {self.batch_size}") print(f" Buffer will swap when size >= {self.batch_size}") else: print( f"✓ Ultralytics engine supports DYNAMIC batching, max={actual_batch_size}" ) logger.info( f"Ultralytics engine supports dynamic batching, " f"using max batch_size={actual_batch_size}" ) def _detect_engine_batch_size(self, inference_engine) -> int: """ Detect the batch size from Ultralytics engine. Returns: Fixed batch size (e.g., 2, 4, 8) or -1 for dynamic batching """ try: # Get engine metadata metadata = inference_engine.get_metadata() logger.info(f"Detecting batch size from engine metadata: {metadata}") # Check input shape for batch dimension if "images" in metadata.input_shapes: input_shape = metadata.input_shapes["images"] batch_dim = input_shape[0] logger.info(f"Found batch dimension in metadata: {batch_dim}") if batch_dim > 0: # Fixed batch size logger.info(f"Using fixed batch size from engine: {batch_dim}") return batch_dim else: # Dynamic batch size (-1) logger.info("Engine supports dynamic batching (batch_dim=-1)") return -1 # Fallback: try to get from model directly if ( hasattr(inference_engine, "_model") and inference_engine._model is not None ): model = inference_engine._model # Try to get batch info from Ultralytics model if hasattr(model, "predictor") and model.predictor is not None: predictor = model.predictor if hasattr(predictor, "model") and hasattr( predictor.model, "batch" ): return predictor.model.batch # Try to get from model.model (for .engine files) if hasattr(model, "model"): # For TensorRT engines, check input shape if hasattr(model.model, "get_input_details"): details = model.model.get_input_details() if details and len(details) > 0: shape = details[0].get("shape") if shape and len(shape) > 0: return shape[0] if shape[0] > 0 else -1 except Exception as e: logger.warning(f"Could not detect engine batch size: {e}") # Default: assume dynamic batching return -1 def _run_batch_inference(self, batch: List[BatchFrame]) -> List[Dict[str, Any]]: """ Run Ultralytics YOLO inference on a batch of frames. Ultralytics handles batching natively and returns Results objects. """ # Preprocess frames preprocessed = [] for batch_frame in batch: if self.preprocess_fn: processed = self.preprocess_fn(batch_frame.frame) # Ensure shape is (C, H, W) not (1, C, H, W) if processed.dim() == 4 and processed.shape[0] == 1: processed = processed.squeeze(0) else: processed = batch_frame.frame preprocessed.append(processed) # Stack into batch tensor: (B, C, H, W) batch_tensor = torch.stack(preprocessed, dim=0) actual_batch_size = len(batch) # Handle fixed batch size engines (pad if needed) if self.engine_batch_size > 0: # Engine has fixed batch size if batch_tensor.shape[0] > self.engine_batch_size: # Truncate to engine's max batch size logger.warning( f"Batch size {batch_tensor.shape[0]} exceeds engine max {self.engine_batch_size}, truncating" ) batch_tensor = batch_tensor[: self.engine_batch_size] batch = batch[: self.engine_batch_size] actual_batch_size = self.engine_batch_size elif batch_tensor.shape[0] < self.engine_batch_size: # Pad to match engine's fixed batch size padding_size = self.engine_batch_size - batch_tensor.shape[0] # Replicate last frame to pad (cheaper than zeros) padding = batch_tensor[-1:].repeat(padding_size, 1, 1, 1) batch_tensor = torch.cat([batch_tensor, padding], dim=0) logger.debug( f"Padded batch from {actual_batch_size} to {self.engine_batch_size} frames" ) else: # Dynamic batching - just limit to max if batch_tensor.shape[0] > self.batch_size: logger.warning( f"Batch size {batch_tensor.shape[0]} exceeds configured max {self.batch_size}" ) batch_tensor = batch_tensor[: self.batch_size] batch = batch[: self.batch_size] actual_batch_size = self.batch_size # Run Ultralytics inference # Input should be (B, 3, H, W) in range [0, 1], RGB format outputs = self.inference_engine.infer( inputs={"images": batch_tensor}, conf=0.25, # Confidence threshold iou=0.45, # NMS IoU threshold ) # Ultralytics returns Results objects in outputs["results"] yolo_results = outputs["results"] # Convert Results objects to our standard format # Only process actual batch size (ignore padded results if any) results = [] for i in range(actual_batch_size): batch_frame = batch[i] yolo_result = yolo_results[i] # Extract detections from YOLO Results object # yolo_result.boxes.data has format: [x1, y1, x2, y2, conf, cls] if hasattr(yolo_result, "boxes") and yolo_result.boxes is not None: detections = yolo_result.boxes.data # Already a tensor on GPU else: # No detections detections = torch.zeros((0, 6), device=batch_tensor.device) # NOTE: Skip postprocess_fn for Ultralytics backend! # Ultralytics already does confidence filtering, NMS, and format conversion. # The detections are already in final format: [x1, y1, x2, y2, conf, cls] # Any custom postprocess_fn would expect raw TensorRT output and will fail. result = { "stream_id": batch_frame.stream_id, "timestamp": batch_frame.timestamp, "detections": detections, "frame": batch_frame.frame, # Include original frame tensor "metadata": batch_frame.metadata, "yolo_result": yolo_result, # Keep original Results object for debugging } results.append(result) return results