new buffer paradigm

2025-11-11 02:02:12 +07:00 · 2025-11-11 02:02:12 +07:00 · a519dea130
commit a519dea130
parent fdaeb9981c
6 changed files with 341 additions and 327 deletions
--- a/services/ultralytics_model_controller.py
+++ b/services/ultralytics_model_controller.py
@ -25,20 +25,27 @@ class UltralyticsModelController(BaseModelController):
        inference_engine,
        model_id: str,
        batch_size: int = 16,
-        force_timeout: float = 0.05,
+        max_queue_size: int = 100,
        preprocess_fn: Optional[Callable] = None,
        postprocess_fn: Optional[Callable] = None,
    ):
        # Auto-detect actual batch size from the YOLO engine
+        print(f"[UltralyticsModelController] Detecting batch size from engine...")
        engine_batch_size = self._detect_engine_batch_size(inference_engine)
+        print(
+            f"[UltralyticsModelController] Detected engine_batch_size={engine_batch_size}"
+        )

        # If engine has fixed batch size, use it. Otherwise use user's batch_size
        actual_batch_size = engine_batch_size if engine_batch_size > 0 else batch_size
+        print(
+            f"[UltralyticsModelController] Using actual_batch_size={actual_batch_size}"
+        )

        super().__init__(
            model_id=model_id,
            batch_size=actual_batch_size,
-            force_timeout=force_timeout,
+            max_queue_size=max_queue_size,
            preprocess_fn=preprocess_fn,
            postprocess_fn=postprocess_fn,
        )
@ -46,11 +53,23 @@ class UltralyticsModelController(BaseModelController):
        self.engine_batch_size = engine_batch_size  # Store for padding logic

        if engine_batch_size > 0:
+            print(f"✓ Ultralytics engine has FIXED batch_size={engine_batch_size}")
+            print(
+                f"  Will pad/truncate all batches to exactly {engine_batch_size} frames"
+            )
            logger.info(
                f"Ultralytics engine has fixed batch_size={engine_batch_size}, "
                f"will pad batches to match"
            )
+            # CRITICAL: Override the parent's batch_size to match engine's fixed size
+            # This prevents buffer accumulation beyond the engine's capacity
+            self.batch_size = engine_batch_size
+            print(f"  Controller self.batch_size is now: {self.batch_size}")
+            print(f"  Buffer will swap when size >= {self.batch_size}")
        else:
+            print(
+                f"✓ Ultralytics engine supports DYNAMIC batching, max={actual_batch_size}"
+            )
            logger.info(
                f"Ultralytics engine supports dynamic batching, "
                f"using max batch_size={actual_batch_size}"
@ -67,16 +86,22 @@ class UltralyticsModelController(BaseModelController):
            # Get engine metadata
            metadata = inference_engine.get_metadata()

+            logger.info(f"Detecting batch size from engine metadata: {metadata}")
+
            # Check input shape for batch dimension
            if "images" in metadata.input_shapes:
                input_shape = metadata.input_shapes["images"]
                batch_dim = input_shape[0]

+                logger.info(f"Found batch dimension in metadata: {batch_dim}")
+
                if batch_dim > 0:
                    # Fixed batch size
+                    logger.info(f"Using fixed batch size from engine: {batch_dim}")
                    return batch_dim
                else:
                    # Dynamic batch size (-1)
+                    logger.info("Engine supports dynamic batching (batch_dim=-1)")
                    return -1

            # Fallback: try to get from model directly
@ -187,28 +212,16 @@ class UltralyticsModelController(BaseModelController):
                # No detections
                detections = torch.zeros((0, 6), device=batch_tensor.device)

-            # Apply custom postprocessing if provided
-            if self.postprocess_fn:
-                try:
-                    # For Ultralytics, postprocess_fn might do additional filtering
-                    # Pass the raw boxes tensor in the same format as TensorRT output
-                    detections = self.postprocess_fn(
-                        {
-                            "output0": detections.unsqueeze(
-                                0
-                            )  # Add batch dim for compatibility
-                        }
-                    )
-                except Exception as e:
-                    logger.error(
-                        f"Error in postprocess for stream {batch_frame.stream_id}: {e}"
-                    )
-                    detections = torch.zeros((0, 6), device=batch_tensor.device)
+            # NOTE: Skip postprocess_fn for Ultralytics backend!
+            # Ultralytics already does confidence filtering, NMS, and format conversion.
+            # The detections are already in final format: [x1, y1, x2, y2, conf, cls]
+            # Any custom postprocess_fn would expect raw TensorRT output and will fail.

            result = {
                "stream_id": batch_frame.stream_id,
                "timestamp": batch_frame.timestamp,
                "detections": detections,
+                "frame": batch_frame.frame,  # Include original frame tensor
                "metadata": batch_frame.metadata,
                "yolo_result": yolo_result,  # Keep original Results object for debugging
            }