ultralytic export
This commit is contained in:
parent
bf7b68edb1
commit
fdaeb9981c
14 changed files with 2241 additions and 507 deletions
217
services/ultralytics_model_controller.py
Normal file
217
services/ultralytics_model_controller.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
"""
|
||||
Ultralytics Model Controller - YOLO inference with batched processing.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import torch
|
||||
|
||||
from .base_model_controller import BaseModelController, BatchFrame
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UltralyticsModelController(BaseModelController):
|
||||
"""
|
||||
Model controller for Ultralytics YOLO inference.
|
||||
|
||||
Uses UltralyticsEngine which wraps the Ultralytics YOLO model with
|
||||
native TensorRT backend for GPU-accelerated inference.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
inference_engine,
|
||||
model_id: str,
|
||||
batch_size: int = 16,
|
||||
force_timeout: float = 0.05,
|
||||
preprocess_fn: Optional[Callable] = None,
|
||||
postprocess_fn: Optional[Callable] = None,
|
||||
):
|
||||
# Auto-detect actual batch size from the YOLO engine
|
||||
engine_batch_size = self._detect_engine_batch_size(inference_engine)
|
||||
|
||||
# If engine has fixed batch size, use it. Otherwise use user's batch_size
|
||||
actual_batch_size = engine_batch_size if engine_batch_size > 0 else batch_size
|
||||
|
||||
super().__init__(
|
||||
model_id=model_id,
|
||||
batch_size=actual_batch_size,
|
||||
force_timeout=force_timeout,
|
||||
preprocess_fn=preprocess_fn,
|
||||
postprocess_fn=postprocess_fn,
|
||||
)
|
||||
self.inference_engine = inference_engine
|
||||
self.engine_batch_size = engine_batch_size # Store for padding logic
|
||||
|
||||
if engine_batch_size > 0:
|
||||
logger.info(
|
||||
f"Ultralytics engine has fixed batch_size={engine_batch_size}, "
|
||||
f"will pad batches to match"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Ultralytics engine supports dynamic batching, "
|
||||
f"using max batch_size={actual_batch_size}"
|
||||
)
|
||||
|
||||
def _detect_engine_batch_size(self, inference_engine) -> int:
|
||||
"""
|
||||
Detect the batch size from Ultralytics engine.
|
||||
|
||||
Returns:
|
||||
Fixed batch size (e.g., 2, 4, 8) or -1 for dynamic batching
|
||||
"""
|
||||
try:
|
||||
# Get engine metadata
|
||||
metadata = inference_engine.get_metadata()
|
||||
|
||||
# Check input shape for batch dimension
|
||||
if "images" in metadata.input_shapes:
|
||||
input_shape = metadata.input_shapes["images"]
|
||||
batch_dim = input_shape[0]
|
||||
|
||||
if batch_dim > 0:
|
||||
# Fixed batch size
|
||||
return batch_dim
|
||||
else:
|
||||
# Dynamic batch size (-1)
|
||||
return -1
|
||||
|
||||
# Fallback: try to get from model directly
|
||||
if (
|
||||
hasattr(inference_engine, "_model")
|
||||
and inference_engine._model is not None
|
||||
):
|
||||
model = inference_engine._model
|
||||
|
||||
# Try to get batch info from Ultralytics model
|
||||
if hasattr(model, "predictor") and model.predictor is not None:
|
||||
predictor = model.predictor
|
||||
if hasattr(predictor, "model") and hasattr(
|
||||
predictor.model, "batch"
|
||||
):
|
||||
return predictor.model.batch
|
||||
|
||||
# Try to get from model.model (for .engine files)
|
||||
if hasattr(model, "model"):
|
||||
# For TensorRT engines, check input shape
|
||||
if hasattr(model.model, "get_input_details"):
|
||||
details = model.model.get_input_details()
|
||||
if details and len(details) > 0:
|
||||
shape = details[0].get("shape")
|
||||
if shape and len(shape) > 0:
|
||||
return shape[0] if shape[0] > 0 else -1
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not detect engine batch size: {e}")
|
||||
|
||||
# Default: assume dynamic batching
|
||||
return -1
|
||||
|
||||
def _run_batch_inference(self, batch: List[BatchFrame]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Run Ultralytics YOLO inference on a batch of frames.
|
||||
|
||||
Ultralytics handles batching natively and returns Results objects.
|
||||
"""
|
||||
# Preprocess frames
|
||||
preprocessed = []
|
||||
for batch_frame in batch:
|
||||
if self.preprocess_fn:
|
||||
processed = self.preprocess_fn(batch_frame.frame)
|
||||
# Ensure shape is (C, H, W) not (1, C, H, W)
|
||||
if processed.dim() == 4 and processed.shape[0] == 1:
|
||||
processed = processed.squeeze(0)
|
||||
else:
|
||||
processed = batch_frame.frame
|
||||
preprocessed.append(processed)
|
||||
|
||||
# Stack into batch tensor: (B, C, H, W)
|
||||
batch_tensor = torch.stack(preprocessed, dim=0)
|
||||
actual_batch_size = len(batch)
|
||||
|
||||
# Handle fixed batch size engines (pad if needed)
|
||||
if self.engine_batch_size > 0:
|
||||
# Engine has fixed batch size
|
||||
if batch_tensor.shape[0] > self.engine_batch_size:
|
||||
# Truncate to engine's max batch size
|
||||
logger.warning(
|
||||
f"Batch size {batch_tensor.shape[0]} exceeds engine max {self.engine_batch_size}, truncating"
|
||||
)
|
||||
batch_tensor = batch_tensor[: self.engine_batch_size]
|
||||
batch = batch[: self.engine_batch_size]
|
||||
actual_batch_size = self.engine_batch_size
|
||||
elif batch_tensor.shape[0] < self.engine_batch_size:
|
||||
# Pad to match engine's fixed batch size
|
||||
padding_size = self.engine_batch_size - batch_tensor.shape[0]
|
||||
# Replicate last frame to pad (cheaper than zeros)
|
||||
padding = batch_tensor[-1:].repeat(padding_size, 1, 1, 1)
|
||||
batch_tensor = torch.cat([batch_tensor, padding], dim=0)
|
||||
logger.debug(
|
||||
f"Padded batch from {actual_batch_size} to {self.engine_batch_size} frames"
|
||||
)
|
||||
else:
|
||||
# Dynamic batching - just limit to max
|
||||
if batch_tensor.shape[0] > self.batch_size:
|
||||
logger.warning(
|
||||
f"Batch size {batch_tensor.shape[0]} exceeds configured max {self.batch_size}"
|
||||
)
|
||||
batch_tensor = batch_tensor[: self.batch_size]
|
||||
batch = batch[: self.batch_size]
|
||||
actual_batch_size = self.batch_size
|
||||
|
||||
# Run Ultralytics inference
|
||||
# Input should be (B, 3, H, W) in range [0, 1], RGB format
|
||||
outputs = self.inference_engine.infer(
|
||||
inputs={"images": batch_tensor},
|
||||
conf=0.25, # Confidence threshold
|
||||
iou=0.45, # NMS IoU threshold
|
||||
)
|
||||
|
||||
# Ultralytics returns Results objects in outputs["results"]
|
||||
yolo_results = outputs["results"]
|
||||
|
||||
# Convert Results objects to our standard format
|
||||
# Only process actual batch size (ignore padded results if any)
|
||||
results = []
|
||||
for i in range(actual_batch_size):
|
||||
batch_frame = batch[i]
|
||||
yolo_result = yolo_results[i]
|
||||
# Extract detections from YOLO Results object
|
||||
# yolo_result.boxes.data has format: [x1, y1, x2, y2, conf, cls]
|
||||
if hasattr(yolo_result, "boxes") and yolo_result.boxes is not None:
|
||||
detections = yolo_result.boxes.data # Already a tensor on GPU
|
||||
else:
|
||||
# No detections
|
||||
detections = torch.zeros((0, 6), device=batch_tensor.device)
|
||||
|
||||
# Apply custom postprocessing if provided
|
||||
if self.postprocess_fn:
|
||||
try:
|
||||
# For Ultralytics, postprocess_fn might do additional filtering
|
||||
# Pass the raw boxes tensor in the same format as TensorRT output
|
||||
detections = self.postprocess_fn(
|
||||
{
|
||||
"output0": detections.unsqueeze(
|
||||
0
|
||||
) # Add batch dim for compatibility
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error in postprocess for stream {batch_frame.stream_id}: {e}"
|
||||
)
|
||||
detections = torch.zeros((0, 6), device=batch_tensor.device)
|
||||
|
||||
result = {
|
||||
"stream_id": batch_frame.stream_id,
|
||||
"timestamp": batch_frame.timestamp,
|
||||
"detections": detections,
|
||||
"metadata": batch_frame.metadata,
|
||||
"yolo_result": yolo_result, # Keep original Results object for debugging
|
||||
}
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
Loading…
Add table
Add a link
Reference in a new issue