nms optimization

2025-11-09 11:47:18 +07:00 · 2025-11-09 11:47:18 +07:00 · 8e20496fa7
commit 8e20496fa7
parent 81bbb0074e
5 changed files with 907 additions and 26 deletions
--- a/scripts/build_batch_model.sh
+++ b/scripts/build_batch_model.sh
@ -0,0 +1,86 @@
+#!/bin/bash
+#
+# Build YOLOv8 TensorRT Model with Batch Support
+#
+# This script creates a batched version of the YOLOv8 model that can process
+# multiple camera frames in a single inference call, eliminating the sequential
+# processing bottleneck.
+#
+# Performance Impact:
+# - Sequential (batch=1): Each camera processed separately
+# - Batched (batch=4): All 4 cameras in single GPU call
+# - Expected speedup: 2-3x for multi-camera scenarios
+#
+
+set -e
+
+echo "================================================================================"
+echo "Building YOLOv8 TensorRT Model with Batch Support"
+echo "================================================================================"
+
+# Configuration
+MODEL_INPUT="yolov8n.pt"
+MODEL_OUTPUT="models/yolov8n_batch4.trt"
+MAX_BATCH=4
+GPU_ID=0
+
+# Check if input model exists
+if [ ! -f "$MODEL_INPUT" ]; then
+    echo "Error: Input model not found: $MODEL_INPUT"
+    echo ""
+    echo "Please download YOLOv8 model first:"
+    echo "  pip install ultralytics"
+    echo "  yolo export model=yolov8n.pt format=onnx"
+    echo ""
+    echo "Or provide the .pt file in the current directory"
+    exit 1
+fi
+
+echo ""
+echo "Configuration:"
+echo "  Input:      $MODEL_INPUT"
+echo "  Output:     $MODEL_OUTPUT"
+echo "  Max Batch:  $MAX_BATCH"
+echo "  GPU:        $GPU_ID"
+echo "  Precision:  FP16"
+echo ""
+
+# Create models directory if it doesn't exist
+mkdir -p models
+
+# Run conversion with dynamic batching
+echo "Starting conversion..."
+echo ""
+
+python scripts/convert_pt_to_tensorrt.py \
+    --model "$MODEL_INPUT" \
+    --output "$MODEL_OUTPUT" \
+    --dynamic-batch \
+    --max-batch $MAX_BATCH \
+    --fp16 \
+    --gpu $GPU_ID \
+    --input-names images \
+    --output-names output0 \
+    --workspace-size 4
+
+echo ""
+echo "================================================================================"
+echo "Build Complete!"
+echo "================================================================================"
+echo ""
+echo "The batched model has been created: $MODEL_OUTPUT"
+echo ""
+echo "Next steps:"
+echo "  1. Test batch inference:"
+echo "     python test_batch_inference.py"
+echo ""
+echo "  2. Compare performance:"
+echo "     - Sequential: ~147 FPS per camera (4 cameras)"
+echo "     - Batched: Expected 300-400+ FPS per camera"
+echo ""
+echo "  3. Integration:"
+echo "     - Use preprocess_batch() and postprocess_batch() from test_batch_inference.py"
+echo "     - Stack frames from multiple cameras"
+echo "     - Single model_repo.infer() call for all cameras"
+echo ""
+echo "================================================================================"