#!/bin/bash
#
# Build YOLOv8 TensorRT Model with Batch Support
#
# This script creates a batched version of the YOLOv8 model that can process
# multiple camera frames in a single inference call, eliminating the sequential
# processing bottleneck.
#
# Performance Impact:
# - Sequential (batch=1): Each camera processed separately
# - Batched (batch=4): All 4 cameras in single GPU call
# - Expected speedup: 2-3x for multi-camera scenarios
#

set -e

echo "================================================================================"
echo "Building YOLOv8 TensorRT Model with Batch Support"
echo "================================================================================"

# Configuration
MODEL_INPUT="yolov8n.pt"
MODEL_OUTPUT="models/yolov8n_batch4.trt"
MAX_BATCH=4
GPU_ID=0

# Check if input model exists
if [ ! -f "$MODEL_INPUT" ]; then
    echo "Error: Input model not found: $MODEL_INPUT"
    echo ""
    echo "Please download YOLOv8 model first:"
    echo "  pip install ultralytics"
    echo "  yolo export model=yolov8n.pt format=onnx"
    echo ""
    echo "Or provide the .pt file in the current directory"
    exit 1
fi

echo ""
echo "Configuration:"
echo "  Input:      $MODEL_INPUT"
echo "  Output:     $MODEL_OUTPUT"
echo "  Max Batch:  $MAX_BATCH"
echo "  GPU:        $GPU_ID"
echo "  Precision:  FP16"
echo ""

# Create models directory if it doesn't exist
mkdir -p models

# Run conversion with dynamic batching
echo "Starting conversion..."
echo ""

python scripts/convert_pt_to_tensorrt.py \
    --model "$MODEL_INPUT" \
    --output "$MODEL_OUTPUT" \
    --dynamic-batch \
    --max-batch $MAX_BATCH \
    --fp16 \
    --gpu $GPU_ID \
    --input-names images \
    --output-names output0 \
    --workspace-size 4

echo ""
echo "================================================================================"
echo "Build Complete!"
echo "================================================================================"
echo ""
echo "The batched model has been created: $MODEL_OUTPUT"
echo ""
echo "Next steps:"
echo "  1. Test batch inference:"
echo "     python test_batch_inference.py"
echo ""
echo "  2. Compare performance:"
echo "     - Sequential: ~147 FPS per camera (4 cameras)"
echo "     - Batched: Expected 300-400+ FPS per camera"
echo ""
echo "  3. Integration:"
echo "     - Use preprocess_batch() and postprocess_batch() from test_batch_inference.py"
echo "     - Stack frames from multiple cameras"
echo "     - Single model_repo.infer() call for all cameras"
echo ""
echo "================================================================================"