fix: make ffmpeg support

2025-09-25 23:23:56 +07:00 · 2025-09-25 23:23:56 +07:00 · a45f76884f
commit a45f76884f
parent 0fc86fb72b
3 changed files with 102 additions and 231 deletions
--- a/Dockerfile.base
+++ b/Dockerfile.base
@ -13,44 +13,39 @@ RUN apt-get update && apt-get install -y \
    yasm \
    nasm \
    # System libraries
-    libgl1 \
+    libgl1-mesa-glx \
    libglib2.0-0 \
-    libgtk-3-0 \
    libgomp1 \
-    # Media libraries for FFmpeg build
+    # Core media libraries (essential ones only)
    libjpeg-dev \
    libpng-dev \
-    libtiff-dev \
    libx264-dev \
    libx265-dev \
    libvpx-dev \
-    libfdk-aac-dev \
    libmp3lame-dev \
-    libopus-dev \
    libv4l-dev \
-    libxvidcore-dev \
-    libdc1394-22-dev \
    # TurboJPEG for fast JPEG encoding
    libturbojpeg0-dev \
-    # GStreamer complete stack
-    libgstreamer1.0-dev \
-    libgstreamer-plugins-base1.0-dev \
-    libgstreamer-plugins-bad1.0-dev \
-    gstreamer1.0-tools \
-    gstreamer1.0-plugins-base \
-    gstreamer1.0-plugins-good \
-    gstreamer1.0-plugins-bad \
-    gstreamer1.0-plugins-ugly \
-    gstreamer1.0-libav \
-    gstreamer1.0-vaapi \
-    python3-gst-1.0 \
    # Python development
    python3-dev \
    python3-numpy \
-    # NVIDIA driver components
+    && rm -rf /var/lib/apt/lists/*
+
+# Install CUDA development tools (required for FFmpeg CUDA compilation)
+RUN apt-get update && apt-get install -y \
+    cuda-nvcc-12-6 \
+    libcuda1 \
+    cuda-cudart-dev-12-6 \
+    cuda-driver-dev-12-6 \
+    || echo "CUDA development packages not available, continuing without them" && \
+    rm -rf /var/lib/apt/lists/*
+
+# Try to install NVIDIA packages (may not be available in all environments)
+RUN apt-get update && apt-get install -y \
    libnvidia-encode-535 \
    libnvidia-decode-535 \
-    && rm -rf /var/lib/apt/lists/*
+    || echo "NVIDIA packages not available, continuing without them" && \
+    rm -rf /var/lib/apt/lists/*

 # Install NVIDIA Video Codec SDK headers
 RUN cd /tmp && \
@ -60,33 +55,60 @@ RUN cd /tmp && \
    make install && \
    rm -rf /tmp/*

-# Build FFmpeg from source with full NVIDIA hardware acceleration
+# Build FFmpeg from source with NVIDIA CUVID support
 ENV FFMPEG_VERSION=6.0
+# Ensure CUDA paths are available for FFmpeg compilation
+ENV PATH="/usr/local/cuda/bin:${PATH}"
+ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
 RUN cd /tmp && \
    wget https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
    tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
    cd ffmpeg-${FFMPEG_VERSION} && \
-    ./configure \
+    # Configure with explicit CUVID support (with fallback)
+    (./configure \
        --enable-gpl \
        --enable-nonfree \
+        --enable-shared \
        --enable-libx264 \
        --enable-libx265 \
        --enable-libvpx \
-        --enable-libfdk-aac \
        --enable-libmp3lame \
-        --enable-libopus \
        --enable-cuda-nvcc \
-        --enable-cuvid \
-        --enable-nvenc \
-        --enable-nvdec \
        --enable-cuda-llvm \
+        --enable-cuvid \
+        --enable-nvdec \
+        --enable-nvenc \
        --enable-libnpp \
-        --extra-cflags=-I/usr/local/cuda/include \
-        --extra-ldflags=-L/usr/local/cuda/lib64 \
-        --nvccflags="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90" && \
-    make -j$(nproc) && \
+        --enable-decoder=h264_cuvid \
+        --enable-decoder=hevc_cuvid \
+        --enable-decoder=mjpeg_cuvid \
+        --enable-decoder=mpeg1_cuvid \
+        --enable-decoder=mpeg2_cuvid \
+        --enable-decoder=mpeg4_cuvid \
+        --enable-decoder=vc1_cuvid \
+        --enable-encoder=h264_nvenc \
+        --enable-encoder=hevc_nvenc \
+        --extra-cflags="-I/usr/local/cuda/include" \
+        --extra-ldflags="-L/usr/local/cuda/lib64" \
+        --extra-libs="-lcuda -lcudart -lnvcuvid -lnvidia-encode" \
+        --nvccflags="-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" \
+    || echo "CUDA configuration failed, trying basic configuration..." && \
+    ./configure \
+        --enable-gpl \
+        --enable-nonfree \
+        --enable-shared \
+        --enable-libx264 \
+        --enable-libx265 \
+        --enable-libvpx \
+        --enable-libmp3lame) \
+    && make -j$(nproc) && \
    make install && \
    ldconfig && \
+    # Verify CUVID decoders are available
+    echo "=== Verifying FFmpeg CUVID Support ===" && \
+    ffmpeg -hide_banner -decoders 2>/dev/null | grep cuvid && \
+    echo "=== Verifying FFmpeg NVENC Support ===" && \
+    ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc && \
    cd / && rm -rf /tmp/*

 # Build OpenCV from source with custom FFmpeg and full CUDA support
@ -111,15 +133,14 @@ RUN cd /tmp && \
        -D WITH_CUVID=ON \
        -D BUILD_opencv_cudacodec=ON \
        -D WITH_FFMPEG=ON \
-        -D WITH_GSTREAMER=ON \
        -D WITH_LIBV4L=ON \
        -D BUILD_opencv_python3=ON \
        -D OPENCV_GENERATE_PKGCONFIG=ON \
        -D OPENCV_ENABLE_NONFREE=ON \
        -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \
        -D PYTHON3_EXECUTABLE=$(which python3) \
-        -D PYTHON_INCLUDE_DIR=$(python3 -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \
-        -D PYTHON_LIBRARY=$(python3 -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \
+        -D PYTHON_INCLUDE_DIR=$(python3 -c "import sysconfig; print(sysconfig.get_path('include'))") \
+        -D PYTHON_LIBRARY=$(python3 -c "import sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \
        -D BUILD_EXAMPLES=OFF \
        -D BUILD_TESTS=OFF \
        -D BUILD_PERF_TESTS=OFF \
@ -133,7 +154,6 @@ RUN cd /tmp && \
 ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/lib:${LD_LIBRARY_PATH}"
 ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH}"
 ENV PYTHONPATH="/usr/local/lib/python3.10/dist-packages:${PYTHONPATH}"
-ENV GST_PLUGIN_PATH="/usr/lib/x86_64-linux-gnu/gstreamer-1.0"

 # Optimized environment variables for hardware acceleration
 ENV OPENCV_FFMPEG_CAPTURE_OPTIONS="rtsp_transport;tcp|hwaccel;cuda|hwaccel_device;0|video_codec;h264_cuvid|hwaccel_output_format;cuda"
@ -151,16 +171,21 @@ RUN grep -v opencv-python requirements.base.txt > requirements.tmp && \
 # Verify complete hardware acceleration setup
 RUN echo "=== Hardware Acceleration Verification ===" && \
    echo "FFmpeg Hardware Accelerators:" && \
-    ffmpeg -hide_banner -hwaccels 2>/dev/null | head -10 && \
-    echo "FFmpeg NVIDIA Decoders:" && \
-    ffmpeg -hide_banner -decoders 2>/dev/null | grep -E "(cuvid|nvdec)" | head -5 && \
-    echo "FFmpeg NVIDIA Encoders:" && \
-    ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc | head -5 && \
+    (ffmpeg -hide_banner -hwaccels 2>/dev/null || echo "FFmpeg hwaccels command failed") && \
+    echo "" && \
+    echo "FFmpeg CUVID Decoders (NVIDIA):" && \
+    (ffmpeg -hide_banner -decoders 2>/dev/null | grep -E "cuvid" || echo "No CUVID decoders found") && \
+    echo "" && \
+    echo "FFmpeg NVENC Encoders (NVIDIA):" && \
+    (ffmpeg -hide_banner -encoders 2>/dev/null | grep -E "nvenc" || echo "No NVENC encoders found") && \
+    echo "" && \
+    echo "Testing CUVID decoder compilation (no GPU required):" && \
+    (ffmpeg -hide_banner -f lavfi -i testsrc=duration=0.1:size=64x64:rate=1 -c:v libx264 -f null - 2>/dev/null && echo "✅ FFmpeg basic functionality working" || echo "❌ FFmpeg basic test failed") && \
+    echo "" && \
    echo "OpenCV Configuration:" && \
-    python3 -c "import cv2; print('OpenCV version:', cv2.__version__); print('CUDA devices:', cv2.cuda.getCudaEnabledDeviceCount()); build_info = cv2.getBuildInformation(); print('CUDA support:', 'CUDA' in build_info); print('CUVID support:', 'CUVID' in build_info); print('FFmpeg support:', 'FFMPEG' in build_info); print('GStreamer support:', 'GStreamer' in build_info)" && \
-    echo "GStreamer NVIDIA Plugins:" && \
-    gst-inspect-1.0 2>/dev/null | grep -E "(nvv4l2|nvvideo)" | head -5 || echo "GStreamer NVIDIA plugins not detected" && \
-    echo "=== Verification Complete ==="
+    (python3 -c "import cv2; print('OpenCV version:', cv2.__version__); build_info = cv2.getBuildInformation(); print('CUDA support:', 'CUDA' in build_info); print('CUVID support:', 'CUVID' in build_info); print('FFmpeg support:', 'FFMPEG' in build_info)" || echo "OpenCV verification failed") && \
+    echo "" && \
+    echo "=== Verification Complete (build-time only) ==="

 # Set working directory
 WORKDIR /app
--- a/README-hardware-acceleration.md
+++ b/README-hardware-acceleration.md
@ -1,127 +0,0 @@
-# Hardware Acceleration Setup
-
-This detector worker now includes **complete NVIDIA hardware acceleration** with FFmpeg and OpenCV built from source.
-
-## What's Included
-
-### 🔧 Complete Hardware Stack
- **FFmpeg 6.0** built from source with NVIDIA Video Codec SDK
- **OpenCV 4.8.1** built with CUDA and custom FFmpeg integration
- **GStreamer** with NVDEC/VAAPI plugins
- **TurboJPEG** for optimized JPEG encoding (3-5x faster)
- **CUDA** support for YOLO model inference
-
-### 🎯 Hardware Acceleration Methods (Automatic Detection)
-1. **GStreamer NVDEC** - Best for RTSP streaming, lowest latency
-2. **OpenCV CUDA** - Direct GPU memory access, best integration
-3. **FFmpeg CUVID** - Custom build with full NVIDIA acceleration
-4. **VAAPI** - Intel/AMD GPU support
-5. **Software Fallback** - CPU-only as last resort
-
-## Build and Run
-
-### Single Build Script
-```bash
-./build-nvdec.sh
-```
-**Build time**: 45-90 minutes (compiles FFmpeg + OpenCV from source)
-
-### Run with GPU Support
-```bash
-docker run --gpus all -p 8000:8000 detector-worker:complete-hw-accel
-```
-
-## Performance Improvements
-
-### Expected CPU Reduction
- **Video decoding**: 70-90% reduction (moved to GPU)
- **JPEG encoding**: 70-80% faster with TurboJPEG
- **Model inference**: GPU accelerated with CUDA
- **Overall system**: 50-80% less CPU usage
-
-### Profiling Results Comparison
-**Before (Software Only)**:
- `cv2.imencode`: 6.5% CPU time (1.95s out of 30s)
- `psutil.cpu_percent`: 88% CPU time (idle polling)
- Video decoding: 100% CPU
-
-**After (Hardware Accelerated)**:
- Video decoding: GPU (~5-10% CPU overhead)
- JPEG encoding: 3-5x faster with TurboJPEG
- Model inference: GPU accelerated
-
-## Verification
-
-### Check Hardware Acceleration Support
-```bash
-docker run --rm --gpus all detector-worker:complete-hw-accel \
-  bash -c "ffmpeg -hwaccels && python3 -c 'import cv2; build=cv2.getBuildInformation(); print(\"CUDA:\", \"CUDA\" in build); print(\"CUVID:\", \"CUVID\" in build)'"
-```
-
-### Runtime Logs
-The application will automatically log which acceleration method is being used:
-```
-Camera cam1: Successfully using GStreamer with NVDEC hardware acceleration
-Camera cam2: Using FFMPEG hardware acceleration (backend: FFMPEG)
-Camera cam3: Using OpenCV CUDA hardware acceleration
-```
-
-## Files Modified
-
-### Docker Configuration
- **Dockerfile.base** - Complete hardware acceleration stack
- **build-nvdec.sh** - Single build script for everything
-
-### Application Code
- **core/streaming/readers.py** - Multi-method hardware acceleration
- **core/utils/hardware_encoder.py** - TurboJPEG + NVENC encoding
- **core/utils/ffmpeg_detector.py** - Runtime capability detection
- **requirements.base.txt** - Added TurboJPEG, removed opencv-python
-
-## Architecture
-
-```
-Input RTSP Stream
-       ↓
-1. GStreamer NVDEC Pipeline (NVIDIA GPU)
-   rtspsrc → nvv4l2decoder → nvvideoconvert → OpenCV
-       ↓
-2. OpenCV CUDA Backend (NVIDIA GPU)
-   OpenCV with CUDA acceleration
-       ↓
-3. FFmpeg CUVID (NVIDIA GPU)
-   Custom FFmpeg with h264_cuvid decoder
-       ↓
-4. VAAPI (Intel/AMD GPU)
-   Hardware acceleration for non-NVIDIA
-       ↓
-5. Software Fallback (CPU)
-   Standard OpenCV software decoding
-```
-
-## Benefits
-
-### For Development
- **Single Dockerfile.base** - Everything consolidated
- **Automatic detection** - No manual configuration needed
- **Graceful fallback** - Works without GPU for development
-
-### For Production
- **Maximum performance** - Uses best available acceleration
- **GPU memory efficiency** - Direct GPU-to-GPU pipeline
- **Lower latency** - Hardware decoding + CUDA inference
- **Reduced CPU load** - Frees CPU for other tasks
-
-## Troubleshooting
-
-### Build Issues
- Ensure NVIDIA Docker runtime is installed
- Check CUDA 12.6 compatibility with your GPU
- Build takes 45-90 minutes - be patient
-
-### Runtime Issues
- Verify `nvidia-smi` works in container
- Check logs for acceleration method being used
- Fallback to software decoding is automatic
-
-This setup provides **production-ready hardware acceleration** with automatic detection and graceful fallback for maximum compatibility.
--- a/core/streaming/readers.py
+++ b/core/streaming/readers.py
@ -166,40 +166,17 @@ class RTSPReader:
        logger.info(f"RTSP reader thread ended for camera {self.camera_id}")

    def _initialize_capture(self) -> bool:
-        """Initialize video capture with hardware acceleration (NVDEC) for 1280x720@6fps."""
+        """Initialize video capture with FFmpeg hardware acceleration (CUVID/NVDEC) for 1280x720@6fps."""
        try:
            # Release previous capture if exists
            if self.cap:
                self.cap.release()
                time.sleep(0.5)

-            logger.info(f"Initializing capture for camera {self.camera_id} with hardware acceleration")
+            logger.info(f"Initializing capture for camera {self.camera_id} with FFmpeg hardware acceleration")
            hw_accel_success = False

-            # Method 1: Try GStreamer with NVDEC (most efficient on NVIDIA GPUs)
-            if not hw_accel_success:
-                try:
-                    # Build GStreamer pipeline for NVIDIA hardware decoding
-                    gst_pipeline = (
-                        f"rtspsrc location={self.rtsp_url} protocols=tcp latency=100 ! "
-                        "rtph264depay ! h264parse ! "
-                        "nvv4l2decoder ! "  # NVIDIA hardware decoder
-                        "nvvideoconvert ! "  # NVIDIA hardware color conversion
-                        "video/x-raw,format=BGRx,width=1280,height=720 ! "
-                        "videoconvert ! "
-                        "video/x-raw,format=BGR ! "
-                        "appsink max-buffers=1 drop=true sync=false"
-                    )
-                    logger.info(f"Attempting GStreamer NVDEC pipeline for camera {self.camera_id}")
-                    self.cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER)
-
-                    if self.cap.isOpened():
-                        hw_accel_success = True
-                        logger.info(f"Camera {self.camera_id}: Successfully using GStreamer with NVDEC hardware acceleration")
-                except Exception as e:
-                    logger.debug(f"Camera {self.camera_id}: GStreamer NVDEC not available: {e}")
-
-            # Method 2: Try OpenCV CUDA VideoReader (if built with CUVID support)
+            # Method 1: Try OpenCV CUDA VideoReader (if built with CUVID support)
            if not hw_accel_success:
                try:
                    # Check if OpenCV was built with CUDA codec support
@ -220,7 +197,7 @@ class RTSPReader:
                except Exception as e:
                    logger.debug(f"Camera {self.camera_id}: OpenCV CUDA not available: {e}")

-            # Method 3: Try FFMPEG with optimal hardware acceleration (CUVID/VAAPI)
+            # Method 2: Try FFmpeg with optimal hardware acceleration (CUVID/NVDEC)
            if not hw_accel_success:
                try:
                    from core.utils.ffmpeg_detector import get_optimal_rtsp_options
@ -230,7 +207,7 @@ class RTSPReader:
                    optimal_options = get_optimal_rtsp_options(self.rtsp_url)
                    os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = optimal_options

-                    logger.info(f"Attempting FFMPEG with detected hardware acceleration for camera {self.camera_id}")
+                    logger.info(f"Attempting FFmpeg with detected hardware acceleration for camera {self.camera_id}")
                    logger.debug(f"Camera {self.camera_id}: Using FFmpeg options: {optimal_options}")

                    self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
@ -239,45 +216,41 @@ class RTSPReader:
                        hw_accel_success = True
                        # Try to get backend info to confirm hardware acceleration
                        backend = self.cap.getBackendName()
-                        logger.info(f"Camera {self.camera_id}: Using FFMPEG hardware acceleration (backend: {backend})")
+                        logger.info(f"Camera {self.camera_id}: Using FFmpeg hardware acceleration (backend: {backend})")
                except Exception as e:
-                    logger.debug(f"Camera {self.camera_id}: FFMPEG hardware acceleration not available: {e}")
+                    logger.debug(f"Camera {self.camera_id}: FFmpeg optimal hardware acceleration not available: {e}")

-                    # Fallback to basic CUVID
-                    try:
-                        import os
-                        os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda'
-                        self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
-
-                        if self.cap.isOpened():
-                            hw_accel_success = True
-                            logger.info(f"Camera {self.camera_id}: Using basic FFMPEG CUVID hardware acceleration")
-                    except Exception as e2:
-                        logger.debug(f"Camera {self.camera_id}: Basic CUVID also failed: {e2}")
-
-            # Method 4: Try VAAPI hardware acceleration (for Intel/AMD GPUs)
+            # Method 3: Try FFmpeg with basic NVIDIA CUVID
            if not hw_accel_success:
                try:
-                    gst_pipeline = (
-                        f"rtspsrc location={self.rtsp_url} protocols=tcp latency=100 ! "
-                        "rtph264depay ! h264parse ! "
-                        "vaapih264dec ! "  # VAAPI hardware decoder
-                        "vaapipostproc ! "
-                        "video/x-raw,format=BGRx,width=1280,height=720 ! "
-                        "videoconvert ! "
-                        "video/x-raw,format=BGR ! "
-                        "appsink max-buffers=1 drop=true sync=false"
-                    )
-                    logger.info(f"Attempting GStreamer VAAPI pipeline for camera {self.camera_id}")
-                    self.cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER)
+                    import os
+                    os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda|hwaccel_device;0'
+
+                    logger.info(f"Attempting FFmpeg with basic CUVID for camera {self.camera_id}")
+                    self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)

                    if self.cap.isOpened():
                        hw_accel_success = True
-                        logger.info(f"Camera {self.camera_id}: Successfully using GStreamer with VAAPI hardware acceleration")
+                        logger.info(f"Camera {self.camera_id}: Using FFmpeg CUVID hardware acceleration")
                except Exception as e:
-                    logger.debug(f"Camera {self.camera_id}: GStreamer VAAPI not available: {e}")
+                    logger.debug(f"Camera {self.camera_id}: FFmpeg CUVID not available: {e}")

-            # Fallback: Standard FFMPEG with software decoding
+            # Method 4: Try FFmpeg with VAAPI (Intel/AMD GPUs)
+            if not hw_accel_success:
+                try:
+                    import os
+                    os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'hwaccel;vaapi|hwaccel_device;/dev/dri/renderD128|video_codec;h264|rtsp_transport;tcp'
+
+                    logger.info(f"Attempting FFmpeg with VAAPI for camera {self.camera_id}")
+                    self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
+
+                    if self.cap.isOpened():
+                        hw_accel_success = True
+                        logger.info(f"Camera {self.camera_id}: Using FFmpeg VAAPI hardware acceleration")
+                except Exception as e:
+                    logger.debug(f"Camera {self.camera_id}: FFmpeg VAAPI not available: {e}")
+
+            # Fallback: Standard FFmpeg with software decoding
            if not hw_accel_success:
                logger.warning(f"Camera {self.camera_id}: Hardware acceleration not available, falling back to software decoding")
                import os