fix: use nvdec
This commit is contained in:
		
							parent
							
								
									f9a67935d6
								
							
						
					
					
						commit
						b919a1ebe2
					
				
					 5 changed files with 328 additions and 19 deletions
				
			
		| 
						 | 
				
			
			@ -1,18 +1,54 @@
 | 
			
		|||
# Base image with all ML dependencies
 | 
			
		||||
# Base image with all ML dependencies and NVIDIA Video Codec SDK
 | 
			
		||||
FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime
 | 
			
		||||
 | 
			
		||||
# Install system dependencies
 | 
			
		||||
# Install system dependencies including GStreamer with NVDEC support
 | 
			
		||||
RUN apt update && apt install -y \
 | 
			
		||||
    libgl1 \
 | 
			
		||||
    libglib2.0-0 \
 | 
			
		||||
    libgstreamer1.0-0 \
 | 
			
		||||
    libgtk-3-0 \
 | 
			
		||||
    libavcodec58 \
 | 
			
		||||
    libgomp1 \
 | 
			
		||||
    # GStreamer base
 | 
			
		||||
    libgstreamer1.0-0 \
 | 
			
		||||
    libgstreamer-plugins-base1.0-0 \
 | 
			
		||||
    libgstreamer-plugins-bad1.0-0 \
 | 
			
		||||
    gstreamer1.0-tools \
 | 
			
		||||
    gstreamer1.0-plugins-base \
 | 
			
		||||
    gstreamer1.0-plugins-good \
 | 
			
		||||
    gstreamer1.0-plugins-bad \
 | 
			
		||||
    gstreamer1.0-plugins-ugly \
 | 
			
		||||
    gstreamer1.0-libav \
 | 
			
		||||
    # GStreamer Python bindings
 | 
			
		||||
    python3-gst-1.0 \
 | 
			
		||||
    # NVIDIA specific GStreamer plugins for hardware acceleration
 | 
			
		||||
    gstreamer1.0-vaapi \
 | 
			
		||||
    # FFmpeg with hardware acceleration support
 | 
			
		||||
    ffmpeg \
 | 
			
		||||
    libavcodec-extra \
 | 
			
		||||
    libavformat58 \
 | 
			
		||||
    libswscale5 \
 | 
			
		||||
    libgomp1 \
 | 
			
		||||
    # Additional codecs
 | 
			
		||||
    libx264-155 \
 | 
			
		||||
    libx265-179 \
 | 
			
		||||
    # TurboJPEG for fast JPEG encoding
 | 
			
		||||
    libturbojpeg0-dev \
 | 
			
		||||
    && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
# Install NVIDIA DeepStream (includes hardware accelerated GStreamer plugins)
 | 
			
		||||
# This provides nvv4l2decoder, nvvideoconvert, etc.
 | 
			
		||||
RUN apt update && apt install -y \
 | 
			
		||||
    wget \
 | 
			
		||||
    software-properties-common \
 | 
			
		||||
    && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \
 | 
			
		||||
    && dpkg -i cuda-keyring_1.0-1_all.deb \
 | 
			
		||||
    && apt update \
 | 
			
		||||
    && apt install -y libnvidia-decode-535 \
 | 
			
		||||
    && rm -rf /var/lib/apt/lists/* cuda-keyring_1.0-1_all.deb
 | 
			
		||||
 | 
			
		||||
# Set environment variables for hardware acceleration
 | 
			
		||||
ENV OPENCV_FFMPEG_CAPTURE_OPTIONS="video_codec;h264_cuvid"
 | 
			
		||||
ENV GST_PLUGIN_PATH="/usr/lib/x86_64-linux-gnu/gstreamer-1.0"
 | 
			
		||||
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
 | 
			
		||||
 | 
			
		||||
# Copy and install base requirements (ML dependencies that rarely change)
 | 
			
		||||
COPY requirements.base.txt .
 | 
			
		||||
RUN pip install --no-cache-dir -r requirements.base.txt
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										44
									
								
								build-nvdec.sh
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										44
									
								
								build-nvdec.sh
									
										
									
									
									
										Executable file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,44 @@
 | 
			
		|||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
# Build script for Docker image with NVDEC hardware acceleration support
 | 
			
		||||
 | 
			
		||||
echo "Building Docker image with NVDEC hardware acceleration support..."
 | 
			
		||||
echo "========================================================="
 | 
			
		||||
 | 
			
		||||
# Build the base image first (with all ML and hardware acceleration dependencies)
 | 
			
		||||
echo "Building base image with NVDEC support..."
 | 
			
		||||
docker build -f Dockerfile.base -t detector-worker-base:nvdec .
 | 
			
		||||
 | 
			
		||||
if [ $? -ne 0 ]; then
 | 
			
		||||
    echo "Failed to build base image"
 | 
			
		||||
    exit 1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# Build the main application image
 | 
			
		||||
echo "Building application image..."
 | 
			
		||||
docker build -t detector-worker:nvdec .
 | 
			
		||||
 | 
			
		||||
if [ $? -ne 0 ]; then
 | 
			
		||||
    echo "Failed to build application image"
 | 
			
		||||
    exit 1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
echo ""
 | 
			
		||||
echo "========================================================="
 | 
			
		||||
echo "Build complete!"
 | 
			
		||||
echo ""
 | 
			
		||||
echo "To run the container with GPU support:"
 | 
			
		||||
echo "docker run --gpus all -p 8000:8000 detector-worker:nvdec"
 | 
			
		||||
echo ""
 | 
			
		||||
echo "Hardware acceleration features enabled:"
 | 
			
		||||
echo "- NVDEC for H.264/H.265 video decoding"
 | 
			
		||||
echo "- NVENC for video encoding (if needed)"
 | 
			
		||||
echo "- TurboJPEG for fast JPEG encoding"
 | 
			
		||||
echo "- CUDA for model inference"
 | 
			
		||||
echo ""
 | 
			
		||||
echo "The application will automatically detect and use:"
 | 
			
		||||
echo "1. GStreamer with NVDEC (NVIDIA GPUs)"
 | 
			
		||||
echo "2. FFMPEG with CUVID (NVIDIA GPUs)"
 | 
			
		||||
echo "3. VAAPI (Intel/AMD GPUs)"
 | 
			
		||||
echo "4. TurboJPEG (3-5x faster than standard JPEG)"
 | 
			
		||||
echo "========================================================="
 | 
			
		||||
| 
						 | 
				
			
			@ -166,28 +166,83 @@ class RTSPReader:
 | 
			
		|||
        logger.info(f"RTSP reader thread ended for camera {self.camera_id}")
 | 
			
		||||
 | 
			
		||||
    def _initialize_capture(self) -> bool:
 | 
			
		||||
        """Initialize video capture with optimized settings for 1280x720@6fps."""
 | 
			
		||||
        """Initialize video capture with hardware acceleration (NVDEC) for 1280x720@6fps."""
 | 
			
		||||
        try:
 | 
			
		||||
            # Release previous capture if exists
 | 
			
		||||
            if self.cap:
 | 
			
		||||
                self.cap.release()
 | 
			
		||||
                time.sleep(0.5)
 | 
			
		||||
 | 
			
		||||
            logger.info(f"Initializing capture for camera {self.camera_id}")
 | 
			
		||||
            logger.info(f"Initializing capture for camera {self.camera_id} with hardware acceleration")
 | 
			
		||||
            hw_accel_success = False
 | 
			
		||||
 | 
			
		||||
            # Create capture with FFMPEG backend and TCP transport for reliability
 | 
			
		||||
            # Use TCP instead of UDP to prevent packet loss
 | 
			
		||||
            rtsp_url_tcp = self.rtsp_url.replace('rtsp://', 'rtsp://')
 | 
			
		||||
            if '?' in rtsp_url_tcp:
 | 
			
		||||
                rtsp_url_tcp += '&tcp'
 | 
			
		||||
            else:
 | 
			
		||||
                rtsp_url_tcp += '?tcp'
 | 
			
		||||
            # Method 1: Try GStreamer with NVDEC (most efficient on NVIDIA GPUs)
 | 
			
		||||
            if not hw_accel_success:
 | 
			
		||||
                try:
 | 
			
		||||
                    # Build GStreamer pipeline for NVIDIA hardware decoding
 | 
			
		||||
                    gst_pipeline = (
 | 
			
		||||
                        f"rtspsrc location={self.rtsp_url} protocols=tcp latency=100 ! "
 | 
			
		||||
                        "rtph264depay ! h264parse ! "
 | 
			
		||||
                        "nvv4l2decoder ! "  # NVIDIA hardware decoder
 | 
			
		||||
                        "nvvideoconvert ! "  # NVIDIA hardware color conversion
 | 
			
		||||
                        "video/x-raw,format=BGRx,width=1280,height=720 ! "
 | 
			
		||||
                        "videoconvert ! "
 | 
			
		||||
                        "video/x-raw,format=BGR ! "
 | 
			
		||||
                        "appsink max-buffers=1 drop=true sync=false"
 | 
			
		||||
                    )
 | 
			
		||||
                    logger.info(f"Attempting GStreamer NVDEC pipeline for camera {self.camera_id}")
 | 
			
		||||
                    self.cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER)
 | 
			
		||||
 | 
			
		||||
            # Alternative: Set environment variable for RTSP transport
 | 
			
		||||
            import os
 | 
			
		||||
            os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'rtsp_transport;tcp'
 | 
			
		||||
                    if self.cap.isOpened():
 | 
			
		||||
                        hw_accel_success = True
 | 
			
		||||
                        logger.info(f"Camera {self.camera_id}: Successfully using GStreamer with NVDEC hardware acceleration")
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.debug(f"Camera {self.camera_id}: GStreamer NVDEC not available: {e}")
 | 
			
		||||
 | 
			
		||||
            self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
 | 
			
		||||
            # Method 2: Try FFMPEG with NVIDIA CUVID hardware decoder
 | 
			
		||||
            if not hw_accel_success:
 | 
			
		||||
                try:
 | 
			
		||||
                    import os
 | 
			
		||||
                    # Set FFMPEG to use NVIDIA CUVID decoder
 | 
			
		||||
                    os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda'
 | 
			
		||||
 | 
			
		||||
                    logger.info(f"Attempting FFMPEG with h264_cuvid for camera {self.camera_id}")
 | 
			
		||||
                    self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
 | 
			
		||||
 | 
			
		||||
                    if self.cap.isOpened():
 | 
			
		||||
                        hw_accel_success = True
 | 
			
		||||
                        logger.info(f"Camera {self.camera_id}: Using FFMPEG with CUVID hardware acceleration")
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.debug(f"Camera {self.camera_id}: FFMPEG CUVID not available: {e}")
 | 
			
		||||
 | 
			
		||||
            # Method 3: Try VAAPI hardware acceleration (for Intel/AMD GPUs)
 | 
			
		||||
            if not hw_accel_success:
 | 
			
		||||
                try:
 | 
			
		||||
                    gst_pipeline = (
 | 
			
		||||
                        f"rtspsrc location={self.rtsp_url} protocols=tcp latency=100 ! "
 | 
			
		||||
                        "rtph264depay ! h264parse ! "
 | 
			
		||||
                        "vaapih264dec ! "  # VAAPI hardware decoder
 | 
			
		||||
                        "vaapipostproc ! "
 | 
			
		||||
                        "video/x-raw,format=BGRx,width=1280,height=720 ! "
 | 
			
		||||
                        "videoconvert ! "
 | 
			
		||||
                        "video/x-raw,format=BGR ! "
 | 
			
		||||
                        "appsink max-buffers=1 drop=true sync=false"
 | 
			
		||||
                    )
 | 
			
		||||
                    logger.info(f"Attempting GStreamer VAAPI pipeline for camera {self.camera_id}")
 | 
			
		||||
                    self.cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER)
 | 
			
		||||
 | 
			
		||||
                    if self.cap.isOpened():
 | 
			
		||||
                        hw_accel_success = True
 | 
			
		||||
                        logger.info(f"Camera {self.camera_id}: Successfully using GStreamer with VAAPI hardware acceleration")
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.debug(f"Camera {self.camera_id}: GStreamer VAAPI not available: {e}")
 | 
			
		||||
 | 
			
		||||
            # Fallback: Standard FFMPEG with software decoding
 | 
			
		||||
            if not hw_accel_success:
 | 
			
		||||
                logger.warning(f"Camera {self.camera_id}: Hardware acceleration not available, falling back to software decoding")
 | 
			
		||||
                import os
 | 
			
		||||
                os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'rtsp_transport;tcp'
 | 
			
		||||
                self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
 | 
			
		||||
 | 
			
		||||
            if not self.cap.isOpened():
 | 
			
		||||
                logger.error(f"Failed to open stream for camera {self.camera_id}")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										173
									
								
								core/utils/hardware_encoder.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										173
									
								
								core/utils/hardware_encoder.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,173 @@
 | 
			
		|||
"""
 | 
			
		||||
Hardware-accelerated image encoding using NVIDIA NVENC or Intel QuickSync
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import cv2
 | 
			
		||||
import numpy as np
 | 
			
		||||
import logging
 | 
			
		||||
from typing import Optional, Tuple
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger("detector_worker")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class HardwareEncoder:
 | 
			
		||||
    """Hardware-accelerated JPEG encoder using GPU."""
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        """Initialize hardware encoder."""
 | 
			
		||||
        self.nvenc_available = False
 | 
			
		||||
        self.vaapi_available = False
 | 
			
		||||
        self.turbojpeg_available = False
 | 
			
		||||
 | 
			
		||||
        # Check for TurboJPEG (fastest CPU-based option)
 | 
			
		||||
        try:
 | 
			
		||||
            from turbojpeg import TurboJPEG
 | 
			
		||||
            self.turbojpeg = TurboJPEG()
 | 
			
		||||
            self.turbojpeg_available = True
 | 
			
		||||
            logger.info("TurboJPEG accelerated encoding available")
 | 
			
		||||
        except ImportError:
 | 
			
		||||
            logger.debug("TurboJPEG not available")
 | 
			
		||||
 | 
			
		||||
        # Check for NVIDIA NVENC support
 | 
			
		||||
        try:
 | 
			
		||||
            # Test if we can create an NVENC encoder
 | 
			
		||||
            test_frame = np.zeros((720, 1280, 3), dtype=np.uint8)
 | 
			
		||||
            fourcc = cv2.VideoWriter_fourcc(*'H264')
 | 
			
		||||
            test_writer = cv2.VideoWriter(
 | 
			
		||||
                "test.mp4",
 | 
			
		||||
                fourcc,
 | 
			
		||||
                30,
 | 
			
		||||
                (1280, 720),
 | 
			
		||||
                [cv2.CAP_PROP_HW_ACCELERATION, cv2.VIDEO_ACCELERATION_ANY]
 | 
			
		||||
            )
 | 
			
		||||
            if test_writer.isOpened():
 | 
			
		||||
                self.nvenc_available = True
 | 
			
		||||
                logger.info("NVENC hardware encoding available")
 | 
			
		||||
            test_writer.release()
 | 
			
		||||
            if os.path.exists("test.mp4"):
 | 
			
		||||
                os.remove("test.mp4")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.debug(f"NVENC not available: {e}")
 | 
			
		||||
 | 
			
		||||
    def encode_jpeg(self, frame: np.ndarray, quality: int = 85) -> Optional[bytes]:
 | 
			
		||||
        """
 | 
			
		||||
        Encode frame to JPEG using the fastest available method.
 | 
			
		||||
 | 
			
		||||
        Args:
 | 
			
		||||
            frame: BGR image frame
 | 
			
		||||
            quality: JPEG quality (1-100)
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Encoded JPEG bytes or None on failure
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
            # Method 1: TurboJPEG (3-5x faster than cv2.imencode)
 | 
			
		||||
            if self.turbojpeg_available:
 | 
			
		||||
                # Convert BGR to RGB for TurboJPEG
 | 
			
		||||
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 | 
			
		||||
                encoded = self.turbojpeg.encode(rgb_frame, quality=quality)
 | 
			
		||||
                return encoded
 | 
			
		||||
 | 
			
		||||
            # Method 2: Hardware-accelerated encoding via GStreamer (if available)
 | 
			
		||||
            if self.nvenc_available:
 | 
			
		||||
                return self._encode_with_nvenc(frame, quality)
 | 
			
		||||
 | 
			
		||||
            # Fallback: Standard OpenCV encoding
 | 
			
		||||
            encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
 | 
			
		||||
            success, encoded = cv2.imencode('.jpg', frame, encode_params)
 | 
			
		||||
            if success:
 | 
			
		||||
                return encoded.tobytes()
 | 
			
		||||
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Failed to encode frame: {e}")
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def _encode_with_nvenc(self, frame: np.ndarray, quality: int) -> Optional[bytes]:
 | 
			
		||||
        """
 | 
			
		||||
        Encode using NVIDIA NVENC hardware encoder.
 | 
			
		||||
 | 
			
		||||
        This is complex to implement directly, so we'll use a GStreamer pipeline
 | 
			
		||||
        if available.
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
            # Create a GStreamer pipeline for hardware encoding
 | 
			
		||||
            height, width = frame.shape[:2]
 | 
			
		||||
            gst_pipeline = (
 | 
			
		||||
                f"appsrc ! "
 | 
			
		||||
                f"video/x-raw,format=BGR,width={width},height={height},framerate=30/1 ! "
 | 
			
		||||
                f"videoconvert ! "
 | 
			
		||||
                f"nvvideoconvert ! "  # GPU color conversion
 | 
			
		||||
                f"nvjpegenc quality={quality} ! "  # Hardware JPEG encoder
 | 
			
		||||
                f"appsink"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            # This would require GStreamer Python bindings
 | 
			
		||||
            # For now, fall back to TurboJPEG or standard encoding
 | 
			
		||||
            logger.debug("NVENC JPEG encoding not fully implemented, using fallback")
 | 
			
		||||
            encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
 | 
			
		||||
            success, encoded = cv2.imencode('.jpg', frame, encode_params)
 | 
			
		||||
            if success:
 | 
			
		||||
                return encoded.tobytes()
 | 
			
		||||
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"NVENC encoding failed: {e}")
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def encode_batch(self, frames: list, quality: int = 85) -> list:
 | 
			
		||||
        """
 | 
			
		||||
        Batch encode multiple frames for better GPU utilization.
 | 
			
		||||
 | 
			
		||||
        Args:
 | 
			
		||||
            frames: List of BGR frames
 | 
			
		||||
            quality: JPEG quality
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            List of encoded JPEG bytes
 | 
			
		||||
        """
 | 
			
		||||
        encoded_frames = []
 | 
			
		||||
 | 
			
		||||
        if self.turbojpeg_available:
 | 
			
		||||
            # TurboJPEG can handle batch encoding efficiently
 | 
			
		||||
            for frame in frames:
 | 
			
		||||
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 | 
			
		||||
                encoded = self.turbojpeg.encode(rgb_frame, quality=quality)
 | 
			
		||||
                encoded_frames.append(encoded)
 | 
			
		||||
        else:
 | 
			
		||||
            # Fallback to sequential encoding
 | 
			
		||||
            for frame in frames:
 | 
			
		||||
                encoded = self.encode_jpeg(frame, quality)
 | 
			
		||||
                encoded_frames.append(encoded)
 | 
			
		||||
 | 
			
		||||
        return encoded_frames
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Global encoder instance
 | 
			
		||||
_hardware_encoder = None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_hardware_encoder() -> HardwareEncoder:
 | 
			
		||||
    """Get or create the global hardware encoder instance."""
 | 
			
		||||
    global _hardware_encoder
 | 
			
		||||
    if _hardware_encoder is None:
 | 
			
		||||
        _hardware_encoder = HardwareEncoder()
 | 
			
		||||
    return _hardware_encoder
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def encode_frame_hardware(frame: np.ndarray, quality: int = 85) -> Optional[bytes]:
 | 
			
		||||
    """
 | 
			
		||||
    Convenience function to encode a frame using hardware acceleration.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        frame: BGR image frame
 | 
			
		||||
        quality: JPEG quality (1-100)
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        Encoded JPEG bytes or None on failure
 | 
			
		||||
    """
 | 
			
		||||
    encoder = get_hardware_encoder()
 | 
			
		||||
    return encoder.encode_jpeg(frame, quality)
 | 
			
		||||
| 
						 | 
				
			
			@ -6,4 +6,5 @@ scipy
 | 
			
		|||
filterpy
 | 
			
		||||
psycopg2-binary
 | 
			
		||||
lap>=0.5.12
 | 
			
		||||
pynvml
 | 
			
		||||
pynvml
 | 
			
		||||
PyTurboJPEG
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue