From 9f29755e0fd4143bbeeb3147ef0e7b7ceca3c7bb Mon Sep 17 00:00:00 2001 From: ziesorx Date: Thu, 25 Sep 2025 11:24:45 +0700 Subject: [PATCH 01/28] feat: update filename and timestamp to gmt+7 --- core/communication/websocket.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/communication/websocket.py b/core/communication/websocket.py index 9def134..813350e 100644 --- a/core/communication/websocket.py +++ b/core/communication/websocket.py @@ -6,7 +6,7 @@ import json import logging import os import cv2 -from datetime import datetime +from datetime import datetime, timezone, timedelta from pathlib import Path from typing import Optional from fastapi import WebSocket, WebSocketDisconnect @@ -483,8 +483,8 @@ class WebSocketHandler: images_dir.mkdir(exist_ok=True) # Generate filename with timestamp and session ID - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"{display_identifier}_{session_id}_{timestamp}.jpg" + timestamp = datetime.now(tz=timezone(timedelta(hours=7))).strftime("%Y%m%d_%H%M%S") + filename = f"{session_id}_{display_identifier}_{timestamp}.jpg" filepath = images_dir / filename # Use existing HTTPSnapshotReader to fetch snapshot From b919a1ebe2bfbf30f567765487a2026cdafb7c1b Mon Sep 17 00:00:00 2001 From: ziesorx Date: Thu, 25 Sep 2025 22:16:19 +0700 Subject: [PATCH 02/28] fix: use nvdec --- Dockerfile.base | 46 ++++++++- build-nvdec.sh | 44 +++++++++ core/streaming/readers.py | 81 ++++++++++++--- core/utils/hardware_encoder.py | 173 +++++++++++++++++++++++++++++++++ requirements.base.txt | 3 +- 5 files changed, 328 insertions(+), 19 deletions(-) create mode 100755 build-nvdec.sh create mode 100644 core/utils/hardware_encoder.py diff --git a/Dockerfile.base b/Dockerfile.base index ade3d69..ecf7b2a 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -1,18 +1,54 @@ -# Base image with all ML dependencies +# Base image with all ML dependencies and NVIDIA Video Codec SDK FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime -# Install system dependencies +# Install system dependencies including GStreamer with NVDEC support RUN apt update && apt install -y \ libgl1 \ libglib2.0-0 \ - libgstreamer1.0-0 \ libgtk-3-0 \ - libavcodec58 \ + libgomp1 \ + # GStreamer base + libgstreamer1.0-0 \ + libgstreamer-plugins-base1.0-0 \ + libgstreamer-plugins-bad1.0-0 \ + gstreamer1.0-tools \ + gstreamer1.0-plugins-base \ + gstreamer1.0-plugins-good \ + gstreamer1.0-plugins-bad \ + gstreamer1.0-plugins-ugly \ + gstreamer1.0-libav \ + # GStreamer Python bindings + python3-gst-1.0 \ + # NVIDIA specific GStreamer plugins for hardware acceleration + gstreamer1.0-vaapi \ + # FFmpeg with hardware acceleration support + ffmpeg \ + libavcodec-extra \ libavformat58 \ libswscale5 \ - libgomp1 \ + # Additional codecs + libx264-155 \ + libx265-179 \ + # TurboJPEG for fast JPEG encoding + libturbojpeg0-dev \ && rm -rf /var/lib/apt/lists/* +# Install NVIDIA DeepStream (includes hardware accelerated GStreamer plugins) +# This provides nvv4l2decoder, nvvideoconvert, etc. +RUN apt update && apt install -y \ + wget \ + software-properties-common \ + && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \ + && dpkg -i cuda-keyring_1.0-1_all.deb \ + && apt update \ + && apt install -y libnvidia-decode-535 \ + && rm -rf /var/lib/apt/lists/* cuda-keyring_1.0-1_all.deb + +# Set environment variables for hardware acceleration +ENV OPENCV_FFMPEG_CAPTURE_OPTIONS="video_codec;h264_cuvid" +ENV GST_PLUGIN_PATH="/usr/lib/x86_64-linux-gnu/gstreamer-1.0" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" + # Copy and install base requirements (ML dependencies that rarely change) COPY requirements.base.txt . RUN pip install --no-cache-dir -r requirements.base.txt diff --git a/build-nvdec.sh b/build-nvdec.sh new file mode 100755 index 0000000..6629994 --- /dev/null +++ b/build-nvdec.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Build script for Docker image with NVDEC hardware acceleration support + +echo "Building Docker image with NVDEC hardware acceleration support..." +echo "=========================================================" + +# Build the base image first (with all ML and hardware acceleration dependencies) +echo "Building base image with NVDEC support..." +docker build -f Dockerfile.base -t detector-worker-base:nvdec . + +if [ $? -ne 0 ]; then + echo "Failed to build base image" + exit 1 +fi + +# Build the main application image +echo "Building application image..." +docker build -t detector-worker:nvdec . + +if [ $? -ne 0 ]; then + echo "Failed to build application image" + exit 1 +fi + +echo "" +echo "=========================================================" +echo "Build complete!" +echo "" +echo "To run the container with GPU support:" +echo "docker run --gpus all -p 8000:8000 detector-worker:nvdec" +echo "" +echo "Hardware acceleration features enabled:" +echo "- NVDEC for H.264/H.265 video decoding" +echo "- NVENC for video encoding (if needed)" +echo "- TurboJPEG for fast JPEG encoding" +echo "- CUDA for model inference" +echo "" +echo "The application will automatically detect and use:" +echo "1. GStreamer with NVDEC (NVIDIA GPUs)" +echo "2. FFMPEG with CUVID (NVIDIA GPUs)" +echo "3. VAAPI (Intel/AMD GPUs)" +echo "4. TurboJPEG (3-5x faster than standard JPEG)" +echo "=========================================================" \ No newline at end of file diff --git a/core/streaming/readers.py b/core/streaming/readers.py index a48840a..0a989b5 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -166,28 +166,83 @@ class RTSPReader: logger.info(f"RTSP reader thread ended for camera {self.camera_id}") def _initialize_capture(self) -> bool: - """Initialize video capture with optimized settings for 1280x720@6fps.""" + """Initialize video capture with hardware acceleration (NVDEC) for 1280x720@6fps.""" try: # Release previous capture if exists if self.cap: self.cap.release() time.sleep(0.5) - logger.info(f"Initializing capture for camera {self.camera_id}") + logger.info(f"Initializing capture for camera {self.camera_id} with hardware acceleration") + hw_accel_success = False - # Create capture with FFMPEG backend and TCP transport for reliability - # Use TCP instead of UDP to prevent packet loss - rtsp_url_tcp = self.rtsp_url.replace('rtsp://', 'rtsp://') - if '?' in rtsp_url_tcp: - rtsp_url_tcp += '&tcp' - else: - rtsp_url_tcp += '?tcp' + # Method 1: Try GStreamer with NVDEC (most efficient on NVIDIA GPUs) + if not hw_accel_success: + try: + # Build GStreamer pipeline for NVIDIA hardware decoding + gst_pipeline = ( + f"rtspsrc location={self.rtsp_url} protocols=tcp latency=100 ! " + "rtph264depay ! h264parse ! " + "nvv4l2decoder ! " # NVIDIA hardware decoder + "nvvideoconvert ! " # NVIDIA hardware color conversion + "video/x-raw,format=BGRx,width=1280,height=720 ! " + "videoconvert ! " + "video/x-raw,format=BGR ! " + "appsink max-buffers=1 drop=true sync=false" + ) + logger.info(f"Attempting GStreamer NVDEC pipeline for camera {self.camera_id}") + self.cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER) - # Alternative: Set environment variable for RTSP transport - import os - os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'rtsp_transport;tcp' + if self.cap.isOpened(): + hw_accel_success = True + logger.info(f"Camera {self.camera_id}: Successfully using GStreamer with NVDEC hardware acceleration") + except Exception as e: + logger.debug(f"Camera {self.camera_id}: GStreamer NVDEC not available: {e}") - self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) + # Method 2: Try FFMPEG with NVIDIA CUVID hardware decoder + if not hw_accel_success: + try: + import os + # Set FFMPEG to use NVIDIA CUVID decoder + os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda' + + logger.info(f"Attempting FFMPEG with h264_cuvid for camera {self.camera_id}") + self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) + + if self.cap.isOpened(): + hw_accel_success = True + logger.info(f"Camera {self.camera_id}: Using FFMPEG with CUVID hardware acceleration") + except Exception as e: + logger.debug(f"Camera {self.camera_id}: FFMPEG CUVID not available: {e}") + + # Method 3: Try VAAPI hardware acceleration (for Intel/AMD GPUs) + if not hw_accel_success: + try: + gst_pipeline = ( + f"rtspsrc location={self.rtsp_url} protocols=tcp latency=100 ! " + "rtph264depay ! h264parse ! " + "vaapih264dec ! " # VAAPI hardware decoder + "vaapipostproc ! " + "video/x-raw,format=BGRx,width=1280,height=720 ! " + "videoconvert ! " + "video/x-raw,format=BGR ! " + "appsink max-buffers=1 drop=true sync=false" + ) + logger.info(f"Attempting GStreamer VAAPI pipeline for camera {self.camera_id}") + self.cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER) + + if self.cap.isOpened(): + hw_accel_success = True + logger.info(f"Camera {self.camera_id}: Successfully using GStreamer with VAAPI hardware acceleration") + except Exception as e: + logger.debug(f"Camera {self.camera_id}: GStreamer VAAPI not available: {e}") + + # Fallback: Standard FFMPEG with software decoding + if not hw_accel_success: + logger.warning(f"Camera {self.camera_id}: Hardware acceleration not available, falling back to software decoding") + import os + os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'rtsp_transport;tcp' + self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) if not self.cap.isOpened(): logger.error(f"Failed to open stream for camera {self.camera_id}") diff --git a/core/utils/hardware_encoder.py b/core/utils/hardware_encoder.py new file mode 100644 index 0000000..45bbb35 --- /dev/null +++ b/core/utils/hardware_encoder.py @@ -0,0 +1,173 @@ +""" +Hardware-accelerated image encoding using NVIDIA NVENC or Intel QuickSync +""" + +import cv2 +import numpy as np +import logging +from typing import Optional, Tuple +import os + +logger = logging.getLogger("detector_worker") + + +class HardwareEncoder: + """Hardware-accelerated JPEG encoder using GPU.""" + + def __init__(self): + """Initialize hardware encoder.""" + self.nvenc_available = False + self.vaapi_available = False + self.turbojpeg_available = False + + # Check for TurboJPEG (fastest CPU-based option) + try: + from turbojpeg import TurboJPEG + self.turbojpeg = TurboJPEG() + self.turbojpeg_available = True + logger.info("TurboJPEG accelerated encoding available") + except ImportError: + logger.debug("TurboJPEG not available") + + # Check for NVIDIA NVENC support + try: + # Test if we can create an NVENC encoder + test_frame = np.zeros((720, 1280, 3), dtype=np.uint8) + fourcc = cv2.VideoWriter_fourcc(*'H264') + test_writer = cv2.VideoWriter( + "test.mp4", + fourcc, + 30, + (1280, 720), + [cv2.CAP_PROP_HW_ACCELERATION, cv2.VIDEO_ACCELERATION_ANY] + ) + if test_writer.isOpened(): + self.nvenc_available = True + logger.info("NVENC hardware encoding available") + test_writer.release() + if os.path.exists("test.mp4"): + os.remove("test.mp4") + except Exception as e: + logger.debug(f"NVENC not available: {e}") + + def encode_jpeg(self, frame: np.ndarray, quality: int = 85) -> Optional[bytes]: + """ + Encode frame to JPEG using the fastest available method. + + Args: + frame: BGR image frame + quality: JPEG quality (1-100) + + Returns: + Encoded JPEG bytes or None on failure + """ + try: + # Method 1: TurboJPEG (3-5x faster than cv2.imencode) + if self.turbojpeg_available: + # Convert BGR to RGB for TurboJPEG + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + encoded = self.turbojpeg.encode(rgb_frame, quality=quality) + return encoded + + # Method 2: Hardware-accelerated encoding via GStreamer (if available) + if self.nvenc_available: + return self._encode_with_nvenc(frame, quality) + + # Fallback: Standard OpenCV encoding + encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality] + success, encoded = cv2.imencode('.jpg', frame, encode_params) + if success: + return encoded.tobytes() + + return None + + except Exception as e: + logger.error(f"Failed to encode frame: {e}") + return None + + def _encode_with_nvenc(self, frame: np.ndarray, quality: int) -> Optional[bytes]: + """ + Encode using NVIDIA NVENC hardware encoder. + + This is complex to implement directly, so we'll use a GStreamer pipeline + if available. + """ + try: + # Create a GStreamer pipeline for hardware encoding + height, width = frame.shape[:2] + gst_pipeline = ( + f"appsrc ! " + f"video/x-raw,format=BGR,width={width},height={height},framerate=30/1 ! " + f"videoconvert ! " + f"nvvideoconvert ! " # GPU color conversion + f"nvjpegenc quality={quality} ! " # Hardware JPEG encoder + f"appsink" + ) + + # This would require GStreamer Python bindings + # For now, fall back to TurboJPEG or standard encoding + logger.debug("NVENC JPEG encoding not fully implemented, using fallback") + encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality] + success, encoded = cv2.imencode('.jpg', frame, encode_params) + if success: + return encoded.tobytes() + + return None + + except Exception as e: + logger.error(f"NVENC encoding failed: {e}") + return None + + def encode_batch(self, frames: list, quality: int = 85) -> list: + """ + Batch encode multiple frames for better GPU utilization. + + Args: + frames: List of BGR frames + quality: JPEG quality + + Returns: + List of encoded JPEG bytes + """ + encoded_frames = [] + + if self.turbojpeg_available: + # TurboJPEG can handle batch encoding efficiently + for frame in frames: + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + encoded = self.turbojpeg.encode(rgb_frame, quality=quality) + encoded_frames.append(encoded) + else: + # Fallback to sequential encoding + for frame in frames: + encoded = self.encode_jpeg(frame, quality) + encoded_frames.append(encoded) + + return encoded_frames + + +# Global encoder instance +_hardware_encoder = None + + +def get_hardware_encoder() -> HardwareEncoder: + """Get or create the global hardware encoder instance.""" + global _hardware_encoder + if _hardware_encoder is None: + _hardware_encoder = HardwareEncoder() + return _hardware_encoder + + +def encode_frame_hardware(frame: np.ndarray, quality: int = 85) -> Optional[bytes]: + """ + Convenience function to encode a frame using hardware acceleration. + + Args: + frame: BGR image frame + quality: JPEG quality (1-100) + + Returns: + Encoded JPEG bytes or None on failure + """ + encoder = get_hardware_encoder() + return encoder.encode_jpeg(frame, quality) \ No newline at end of file diff --git a/requirements.base.txt b/requirements.base.txt index 04e90ba..3511dd4 100644 --- a/requirements.base.txt +++ b/requirements.base.txt @@ -6,4 +6,5 @@ scipy filterpy psycopg2-binary lap>=0.5.12 -pynvml \ No newline at end of file +pynvml +PyTurboJPEG \ No newline at end of file From 5f29392c2fbbd82e7337e1047068179c35fc3012 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Thu, 25 Sep 2025 22:25:27 +0700 Subject: [PATCH 03/28] chore: update Dockerfile.base --- Dockerfile.base | 3 --- 1 file changed, 3 deletions(-) diff --git a/Dockerfile.base b/Dockerfile.base index ecf7b2a..281ba9d 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -26,9 +26,6 @@ RUN apt update && apt install -y \ libavcodec-extra \ libavformat58 \ libswscale5 \ - # Additional codecs - libx264-155 \ - libx265-179 \ # TurboJPEG for fast JPEG encoding libturbojpeg0-dev \ && rm -rf /var/lib/apt/lists/* From 6bb679f4d84bf70d535ac1a52cf987f508829301 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Thu, 25 Sep 2025 22:59:55 +0700 Subject: [PATCH 04/28] fix: use gpu --- Dockerfile.base | 176 +++++++++++++++++++++----- README-hardware-acceleration.md | 127 +++++++++++++++++++ build-nvdec.sh | 44 ------- core/streaming/readers.py | 56 +++++++-- core/utils/ffmpeg_detector.py | 214 ++++++++++++++++++++++++++++++++ 5 files changed, 533 insertions(+), 84 deletions(-) create mode 100644 README-hardware-acceleration.md delete mode 100755 build-nvdec.sh create mode 100644 core/utils/ffmpeg_detector.py diff --git a/Dockerfile.base b/Dockerfile.base index 281ba9d..620f4d8 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -1,54 +1,166 @@ -# Base image with all ML dependencies and NVIDIA Video Codec SDK +# Base image with complete ML and hardware acceleration stack FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime -# Install system dependencies including GStreamer with NVDEC support -RUN apt update && apt install -y \ +# Install build dependencies and system libraries +RUN apt-get update && apt-get install -y \ + # Build tools + build-essential \ + cmake \ + git \ + pkg-config \ + wget \ + unzip \ + yasm \ + nasm \ + # System libraries libgl1 \ libglib2.0-0 \ libgtk-3-0 \ libgomp1 \ - # GStreamer base - libgstreamer1.0-0 \ - libgstreamer-plugins-base1.0-0 \ - libgstreamer-plugins-bad1.0-0 \ + # Media libraries for FFmpeg build + libjpeg-dev \ + libpng-dev \ + libtiff-dev \ + libx264-dev \ + libx265-dev \ + libvpx-dev \ + libfdk-aac-dev \ + libmp3lame-dev \ + libopus-dev \ + libv4l-dev \ + libxvidcore-dev \ + libdc1394-22-dev \ + # TurboJPEG for fast JPEG encoding + libturbojpeg0-dev \ + # GStreamer complete stack + libgstreamer1.0-dev \ + libgstreamer-plugins-base1.0-dev \ + libgstreamer-plugins-bad1.0-dev \ gstreamer1.0-tools \ gstreamer1.0-plugins-base \ gstreamer1.0-plugins-good \ gstreamer1.0-plugins-bad \ gstreamer1.0-plugins-ugly \ gstreamer1.0-libav \ - # GStreamer Python bindings - python3-gst-1.0 \ - # NVIDIA specific GStreamer plugins for hardware acceleration gstreamer1.0-vaapi \ - # FFmpeg with hardware acceleration support - ffmpeg \ - libavcodec-extra \ - libavformat58 \ - libswscale5 \ - # TurboJPEG for fast JPEG encoding - libturbojpeg0-dev \ + python3-gst-1.0 \ + # Python development + python3-dev \ + python3-numpy \ + # NVIDIA driver components + libnvidia-encode-535 \ + libnvidia-decode-535 \ && rm -rf /var/lib/apt/lists/* -# Install NVIDIA DeepStream (includes hardware accelerated GStreamer plugins) -# This provides nvv4l2decoder, nvvideoconvert, etc. -RUN apt update && apt install -y \ - wget \ - software-properties-common \ - && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \ - && dpkg -i cuda-keyring_1.0-1_all.deb \ - && apt update \ - && apt install -y libnvidia-decode-535 \ - && rm -rf /var/lib/apt/lists/* cuda-keyring_1.0-1_all.deb +# Install NVIDIA Video Codec SDK headers +RUN cd /tmp && \ + wget https://github.com/FFmpeg/nv-codec-headers/archive/refs/tags/n12.1.14.0.zip && \ + unzip n12.1.14.0.zip && \ + cd nv-codec-headers-n12.1.14.0 && \ + make install && \ + rm -rf /tmp/* -# Set environment variables for hardware acceleration -ENV OPENCV_FFMPEG_CAPTURE_OPTIONS="video_codec;h264_cuvid" +# Build FFmpeg from source with full NVIDIA hardware acceleration +ENV FFMPEG_VERSION=6.0 +RUN cd /tmp && \ + wget https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \ + tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \ + cd ffmpeg-${FFMPEG_VERSION} && \ + ./configure \ + --enable-gpl \ + --enable-nonfree \ + --enable-libx264 \ + --enable-libx265 \ + --enable-libvpx \ + --enable-libfdk-aac \ + --enable-libmp3lame \ + --enable-libopus \ + --enable-cuda-nvcc \ + --enable-cuvid \ + --enable-nvenc \ + --enable-nvdec \ + --enable-cuda-llvm \ + --enable-libnpp \ + --extra-cflags=-I/usr/local/cuda/include \ + --extra-ldflags=-L/usr/local/cuda/lib64 \ + --nvccflags="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90" && \ + make -j$(nproc) && \ + make install && \ + ldconfig && \ + cd / && rm -rf /tmp/* + +# Build OpenCV from source with custom FFmpeg and full CUDA support +ENV OPENCV_VERSION=4.8.1 +RUN cd /tmp && \ + wget -O opencv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \ + wget -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \ + unzip opencv.zip && \ + unzip opencv_contrib.zip && \ + cd opencv-${OPENCV_VERSION} && \ + mkdir build && cd build && \ + PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH \ + cmake -D CMAKE_BUILD_TYPE=RELEASE \ + -D CMAKE_INSTALL_PREFIX=/usr/local \ + -D WITH_CUDA=ON \ + -D WITH_CUDNN=ON \ + -D OPENCV_DNN_CUDA=ON \ + -D ENABLE_FAST_MATH=ON \ + -D CUDA_FAST_MATH=ON \ + -D WITH_CUBLAS=ON \ + -D WITH_NVCUVID=ON \ + -D WITH_CUVID=ON \ + -D BUILD_opencv_cudacodec=ON \ + -D WITH_FFMPEG=ON \ + -D WITH_GSTREAMER=ON \ + -D WITH_LIBV4L=ON \ + -D BUILD_opencv_python3=ON \ + -D OPENCV_GENERATE_PKGCONFIG=ON \ + -D OPENCV_ENABLE_NONFREE=ON \ + -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \ + -D PYTHON3_EXECUTABLE=$(which python3) \ + -D PYTHON_INCLUDE_DIR=$(python3 -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \ + -D PYTHON_LIBRARY=$(python3 -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \ + -D BUILD_EXAMPLES=OFF \ + -D BUILD_TESTS=OFF \ + -D BUILD_PERF_TESTS=OFF \ + .. && \ + make -j$(nproc) && \ + make install && \ + ldconfig && \ + cd / && rm -rf /tmp/* + +# Set environment variables for maximum hardware acceleration +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/lib:${LD_LIBRARY_PATH}" +ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH}" +ENV PYTHONPATH="/usr/local/lib/python3.10/dist-packages:${PYTHONPATH}" ENV GST_PLUGIN_PATH="/usr/lib/x86_64-linux-gnu/gstreamer-1.0" -ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" -# Copy and install base requirements (ML dependencies that rarely change) +# Optimized environment variables for hardware acceleration +ENV OPENCV_FFMPEG_CAPTURE_OPTIONS="rtsp_transport;tcp|hwaccel;cuda|hwaccel_device;0|video_codec;h264_cuvid|hwaccel_output_format;cuda" +ENV OPENCV_FFMPEG_WRITER_OPTIONS="video_codec;h264_nvenc|preset;fast|tune;zerolatency|gpu;0" +ENV CUDA_VISIBLE_DEVICES=0 +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,video,utility + +# Copy and install base requirements (exclude opencv-python since we built from source) COPY requirements.base.txt . -RUN pip install --no-cache-dir -r requirements.base.txt +RUN grep -v opencv-python requirements.base.txt > requirements.tmp && \ + mv requirements.tmp requirements.base.txt && \ + pip install --no-cache-dir -r requirements.base.txt + +# Verify complete hardware acceleration setup +RUN echo "=== Hardware Acceleration Verification ===" && \ + echo "FFmpeg Hardware Accelerators:" && \ + ffmpeg -hide_banner -hwaccels 2>/dev/null | head -10 && \ + echo "FFmpeg NVIDIA Decoders:" && \ + ffmpeg -hide_banner -decoders 2>/dev/null | grep -E "(cuvid|nvdec)" | head -5 && \ + echo "FFmpeg NVIDIA Encoders:" && \ + ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc | head -5 && \ + echo "OpenCV Configuration:" && \ + python3 -c "import cv2; print('OpenCV version:', cv2.__version__); print('CUDA devices:', cv2.cuda.getCudaEnabledDeviceCount()); build_info = cv2.getBuildInformation(); print('CUDA support:', 'CUDA' in build_info); print('CUVID support:', 'CUVID' in build_info); print('FFmpeg support:', 'FFMPEG' in build_info); print('GStreamer support:', 'GStreamer' in build_info)" && \ + echo "GStreamer NVIDIA Plugins:" && \ + gst-inspect-1.0 2>/dev/null | grep -E "(nvv4l2|nvvideo)" | head -5 || echo "GStreamer NVIDIA plugins not detected" && \ + echo "=== Verification Complete ===" # Set working directory WORKDIR /app diff --git a/README-hardware-acceleration.md b/README-hardware-acceleration.md new file mode 100644 index 0000000..69c6e09 --- /dev/null +++ b/README-hardware-acceleration.md @@ -0,0 +1,127 @@ +# Hardware Acceleration Setup + +This detector worker now includes **complete NVIDIA hardware acceleration** with FFmpeg and OpenCV built from source. + +## What's Included + +### 🔧 Complete Hardware Stack +- **FFmpeg 6.0** built from source with NVIDIA Video Codec SDK +- **OpenCV 4.8.1** built with CUDA and custom FFmpeg integration +- **GStreamer** with NVDEC/VAAPI plugins +- **TurboJPEG** for optimized JPEG encoding (3-5x faster) +- **CUDA** support for YOLO model inference + +### 🎯 Hardware Acceleration Methods (Automatic Detection) +1. **GStreamer NVDEC** - Best for RTSP streaming, lowest latency +2. **OpenCV CUDA** - Direct GPU memory access, best integration +3. **FFmpeg CUVID** - Custom build with full NVIDIA acceleration +4. **VAAPI** - Intel/AMD GPU support +5. **Software Fallback** - CPU-only as last resort + +## Build and Run + +### Single Build Script +```bash +./build-nvdec.sh +``` +**Build time**: 45-90 minutes (compiles FFmpeg + OpenCV from source) + +### Run with GPU Support +```bash +docker run --gpus all -p 8000:8000 detector-worker:complete-hw-accel +``` + +## Performance Improvements + +### Expected CPU Reduction +- **Video decoding**: 70-90% reduction (moved to GPU) +- **JPEG encoding**: 70-80% faster with TurboJPEG +- **Model inference**: GPU accelerated with CUDA +- **Overall system**: 50-80% less CPU usage + +### Profiling Results Comparison +**Before (Software Only)**: +- `cv2.imencode`: 6.5% CPU time (1.95s out of 30s) +- `psutil.cpu_percent`: 88% CPU time (idle polling) +- Video decoding: 100% CPU + +**After (Hardware Accelerated)**: +- Video decoding: GPU (~5-10% CPU overhead) +- JPEG encoding: 3-5x faster with TurboJPEG +- Model inference: GPU accelerated + +## Verification + +### Check Hardware Acceleration Support +```bash +docker run --rm --gpus all detector-worker:complete-hw-accel \ + bash -c "ffmpeg -hwaccels && python3 -c 'import cv2; build=cv2.getBuildInformation(); print(\"CUDA:\", \"CUDA\" in build); print(\"CUVID:\", \"CUVID\" in build)'" +``` + +### Runtime Logs +The application will automatically log which acceleration method is being used: +``` +Camera cam1: Successfully using GStreamer with NVDEC hardware acceleration +Camera cam2: Using FFMPEG hardware acceleration (backend: FFMPEG) +Camera cam3: Using OpenCV CUDA hardware acceleration +``` + +## Files Modified + +### Docker Configuration +- **Dockerfile.base** - Complete hardware acceleration stack +- **build-nvdec.sh** - Single build script for everything + +### Application Code +- **core/streaming/readers.py** - Multi-method hardware acceleration +- **core/utils/hardware_encoder.py** - TurboJPEG + NVENC encoding +- **core/utils/ffmpeg_detector.py** - Runtime capability detection +- **requirements.base.txt** - Added TurboJPEG, removed opencv-python + +## Architecture + +``` +Input RTSP Stream + ↓ +1. GStreamer NVDEC Pipeline (NVIDIA GPU) + rtspsrc → nvv4l2decoder → nvvideoconvert → OpenCV + ↓ +2. OpenCV CUDA Backend (NVIDIA GPU) + OpenCV with CUDA acceleration + ↓ +3. FFmpeg CUVID (NVIDIA GPU) + Custom FFmpeg with h264_cuvid decoder + ↓ +4. VAAPI (Intel/AMD GPU) + Hardware acceleration for non-NVIDIA + ↓ +5. Software Fallback (CPU) + Standard OpenCV software decoding +``` + +## Benefits + +### For Development +- **Single Dockerfile.base** - Everything consolidated +- **Automatic detection** - No manual configuration needed +- **Graceful fallback** - Works without GPU for development + +### For Production +- **Maximum performance** - Uses best available acceleration +- **GPU memory efficiency** - Direct GPU-to-GPU pipeline +- **Lower latency** - Hardware decoding + CUDA inference +- **Reduced CPU load** - Frees CPU for other tasks + +## Troubleshooting + +### Build Issues +- Ensure NVIDIA Docker runtime is installed +- Check CUDA 12.6 compatibility with your GPU +- Build takes 45-90 minutes - be patient + +### Runtime Issues +- Verify `nvidia-smi` works in container +- Check logs for acceleration method being used +- Fallback to software decoding is automatic + +This setup provides **production-ready hardware acceleration** with automatic detection and graceful fallback for maximum compatibility. \ No newline at end of file diff --git a/build-nvdec.sh b/build-nvdec.sh deleted file mode 100755 index 6629994..0000000 --- a/build-nvdec.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# Build script for Docker image with NVDEC hardware acceleration support - -echo "Building Docker image with NVDEC hardware acceleration support..." -echo "=========================================================" - -# Build the base image first (with all ML and hardware acceleration dependencies) -echo "Building base image with NVDEC support..." -docker build -f Dockerfile.base -t detector-worker-base:nvdec . - -if [ $? -ne 0 ]; then - echo "Failed to build base image" - exit 1 -fi - -# Build the main application image -echo "Building application image..." -docker build -t detector-worker:nvdec . - -if [ $? -ne 0 ]; then - echo "Failed to build application image" - exit 1 -fi - -echo "" -echo "=========================================================" -echo "Build complete!" -echo "" -echo "To run the container with GPU support:" -echo "docker run --gpus all -p 8000:8000 detector-worker:nvdec" -echo "" -echo "Hardware acceleration features enabled:" -echo "- NVDEC for H.264/H.265 video decoding" -echo "- NVENC for video encoding (if needed)" -echo "- TurboJPEG for fast JPEG encoding" -echo "- CUDA for model inference" -echo "" -echo "The application will automatically detect and use:" -echo "1. GStreamer with NVDEC (NVIDIA GPUs)" -echo "2. FFMPEG with CUVID (NVIDIA GPUs)" -echo "3. VAAPI (Intel/AMD GPUs)" -echo "4. TurboJPEG (3-5x faster than standard JPEG)" -echo "=========================================================" \ No newline at end of file diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 0a989b5..377db56 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -199,23 +199,63 @@ class RTSPReader: except Exception as e: logger.debug(f"Camera {self.camera_id}: GStreamer NVDEC not available: {e}") - # Method 2: Try FFMPEG with NVIDIA CUVID hardware decoder + # Method 2: Try OpenCV CUDA VideoReader (if built with CUVID support) if not hw_accel_success: try: - import os - # Set FFMPEG to use NVIDIA CUVID decoder - os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda' + # Check if OpenCV was built with CUDA codec support + build_info = cv2.getBuildInformation() + if 'cudacodec' in build_info or 'CUVID' in build_info: + logger.info(f"Attempting OpenCV CUDA VideoReader for camera {self.camera_id}") + + # Use OpenCV's CUDA backend + self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG, [ + cv2.CAP_PROP_HW_ACCELERATION, cv2.VIDEO_ACCELERATION_ANY + ]) + + if self.cap.isOpened(): + hw_accel_success = True + logger.info(f"Camera {self.camera_id}: Using OpenCV CUDA hardware acceleration") + else: + logger.debug(f"Camera {self.camera_id}: OpenCV not built with CUDA codec support") + except Exception as e: + logger.debug(f"Camera {self.camera_id}: OpenCV CUDA not available: {e}") + + # Method 3: Try FFMPEG with optimal hardware acceleration (CUVID/VAAPI) + if not hw_accel_success: + try: + from core.utils.ffmpeg_detector import get_optimal_rtsp_options + import os + + # Get optimal FFmpeg options based on detected capabilities + optimal_options = get_optimal_rtsp_options(self.rtsp_url) + os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = optimal_options + + logger.info(f"Attempting FFMPEG with detected hardware acceleration for camera {self.camera_id}") + logger.debug(f"Camera {self.camera_id}: Using FFmpeg options: {optimal_options}") - logger.info(f"Attempting FFMPEG with h264_cuvid for camera {self.camera_id}") self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) if self.cap.isOpened(): hw_accel_success = True - logger.info(f"Camera {self.camera_id}: Using FFMPEG with CUVID hardware acceleration") + # Try to get backend info to confirm hardware acceleration + backend = self.cap.getBackendName() + logger.info(f"Camera {self.camera_id}: Using FFMPEG hardware acceleration (backend: {backend})") except Exception as e: - logger.debug(f"Camera {self.camera_id}: FFMPEG CUVID not available: {e}") + logger.debug(f"Camera {self.camera_id}: FFMPEG hardware acceleration not available: {e}") - # Method 3: Try VAAPI hardware acceleration (for Intel/AMD GPUs) + # Fallback to basic CUVID + try: + import os + os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda' + self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) + + if self.cap.isOpened(): + hw_accel_success = True + logger.info(f"Camera {self.camera_id}: Using basic FFMPEG CUVID hardware acceleration") + except Exception as e2: + logger.debug(f"Camera {self.camera_id}: Basic CUVID also failed: {e2}") + + # Method 4: Try VAAPI hardware acceleration (for Intel/AMD GPUs) if not hw_accel_success: try: gst_pipeline = ( diff --git a/core/utils/ffmpeg_detector.py b/core/utils/ffmpeg_detector.py new file mode 100644 index 0000000..a3cf8fc --- /dev/null +++ b/core/utils/ffmpeg_detector.py @@ -0,0 +1,214 @@ +""" +FFmpeg hardware acceleration detection and configuration +""" + +import subprocess +import logging +import re +from typing import Dict, List, Optional + +logger = logging.getLogger("detector_worker") + + +class FFmpegCapabilities: + """Detect and configure FFmpeg hardware acceleration capabilities.""" + + def __init__(self): + """Initialize FFmpeg capabilities detector.""" + self.hwaccels = [] + self.codecs = {} + self.nvidia_support = False + self.vaapi_support = False + self.qsv_support = False + + self._detect_capabilities() + + def _detect_capabilities(self): + """Detect available hardware acceleration methods.""" + try: + # Get hardware accelerators + result = subprocess.run( + ['ffmpeg', '-hide_banner', '-hwaccels'], + capture_output=True, text=True, timeout=10 + ) + if result.returncode == 0: + self.hwaccels = [line.strip() for line in result.stdout.strip().split('\n')[1:] if line.strip()] + logger.info(f"Available FFmpeg hardware accelerators: {', '.join(self.hwaccels)}") + + # Check for NVIDIA support + self.nvidia_support = any(hw in self.hwaccels for hw in ['cuda', 'cuvid', 'nvdec']) + self.vaapi_support = 'vaapi' in self.hwaccels + self.qsv_support = 'qsv' in self.hwaccels + + # Get decoder information + self._detect_decoders() + + # Log capabilities + if self.nvidia_support: + logger.info("NVIDIA hardware acceleration available (CUDA/CUVID/NVDEC)") + if self.vaapi_support: + logger.info("VAAPI hardware acceleration available") + if self.qsv_support: + logger.info("Intel QuickSync hardware acceleration available") + + except Exception as e: + logger.warning(f"Failed to detect FFmpeg capabilities: {e}") + + def _detect_decoders(self): + """Detect available hardware decoders.""" + try: + result = subprocess.run( + ['ffmpeg', '-hide_banner', '-decoders'], + capture_output=True, text=True, timeout=10 + ) + if result.returncode == 0: + # Parse decoder output to find hardware decoders + for line in result.stdout.split('\n'): + if 'cuvid' in line or 'nvdec' in line: + match = re.search(r'(\w+)\s+.*?(\w+(?:_cuvid|_nvdec))', line) + if match: + codec_type, decoder = match.groups() + if 'h264' in decoder: + self.codecs['h264_hw'] = decoder + elif 'hevc' in decoder or 'h265' in decoder: + self.codecs['h265_hw'] = decoder + elif 'vaapi' in line: + match = re.search(r'(\w+)\s+.*?(\w+_vaapi)', line) + if match: + codec_type, decoder = match.groups() + if 'h264' in decoder: + self.codecs['h264_vaapi'] = decoder + + except Exception as e: + logger.debug(f"Failed to detect decoders: {e}") + + def get_optimal_capture_options(self, codec: str = 'h264') -> Dict[str, str]: + """ + Get optimal FFmpeg capture options for the given codec. + + Args: + codec: Video codec (h264, h265, etc.) + + Returns: + Dictionary of FFmpeg options + """ + options = { + 'rtsp_transport': 'tcp', + 'buffer_size': '1024k', + 'max_delay': '500000', # 500ms + 'fflags': '+genpts', + 'flags': '+low_delay', + 'probesize': '32', + 'analyzeduration': '0' + } + + # Add hardware acceleration if available + if self.nvidia_support: + if codec == 'h264' and 'h264_hw' in self.codecs: + options.update({ + 'hwaccel': 'cuda', + 'hwaccel_device': '0', + 'video_codec': 'h264_cuvid', + 'hwaccel_output_format': 'cuda' + }) + logger.debug("Using NVIDIA CUVID hardware acceleration for H.264") + elif codec == 'h265' and 'h265_hw' in self.codecs: + options.update({ + 'hwaccel': 'cuda', + 'hwaccel_device': '0', + 'video_codec': 'hevc_cuvid', + 'hwaccel_output_format': 'cuda' + }) + logger.debug("Using NVIDIA CUVID hardware acceleration for H.265") + + elif self.vaapi_support: + if codec == 'h264': + options.update({ + 'hwaccel': 'vaapi', + 'hwaccel_device': '/dev/dri/renderD128', + 'video_codec': 'h264_vaapi' + }) + logger.debug("Using VAAPI hardware acceleration") + + return options + + def format_opencv_options(self, options: Dict[str, str]) -> str: + """ + Format options for OpenCV FFmpeg backend. + + Args: + options: Dictionary of FFmpeg options + + Returns: + Formatted options string for OpenCV + """ + return '|'.join(f"{key};{value}" for key, value in options.items()) + + def get_hardware_encoder_options(self, codec: str = 'h264', quality: str = 'fast') -> Dict[str, str]: + """ + Get optimal hardware encoding options. + + Args: + codec: Video codec for encoding + quality: Quality preset (fast, medium, slow) + + Returns: + Dictionary of encoding options + """ + options = {} + + if self.nvidia_support: + if codec == 'h264': + options.update({ + 'video_codec': 'h264_nvenc', + 'preset': quality, + 'tune': 'zerolatency', + 'gpu': '0', + 'rc': 'cbr_hq', + 'surfaces': '64' + }) + elif codec == 'h265': + options.update({ + 'video_codec': 'hevc_nvenc', + 'preset': quality, + 'tune': 'zerolatency', + 'gpu': '0' + }) + + elif self.vaapi_support: + if codec == 'h264': + options.update({ + 'video_codec': 'h264_vaapi', + 'vaapi_device': '/dev/dri/renderD128' + }) + + return options + + +# Global instance +_ffmpeg_caps = None + +def get_ffmpeg_capabilities() -> FFmpegCapabilities: + """Get or create the global FFmpeg capabilities instance.""" + global _ffmpeg_caps + if _ffmpeg_caps is None: + _ffmpeg_caps = FFmpegCapabilities() + return _ffmpeg_caps + +def get_optimal_rtsp_options(rtsp_url: str) -> str: + """ + Get optimal OpenCV FFmpeg options for RTSP streaming. + + Args: + rtsp_url: RTSP stream URL + + Returns: + Formatted options string for cv2.VideoCapture + """ + caps = get_ffmpeg_capabilities() + + # Detect codec from URL or assume H.264 + codec = 'h265' if any(x in rtsp_url.lower() for x in ['h265', 'hevc']) else 'h264' + + options = caps.get_optimal_capture_options(codec) + return caps.format_opencv_options(options) \ No newline at end of file From a45f76884fd18d50918f573490fd2d441d08b865 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Thu, 25 Sep 2025 23:23:56 +0700 Subject: [PATCH 05/28] fix: make ffmpeg support --- Dockerfile.base | 117 +++++++++++++++++------------ README-hardware-acceleration.md | 127 -------------------------------- core/streaming/readers.py | 89 ++++++++-------------- 3 files changed, 102 insertions(+), 231 deletions(-) delete mode 100644 README-hardware-acceleration.md diff --git a/Dockerfile.base b/Dockerfile.base index 620f4d8..9fd9020 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -13,44 +13,39 @@ RUN apt-get update && apt-get install -y \ yasm \ nasm \ # System libraries - libgl1 \ + libgl1-mesa-glx \ libglib2.0-0 \ - libgtk-3-0 \ libgomp1 \ - # Media libraries for FFmpeg build + # Core media libraries (essential ones only) libjpeg-dev \ libpng-dev \ - libtiff-dev \ libx264-dev \ libx265-dev \ libvpx-dev \ - libfdk-aac-dev \ libmp3lame-dev \ - libopus-dev \ libv4l-dev \ - libxvidcore-dev \ - libdc1394-22-dev \ # TurboJPEG for fast JPEG encoding libturbojpeg0-dev \ - # GStreamer complete stack - libgstreamer1.0-dev \ - libgstreamer-plugins-base1.0-dev \ - libgstreamer-plugins-bad1.0-dev \ - gstreamer1.0-tools \ - gstreamer1.0-plugins-base \ - gstreamer1.0-plugins-good \ - gstreamer1.0-plugins-bad \ - gstreamer1.0-plugins-ugly \ - gstreamer1.0-libav \ - gstreamer1.0-vaapi \ - python3-gst-1.0 \ # Python development python3-dev \ python3-numpy \ - # NVIDIA driver components + && rm -rf /var/lib/apt/lists/* + +# Install CUDA development tools (required for FFmpeg CUDA compilation) +RUN apt-get update && apt-get install -y \ + cuda-nvcc-12-6 \ + libcuda1 \ + cuda-cudart-dev-12-6 \ + cuda-driver-dev-12-6 \ + || echo "CUDA development packages not available, continuing without them" && \ + rm -rf /var/lib/apt/lists/* + +# Try to install NVIDIA packages (may not be available in all environments) +RUN apt-get update && apt-get install -y \ libnvidia-encode-535 \ libnvidia-decode-535 \ - && rm -rf /var/lib/apt/lists/* + || echo "NVIDIA packages not available, continuing without them" && \ + rm -rf /var/lib/apt/lists/* # Install NVIDIA Video Codec SDK headers RUN cd /tmp && \ @@ -60,33 +55,60 @@ RUN cd /tmp && \ make install && \ rm -rf /tmp/* -# Build FFmpeg from source with full NVIDIA hardware acceleration +# Build FFmpeg from source with NVIDIA CUVID support ENV FFMPEG_VERSION=6.0 +# Ensure CUDA paths are available for FFmpeg compilation +ENV PATH="/usr/local/cuda/bin:${PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" RUN cd /tmp && \ wget https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \ tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \ cd ffmpeg-${FFMPEG_VERSION} && \ - ./configure \ + # Configure with explicit CUVID support (with fallback) + (./configure \ --enable-gpl \ --enable-nonfree \ + --enable-shared \ --enable-libx264 \ --enable-libx265 \ --enable-libvpx \ - --enable-libfdk-aac \ --enable-libmp3lame \ - --enable-libopus \ --enable-cuda-nvcc \ - --enable-cuvid \ - --enable-nvenc \ - --enable-nvdec \ --enable-cuda-llvm \ + --enable-cuvid \ + --enable-nvdec \ + --enable-nvenc \ --enable-libnpp \ - --extra-cflags=-I/usr/local/cuda/include \ - --extra-ldflags=-L/usr/local/cuda/lib64 \ - --nvccflags="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89 -gencode arch=compute_90,code=sm_90" && \ - make -j$(nproc) && \ + --enable-decoder=h264_cuvid \ + --enable-decoder=hevc_cuvid \ + --enable-decoder=mjpeg_cuvid \ + --enable-decoder=mpeg1_cuvid \ + --enable-decoder=mpeg2_cuvid \ + --enable-decoder=mpeg4_cuvid \ + --enable-decoder=vc1_cuvid \ + --enable-encoder=h264_nvenc \ + --enable-encoder=hevc_nvenc \ + --extra-cflags="-I/usr/local/cuda/include" \ + --extra-ldflags="-L/usr/local/cuda/lib64" \ + --extra-libs="-lcuda -lcudart -lnvcuvid -lnvidia-encode" \ + --nvccflags="-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" \ + || echo "CUDA configuration failed, trying basic configuration..." && \ + ./configure \ + --enable-gpl \ + --enable-nonfree \ + --enable-shared \ + --enable-libx264 \ + --enable-libx265 \ + --enable-libvpx \ + --enable-libmp3lame) \ + && make -j$(nproc) && \ make install && \ ldconfig && \ + # Verify CUVID decoders are available + echo "=== Verifying FFmpeg CUVID Support ===" && \ + ffmpeg -hide_banner -decoders 2>/dev/null | grep cuvid && \ + echo "=== Verifying FFmpeg NVENC Support ===" && \ + ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc && \ cd / && rm -rf /tmp/* # Build OpenCV from source with custom FFmpeg and full CUDA support @@ -111,15 +133,14 @@ RUN cd /tmp && \ -D WITH_CUVID=ON \ -D BUILD_opencv_cudacodec=ON \ -D WITH_FFMPEG=ON \ - -D WITH_GSTREAMER=ON \ -D WITH_LIBV4L=ON \ -D BUILD_opencv_python3=ON \ -D OPENCV_GENERATE_PKGCONFIG=ON \ -D OPENCV_ENABLE_NONFREE=ON \ -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \ -D PYTHON3_EXECUTABLE=$(which python3) \ - -D PYTHON_INCLUDE_DIR=$(python3 -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \ - -D PYTHON_LIBRARY=$(python3 -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \ + -D PYTHON_INCLUDE_DIR=$(python3 -c "import sysconfig; print(sysconfig.get_path('include'))") \ + -D PYTHON_LIBRARY=$(python3 -c "import sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \ -D BUILD_EXAMPLES=OFF \ -D BUILD_TESTS=OFF \ -D BUILD_PERF_TESTS=OFF \ @@ -133,7 +154,6 @@ RUN cd /tmp && \ ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/lib:${LD_LIBRARY_PATH}" ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH}" ENV PYTHONPATH="/usr/local/lib/python3.10/dist-packages:${PYTHONPATH}" -ENV GST_PLUGIN_PATH="/usr/lib/x86_64-linux-gnu/gstreamer-1.0" # Optimized environment variables for hardware acceleration ENV OPENCV_FFMPEG_CAPTURE_OPTIONS="rtsp_transport;tcp|hwaccel;cuda|hwaccel_device;0|video_codec;h264_cuvid|hwaccel_output_format;cuda" @@ -151,16 +171,21 @@ RUN grep -v opencv-python requirements.base.txt > requirements.tmp && \ # Verify complete hardware acceleration setup RUN echo "=== Hardware Acceleration Verification ===" && \ echo "FFmpeg Hardware Accelerators:" && \ - ffmpeg -hide_banner -hwaccels 2>/dev/null | head -10 && \ - echo "FFmpeg NVIDIA Decoders:" && \ - ffmpeg -hide_banner -decoders 2>/dev/null | grep -E "(cuvid|nvdec)" | head -5 && \ - echo "FFmpeg NVIDIA Encoders:" && \ - ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc | head -5 && \ + (ffmpeg -hide_banner -hwaccels 2>/dev/null || echo "FFmpeg hwaccels command failed") && \ + echo "" && \ + echo "FFmpeg CUVID Decoders (NVIDIA):" && \ + (ffmpeg -hide_banner -decoders 2>/dev/null | grep -E "cuvid" || echo "No CUVID decoders found") && \ + echo "" && \ + echo "FFmpeg NVENC Encoders (NVIDIA):" && \ + (ffmpeg -hide_banner -encoders 2>/dev/null | grep -E "nvenc" || echo "No NVENC encoders found") && \ + echo "" && \ + echo "Testing CUVID decoder compilation (no GPU required):" && \ + (ffmpeg -hide_banner -f lavfi -i testsrc=duration=0.1:size=64x64:rate=1 -c:v libx264 -f null - 2>/dev/null && echo "✅ FFmpeg basic functionality working" || echo "❌ FFmpeg basic test failed") && \ + echo "" && \ echo "OpenCV Configuration:" && \ - python3 -c "import cv2; print('OpenCV version:', cv2.__version__); print('CUDA devices:', cv2.cuda.getCudaEnabledDeviceCount()); build_info = cv2.getBuildInformation(); print('CUDA support:', 'CUDA' in build_info); print('CUVID support:', 'CUVID' in build_info); print('FFmpeg support:', 'FFMPEG' in build_info); print('GStreamer support:', 'GStreamer' in build_info)" && \ - echo "GStreamer NVIDIA Plugins:" && \ - gst-inspect-1.0 2>/dev/null | grep -E "(nvv4l2|nvvideo)" | head -5 || echo "GStreamer NVIDIA plugins not detected" && \ - echo "=== Verification Complete ===" + (python3 -c "import cv2; print('OpenCV version:', cv2.__version__); build_info = cv2.getBuildInformation(); print('CUDA support:', 'CUDA' in build_info); print('CUVID support:', 'CUVID' in build_info); print('FFmpeg support:', 'FFMPEG' in build_info)" || echo "OpenCV verification failed") && \ + echo "" && \ + echo "=== Verification Complete (build-time only) ===" # Set working directory WORKDIR /app diff --git a/README-hardware-acceleration.md b/README-hardware-acceleration.md deleted file mode 100644 index 69c6e09..0000000 --- a/README-hardware-acceleration.md +++ /dev/null @@ -1,127 +0,0 @@ -# Hardware Acceleration Setup - -This detector worker now includes **complete NVIDIA hardware acceleration** with FFmpeg and OpenCV built from source. - -## What's Included - -### 🔧 Complete Hardware Stack -- **FFmpeg 6.0** built from source with NVIDIA Video Codec SDK -- **OpenCV 4.8.1** built with CUDA and custom FFmpeg integration -- **GStreamer** with NVDEC/VAAPI plugins -- **TurboJPEG** for optimized JPEG encoding (3-5x faster) -- **CUDA** support for YOLO model inference - -### 🎯 Hardware Acceleration Methods (Automatic Detection) -1. **GStreamer NVDEC** - Best for RTSP streaming, lowest latency -2. **OpenCV CUDA** - Direct GPU memory access, best integration -3. **FFmpeg CUVID** - Custom build with full NVIDIA acceleration -4. **VAAPI** - Intel/AMD GPU support -5. **Software Fallback** - CPU-only as last resort - -## Build and Run - -### Single Build Script -```bash -./build-nvdec.sh -``` -**Build time**: 45-90 minutes (compiles FFmpeg + OpenCV from source) - -### Run with GPU Support -```bash -docker run --gpus all -p 8000:8000 detector-worker:complete-hw-accel -``` - -## Performance Improvements - -### Expected CPU Reduction -- **Video decoding**: 70-90% reduction (moved to GPU) -- **JPEG encoding**: 70-80% faster with TurboJPEG -- **Model inference**: GPU accelerated with CUDA -- **Overall system**: 50-80% less CPU usage - -### Profiling Results Comparison -**Before (Software Only)**: -- `cv2.imencode`: 6.5% CPU time (1.95s out of 30s) -- `psutil.cpu_percent`: 88% CPU time (idle polling) -- Video decoding: 100% CPU - -**After (Hardware Accelerated)**: -- Video decoding: GPU (~5-10% CPU overhead) -- JPEG encoding: 3-5x faster with TurboJPEG -- Model inference: GPU accelerated - -## Verification - -### Check Hardware Acceleration Support -```bash -docker run --rm --gpus all detector-worker:complete-hw-accel \ - bash -c "ffmpeg -hwaccels && python3 -c 'import cv2; build=cv2.getBuildInformation(); print(\"CUDA:\", \"CUDA\" in build); print(\"CUVID:\", \"CUVID\" in build)'" -``` - -### Runtime Logs -The application will automatically log which acceleration method is being used: -``` -Camera cam1: Successfully using GStreamer with NVDEC hardware acceleration -Camera cam2: Using FFMPEG hardware acceleration (backend: FFMPEG) -Camera cam3: Using OpenCV CUDA hardware acceleration -``` - -## Files Modified - -### Docker Configuration -- **Dockerfile.base** - Complete hardware acceleration stack -- **build-nvdec.sh** - Single build script for everything - -### Application Code -- **core/streaming/readers.py** - Multi-method hardware acceleration -- **core/utils/hardware_encoder.py** - TurboJPEG + NVENC encoding -- **core/utils/ffmpeg_detector.py** - Runtime capability detection -- **requirements.base.txt** - Added TurboJPEG, removed opencv-python - -## Architecture - -``` -Input RTSP Stream - ↓ -1. GStreamer NVDEC Pipeline (NVIDIA GPU) - rtspsrc → nvv4l2decoder → nvvideoconvert → OpenCV - ↓ -2. OpenCV CUDA Backend (NVIDIA GPU) - OpenCV with CUDA acceleration - ↓ -3. FFmpeg CUVID (NVIDIA GPU) - Custom FFmpeg with h264_cuvid decoder - ↓ -4. VAAPI (Intel/AMD GPU) - Hardware acceleration for non-NVIDIA - ↓ -5. Software Fallback (CPU) - Standard OpenCV software decoding -``` - -## Benefits - -### For Development -- **Single Dockerfile.base** - Everything consolidated -- **Automatic detection** - No manual configuration needed -- **Graceful fallback** - Works without GPU for development - -### For Production -- **Maximum performance** - Uses best available acceleration -- **GPU memory efficiency** - Direct GPU-to-GPU pipeline -- **Lower latency** - Hardware decoding + CUDA inference -- **Reduced CPU load** - Frees CPU for other tasks - -## Troubleshooting - -### Build Issues -- Ensure NVIDIA Docker runtime is installed -- Check CUDA 12.6 compatibility with your GPU -- Build takes 45-90 minutes - be patient - -### Runtime Issues -- Verify `nvidia-smi` works in container -- Check logs for acceleration method being used -- Fallback to software decoding is automatic - -This setup provides **production-ready hardware acceleration** with automatic detection and graceful fallback for maximum compatibility. \ No newline at end of file diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 377db56..9a3db6d 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -166,40 +166,17 @@ class RTSPReader: logger.info(f"RTSP reader thread ended for camera {self.camera_id}") def _initialize_capture(self) -> bool: - """Initialize video capture with hardware acceleration (NVDEC) for 1280x720@6fps.""" + """Initialize video capture with FFmpeg hardware acceleration (CUVID/NVDEC) for 1280x720@6fps.""" try: # Release previous capture if exists if self.cap: self.cap.release() time.sleep(0.5) - logger.info(f"Initializing capture for camera {self.camera_id} with hardware acceleration") + logger.info(f"Initializing capture for camera {self.camera_id} with FFmpeg hardware acceleration") hw_accel_success = False - # Method 1: Try GStreamer with NVDEC (most efficient on NVIDIA GPUs) - if not hw_accel_success: - try: - # Build GStreamer pipeline for NVIDIA hardware decoding - gst_pipeline = ( - f"rtspsrc location={self.rtsp_url} protocols=tcp latency=100 ! " - "rtph264depay ! h264parse ! " - "nvv4l2decoder ! " # NVIDIA hardware decoder - "nvvideoconvert ! " # NVIDIA hardware color conversion - "video/x-raw,format=BGRx,width=1280,height=720 ! " - "videoconvert ! " - "video/x-raw,format=BGR ! " - "appsink max-buffers=1 drop=true sync=false" - ) - logger.info(f"Attempting GStreamer NVDEC pipeline for camera {self.camera_id}") - self.cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER) - - if self.cap.isOpened(): - hw_accel_success = True - logger.info(f"Camera {self.camera_id}: Successfully using GStreamer with NVDEC hardware acceleration") - except Exception as e: - logger.debug(f"Camera {self.camera_id}: GStreamer NVDEC not available: {e}") - - # Method 2: Try OpenCV CUDA VideoReader (if built with CUVID support) + # Method 1: Try OpenCV CUDA VideoReader (if built with CUVID support) if not hw_accel_success: try: # Check if OpenCV was built with CUDA codec support @@ -220,7 +197,7 @@ class RTSPReader: except Exception as e: logger.debug(f"Camera {self.camera_id}: OpenCV CUDA not available: {e}") - # Method 3: Try FFMPEG with optimal hardware acceleration (CUVID/VAAPI) + # Method 2: Try FFmpeg with optimal hardware acceleration (CUVID/NVDEC) if not hw_accel_success: try: from core.utils.ffmpeg_detector import get_optimal_rtsp_options @@ -230,7 +207,7 @@ class RTSPReader: optimal_options = get_optimal_rtsp_options(self.rtsp_url) os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = optimal_options - logger.info(f"Attempting FFMPEG with detected hardware acceleration for camera {self.camera_id}") + logger.info(f"Attempting FFmpeg with detected hardware acceleration for camera {self.camera_id}") logger.debug(f"Camera {self.camera_id}: Using FFmpeg options: {optimal_options}") self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) @@ -239,45 +216,41 @@ class RTSPReader: hw_accel_success = True # Try to get backend info to confirm hardware acceleration backend = self.cap.getBackendName() - logger.info(f"Camera {self.camera_id}: Using FFMPEG hardware acceleration (backend: {backend})") + logger.info(f"Camera {self.camera_id}: Using FFmpeg hardware acceleration (backend: {backend})") except Exception as e: - logger.debug(f"Camera {self.camera_id}: FFMPEG hardware acceleration not available: {e}") + logger.debug(f"Camera {self.camera_id}: FFmpeg optimal hardware acceleration not available: {e}") - # Fallback to basic CUVID - try: - import os - os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda' - self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) - - if self.cap.isOpened(): - hw_accel_success = True - logger.info(f"Camera {self.camera_id}: Using basic FFMPEG CUVID hardware acceleration") - except Exception as e2: - logger.debug(f"Camera {self.camera_id}: Basic CUVID also failed: {e2}") - - # Method 4: Try VAAPI hardware acceleration (for Intel/AMD GPUs) + # Method 3: Try FFmpeg with basic NVIDIA CUVID if not hw_accel_success: try: - gst_pipeline = ( - f"rtspsrc location={self.rtsp_url} protocols=tcp latency=100 ! " - "rtph264depay ! h264parse ! " - "vaapih264dec ! " # VAAPI hardware decoder - "vaapipostproc ! " - "video/x-raw,format=BGRx,width=1280,height=720 ! " - "videoconvert ! " - "video/x-raw,format=BGR ! " - "appsink max-buffers=1 drop=true sync=false" - ) - logger.info(f"Attempting GStreamer VAAPI pipeline for camera {self.camera_id}") - self.cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER) + import os + os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda|hwaccel_device;0' + + logger.info(f"Attempting FFmpeg with basic CUVID for camera {self.camera_id}") + self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) if self.cap.isOpened(): hw_accel_success = True - logger.info(f"Camera {self.camera_id}: Successfully using GStreamer with VAAPI hardware acceleration") + logger.info(f"Camera {self.camera_id}: Using FFmpeg CUVID hardware acceleration") except Exception as e: - logger.debug(f"Camera {self.camera_id}: GStreamer VAAPI not available: {e}") + logger.debug(f"Camera {self.camera_id}: FFmpeg CUVID not available: {e}") - # Fallback: Standard FFMPEG with software decoding + # Method 4: Try FFmpeg with VAAPI (Intel/AMD GPUs) + if not hw_accel_success: + try: + import os + os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'hwaccel;vaapi|hwaccel_device;/dev/dri/renderD128|video_codec;h264|rtsp_transport;tcp' + + logger.info(f"Attempting FFmpeg with VAAPI for camera {self.camera_id}") + self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) + + if self.cap.isOpened(): + hw_accel_success = True + logger.info(f"Camera {self.camera_id}: Using FFmpeg VAAPI hardware acceleration") + except Exception as e: + logger.debug(f"Camera {self.camera_id}: FFmpeg VAAPI not available: {e}") + + # Fallback: Standard FFmpeg with software decoding if not hw_accel_success: logger.warning(f"Camera {self.camera_id}: Hardware acceleration not available, falling back to software decoding") import os From ff56c1b666072a1f6fd1f8f0eb52a62f8e0918a4 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Thu, 25 Sep 2025 23:36:07 +0700 Subject: [PATCH 06/28] fix: dockerfile base --- Dockerfile.base | 75 +++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 50 deletions(-) diff --git a/Dockerfile.base b/Dockerfile.base index 9fd9020..557a88e 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -47,7 +47,13 @@ RUN apt-get update && apt-get install -y \ || echo "NVIDIA packages not available, continuing without them" && \ rm -rf /var/lib/apt/lists/* -# Install NVIDIA Video Codec SDK headers +# Use pre-built FFmpeg with CUDA support using the build script +ENV FFMPEG_BUILD_SCRIPT_VERSION=1.43 +# Ensure CUDA paths are available +ENV PATH="/usr/local/cuda/bin:${PATH}" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" + +# Install NVIDIA Video Codec SDK headers first RUN cd /tmp && \ wget https://github.com/FFmpeg/nv-codec-headers/archive/refs/tags/n12.1.14.0.zip && \ unzip n12.1.14.0.zip && \ @@ -55,60 +61,29 @@ RUN cd /tmp && \ make install && \ rm -rf /tmp/* -# Build FFmpeg from source with NVIDIA CUVID support -ENV FFMPEG_VERSION=6.0 -# Ensure CUDA paths are available for FFmpeg compilation -ENV PATH="/usr/local/cuda/bin:${PATH}" -ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" +# Build FFmpeg using the well-maintained build script with CUDA support RUN cd /tmp && \ - wget https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \ - tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \ - cd ffmpeg-${FFMPEG_VERSION} && \ - # Configure with explicit CUVID support (with fallback) - (./configure \ - --enable-gpl \ - --enable-nonfree \ - --enable-shared \ - --enable-libx264 \ - --enable-libx265 \ - --enable-libvpx \ - --enable-libmp3lame \ - --enable-cuda-nvcc \ - --enable-cuda-llvm \ - --enable-cuvid \ - --enable-nvdec \ - --enable-nvenc \ - --enable-libnpp \ - --enable-decoder=h264_cuvid \ - --enable-decoder=hevc_cuvid \ - --enable-decoder=mjpeg_cuvid \ - --enable-decoder=mpeg1_cuvid \ - --enable-decoder=mpeg2_cuvid \ - --enable-decoder=mpeg4_cuvid \ - --enable-decoder=vc1_cuvid \ - --enable-encoder=h264_nvenc \ - --enable-encoder=hevc_nvenc \ - --extra-cflags="-I/usr/local/cuda/include" \ - --extra-ldflags="-L/usr/local/cuda/lib64" \ - --extra-libs="-lcuda -lcudart -lnvcuvid -lnvidia-encode" \ - --nvccflags="-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" \ - || echo "CUDA configuration failed, trying basic configuration..." && \ - ./configure \ - --enable-gpl \ - --enable-nonfree \ - --enable-shared \ - --enable-libx264 \ - --enable-libx265 \ - --enable-libvpx \ - --enable-libmp3lame) \ - && make -j$(nproc) && \ - make install && \ + echo "Building FFmpeg with CUDA support using build script..." && \ + curl -sL "https://raw.githubusercontent.com/markus-perl/ffmpeg-build-script/master/build-ffmpeg" -o build-ffmpeg && \ + chmod +x build-ffmpeg && \ + # Configure the build script for CUDA support + SKIPINSTALL=yes \ + AUTOINSTALL=yes \ + ./build-ffmpeg \ + --build \ + --enable-gpl-and-non-free \ + --latest \ + --cuda \ + && \ + # Copy built binaries to system paths + cp workspace/bin/* /usr/local/bin/ && \ + cp workspace/lib/* /usr/local/lib/ && \ ldconfig && \ # Verify CUVID decoders are available echo "=== Verifying FFmpeg CUVID Support ===" && \ - ffmpeg -hide_banner -decoders 2>/dev/null | grep cuvid && \ + (ffmpeg -hide_banner -decoders 2>/dev/null | grep cuvid || echo "No CUVID decoders found") && \ echo "=== Verifying FFmpeg NVENC Support ===" && \ - ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc && \ + (ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc || echo "No NVENC encoders found") && \ cd / && rm -rf /tmp/* # Build OpenCV from source with custom FFmpeg and full CUDA support From 47d4fa6b8f10099eb04e06d454ec84428e2220c2 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Thu, 25 Sep 2025 23:48:35 +0700 Subject: [PATCH 07/28] refactor: streamline FFmpeg installation process and remove unnecessary CUDA development tools --- Dockerfile.base | 102 +++++------------------------------------------- 1 file changed, 10 insertions(+), 92 deletions(-) diff --git a/Dockerfile.base b/Dockerfile.base index 557a88e..e2baf08 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -31,24 +31,7 @@ RUN apt-get update && apt-get install -y \ python3-numpy \ && rm -rf /var/lib/apt/lists/* -# Install CUDA development tools (required for FFmpeg CUDA compilation) -RUN apt-get update && apt-get install -y \ - cuda-nvcc-12-6 \ - libcuda1 \ - cuda-cudart-dev-12-6 \ - cuda-driver-dev-12-6 \ - || echo "CUDA development packages not available, continuing without them" && \ - rm -rf /var/lib/apt/lists/* - -# Try to install NVIDIA packages (may not be available in all environments) -RUN apt-get update && apt-get install -y \ - libnvidia-encode-535 \ - libnvidia-decode-535 \ - || echo "NVIDIA packages not available, continuing without them" && \ - rm -rf /var/lib/apt/lists/* - -# Use pre-built FFmpeg with CUDA support using the build script -ENV FFMPEG_BUILD_SCRIPT_VERSION=1.43 +# Install prebuilt FFmpeg with CUDA support # Ensure CUDA paths are available ENV PATH="/usr/local/cuda/bin:${PATH}" ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" @@ -61,23 +44,16 @@ RUN cd /tmp && \ make install && \ rm -rf /tmp/* -# Build FFmpeg using the well-maintained build script with CUDA support +# Download and install prebuilt FFmpeg with CUDA support RUN cd /tmp && \ - echo "Building FFmpeg with CUDA support using build script..." && \ - curl -sL "https://raw.githubusercontent.com/markus-perl/ffmpeg-build-script/master/build-ffmpeg" -o build-ffmpeg && \ - chmod +x build-ffmpeg && \ - # Configure the build script for CUDA support - SKIPINSTALL=yes \ - AUTOINSTALL=yes \ - ./build-ffmpeg \ - --build \ - --enable-gpl-and-non-free \ - --latest \ - --cuda \ - && \ - # Copy built binaries to system paths - cp workspace/bin/* /usr/local/bin/ && \ - cp workspace/lib/* /usr/local/lib/ && \ + echo "Installing prebuilt FFmpeg with CUDA support..." && \ + wget https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz && \ + tar -xf ffmpeg-master-latest-linux64-gpl.tar.xz && \ + cd ffmpeg-master-latest-linux64-gpl && \ + # Copy binaries to system paths + cp bin/* /usr/local/bin/ && \ + cp -r lib/* /usr/local/lib/ && \ + cp -r include/* /usr/local/include/ && \ ldconfig && \ # Verify CUVID decoders are available echo "=== Verifying FFmpeg CUVID Support ===" && \ @@ -86,45 +62,6 @@ RUN cd /tmp && \ (ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc || echo "No NVENC encoders found") && \ cd / && rm -rf /tmp/* -# Build OpenCV from source with custom FFmpeg and full CUDA support -ENV OPENCV_VERSION=4.8.1 -RUN cd /tmp && \ - wget -O opencv.zip https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \ - wget -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VERSION}.zip && \ - unzip opencv.zip && \ - unzip opencv_contrib.zip && \ - cd opencv-${OPENCV_VERSION} && \ - mkdir build && cd build && \ - PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH \ - cmake -D CMAKE_BUILD_TYPE=RELEASE \ - -D CMAKE_INSTALL_PREFIX=/usr/local \ - -D WITH_CUDA=ON \ - -D WITH_CUDNN=ON \ - -D OPENCV_DNN_CUDA=ON \ - -D ENABLE_FAST_MATH=ON \ - -D CUDA_FAST_MATH=ON \ - -D WITH_CUBLAS=ON \ - -D WITH_NVCUVID=ON \ - -D WITH_CUVID=ON \ - -D BUILD_opencv_cudacodec=ON \ - -D WITH_FFMPEG=ON \ - -D WITH_LIBV4L=ON \ - -D BUILD_opencv_python3=ON \ - -D OPENCV_GENERATE_PKGCONFIG=ON \ - -D OPENCV_ENABLE_NONFREE=ON \ - -D OPENCV_EXTRA_MODULES_PATH=/tmp/opencv_contrib-${OPENCV_VERSION}/modules \ - -D PYTHON3_EXECUTABLE=$(which python3) \ - -D PYTHON_INCLUDE_DIR=$(python3 -c "import sysconfig; print(sysconfig.get_path('include'))") \ - -D PYTHON_LIBRARY=$(python3 -c "import sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \ - -D BUILD_EXAMPLES=OFF \ - -D BUILD_TESTS=OFF \ - -D BUILD_PERF_TESTS=OFF \ - .. && \ - make -j$(nproc) && \ - make install && \ - ldconfig && \ - cd / && rm -rf /tmp/* - # Set environment variables for maximum hardware acceleration ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/lib:${LD_LIBRARY_PATH}" ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH}" @@ -143,25 +80,6 @@ RUN grep -v opencv-python requirements.base.txt > requirements.tmp && \ mv requirements.tmp requirements.base.txt && \ pip install --no-cache-dir -r requirements.base.txt -# Verify complete hardware acceleration setup -RUN echo "=== Hardware Acceleration Verification ===" && \ - echo "FFmpeg Hardware Accelerators:" && \ - (ffmpeg -hide_banner -hwaccels 2>/dev/null || echo "FFmpeg hwaccels command failed") && \ - echo "" && \ - echo "FFmpeg CUVID Decoders (NVIDIA):" && \ - (ffmpeg -hide_banner -decoders 2>/dev/null | grep -E "cuvid" || echo "No CUVID decoders found") && \ - echo "" && \ - echo "FFmpeg NVENC Encoders (NVIDIA):" && \ - (ffmpeg -hide_banner -encoders 2>/dev/null | grep -E "nvenc" || echo "No NVENC encoders found") && \ - echo "" && \ - echo "Testing CUVID decoder compilation (no GPU required):" && \ - (ffmpeg -hide_banner -f lavfi -i testsrc=duration=0.1:size=64x64:rate=1 -c:v libx264 -f null - 2>/dev/null && echo "✅ FFmpeg basic functionality working" || echo "❌ FFmpeg basic test failed") && \ - echo "" && \ - echo "OpenCV Configuration:" && \ - (python3 -c "import cv2; print('OpenCV version:', cv2.__version__); build_info = cv2.getBuildInformation(); print('CUDA support:', 'CUDA' in build_info); print('CUVID support:', 'CUVID' in build_info); print('FFmpeg support:', 'FFMPEG' in build_info)" || echo "OpenCV verification failed") && \ - echo "" && \ - echo "=== Verification Complete (build-time only) ===" - # Set working directory WORKDIR /app From dc1db635d0a0b88e47cda200a069ebf05af4c3d8 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Thu, 25 Sep 2025 23:56:29 +0700 Subject: [PATCH 08/28] fix: remove unnecessary copying of FFmpeg library and include files --- Dockerfile.base | 2 -- 1 file changed, 2 deletions(-) diff --git a/Dockerfile.base b/Dockerfile.base index e2baf08..8c104d2 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -52,8 +52,6 @@ RUN cd /tmp && \ cd ffmpeg-master-latest-linux64-gpl && \ # Copy binaries to system paths cp bin/* /usr/local/bin/ && \ - cp -r lib/* /usr/local/lib/ && \ - cp -r include/* /usr/local/include/ && \ ldconfig && \ # Verify CUVID decoders are available echo "=== Verifying FFmpeg CUVID Support ===" && \ From 719d16ae4d32c25c35a09bdd4e8fe1a7c9b83488 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 00:07:48 +0700 Subject: [PATCH 09/28] refactor: simplify frame handling by removing stream type management and enhancing validation --- .claude/settings.local.json | 9 +++ core/streaming/buffers.py | 134 +++++++----------------------------- core/streaming/manager.py | 41 +---------- core/streaming/readers.py | 49 ++++--------- 4 files changed, 51 insertions(+), 182 deletions(-) create mode 100644 .claude/settings.local.json diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..b06024d --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "Bash(dir:*)" + ], + "deny": [], + "ask": [] + } +} \ No newline at end of file diff --git a/core/streaming/buffers.py b/core/streaming/buffers.py index 602e028..fd29fbb 100644 --- a/core/streaming/buffers.py +++ b/core/streaming/buffers.py @@ -9,53 +9,25 @@ import logging import numpy as np from typing import Optional, Dict, Any, Tuple from collections import defaultdict -from enum import Enum logger = logging.getLogger(__name__) -class StreamType(Enum): - """Stream type enumeration.""" - RTSP = "rtsp" # 1280x720 @ 6fps - HTTP = "http" # 2560x1440 high quality - - class FrameBuffer: - """Thread-safe frame buffer optimized for different stream types.""" + """Thread-safe frame buffer for all camera streams.""" def __init__(self, max_age_seconds: int = 5): self.max_age_seconds = max_age_seconds self._frames: Dict[str, Dict[str, Any]] = {} - self._stream_types: Dict[str, StreamType] = {} self._lock = threading.RLock() - # Stream-specific settings - self.rtsp_config = { - 'width': 1280, - 'height': 720, - 'fps': 6, - 'max_size_mb': 3 # 1280x720x3 bytes = ~2.6MB - } - self.http_config = { - 'width': 2560, - 'height': 1440, - 'max_size_mb': 10 - } - - def put_frame(self, camera_id: str, frame: np.ndarray, stream_type: Optional[StreamType] = None): - """Store a frame for the given camera ID with type-specific validation.""" + def put_frame(self, camera_id: str, frame: np.ndarray): + """Store a frame for the given camera ID.""" with self._lock: - # Detect stream type if not provided - if stream_type is None: - stream_type = self._detect_stream_type(frame) - - # Store stream type - self._stream_types[camera_id] = stream_type - - # Validate frame based on stream type - if not self._validate_frame(frame, stream_type): - logger.warning(f"Frame validation failed for camera {camera_id} ({stream_type.value})") + # Validate frame + if not self._validate_frame(frame): + logger.warning(f"Frame validation failed for camera {camera_id}") return self._frames[camera_id] = { @@ -63,14 +35,9 @@ class FrameBuffer: 'timestamp': time.time(), 'shape': frame.shape, 'dtype': str(frame.dtype), - 'stream_type': stream_type.value, 'size_mb': frame.nbytes / (1024 * 1024) } - # Commented out verbose frame storage logging - # logger.debug(f"Stored {stream_type.value} frame for camera {camera_id}: " - # f"{frame.shape[1]}x{frame.shape[0]}, {frame.nbytes / (1024 * 1024):.2f}MB") - def get_frame(self, camera_id: str) -> Optional[np.ndarray]: """Get the latest frame for the given camera ID.""" with self._lock: @@ -84,8 +51,6 @@ class FrameBuffer: if age > self.max_age_seconds: logger.debug(f"Frame for camera {camera_id} is {age:.1f}s old, discarding") del self._frames[camera_id] - if camera_id in self._stream_types: - del self._stream_types[camera_id] return None return frame_data['frame'].copy() @@ -101,8 +66,6 @@ class FrameBuffer: if age > self.max_age_seconds: del self._frames[camera_id] - if camera_id in self._stream_types: - del self._stream_types[camera_id] return None return { @@ -110,7 +73,6 @@ class FrameBuffer: 'age': age, 'shape': frame_data['shape'], 'dtype': frame_data['dtype'], - 'stream_type': frame_data.get('stream_type', 'unknown'), 'size_mb': frame_data.get('size_mb', 0) } @@ -123,8 +85,6 @@ class FrameBuffer: with self._lock: if camera_id in self._frames: del self._frames[camera_id] - if camera_id in self._stream_types: - del self._stream_types[camera_id] logger.debug(f"Cleared frames for camera {camera_id}") def clear_all(self): @@ -132,7 +92,6 @@ class FrameBuffer: with self._lock: count = len(self._frames) self._frames.clear() - self._stream_types.clear() logger.debug(f"Cleared all frames ({count} cameras)") def get_camera_list(self) -> list: @@ -152,8 +111,6 @@ class FrameBuffer: # Clean up expired frames for camera_id in expired_cameras: del self._frames[camera_id] - if camera_id in self._stream_types: - del self._stream_types[camera_id] return valid_cameras @@ -165,15 +122,12 @@ class FrameBuffer: 'total_cameras': len(self._frames), 'valid_cameras': 0, 'expired_cameras': 0, - 'rtsp_cameras': 0, - 'http_cameras': 0, 'total_memory_mb': 0, 'cameras': {} } for camera_id, frame_data in self._frames.items(): age = current_time - frame_data['timestamp'] - stream_type = frame_data.get('stream_type', 'unknown') size_mb = frame_data.get('size_mb', 0) if age <= self.max_age_seconds: @@ -181,11 +135,6 @@ class FrameBuffer: else: stats['expired_cameras'] += 1 - if stream_type == StreamType.RTSP.value: - stats['rtsp_cameras'] += 1 - elif stream_type == StreamType.HTTP.value: - stats['http_cameras'] += 1 - stats['total_memory_mb'] += size_mb stats['cameras'][camera_id] = { @@ -193,74 +142,45 @@ class FrameBuffer: 'valid': age <= self.max_age_seconds, 'shape': frame_data['shape'], 'dtype': frame_data['dtype'], - 'stream_type': stream_type, 'size_mb': size_mb } return stats - def _detect_stream_type(self, frame: np.ndarray) -> StreamType: - """Detect stream type based on frame dimensions.""" - h, w = frame.shape[:2] - - # Check if it matches RTSP dimensions (1280x720) - if w == self.rtsp_config['width'] and h == self.rtsp_config['height']: - return StreamType.RTSP - - # Check if it matches HTTP dimensions (2560x1440) or close to it - if w >= 2000 and h >= 1000: - return StreamType.HTTP - - # Default based on size - if w <= 1920 and h <= 1080: - return StreamType.RTSP - else: - return StreamType.HTTP - - def _validate_frame(self, frame: np.ndarray, stream_type: StreamType) -> bool: - """Validate frame based on stream type.""" + def _validate_frame(self, frame: np.ndarray) -> bool: + """Validate frame - basic validation for any stream type.""" if frame is None or frame.size == 0: return False h, w = frame.shape[:2] size_mb = frame.nbytes / (1024 * 1024) - if stream_type == StreamType.RTSP: - config = self.rtsp_config - # Allow some tolerance for RTSP streams - if abs(w - config['width']) > 100 or abs(h - config['height']) > 100: - logger.warning(f"RTSP frame size mismatch: {w}x{h} (expected {config['width']}x{config['height']})") - if size_mb > config['max_size_mb']: - logger.warning(f"RTSP frame too large: {size_mb:.2f}MB (max {config['max_size_mb']}MB)") - return False + # Basic size validation - reject extremely large frames regardless of type + max_size_mb = 50 # Generous limit for any frame type + if size_mb > max_size_mb: + logger.warning(f"Frame too large: {size_mb:.2f}MB (max {max_size_mb}MB) for {w}x{h}") + return False - elif stream_type == StreamType.HTTP: - config = self.http_config - # More flexible for HTTP snapshots - if size_mb > config['max_size_mb']: - logger.warning(f"HTTP snapshot too large: {size_mb:.2f}MB (max {config['max_size_mb']}MB)") - return False + # Basic dimension validation + if w < 100 or h < 100: + logger.warning(f"Frame too small: {w}x{h}") + return False return True class CacheBuffer: - """Enhanced frame cache with support for cropping and optimized for different formats.""" + """Enhanced frame cache with support for cropping.""" def __init__(self, max_age_seconds: int = 10): self.frame_buffer = FrameBuffer(max_age_seconds) self._crop_cache: Dict[str, Dict[str, Any]] = {} self._cache_lock = threading.RLock() + self.jpeg_quality = 95 # High quality for all frames - # Quality settings for different stream types - self.jpeg_quality = { - StreamType.RTSP: 90, # Good quality for 720p - StreamType.HTTP: 95 # High quality for 2K - } - - def put_frame(self, camera_id: str, frame: np.ndarray, stream_type: Optional[StreamType] = None): + def put_frame(self, camera_id: str, frame: np.ndarray): """Store a frame and clear any associated crop cache.""" - self.frame_buffer.put_frame(camera_id, frame, stream_type) + self.frame_buffer.put_frame(camera_id, frame) # Clear crop cache for this camera since we have a new frame with self._cache_lock: @@ -325,21 +245,15 @@ class CacheBuffer: def get_frame_as_jpeg(self, camera_id: str, crop_coords: Optional[Tuple[int, int, int, int]] = None, quality: Optional[int] = None) -> Optional[bytes]: - """Get frame as JPEG bytes with format-specific quality settings.""" + """Get frame as JPEG bytes.""" frame = self.get_frame(camera_id, crop_coords) if frame is None: return None try: - # Determine quality based on stream type if not specified + # Use specified quality or default if quality is None: - frame_info = self.frame_buffer.get_frame_info(camera_id) - if frame_info: - stream_type_str = frame_info.get('stream_type', StreamType.RTSP.value) - stream_type = StreamType.RTSP if stream_type_str == StreamType.RTSP.value else StreamType.HTTP - quality = self.jpeg_quality[stream_type] - else: - quality = 90 # Default + quality = self.jpeg_quality # Encode as JPEG with specified quality encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality] diff --git a/core/streaming/manager.py b/core/streaming/manager.py index 7bd44c1..1e3719f 100644 --- a/core/streaming/manager.py +++ b/core/streaming/manager.py @@ -10,7 +10,7 @@ from dataclasses import dataclass from collections import defaultdict from .readers import RTSPReader, HTTPSnapshotReader -from .buffers import shared_cache_buffer, StreamType +from .buffers import shared_cache_buffer from ..tracking.integration import TrackingPipelineIntegration @@ -177,12 +177,8 @@ class StreamManager: def _frame_callback(self, camera_id: str, frame): """Callback for when a new frame is available.""" try: - # Detect stream type based on frame dimensions - stream_type = self._detect_stream_type(frame) - - # Store frame in shared buffer with stream type - shared_cache_buffer.put_frame(camera_id, frame, stream_type) - + # Store frame in shared buffer + shared_cache_buffer.put_frame(camera_id, frame) # Process tracking for subscriptions with tracking integration self._process_tracking_for_camera(camera_id, frame) @@ -404,26 +400,6 @@ class StreamManager: stats[subscription_id] = subscription_info.tracking_integration.get_statistics() return stats - def _detect_stream_type(self, frame) -> StreamType: - """Detect stream type based on frame dimensions.""" - if frame is None: - return StreamType.RTSP # Default - - h, w = frame.shape[:2] - - # RTSP: 1280x720 - if w == 1280 and h == 720: - return StreamType.RTSP - - # HTTP: 2560x1440 or larger - if w >= 2000 and h >= 1000: - return StreamType.HTTP - - # Default based on size - if w <= 1920 and h <= 1080: - return StreamType.RTSP - else: - return StreamType.HTTP def get_stats(self) -> Dict[str, Any]: """Get comprehensive streaming statistics.""" @@ -431,22 +407,11 @@ class StreamManager: buffer_stats = shared_cache_buffer.get_stats() tracking_stats = self.get_tracking_stats() - # Add stream type information - stream_types = {} - for camera_id in self._streams.keys(): - if isinstance(self._streams[camera_id], RTSPReader): - stream_types[camera_id] = 'rtsp' - elif isinstance(self._streams[camera_id], HTTPSnapshotReader): - stream_types[camera_id] = 'http' - else: - stream_types[camera_id] = 'unknown' - return { 'active_subscriptions': len(self._subscriptions), 'active_streams': len(self._streams), 'cameras_with_subscribers': len(self._camera_subscribers), 'max_streams': self.max_streams, - 'stream_types': stream_types, 'subscriptions_by_camera': { camera_id: len(subscribers) for camera_id, subscribers in self._camera_subscribers.items() diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 9a3db6d..53c9643 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -37,7 +37,6 @@ class RTSPReader: self.expected_fps = 6 # Frame processing parameters - self.frame_interval = 1.0 / self.expected_fps # ~167ms for 6fps self.error_recovery_delay = 5.0 # Increased from 2.0 for stability self.max_consecutive_errors = 30 # Increased from 10 to handle network jitter self.stream_timeout = 30.0 @@ -72,7 +71,6 @@ class RTSPReader: frame_count = 0 last_log_time = time.time() last_successful_frame_time = time.time() - last_frame_time = 0 while not self.stop_event.is_set(): try: @@ -90,12 +88,7 @@ class RTSPReader: last_successful_frame_time = time.time() continue - # Rate limiting for 6fps - current_time = time.time() - if current_time - last_frame_time < self.frame_interval: - time.sleep(0.01) # Small sleep to avoid busy waiting - continue - + # Read frame immediately without rate limiting for minimum latency ret, frame = self.cap.read() if not ret or frame is None: @@ -118,15 +111,10 @@ class RTSPReader: time.sleep(sleep_time) continue - # Validate frame dimensions - if frame.shape[1] != self.expected_width or frame.shape[0] != self.expected_height: - logger.warning(f"Camera {self.camera_id}: Unexpected frame dimensions {frame.shape[1]}x{frame.shape[0]}") - # Try to resize if dimensions are wrong - if frame.shape[1] > 0 and frame.shape[0] > 0: - frame = cv2.resize(frame, (self.expected_width, self.expected_height)) - else: - consecutive_errors += 1 - continue + # Accept any valid frame dimensions - don't force specific resolution + if frame.shape[1] <= 0 or frame.shape[0] <= 0: + consecutive_errors += 1 + continue # Check for corrupted frames (all black, all white, excessive noise) if self._is_frame_corrupted(frame): @@ -138,7 +126,6 @@ class RTSPReader: consecutive_errors = 0 frame_count += 1 last_successful_frame_time = time.time() - last_frame_time = current_time # Call frame callback if self.frame_callback: @@ -148,6 +135,7 @@ class RTSPReader: logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") # Log progress every 30 seconds + current_time = time.time() if current_time - last_log_time >= 30: logger.info(f"Camera {self.camera_id}: {frame_count} frames processed") last_log_time = current_time @@ -261,14 +249,12 @@ class RTSPReader: logger.error(f"Failed to open stream for camera {self.camera_id}") return False - # Set capture properties for 1280x720@6fps - self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.expected_width) - self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.expected_height) - self.cap.set(cv2.CAP_PROP_FPS, self.expected_fps) + # Don't force resolution/fps - let the stream determine its natural specs + # The camera will provide whatever resolution/fps it supports - # Set moderate buffer to handle network jitter while avoiding excessive latency - # Buffer of 3 frames provides resilience without major delay - self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) + # Set minimal buffer for lowest latency - single frame buffer + # This ensures we always get the most recent frame + self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) # Set FFMPEG options for better H.264 handling self.cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'H264')) @@ -405,15 +391,10 @@ class HTTPSnapshotReader: time.sleep(min(2.0, interval_seconds)) continue - # Validate image dimensions - if frame.shape[1] != self.expected_width or frame.shape[0] != self.expected_height: - logger.info(f"Camera {self.camera_id}: Snapshot dimensions {frame.shape[1]}x{frame.shape[0]} " - f"(expected {self.expected_width}x{self.expected_height})") - # Resize if needed (maintaining aspect ratio for high quality) - if frame.shape[1] > 0 and frame.shape[0] > 0: - # Only resize if significantly different - if abs(frame.shape[1] - self.expected_width) > 100: - frame = self._resize_maintain_aspect(frame, self.expected_width, self.expected_height) + # Accept any valid image dimensions - don't force specific resolution + if frame.shape[1] <= 0 or frame.shape[0] <= 0: + logger.warning(f"Camera {self.camera_id}: Invalid frame dimensions {frame.shape[1]}x{frame.shape[0]}") + continue # Reset retry counter on successful fetch retries = 0 From 360a4ab89031e289ed387b96b79d7e1b833ee351 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 00:16:49 +0700 Subject: [PATCH 10/28] feat: enhance logging for detected hardware codecs and improve CUDA acceleration handling --- core/utils/ffmpeg_detector.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/utils/ffmpeg_detector.py b/core/utils/ffmpeg_detector.py index a3cf8fc..92aecfc 100644 --- a/core/utils/ffmpeg_detector.py +++ b/core/utils/ffmpeg_detector.py @@ -46,6 +46,7 @@ class FFmpegCapabilities: # Log capabilities if self.nvidia_support: logger.info("NVIDIA hardware acceleration available (CUDA/CUVID/NVDEC)") + logger.info(f"Detected hardware codecs: {self.codecs}") if self.vaapi_support: logger.info("VAAPI hardware acceleration available") if self.qsv_support: @@ -104,22 +105,23 @@ class FFmpegCapabilities: # Add hardware acceleration if available if self.nvidia_support: - if codec == 'h264' and 'h264_hw' in self.codecs: + # Force enable CUDA hardware acceleration for H.264 if CUDA is available + if codec == 'h264': options.update({ 'hwaccel': 'cuda', 'hwaccel_device': '0', 'video_codec': 'h264_cuvid', 'hwaccel_output_format': 'cuda' }) - logger.debug("Using NVIDIA CUVID hardware acceleration for H.264") - elif codec == 'h265' and 'h265_hw' in self.codecs: + logger.info("Using NVIDIA CUVID hardware acceleration for H.264") + elif codec == 'h265': options.update({ 'hwaccel': 'cuda', 'hwaccel_device': '0', 'video_codec': 'hevc_cuvid', 'hwaccel_output_format': 'cuda' }) - logger.debug("Using NVIDIA CUVID hardware acceleration for H.265") + logger.info("Using NVIDIA CUVID hardware acceleration for H.265") elif self.vaapi_support: if codec == 'h264': From 59e8448f0d5c62b6a26df2a4d7a14bc55ef95da0 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 00:27:08 +0700 Subject: [PATCH 11/28] fix: add missing FFmpeg development libraries for OpenCV integration --- Dockerfile.base | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Dockerfile.base b/Dockerfile.base index 8c104d2..6c2f97b 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -24,6 +24,14 @@ RUN apt-get update && apt-get install -y \ libvpx-dev \ libmp3lame-dev \ libv4l-dev \ + # FFmpeg development libraries for OpenCV integration + libavcodec-dev \ + libavformat-dev \ + libavutil-dev \ + libavdevice-dev \ + libavfilter-dev \ + libswscale-dev \ + libswresample-dev \ # TurboJPEG for fast JPEG encoding libturbojpeg0-dev \ # Python development From e2e535604762d1b4aad21f96dff0c17a4fffc023 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 00:41:49 +0700 Subject: [PATCH 12/28] refactor: build FFmpeg from source with NVIDIA CUDA support and remove unnecessary development libraries --- Dockerfile.base | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/Dockerfile.base b/Dockerfile.base index 6c2f97b..56b4159 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -24,14 +24,6 @@ RUN apt-get update && apt-get install -y \ libvpx-dev \ libmp3lame-dev \ libv4l-dev \ - # FFmpeg development libraries for OpenCV integration - libavcodec-dev \ - libavformat-dev \ - libavutil-dev \ - libavdevice-dev \ - libavfilter-dev \ - libswscale-dev \ - libswresample-dev \ # TurboJPEG for fast JPEG encoding libturbojpeg0-dev \ # Python development @@ -52,14 +44,35 @@ RUN cd /tmp && \ make install && \ rm -rf /tmp/* -# Download and install prebuilt FFmpeg with CUDA support +# Build FFmpeg from source with NVIDIA CUDA support RUN cd /tmp && \ - echo "Installing prebuilt FFmpeg with CUDA support..." && \ - wget https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz && \ - tar -xf ffmpeg-master-latest-linux64-gpl.tar.xz && \ - cd ffmpeg-master-latest-linux64-gpl && \ - # Copy binaries to system paths - cp bin/* /usr/local/bin/ && \ + echo "Building FFmpeg with NVIDIA CUDA support..." && \ + # Download FFmpeg source + wget https://ffmpeg.org/releases/ffmpeg-7.1.tar.xz && \ + tar -xf ffmpeg-7.1.tar.xz && \ + cd ffmpeg-7.1 && \ + # Configure with NVIDIA support + ./configure \ + --prefix=/usr/local \ + --enable-shared \ + --enable-pic \ + --enable-gpl \ + --enable-version3 \ + --enable-nonfree \ + --enable-cuda-nvcc \ + --enable-cuvid \ + --enable-nvdec \ + --enable-nvenc \ + --enable-libnpp \ + --extra-cflags=-I/usr/local/cuda/include \ + --extra-ldflags=-L/usr/local/cuda/lib64 \ + --enable-libx264 \ + --enable-libx265 \ + --enable-libvpx \ + --enable-libmp3lame && \ + # Build and install + make -j$(nproc) && \ + make install && \ ldconfig && \ # Verify CUVID decoders are available echo "=== Verifying FFmpeg CUVID Support ===" && \ From 6fe4b6ebf0d5f3c666ea724515d89cab38a05a54 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 00:48:06 +0700 Subject: [PATCH 13/28] refactor: update Dockerfile to use development image and enhance FFmpeg build process with NVIDIA support --- Dockerfile.base | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/Dockerfile.base b/Dockerfile.base index 56b4159..8d19778 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -1,5 +1,5 @@ # Base image with complete ML and hardware acceleration stack -FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime +FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-devel # Install build dependencies and system libraries RUN apt-get update && apt-get install -y \ @@ -12,6 +12,12 @@ RUN apt-get update && apt-get install -y \ unzip \ yasm \ nasm \ + # Additional dependencies for FFmpeg/NVIDIA build + libtool \ + libc6 \ + libc6-dev \ + libnuma1 \ + libnuma-dev \ # System libraries libgl1-mesa-glx \ libglib2.0-0 \ @@ -31,41 +37,45 @@ RUN apt-get update && apt-get install -y \ python3-numpy \ && rm -rf /var/lib/apt/lists/* -# Install prebuilt FFmpeg with CUDA support +# CUDA development tools already available in devel image + # Ensure CUDA paths are available ENV PATH="/usr/local/cuda/bin:${PATH}" ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" -# Install NVIDIA Video Codec SDK headers first +# Install NVIDIA Video Codec SDK headers (official method) RUN cd /tmp && \ - wget https://github.com/FFmpeg/nv-codec-headers/archive/refs/tags/n12.1.14.0.zip && \ - unzip n12.1.14.0.zip && \ - cd nv-codec-headers-n12.1.14.0 && \ + git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers.git && \ + cd nv-codec-headers && \ make install && \ - rm -rf /tmp/* + cd / && rm -rf /tmp/* # Build FFmpeg from source with NVIDIA CUDA support RUN cd /tmp && \ echo "Building FFmpeg with NVIDIA CUDA support..." && \ - # Download FFmpeg source - wget https://ffmpeg.org/releases/ffmpeg-7.1.tar.xz && \ - tar -xf ffmpeg-7.1.tar.xz && \ - cd ffmpeg-7.1 && \ - # Configure with NVIDIA support + # Download FFmpeg source (official method) + git clone https://git.ffmpeg.org/ffmpeg.git ffmpeg/ && \ + cd ffmpeg && \ + # Configure with NVIDIA support (following official NVIDIA documentation) ./configure \ --prefix=/usr/local \ --enable-shared \ - --enable-pic \ - --enable-gpl \ - --enable-version3 \ + --disable-static \ --enable-nonfree \ + --enable-gpl \ --enable-cuda-nvcc \ + --enable-cuda-llvm \ --enable-cuvid \ --enable-nvdec \ --enable-nvenc \ --enable-libnpp \ + --nvcc=/usr/local/cuda/bin/nvcc \ --extra-cflags=-I/usr/local/cuda/include \ --extra-ldflags=-L/usr/local/cuda/lib64 \ + --extra-libs=-lcuda \ + --extra-libs=-lcudart \ + --extra-libs=-lnvcuvid \ + --extra-libs=-lnvidia-encode \ --enable-libx264 \ --enable-libx265 \ --enable-libvpx \ From fa3ab5c6d2a49e064258ca18f5963a0d7ecd011a Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 00:48:39 +0700 Subject: [PATCH 14/28] refactor: update base image to runtime version and install minimal CUDA development tools for FFmpeg --- Dockerfile.base | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Dockerfile.base b/Dockerfile.base index 8d19778..2569ebd 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -1,5 +1,5 @@ # Base image with complete ML and hardware acceleration stack -FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-devel +FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime # Install build dependencies and system libraries RUN apt-get update && apt-get install -y \ @@ -37,7 +37,13 @@ RUN apt-get update && apt-get install -y \ python3-numpy \ && rm -rf /var/lib/apt/lists/* -# CUDA development tools already available in devel image +# Install minimal CUDA development tools (just what we need for FFmpeg) +RUN apt-get update && apt-get install -y \ + cuda-nvcc-12-6 \ + cuda-cudart-dev-12-6 \ + libnvidia-encode-12-6 \ + libnvidia-decode-12-6 \ + && rm -rf /var/lib/apt/lists/* # Ensure CUDA paths are available ENV PATH="/usr/local/cuda/bin:${PATH}" From bdbf6889465a250e01e9b59e4cb50623102ba77c Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 01:11:32 +0700 Subject: [PATCH 15/28] refactor: streamline CUDA development tools installation and simplify FFmpeg configuration for NVIDIA support --- Dockerfile.base | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Dockerfile.base b/Dockerfile.base index 2569ebd..9684325 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -18,6 +18,11 @@ RUN apt-get update && apt-get install -y \ libc6-dev \ libnuma1 \ libnuma-dev \ + # Essential compilation libraries + gcc \ + g++ \ + libc6-dev \ + linux-libc-dev \ # System libraries libgl1-mesa-glx \ libglib2.0-0 \ @@ -37,13 +42,18 @@ RUN apt-get update && apt-get install -y \ python3-numpy \ && rm -rf /var/lib/apt/lists/* -# Install minimal CUDA development tools (just what we need for FFmpeg) -RUN apt-get update && apt-get install -y \ +# Add NVIDIA CUDA repository and install minimal development tools +RUN apt-get update && apt-get install -y wget gnupg && \ + wget -O - https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub | apt-key add - && \ + echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ + apt-get update && \ + apt-get install -y \ cuda-nvcc-12-6 \ cuda-cudart-dev-12-6 \ - libnvidia-encode-12-6 \ - libnvidia-decode-12-6 \ - && rm -rf /var/lib/apt/lists/* + libnpp-dev-12-6 \ + && apt-get remove -y wget gnupg && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* # Ensure CUDA paths are available ENV PATH="/usr/local/cuda/bin:${PATH}" @@ -62,7 +72,7 @@ RUN cd /tmp && \ # Download FFmpeg source (official method) git clone https://git.ffmpeg.org/ffmpeg.git ffmpeg/ && \ cd ffmpeg && \ - # Configure with NVIDIA support (following official NVIDIA documentation) + # Configure with NVIDIA support (simplified to avoid configure issues) ./configure \ --prefix=/usr/local \ --enable-shared \ @@ -70,18 +80,12 @@ RUN cd /tmp && \ --enable-nonfree \ --enable-gpl \ --enable-cuda-nvcc \ - --enable-cuda-llvm \ --enable-cuvid \ --enable-nvdec \ --enable-nvenc \ --enable-libnpp \ - --nvcc=/usr/local/cuda/bin/nvcc \ --extra-cflags=-I/usr/local/cuda/include \ --extra-ldflags=-L/usr/local/cuda/lib64 \ - --extra-libs=-lcuda \ - --extra-libs=-lcudart \ - --extra-libs=-lnvcuvid \ - --extra-libs=-lnvidia-encode \ --enable-libx264 \ --enable-libx265 \ --enable-libvpx \ From cb9ff7bc861cef272397da5aaa9f3ed1fbe467f2 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 01:33:41 +0700 Subject: [PATCH 16/28] refactor: update FFmpeg hardware acceleration to use NVDEC instead of CUVID for improved performance --- core/streaming/readers.py | 10 +++++----- core/utils/ffmpeg_detector.py | 6 ++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 53c9643..32a424a 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -208,20 +208,20 @@ class RTSPReader: except Exception as e: logger.debug(f"Camera {self.camera_id}: FFmpeg optimal hardware acceleration not available: {e}") - # Method 3: Try FFmpeg with basic NVIDIA CUVID + # Method 3: Try FFmpeg with NVIDIA NVDEC (better for RTX 3060) if not hw_accel_success: try: import os - os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;h264_cuvid|rtsp_transport;tcp|hwaccel;cuda|hwaccel_device;0' + os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'hwaccel;cuda|hwaccel_device;0|rtsp_transport;tcp' - logger.info(f"Attempting FFmpeg with basic CUVID for camera {self.camera_id}") + logger.info(f"Attempting FFmpeg with NVDEC hardware acceleration for camera {self.camera_id}") self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) if self.cap.isOpened(): hw_accel_success = True - logger.info(f"Camera {self.camera_id}: Using FFmpeg CUVID hardware acceleration") + logger.info(f"Camera {self.camera_id}: Using FFmpeg NVDEC hardware acceleration") except Exception as e: - logger.debug(f"Camera {self.camera_id}: FFmpeg CUVID not available: {e}") + logger.debug(f"Camera {self.camera_id}: FFmpeg NVDEC not available: {e}") # Method 4: Try FFmpeg with VAAPI (Intel/AMD GPUs) if not hw_accel_success: diff --git a/core/utils/ffmpeg_detector.py b/core/utils/ffmpeg_detector.py index 92aecfc..565713c 100644 --- a/core/utils/ffmpeg_detector.py +++ b/core/utils/ffmpeg_detector.py @@ -109,11 +109,9 @@ class FFmpegCapabilities: if codec == 'h264': options.update({ 'hwaccel': 'cuda', - 'hwaccel_device': '0', - 'video_codec': 'h264_cuvid', - 'hwaccel_output_format': 'cuda' + 'hwaccel_device': '0' }) - logger.info("Using NVIDIA CUVID hardware acceleration for H.264") + logger.info("Using NVIDIA NVDEC hardware acceleration for H.264") elif codec == 'h265': options.update({ 'hwaccel': 'cuda', From c6a4258055c9694c2cd19a6d3b4e55c6510d843f Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 01:42:30 +0700 Subject: [PATCH 17/28] refactor: enhance error logging in RTSPReader for better debugging of frame capture issues --- core/streaming/readers.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 32a424a..78a3d45 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -94,8 +94,17 @@ class RTSPReader: if not ret or frame is None: consecutive_errors += 1 + # Verbose logging to see actual errors + logger.error(f"Camera {self.camera_id}: cap.read() failed - ret={ret}, frame={frame is not None}") + + # Try to get more info from the capture + if self.cap.isOpened(): + logger.debug(f"Camera {self.camera_id}: Capture still open, backend: {self.cap.getBackendName()}") + else: + logger.error(f"Camera {self.camera_id}: Capture is closed!") + if consecutive_errors >= self.max_consecutive_errors: - logger.error(f"Camera {self.camera_id}: Too many consecutive errors, reinitializing") + logger.error(f"Camera {self.camera_id}: Too many consecutive errors ({consecutive_errors}), reinitializing") self._reinitialize_capture() consecutive_errors = 0 time.sleep(self.error_recovery_delay) From a1e7c42fb35db7f2bbf43b53769f0f149e7dfaa7 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 01:44:46 +0700 Subject: [PATCH 18/28] refactor: improve error handling and logging in RTSPReader for frame capture failures --- core/streaming/readers.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 78a3d45..59db84b 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -89,7 +89,11 @@ class RTSPReader: continue # Read frame immediately without rate limiting for minimum latency - ret, frame = self.cap.read() + try: + ret, frame = self.cap.read() + except Exception as read_error: + logger.error(f"Camera {self.camera_id}: cap.read() threw exception: {type(read_error).__name__}: {read_error}") + ret, frame = False, None if not ret or frame is None: consecutive_errors += 1 @@ -98,10 +102,14 @@ class RTSPReader: logger.error(f"Camera {self.camera_id}: cap.read() failed - ret={ret}, frame={frame is not None}") # Try to get more info from the capture - if self.cap.isOpened(): - logger.debug(f"Camera {self.camera_id}: Capture still open, backend: {self.cap.getBackendName()}") - else: - logger.error(f"Camera {self.camera_id}: Capture is closed!") + try: + if self.cap.isOpened(): + backend = self.cap.getBackendName() + logger.debug(f"Camera {self.camera_id}: Capture still open, backend: {backend}") + else: + logger.error(f"Camera {self.camera_id}: Capture is closed!") + except Exception as info_error: + logger.error(f"Camera {self.camera_id}: Error getting capture info: {type(info_error).__name__}: {info_error}") if consecutive_errors >= self.max_consecutive_errors: logger.error(f"Camera {self.camera_id}: Too many consecutive errors ({consecutive_errors}), reinitializing") From 65b7573fed5a0fcaf4d10003c1b10fb9cd655afc Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 01:52:50 +0700 Subject: [PATCH 19/28] refactor: remove unnecessary buffer size setting for RTSP stream to improve latency --- core/streaming/readers.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 59db84b..ef89724 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -269,9 +269,6 @@ class RTSPReader: # Don't force resolution/fps - let the stream determine its natural specs # The camera will provide whatever resolution/fps it supports - # Set minimal buffer for lowest latency - single frame buffer - # This ensures we always get the most recent frame - self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) # Set FFMPEG options for better H.264 handling self.cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'H264')) From 08cb4eafc40758cf0e652fbfc834e4052ddd452d Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 01:58:50 +0700 Subject: [PATCH 20/28] refactor: enhance error handling and logging in RTSPReader for improved frame retrieval diagnostics --- core/streaming/readers.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index ef89724..6f31cf1 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -90,24 +90,30 @@ class RTSPReader: # Read frame immediately without rate limiting for minimum latency try: - ret, frame = self.cap.read() + # Force grab then retrieve for better error handling + ret = self.cap.grab() + if ret: + ret, frame = self.cap.retrieve() + else: + frame = None except Exception as read_error: - logger.error(f"Camera {self.camera_id}: cap.read() threw exception: {type(read_error).__name__}: {read_error}") + logger.error(f"Camera {self.camera_id}: cap.grab/retrieve threw exception: {type(read_error).__name__}: {read_error}") ret, frame = False, None if not ret or frame is None: consecutive_errors += 1 - # Verbose logging to see actual errors + # Enhanced logging to diagnose the issue logger.error(f"Camera {self.camera_id}: cap.read() failed - ret={ret}, frame={frame is not None}") # Try to get more info from the capture try: - if self.cap.isOpened(): + if self.cap and self.cap.isOpened(): backend = self.cap.getBackendName() - logger.debug(f"Camera {self.camera_id}: Capture still open, backend: {backend}") + pos_frames = self.cap.get(cv2.CAP_PROP_POS_FRAMES) + logger.error(f"Camera {self.camera_id}: Capture open, backend: {backend}, pos_frames: {pos_frames}") else: - logger.error(f"Camera {self.camera_id}: Capture is closed!") + logger.error(f"Camera {self.camera_id}: Capture is closed or None!") except Exception as info_error: logger.error(f"Camera {self.camera_id}: Error getting capture info: {type(info_error).__name__}: {info_error}") From c38b58e34c7928ed7a2b7750e947f8e3aed83c3d Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 02:07:17 +0700 Subject: [PATCH 21/28] refactor: add FFmpegRTSPReader for enhanced RTSP stream handling with CUDA acceleration --- core/streaming/__init__.py | 3 +- core/streaming/manager.py | 8 +- core/streaming/readers.py | 150 +++++++++++++++++++++++++++++++++++-- 3 files changed, 149 insertions(+), 12 deletions(-) diff --git a/core/streaming/__init__.py b/core/streaming/__init__.py index c4c40dc..d878aac 100644 --- a/core/streaming/__init__.py +++ b/core/streaming/__init__.py @@ -2,7 +2,7 @@ Streaming system for RTSP and HTTP camera feeds. Provides modular frame readers, buffers, and stream management. """ -from .readers import RTSPReader, HTTPSnapshotReader +from .readers import RTSPReader, HTTPSnapshotReader, FFmpegRTSPReader from .buffers import FrameBuffer, CacheBuffer, shared_frame_buffer, shared_cache_buffer from .manager import StreamManager, StreamConfig, SubscriptionInfo, shared_stream_manager, initialize_stream_manager @@ -10,6 +10,7 @@ __all__ = [ # Readers 'RTSPReader', 'HTTPSnapshotReader', + 'FFmpegRTSPReader', # Buffers 'FrameBuffer', diff --git a/core/streaming/manager.py b/core/streaming/manager.py index 1e3719f..156daf1 100644 --- a/core/streaming/manager.py +++ b/core/streaming/manager.py @@ -9,7 +9,7 @@ from typing import Dict, Set, Optional, List, Any from dataclasses import dataclass from collections import defaultdict -from .readers import RTSPReader, HTTPSnapshotReader +from .readers import RTSPReader, HTTPSnapshotReader, FFmpegRTSPReader from .buffers import shared_cache_buffer from ..tracking.integration import TrackingPipelineIntegration @@ -129,8 +129,8 @@ class StreamManager: """Start a stream for the given camera.""" try: if stream_config.rtsp_url: - # RTSP stream - reader = RTSPReader( + # RTSP stream using FFmpeg subprocess with CUDA acceleration + reader = FFmpegRTSPReader( camera_id=camera_id, rtsp_url=stream_config.rtsp_url, max_retries=stream_config.max_retries @@ -138,7 +138,7 @@ class StreamManager: reader.set_frame_callback(self._frame_callback) reader.start() self._streams[camera_id] = reader - logger.info(f"Started RTSP stream for camera {camera_id}") + logger.info(f"Started FFmpeg RTSP stream for camera {camera_id}") elif stream_config.snapshot_url: # HTTP snapshot stream diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 6f31cf1..243f088 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -9,6 +9,7 @@ import threading import requests import numpy as np import os +import subprocess from typing import Optional, Callable # Suppress FFMPEG/H.264 error messages if needed @@ -19,6 +20,143 @@ os.environ["OPENCV_FFMPEG_LOGLEVEL"] = "-8" # Suppress FFMPEG warnings logger = logging.getLogger(__name__) +class FFmpegRTSPReader: + """RTSP stream reader using subprocess FFmpeg with CUDA hardware acceleration.""" + + def __init__(self, camera_id: str, rtsp_url: str, max_retries: int = 3): + self.camera_id = camera_id + self.rtsp_url = rtsp_url + self.max_retries = max_retries + self.process = None + self.stop_event = threading.Event() + self.thread = None + self.frame_callback: Optional[Callable] = None + + # Stream specs + self.width = 1280 + self.height = 720 + + def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]): + """Set callback function to handle captured frames.""" + self.frame_callback = callback + + def start(self): + """Start the FFmpeg subprocess reader.""" + if self.thread and self.thread.is_alive(): + logger.warning(f"FFmpeg reader for {self.camera_id} already running") + return + + self.stop_event.clear() + self.thread = threading.Thread(target=self._read_frames, daemon=True) + self.thread.start() + logger.info(f"Started FFmpeg reader for camera {self.camera_id}") + + def stop(self): + """Stop the FFmpeg subprocess reader.""" + self.stop_event.set() + if self.process: + self.process.terminate() + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.process.kill() + if self.thread: + self.thread.join(timeout=5.0) + logger.info(f"Stopped FFmpeg reader for camera {self.camera_id}") + + def _start_ffmpeg_process(self): + """Start FFmpeg subprocess with CUDA hardware acceleration.""" + cmd = [ + 'ffmpeg', + '-hwaccel', 'cuda', + '-hwaccel_device', '0', + '-rtsp_transport', 'tcp', + '-i', self.rtsp_url, + '-f', 'rawvideo', + '-pix_fmt', 'bgr24', + '-an', # No audio + '-' # Output to stdout + ] + + try: + self.process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + bufsize=0 + ) + logger.info(f"Started FFmpeg process for camera {self.camera_id}") + return True + except Exception as e: + logger.error(f"Failed to start FFmpeg for camera {self.camera_id}: {e}") + return False + + def _read_frames(self): + """Read frames from FFmpeg stdout pipe.""" + consecutive_errors = 0 + frame_count = 0 + last_log_time = time.time() + bytes_per_frame = self.width * self.height * 3 # BGR = 3 bytes per pixel + + while not self.stop_event.is_set(): + try: + # Start/restart FFmpeg process if needed + if not self.process or self.process.poll() is not None: + if not self._start_ffmpeg_process(): + time.sleep(5.0) + continue + + # Read one frame worth of data + frame_data = self.process.stdout.read(bytes_per_frame) + + if len(frame_data) != bytes_per_frame: + consecutive_errors += 1 + if consecutive_errors >= 30: + logger.error(f"Camera {self.camera_id}: Too many read errors, restarting FFmpeg") + if self.process: + self.process.terminate() + consecutive_errors = 0 + continue + + # Convert raw bytes to numpy array + frame = np.frombuffer(frame_data, dtype=np.uint8) + frame = frame.reshape((self.height, self.width, 3)) + + # Frame is valid + consecutive_errors = 0 + frame_count += 1 + + # Call frame callback + if self.frame_callback: + try: + self.frame_callback(self.camera_id, frame) + except Exception as e: + logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") + + # Log progress + current_time = time.time() + if current_time - last_log_time >= 30: + logger.info(f"Camera {self.camera_id}: {frame_count} frames processed via FFmpeg") + last_log_time = current_time + + except Exception as e: + logger.error(f"Camera {self.camera_id}: FFmpeg read error: {e}") + consecutive_errors += 1 + if consecutive_errors >= 30: + if self.process: + self.process.terminate() + consecutive_errors = 0 + time.sleep(1.0) + + # Cleanup + if self.process: + self.process.terminate() + logger.info(f"FFmpeg reader thread ended for camera {self.camera_id}") + + +logger = logging.getLogger(__name__) + + class RTSPReader: """RTSP stream frame reader optimized for 1280x720 @ 6fps streams.""" @@ -90,14 +228,12 @@ class RTSPReader: # Read frame immediately without rate limiting for minimum latency try: - # Force grab then retrieve for better error handling - ret = self.cap.grab() - if ret: - ret, frame = self.cap.retrieve() - else: - frame = None + ret, frame = self.cap.read() + if ret and frame is None: + # Grab succeeded but retrieve failed - decoder issue + logger.error(f"Camera {self.camera_id}: Frame grab OK but decode failed") except Exception as read_error: - logger.error(f"Camera {self.camera_id}: cap.grab/retrieve threw exception: {type(read_error).__name__}: {read_error}") + logger.error(f"Camera {self.camera_id}: cap.read() threw exception: {type(read_error).__name__}: {read_error}") ret, frame = False, None if not ret or frame is None: From 79a1189675e430e093d971565776b5ad01809eb0 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 02:15:06 +0700 Subject: [PATCH 22/28] refactor: update FFmpegRTSPReader to use a temporary file for frame reading and improve error handling --- core/streaming/readers.py | 112 +++++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 243f088..7478e38 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -65,7 +65,12 @@ class FFmpegRTSPReader: logger.info(f"Stopped FFmpeg reader for camera {self.camera_id}") def _start_ffmpeg_process(self): - """Start FFmpeg subprocess with CUDA hardware acceleration.""" + """Start FFmpeg subprocess with CUDA hardware acceleration writing to temp file.""" + # Create temp file path for this camera + import tempfile + self.temp_file = f"/tmp/claude/camera_{self.camera_id.replace(' ', '_')}.raw" + os.makedirs("/tmp/claude", exist_ok=True) + cmd = [ 'ffmpeg', '-hwaccel', 'cuda', @@ -75,7 +80,8 @@ class FFmpegRTSPReader: '-f', 'rawvideo', '-pix_fmt', 'bgr24', '-an', # No audio - '-' # Output to stdout + '-y', # Overwrite output file + self.temp_file ] try: @@ -85,18 +91,22 @@ class FFmpegRTSPReader: stderr=subprocess.PIPE, bufsize=0 ) - logger.info(f"Started FFmpeg process for camera {self.camera_id}") + logger.info(f"Started FFmpeg process for camera {self.camera_id} writing to {self.temp_file}") + + # Don't check process immediately - FFmpeg takes time to initialize + logger.info(f"Waiting for FFmpeg to initialize for camera {self.camera_id}...") return True except Exception as e: logger.error(f"Failed to start FFmpeg for camera {self.camera_id}: {e}") return False def _read_frames(self): - """Read frames from FFmpeg stdout pipe.""" + """Read frames from FFmpeg temp file.""" consecutive_errors = 0 frame_count = 0 last_log_time = time.time() bytes_per_frame = self.width * self.height * 3 # BGR = 3 bytes per pixel + last_file_size = 0 while not self.stop_event.is_set(): try: @@ -106,38 +116,72 @@ class FFmpegRTSPReader: time.sleep(5.0) continue - # Read one frame worth of data - frame_data = self.process.stdout.read(bytes_per_frame) - - if len(frame_data) != bytes_per_frame: - consecutive_errors += 1 - if consecutive_errors >= 30: - logger.error(f"Camera {self.camera_id}: Too many read errors, restarting FFmpeg") - if self.process: - self.process.terminate() - consecutive_errors = 0 + # Wait for temp file to exist and have content + if not os.path.exists(self.temp_file): + time.sleep(0.1) continue - # Convert raw bytes to numpy array - frame = np.frombuffer(frame_data, dtype=np.uint8) - frame = frame.reshape((self.height, self.width, 3)) + # Check if file size changed (new frame available) + try: + current_file_size = os.path.getsize(self.temp_file) + if current_file_size <= last_file_size and current_file_size > 0: + # File size didn't increase, wait for next frame + time.sleep(0.05) # ~20 FPS max + continue + last_file_size = current_file_size + except OSError: + time.sleep(0.1) + continue - # Frame is valid - consecutive_errors = 0 - frame_count += 1 + # Read the latest frame from the end of file + try: + with open(self.temp_file, 'rb') as f: + # Seek to last complete frame + file_size = f.seek(0, 2) # Seek to end + if file_size < bytes_per_frame: + time.sleep(0.1) + continue - # Call frame callback - if self.frame_callback: - try: - self.frame_callback(self.camera_id, frame) - except Exception as e: - logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") + # Read last complete frame + last_frame_offset = (file_size // bytes_per_frame - 1) * bytes_per_frame + f.seek(last_frame_offset) + frame_data = f.read(bytes_per_frame) - # Log progress - current_time = time.time() - if current_time - last_log_time >= 30: - logger.info(f"Camera {self.camera_id}: {frame_count} frames processed via FFmpeg") - last_log_time = current_time + if len(frame_data) != bytes_per_frame: + consecutive_errors += 1 + if consecutive_errors >= 30: + logger.error(f"Camera {self.camera_id}: Too many read errors, restarting FFmpeg") + if self.process: + self.process.terminate() + consecutive_errors = 0 + time.sleep(0.1) + continue + + # Convert raw bytes to numpy array + frame = np.frombuffer(frame_data, dtype=np.uint8) + frame = frame.reshape((self.height, self.width, 3)) + + # Frame is valid + consecutive_errors = 0 + frame_count += 1 + + # Call frame callback + if self.frame_callback: + try: + self.frame_callback(self.camera_id, frame) + except Exception as e: + logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") + + # Log progress + current_time = time.time() + if current_time - last_log_time >= 30: + logger.info(f"Camera {self.camera_id}: {frame_count} frames processed via temp file") + last_log_time = current_time + + except IOError as e: + logger.debug(f"Camera {self.camera_id}: File read error: {e}") + time.sleep(0.1) + continue except Exception as e: logger.error(f"Camera {self.camera_id}: FFmpeg read error: {e}") @@ -151,6 +195,12 @@ class FFmpegRTSPReader: # Cleanup if self.process: self.process.terminate() + # Clean up temp file + try: + if hasattr(self, 'temp_file') and os.path.exists(self.temp_file): + os.remove(self.temp_file) + except: + pass logger.info(f"FFmpeg reader thread ended for camera {self.camera_id}") From cb31633cc107a5156b4c81d975823989f42e416c Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 02:18:20 +0700 Subject: [PATCH 23/28] refactor: enhance FFmpegRTSPReader with file watching and reactive frame reading --- .claude/settings.local.json | 3 +- core/streaming/readers.py | 179 ++++++++++++++++++++---------------- 2 files changed, 101 insertions(+), 81 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index b06024d..97cf5c1 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -1,7 +1,8 @@ { "permissions": { "allow": [ - "Bash(dir:*)" + "Bash(dir:*)", + "WebSearch" ], "deny": [], "ask": [] diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 7478e38..e221c4a 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -11,6 +11,8 @@ import numpy as np import os import subprocess from typing import Optional, Callable +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler # Suppress FFMPEG/H.264 error messages if needed # Set this environment variable to reduce noise from decoder errors @@ -20,8 +22,25 @@ os.environ["OPENCV_FFMPEG_LOGLEVEL"] = "-8" # Suppress FFMPEG warnings logger = logging.getLogger(__name__) +class FrameFileHandler(FileSystemEventHandler): + """File system event handler for frame file changes.""" + + def __init__(self, callback): + self.callback = callback + self.last_modified = 0 + + def on_modified(self, event): + if event.is_directory: + return + # Debounce rapid file changes + current_time = time.time() + if current_time - self.last_modified > 0.01: # 10ms debounce + self.last_modified = current_time + self.callback() + + class FFmpegRTSPReader: - """RTSP stream reader using subprocess FFmpeg with CUDA hardware acceleration.""" + """RTSP stream reader using subprocess FFmpeg with CUDA hardware acceleration and file watching.""" def __init__(self, camera_id: str, rtsp_url: str, max_retries: int = 3): self.camera_id = camera_id @@ -31,6 +50,8 @@ class FFmpegRTSPReader: self.stop_event = threading.Event() self.thread = None self.frame_callback: Optional[Callable] = None + self.observer = None + self.frame_ready_event = threading.Event() # Stream specs self.width = 1280 @@ -67,7 +88,6 @@ class FFmpegRTSPReader: def _start_ffmpeg_process(self): """Start FFmpeg subprocess with CUDA hardware acceleration writing to temp file.""" # Create temp file path for this camera - import tempfile self.temp_file = f"/tmp/claude/camera_{self.camera_id.replace(' ', '_')}.raw" os.makedirs("/tmp/claude", exist_ok=True) @@ -85,114 +105,113 @@ class FFmpegRTSPReader: ] try: + # Start FFmpeg detached - we don't need to communicate with it self.process = subprocess.Popen( cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - bufsize=0 + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL ) - logger.info(f"Started FFmpeg process for camera {self.camera_id} writing to {self.temp_file}") - - # Don't check process immediately - FFmpeg takes time to initialize - logger.info(f"Waiting for FFmpeg to initialize for camera {self.camera_id}...") + logger.info(f"Started FFmpeg process PID {self.process.pid} for camera {self.camera_id} -> {self.temp_file}") return True except Exception as e: logger.error(f"Failed to start FFmpeg for camera {self.camera_id}: {e}") return False + def _setup_file_watcher(self): + """Setup file system watcher for temp file.""" + if not os.path.exists(self.temp_file): + return + + # Setup file watcher + handler = FrameFileHandler(self._on_file_changed) + self.observer = Observer() + self.observer.schedule(handler, os.path.dirname(self.temp_file), recursive=False) + self.observer.start() + logger.info(f"Started file watcher for {self.temp_file}") + + def _on_file_changed(self): + """Called when temp file is modified.""" + if os.path.basename(self.temp_file) in str(self.temp_file): + self.frame_ready_event.set() + def _read_frames(self): - """Read frames from FFmpeg temp file.""" - consecutive_errors = 0 + """Reactively read frames when file changes.""" frame_count = 0 last_log_time = time.time() - bytes_per_frame = self.width * self.height * 3 # BGR = 3 bytes per pixel - last_file_size = 0 + bytes_per_frame = self.width * self.height * 3 + restart_check_interval = 10 # Check FFmpeg status every 10 seconds while not self.stop_event.is_set(): try: - # Start/restart FFmpeg process if needed + # Start FFmpeg if not running if not self.process or self.process.poll() is not None: + if self.process and self.process.poll() is not None: + logger.warning(f"FFmpeg process died for camera {self.camera_id}, restarting...") + if not self._start_ffmpeg_process(): time.sleep(5.0) continue - # Wait for temp file to exist and have content - if not os.path.exists(self.temp_file): - time.sleep(0.1) - continue + # Wait for temp file to be created + wait_count = 0 + while not os.path.exists(self.temp_file) and wait_count < 30: + time.sleep(1.0) + wait_count += 1 - # Check if file size changed (new frame available) - try: - current_file_size = os.path.getsize(self.temp_file) - if current_file_size <= last_file_size and current_file_size > 0: - # File size didn't increase, wait for next frame - time.sleep(0.05) # ~20 FPS max - continue - last_file_size = current_file_size - except OSError: - time.sleep(0.1) - continue - - # Read the latest frame from the end of file - try: - with open(self.temp_file, 'rb') as f: - # Seek to last complete frame - file_size = f.seek(0, 2) # Seek to end - if file_size < bytes_per_frame: - time.sleep(0.1) - continue - - # Read last complete frame - last_frame_offset = (file_size // bytes_per_frame - 1) * bytes_per_frame - f.seek(last_frame_offset) - frame_data = f.read(bytes_per_frame) - - if len(frame_data) != bytes_per_frame: - consecutive_errors += 1 - if consecutive_errors >= 30: - logger.error(f"Camera {self.camera_id}: Too many read errors, restarting FFmpeg") - if self.process: - self.process.terminate() - consecutive_errors = 0 - time.sleep(0.1) + if not os.path.exists(self.temp_file): + logger.error(f"Temp file not created after 30s for {self.camera_id}") continue - # Convert raw bytes to numpy array - frame = np.frombuffer(frame_data, dtype=np.uint8) - frame = frame.reshape((self.height, self.width, 3)) + # Setup file watcher + self._setup_file_watcher() - # Frame is valid - consecutive_errors = 0 - frame_count += 1 + # Wait for file change event (or timeout for health check) + if self.frame_ready_event.wait(timeout=restart_check_interval): + self.frame_ready_event.clear() - # Call frame callback - if self.frame_callback: - try: - self.frame_callback(self.camera_id, frame) - except Exception as e: - logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") + # Read latest frame + try: + with open(self.temp_file, 'rb') as f: + # Get file size + f.seek(0, 2) + file_size = f.tell() - # Log progress - current_time = time.time() - if current_time - last_log_time >= 30: - logger.info(f"Camera {self.camera_id}: {frame_count} frames processed via temp file") - last_log_time = current_time + if file_size < bytes_per_frame: + continue - except IOError as e: - logger.debug(f"Camera {self.camera_id}: File read error: {e}") - time.sleep(0.1) - continue + # Read last complete frame + last_frame_offset = (file_size // bytes_per_frame - 1) * bytes_per_frame + f.seek(last_frame_offset) + frame_data = f.read(bytes_per_frame) + + if len(frame_data) == bytes_per_frame: + # Convert to numpy array + frame = np.frombuffer(frame_data, dtype=np.uint8) + frame = frame.reshape((self.height, self.width, 3)) + + # Call frame callback + if self.frame_callback: + self.frame_callback(self.camera_id, frame) + + frame_count += 1 + + # Log progress + current_time = time.time() + if current_time - last_log_time >= 30: + logger.info(f"Camera {self.camera_id}: {frame_count} frames processed reactively") + last_log_time = current_time + + except (IOError, OSError) as e: + logger.debug(f"Camera {self.camera_id}: File read error: {e}") except Exception as e: - logger.error(f"Camera {self.camera_id}: FFmpeg read error: {e}") - consecutive_errors += 1 - if consecutive_errors >= 30: - if self.process: - self.process.terminate() - consecutive_errors = 0 + logger.error(f"Camera {self.camera_id}: Error in reactive frame reading: {e}") time.sleep(1.0) # Cleanup + if self.observer: + self.observer.stop() + self.observer.join() if self.process: self.process.terminate() # Clean up temp file @@ -201,7 +220,7 @@ class FFmpegRTSPReader: os.remove(self.temp_file) except: pass - logger.info(f"FFmpeg reader thread ended for camera {self.camera_id}") + logger.info(f"Reactive FFmpeg reader ended for camera {self.camera_id}") logger = logging.getLogger(__name__) From 84144a295542752f64b9ef1a940ca95b6fc6dd73 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 02:20:14 +0700 Subject: [PATCH 24/28] refactor: update FFmpegRTSPReader to read and update a single frame in place for improved efficiency --- core/streaming/readers.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index e221c4a..d6a1272 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -100,6 +100,7 @@ class FFmpegRTSPReader: '-f', 'rawvideo', '-pix_fmt', 'bgr24', '-an', # No audio + '-update', '1', # Update single frame in place '-y', # Overwrite output file self.temp_file ] @@ -169,19 +170,9 @@ class FFmpegRTSPReader: if self.frame_ready_event.wait(timeout=restart_check_interval): self.frame_ready_event.clear() - # Read latest frame + # Read current frame (file is always exactly one frame) try: with open(self.temp_file, 'rb') as f: - # Get file size - f.seek(0, 2) - file_size = f.tell() - - if file_size < bytes_per_frame: - continue - - # Read last complete frame - last_frame_offset = (file_size // bytes_per_frame - 1) * bytes_per_frame - f.seek(last_frame_offset) frame_data = f.read(bytes_per_frame) if len(frame_data) == bytes_per_frame: From 2742b86961f98832d2f734e19ea9eb2413dc4e39 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 02:26:44 +0700 Subject: [PATCH 25/28] refactor: enhance FFmpegRTSPReader to improve frame reading reliability with retry logic --- core/streaming/readers.py | 49 ++++++++++++++++++++++++++------------- requirements.txt | 3 ++- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index d6a1272..b68a15b 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -170,27 +170,44 @@ class FFmpegRTSPReader: if self.frame_ready_event.wait(timeout=restart_check_interval): self.frame_ready_event.clear() - # Read current frame (file is always exactly one frame) + # Read current frame with concurrency safety try: - with open(self.temp_file, 'rb') as f: - frame_data = f.read(bytes_per_frame) + # Try to read frame multiple times to handle race conditions + frame_data = None + for attempt in range(3): + try: + with open(self.temp_file, 'rb') as f: + frame_data = f.read(bytes_per_frame) - if len(frame_data) == bytes_per_frame: - # Convert to numpy array - frame = np.frombuffer(frame_data, dtype=np.uint8) - frame = frame.reshape((self.height, self.width, 3)) + # Validate we got a complete frame + if len(frame_data) == bytes_per_frame: + break + else: + logger.debug(f"Camera {self.camera_id}: Partial read {len(frame_data)}/{bytes_per_frame}, attempt {attempt+1}") + time.sleep(0.01) # Brief wait before retry - # Call frame callback - if self.frame_callback: - self.frame_callback(self.camera_id, frame) + except (IOError, OSError) as e: + logger.debug(f"Camera {self.camera_id}: Read error on attempt {attempt+1}: {e}") + time.sleep(0.01) - frame_count += 1 + if frame_data and len(frame_data) == bytes_per_frame: + # Convert to numpy array + frame = np.frombuffer(frame_data, dtype=np.uint8) + frame = frame.reshape((self.height, self.width, 3)) - # Log progress - current_time = time.time() - if current_time - last_log_time >= 30: - logger.info(f"Camera {self.camera_id}: {frame_count} frames processed reactively") - last_log_time = current_time + # Call frame callback directly - trust the retry logic caught corruption + if self.frame_callback: + self.frame_callback(self.camera_id, frame) + + frame_count += 1 + + # Log progress + current_time = time.time() + if current_time - last_log_time >= 30: + logger.info(f"Camera {self.camera_id}: {frame_count} frames processed reactively") + last_log_time = current_time + else: + logger.debug(f"Camera {self.camera_id}: Failed to read complete frame after retries") except (IOError, OSError) as e: logger.debug(f"Camera {self.camera_id}: File read error: {e}") diff --git a/requirements.txt b/requirements.txt index 034d18e..2afeb0e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ fastapi[standard] redis urllib3<2.0.0 numpy -requests \ No newline at end of file +requests +watchdog \ No newline at end of file From 95c39a008f14b1795844e25fab42619a9b2211ee Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 02:35:27 +0700 Subject: [PATCH 26/28] refactor: suppress noisy watchdog debug logs for cleaner output --- core/streaming/readers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index b68a15b..f9df506 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -21,6 +21,9 @@ os.environ["OPENCV_FFMPEG_LOGLEVEL"] = "-8" # Suppress FFMPEG warnings logger = logging.getLogger(__name__) +# Suppress noisy watchdog debug logs +logging.getLogger('watchdog.observers.inotify_buffer').setLevel(logging.CRITICAL) + class FrameFileHandler(FileSystemEventHandler): """File system event handler for frame file changes.""" From 73c33676811c1c3e15abc468faab6394fdded6fe Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 02:51:30 +0700 Subject: [PATCH 27/28] refactor: update FFmpegRTSPReader to use JPG format for single frame updates and improve image quality --- core/streaming/readers.py | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index f9df506..b623c49 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -94,16 +94,19 @@ class FFmpegRTSPReader: self.temp_file = f"/tmp/claude/camera_{self.camera_id.replace(' ', '_')}.raw" os.makedirs("/tmp/claude", exist_ok=True) + # Change to JPG format which properly supports -update 1 + self.temp_file = f"/tmp/claude/camera_{self.camera_id.replace(' ', '_')}.jpg" + cmd = [ 'ffmpeg', '-hwaccel', 'cuda', '-hwaccel_device', '0', '-rtsp_transport', 'tcp', '-i', self.rtsp_url, - '-f', 'rawvideo', - '-pix_fmt', 'bgr24', + '-f', 'image2', + '-update', '1', # This actually works with image2 format + '-q:v', '2', # High quality JPEG '-an', # No audio - '-update', '1', # Update single frame in place '-y', # Overwrite output file self.temp_file ] @@ -173,32 +176,27 @@ class FFmpegRTSPReader: if self.frame_ready_event.wait(timeout=restart_check_interval): self.frame_ready_event.clear() - # Read current frame with concurrency safety + # Read JPEG frame with concurrency safety try: - # Try to read frame multiple times to handle race conditions - frame_data = None + # Try to read JPEG multiple times to handle race conditions + frame = None for attempt in range(3): try: - with open(self.temp_file, 'rb') as f: - frame_data = f.read(bytes_per_frame) + # Read and decode JPEG directly + frame = cv2.imread(self.temp_file) - # Validate we got a complete frame - if len(frame_data) == bytes_per_frame: - break - else: - logger.debug(f"Camera {self.camera_id}: Partial read {len(frame_data)}/{bytes_per_frame}, attempt {attempt+1}") - time.sleep(0.01) # Brief wait before retry + if frame is not None and frame.shape == (self.height, self.width, 3): + break + else: + logger.debug(f"Camera {self.camera_id}: Invalid frame shape or None, attempt {attempt+1}") + time.sleep(0.01) # Brief wait before retry except (IOError, OSError) as e: logger.debug(f"Camera {self.camera_id}: Read error on attempt {attempt+1}: {e}") time.sleep(0.01) - if frame_data and len(frame_data) == bytes_per_frame: - # Convert to numpy array - frame = np.frombuffer(frame_data, dtype=np.uint8) - frame = frame.reshape((self.height, self.width, 3)) - - # Call frame callback directly - trust the retry logic caught corruption + if frame is not None: + # Call frame callback directly if self.frame_callback: self.frame_callback(self.camera_id, frame) @@ -207,10 +205,10 @@ class FFmpegRTSPReader: # Log progress current_time = time.time() if current_time - last_log_time >= 30: - logger.info(f"Camera {self.camera_id}: {frame_count} frames processed reactively") + logger.info(f"Camera {self.camera_id}: {frame_count} JPEG frames processed reactively") last_log_time = current_time else: - logger.debug(f"Camera {self.camera_id}: Failed to read complete frame after retries") + logger.debug(f"Camera {self.camera_id}: Failed to read valid JPEG after retries") except (IOError, OSError) as e: logger.debug(f"Camera {self.camera_id}: File read error: {e}") From fe0da18d0fefac3a0177a8bc8a319c2f7556593a Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 02:55:26 +0700 Subject: [PATCH 28/28] refactor: change temporary file format from JPG to PPM for improved frame reading --- core/streaming/readers.py | 53 ++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index b623c49..e6eed55 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -94,8 +94,8 @@ class FFmpegRTSPReader: self.temp_file = f"/tmp/claude/camera_{self.camera_id.replace(' ', '_')}.raw" os.makedirs("/tmp/claude", exist_ok=True) - # Change to JPG format which properly supports -update 1 - self.temp_file = f"/tmp/claude/camera_{self.camera_id.replace(' ', '_')}.jpg" + # Use PPM format - uncompressed with header, supports -update 1 + self.temp_file = f"/tmp/claude/camera_{self.camera_id.replace(' ', '_')}.ppm" cmd = [ 'ffmpeg', @@ -104,8 +104,8 @@ class FFmpegRTSPReader: '-rtsp_transport', 'tcp', '-i', self.rtsp_url, '-f', 'image2', - '-update', '1', # This actually works with image2 format - '-q:v', '2', # High quality JPEG + '-update', '1', # Works with image2 format + '-pix_fmt', 'rgb24', # PPM uses RGB not BGR '-an', # No audio '-y', # Overwrite output file self.temp_file @@ -176,39 +176,28 @@ class FFmpegRTSPReader: if self.frame_ready_event.wait(timeout=restart_check_interval): self.frame_ready_event.clear() - # Read JPEG frame with concurrency safety + # Read PPM frame (uncompressed with header) try: - # Try to read JPEG multiple times to handle race conditions - frame = None - for attempt in range(3): - try: - # Read and decode JPEG directly - frame = cv2.imread(self.temp_file) + if os.path.exists(self.temp_file): + # Read PPM with OpenCV (handles RGB->BGR conversion automatically) + frame = cv2.imread(self.temp_file) - if frame is not None and frame.shape == (self.height, self.width, 3): - break - else: - logger.debug(f"Camera {self.camera_id}: Invalid frame shape or None, attempt {attempt+1}") - time.sleep(0.01) # Brief wait before retry + if frame is not None and frame.shape == (self.height, self.width, 3): + # Call frame callback directly + if self.frame_callback: + self.frame_callback(self.camera_id, frame) - except (IOError, OSError) as e: - logger.debug(f"Camera {self.camera_id}: Read error on attempt {attempt+1}: {e}") - time.sleep(0.01) + frame_count += 1 - if frame is not None: - # Call frame callback directly - if self.frame_callback: - self.frame_callback(self.camera_id, frame) - - frame_count += 1 - - # Log progress - current_time = time.time() - if current_time - last_log_time >= 30: - logger.info(f"Camera {self.camera_id}: {frame_count} JPEG frames processed reactively") - last_log_time = current_time + # Log progress + current_time = time.time() + if current_time - last_log_time >= 30: + logger.info(f"Camera {self.camera_id}: {frame_count} PPM frames processed reactively") + last_log_time = current_time + else: + logger.debug(f"Camera {self.camera_id}: Invalid PPM frame") else: - logger.debug(f"Camera {self.camera_id}: Failed to read valid JPEG after retries") + logger.debug(f"Camera {self.camera_id}: PPM file not found yet") except (IOError, OSError) as e: logger.debug(f"Camera {self.camera_id}: File read error: {e}")