From cd1359f5d227d29d3b576649b3d31c3c3b5307b8 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Fri, 26 Sep 2025 15:06:12 +0700 Subject: [PATCH 01/30] fix: enable hardward acceleration --- core/streaming/readers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index d5635ba..6a1dab8 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -106,8 +106,8 @@ class FFmpegRTSPReader: cmd = [ 'ffmpeg', # DO NOT REMOVE - # '-hwaccel', 'cuda', - # '-hwaccel_device', '0', + '-hwaccel', 'cuda', + '-hwaccel_device', '0', '-rtsp_transport', 'tcp', '-i', self.rtsp_url, '-f', 'image2pipe', # Output images to pipe From 2808316e94f09db23ef3a922b95aae97a9aec847 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 19:42:41 +0700 Subject: [PATCH 02/30] fix: remove unused RTSPReader import and related code --- core/streaming/__init__.py | 3 +- core/streaming/manager.py | 2 +- core/streaming/readers.py | 444 +++++++++---------------------------- 3 files changed, 112 insertions(+), 337 deletions(-) diff --git a/core/streaming/__init__.py b/core/streaming/__init__.py index d878aac..93005ab 100644 --- a/core/streaming/__init__.py +++ b/core/streaming/__init__.py @@ -2,13 +2,12 @@ Streaming system for RTSP and HTTP camera feeds. Provides modular frame readers, buffers, and stream management. """ -from .readers import RTSPReader, HTTPSnapshotReader, FFmpegRTSPReader +from .readers import HTTPSnapshotReader, FFmpegRTSPReader from .buffers import FrameBuffer, CacheBuffer, shared_frame_buffer, shared_cache_buffer from .manager import StreamManager, StreamConfig, SubscriptionInfo, shared_stream_manager, initialize_stream_manager __all__ = [ # Readers - 'RTSPReader', 'HTTPSnapshotReader', 'FFmpegRTSPReader', diff --git a/core/streaming/manager.py b/core/streaming/manager.py index 0c026e7..5b4637c 100644 --- a/core/streaming/manager.py +++ b/core/streaming/manager.py @@ -9,7 +9,7 @@ from typing import Dict, Set, Optional, List, Any from dataclasses import dataclass from collections import defaultdict -from .readers import RTSPReader, HTTPSnapshotReader, FFmpegRTSPReader +from .readers import HTTPSnapshotReader, FFmpegRTSPReader from .buffers import shared_cache_buffer from ..tracking.integration import TrackingPipelineIntegration diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 6a1dab8..5684997 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -8,16 +8,10 @@ import time import threading import requests import numpy as np -import os import subprocess -# import fcntl # No longer needed with atomic file operations from typing import Optional, Callable -# Removed watchdog imports - no longer using file watching -# Suppress FFMPEG/H.264 error messages if needed -# Set this environment variable to reduce noise from decoder errors -os.environ["OPENCV_LOG_LEVEL"] = "ERROR" -os.environ["OPENCV_FFMPEG_LOGLEVEL"] = "-8" # Suppress FFMPEG warnings + logger = logging.getLogger(__name__) @@ -65,12 +59,20 @@ class FFmpegRTSPReader: self.process = None self.stop_event = threading.Event() self.thread = None + self.stderr_thread = None self.frame_callback: Optional[Callable] = None # Expected stream specs (for reference, actual dimensions read from PPM header) self.width = 1280 self.height = 720 + # Watchdog timers for stream reliability + self.process_start_time = None + self.last_frame_time = None + self.is_restart = False # Track if this is a restart (shorter timeout) + self.first_start_timeout = 30.0 # 30s timeout on first start + self.restart_timeout = 15.0 # 15s timeout after restart + def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]): """Set callback function to handle captured frames.""" self.frame_callback = callback @@ -97,6 +99,8 @@ class FFmpegRTSPReader: self.process.kill() if self.thread: self.thread.join(timeout=5.0) + if self.stderr_thread: + self.stderr_thread.join(timeout=2.0) log_info(self.camera_id, "Stream stopped") # Removed _probe_stream_info - BMP headers contain dimensions @@ -122,9 +126,30 @@ class FFmpegRTSPReader: self.process = subprocess.Popen( cmd, stdout=subprocess.PIPE, # Capture stdout for frame data - stderr=subprocess.DEVNULL, + stderr=subprocess.PIPE, # Capture stderr for error logging bufsize=0 # Unbuffered for real-time processing ) + + # Start stderr reading thread + if self.stderr_thread and self.stderr_thread.is_alive(): + # Stop previous stderr thread + try: + self.stderr_thread.join(timeout=1.0) + except: + pass + + self.stderr_thread = threading.Thread(target=self._read_stderr, daemon=True) + self.stderr_thread.start() + + # Set process start time for watchdog + self.process_start_time = time.time() + self.last_frame_time = None # Reset frame time + + # After successful restart, next timeout will be back to 30s + if self.is_restart: + log_info(self.camera_id, f"FFmpeg restarted successfully, next timeout: {self.first_start_timeout}s") + self.is_restart = False + return True except Exception as e: log_error(self.camera_id, f"FFmpeg startup failed: {e}") @@ -180,6 +205,74 @@ class FFmpegRTSPReader: except Exception: return None # Error reading frame silently + def _read_stderr(self): + """Read and log FFmpeg stderr output in background thread.""" + if not self.process or not self.process.stderr: + return + + try: + while self.process and self.process.poll() is None: + try: + line = self.process.stderr.readline() + if line: + error_msg = line.decode('utf-8', errors='ignore').strip() + if error_msg and not self.stop_event.is_set(): + # Filter out common noise but log actual errors + if any(keyword in error_msg.lower() for keyword in ['error', 'failed', 'cannot', 'invalid']): + log_error(self.camera_id, f"FFmpeg: {error_msg}") + elif 'warning' in error_msg.lower(): + log_warning(self.camera_id, f"FFmpeg: {error_msg}") + except Exception: + break + except Exception: + pass + + def _check_watchdog_timeout(self) -> bool: + """Check if watchdog timeout has been exceeded.""" + if not self.process_start_time: + return False + + current_time = time.time() + time_since_start = current_time - self.process_start_time + + # Determine timeout based on whether this is a restart + timeout = self.restart_timeout if self.is_restart else self.first_start_timeout + + # If no frames received yet, check against process start time + if not self.last_frame_time: + if time_since_start > timeout: + log_warning(self.camera_id, f"Watchdog timeout: No frames for {time_since_start:.1f}s (limit: {timeout}s)") + return True + else: + # Check time since last frame + time_since_frame = current_time - self.last_frame_time + if time_since_frame > timeout: + log_warning(self.camera_id, f"Watchdog timeout: No frames for {time_since_frame:.1f}s (limit: {timeout}s)") + return True + + return False + + def _restart_ffmpeg_process(self): + """Restart FFmpeg process due to watchdog timeout.""" + log_warning(self.camera_id, "Watchdog triggered FFmpeg restart") + + # Terminate current process + if self.process: + try: + self.process.terminate() + self.process.wait(timeout=3) + except subprocess.TimeoutExpired: + self.process.kill() + except Exception: + pass + self.process = None + + # Mark as restart for shorter timeout + self.is_restart = True + + # Small delay before restart + time.sleep(1.0) + def _read_frames(self): """Read frames directly from FFmpeg stdout pipe.""" frame_count = 0 @@ -187,6 +280,12 @@ class FFmpegRTSPReader: while not self.stop_event.is_set(): try: + # Check watchdog timeout if process is running + if self.process and self.process.poll() is None: + if self._check_watchdog_timeout(): + self._restart_ffmpeg_process() + continue + # Start FFmpeg if not running if not self.process or self.process.poll() is not None: if self.process and self.process.poll() is not None: @@ -204,6 +303,9 @@ class FFmpegRTSPReader: if frame is None: continue + # Update watchdog - we got a frame + self.last_frame_time = time.time() + # Call frame callback if self.frame_callback: self.frame_callback(self.camera_id, frame) @@ -234,332 +336,6 @@ class FFmpegRTSPReader: logger = logging.getLogger(__name__) -class RTSPReader: - """RTSP stream frame reader optimized for 1280x720 @ 6fps streams.""" - - def __init__(self, camera_id: str, rtsp_url: str, max_retries: int = 3): - self.camera_id = camera_id - self.rtsp_url = rtsp_url - self.max_retries = max_retries - self.cap = None - self.stop_event = threading.Event() - self.thread = None - self.frame_callback: Optional[Callable] = None - - # Expected stream specifications - self.expected_width = 1280 - self.expected_height = 720 - self.expected_fps = 6 - - # Frame processing parameters - self.error_recovery_delay = 5.0 # Increased from 2.0 for stability - self.max_consecutive_errors = 30 # Increased from 10 to handle network jitter - self.stream_timeout = 30.0 - - def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]): - """Set callback function to handle captured frames.""" - self.frame_callback = callback - - def start(self): - """Start the RTSP reader thread.""" - if self.thread and self.thread.is_alive(): - logger.warning(f"RTSP reader for {self.camera_id} already running") - return - - self.stop_event.clear() - self.thread = threading.Thread(target=self._read_frames, daemon=True) - self.thread.start() - logger.info(f"Started RTSP reader for camera {self.camera_id}") - - def stop(self): - """Stop the RTSP reader thread.""" - self.stop_event.set() - if self.thread: - self.thread.join(timeout=5.0) - if self.cap: - self.cap.release() - logger.info(f"Stopped RTSP reader for camera {self.camera_id}") - - def _read_frames(self): - """Main frame reading loop with H.264 error recovery.""" - consecutive_errors = 0 - frame_count = 0 - last_log_time = time.time() - last_successful_frame_time = time.time() - - while not self.stop_event.is_set(): - try: - # Initialize/reinitialize capture if needed - if not self.cap or not self.cap.isOpened(): - if not self._initialize_capture(): - time.sleep(self.error_recovery_delay) - continue - last_successful_frame_time = time.time() - - # Check for stream timeout - if time.time() - last_successful_frame_time > self.stream_timeout: - logger.warning(f"Camera {self.camera_id}: Stream timeout, reinitializing") - self._reinitialize_capture() - last_successful_frame_time = time.time() - continue - - # Read frame immediately without rate limiting for minimum latency - try: - ret, frame = self.cap.read() - if ret and frame is None: - # Grab succeeded but retrieve failed - decoder issue - logger.error(f"Camera {self.camera_id}: Frame grab OK but decode failed") - except Exception as read_error: - logger.error(f"Camera {self.camera_id}: cap.read() threw exception: {type(read_error).__name__}: {read_error}") - ret, frame = False, None - - if not ret or frame is None: - consecutive_errors += 1 - - # Enhanced logging to diagnose the issue - logger.error(f"Camera {self.camera_id}: cap.read() failed - ret={ret}, frame={frame is not None}") - - # Try to get more info from the capture - try: - if self.cap and self.cap.isOpened(): - backend = self.cap.getBackendName() - pos_frames = self.cap.get(cv2.CAP_PROP_POS_FRAMES) - logger.error(f"Camera {self.camera_id}: Capture open, backend: {backend}, pos_frames: {pos_frames}") - else: - logger.error(f"Camera {self.camera_id}: Capture is closed or None!") - except Exception as info_error: - logger.error(f"Camera {self.camera_id}: Error getting capture info: {type(info_error).__name__}: {info_error}") - - if consecutive_errors >= self.max_consecutive_errors: - logger.error(f"Camera {self.camera_id}: Too many consecutive errors ({consecutive_errors}), reinitializing") - self._reinitialize_capture() - consecutive_errors = 0 - time.sleep(self.error_recovery_delay) - else: - # Skip corrupted frame and continue with exponential backoff - if consecutive_errors <= 5: - logger.debug(f"Camera {self.camera_id}: Frame read failed (error {consecutive_errors})") - elif consecutive_errors % 10 == 0: # Log every 10th error after 5 - logger.warning(f"Camera {self.camera_id}: Continuing frame read failures (error {consecutive_errors})") - - # Exponential backoff with cap at 1 second - sleep_time = min(0.1 * (1.5 ** min(consecutive_errors, 10)), 1.0) - time.sleep(sleep_time) - continue - - # Accept any valid frame dimensions - don't force specific resolution - if frame.shape[1] <= 0 or frame.shape[0] <= 0: - consecutive_errors += 1 - continue - - # Check for corrupted frames (all black, all white, excessive noise) - if self._is_frame_corrupted(frame): - logger.debug(f"Camera {self.camera_id}: Corrupted frame detected, skipping") - consecutive_errors += 1 - continue - - # Frame is valid - consecutive_errors = 0 - frame_count += 1 - last_successful_frame_time = time.time() - - # Call frame callback - if self.frame_callback: - try: - self.frame_callback(self.camera_id, frame) - except Exception as e: - logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") - - # Log progress every 30 seconds - current_time = time.time() - if current_time - last_log_time >= 30: - logger.info(f"Camera {self.camera_id}: {frame_count} frames processed") - last_log_time = current_time - - except Exception as e: - logger.error(f"Camera {self.camera_id}: Error in frame reading loop: {e}") - consecutive_errors += 1 - if consecutive_errors >= self.max_consecutive_errors: - self._reinitialize_capture() - consecutive_errors = 0 - time.sleep(self.error_recovery_delay) - - # Cleanup - if self.cap: - self.cap.release() - logger.info(f"RTSP reader thread ended for camera {self.camera_id}") - - def _initialize_capture(self) -> bool: - """Initialize video capture with FFmpeg hardware acceleration (CUVID/NVDEC) for 1280x720@6fps.""" - try: - # Release previous capture if exists - if self.cap: - self.cap.release() - time.sleep(0.5) - - logger.info(f"Initializing capture for camera {self.camera_id} with FFmpeg hardware acceleration") - hw_accel_success = False - - # Method 1: Try OpenCV CUDA VideoReader (if built with CUVID support) - if not hw_accel_success: - try: - # Check if OpenCV was built with CUDA codec support - build_info = cv2.getBuildInformation() - if 'cudacodec' in build_info or 'CUVID' in build_info: - logger.info(f"Attempting OpenCV CUDA VideoReader for camera {self.camera_id}") - - # Use OpenCV's CUDA backend - self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG, [ - cv2.CAP_PROP_HW_ACCELERATION, cv2.VIDEO_ACCELERATION_ANY - ]) - - if self.cap.isOpened(): - hw_accel_success = True - logger.info(f"Camera {self.camera_id}: Using OpenCV CUDA hardware acceleration") - else: - logger.debug(f"Camera {self.camera_id}: OpenCV not built with CUDA codec support") - except Exception as e: - logger.debug(f"Camera {self.camera_id}: OpenCV CUDA not available: {e}") - - # Method 2: Try FFmpeg with optimal hardware acceleration (CUVID/NVDEC) - if not hw_accel_success: - try: - from core.utils.ffmpeg_detector import get_optimal_rtsp_options - import os - - # Get optimal FFmpeg options based on detected capabilities - optimal_options = get_optimal_rtsp_options(self.rtsp_url) - os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = optimal_options - - logger.info(f"Attempting FFmpeg with detected hardware acceleration for camera {self.camera_id}") - logger.debug(f"Camera {self.camera_id}: Using FFmpeg options: {optimal_options}") - - self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) - - if self.cap.isOpened(): - hw_accel_success = True - # Try to get backend info to confirm hardware acceleration - backend = self.cap.getBackendName() - logger.info(f"Camera {self.camera_id}: Using FFmpeg hardware acceleration (backend: {backend})") - except Exception as e: - logger.debug(f"Camera {self.camera_id}: FFmpeg optimal hardware acceleration not available: {e}") - - # Method 3: Try FFmpeg with NVIDIA NVDEC (better for RTX 3060) - if not hw_accel_success: - try: - import os - os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'hwaccel;cuda|hwaccel_device;0|rtsp_transport;tcp' - - logger.info(f"Attempting FFmpeg with NVDEC hardware acceleration for camera {self.camera_id}") - self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) - - if self.cap.isOpened(): - hw_accel_success = True - logger.info(f"Camera {self.camera_id}: Using FFmpeg NVDEC hardware acceleration") - except Exception as e: - logger.debug(f"Camera {self.camera_id}: FFmpeg NVDEC not available: {e}") - - # Method 4: Try FFmpeg with VAAPI (Intel/AMD GPUs) - if not hw_accel_success: - try: - import os - os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'hwaccel;vaapi|hwaccel_device;/dev/dri/renderD128|video_codec;h264|rtsp_transport;tcp' - - logger.info(f"Attempting FFmpeg with VAAPI for camera {self.camera_id}") - self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) - - if self.cap.isOpened(): - hw_accel_success = True - logger.info(f"Camera {self.camera_id}: Using FFmpeg VAAPI hardware acceleration") - except Exception as e: - logger.debug(f"Camera {self.camera_id}: FFmpeg VAAPI not available: {e}") - - # Fallback: Standard FFmpeg with software decoding - if not hw_accel_success: - logger.warning(f"Camera {self.camera_id}: Hardware acceleration not available, falling back to software decoding") - import os - os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'rtsp_transport;tcp' - self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) - - if not self.cap.isOpened(): - logger.error(f"Failed to open stream for camera {self.camera_id}") - return False - - # Don't force resolution/fps - let the stream determine its natural specs - # The camera will provide whatever resolution/fps it supports - - - # Set FFMPEG options for better H.264 handling - self.cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'H264')) - - # Verify stream properties - actual_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - actual_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - actual_fps = self.cap.get(cv2.CAP_PROP_FPS) - - logger.info(f"Camera {self.camera_id} initialized: {actual_width}x{actual_height} @ {actual_fps}fps") - - # Read and discard first few frames to stabilize stream - for _ in range(5): - ret, _ = self.cap.read() - if not ret: - logger.warning(f"Camera {self.camera_id}: Failed to read initial frames") - time.sleep(0.1) - - return True - - except Exception as e: - logger.error(f"Error initializing capture for camera {self.camera_id}: {e}") - return False - - def _reinitialize_capture(self): - """Reinitialize capture after errors with retry logic.""" - logger.info(f"Reinitializing capture for camera {self.camera_id}") - if self.cap: - self.cap.release() - self.cap = None - - # Longer delay before reconnection to avoid rapid reconnect loops - time.sleep(3.0) - - # Retry initialization up to 3 times - for attempt in range(3): - if self._initialize_capture(): - logger.info(f"Successfully reinitialized camera {self.camera_id} on attempt {attempt + 1}") - break - else: - logger.warning(f"Failed to reinitialize camera {self.camera_id} on attempt {attempt + 1}") - time.sleep(2.0) - - def _is_frame_corrupted(self, frame: np.ndarray) -> bool: - """Check if frame is corrupted (all black, all white, or excessive noise).""" - if frame is None or frame.size == 0: - return True - - # Check mean and standard deviation - mean = np.mean(frame) - std = np.std(frame) - - # All black or all white - if mean < 5 or mean > 250: - return True - - # No variation (stuck frame) - if std < 1: - return True - - # Excessive noise (corrupted H.264 decode) - # Calculate edge density as corruption indicator - edges = cv2.Canny(frame, 50, 150) - edge_density = np.sum(edges > 0) / edges.size - - # Too many edges indicate corruption - if edge_density > 0.5: - return True - - return False - - class HTTPSnapshotReader: """HTTP snapshot reader optimized for 2560x1440 (2K) high quality images.""" From 33d738b31b353433d104ff0104c6bb49ffe8ac7e Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 19:42:57 +0700 Subject: [PATCH 03/30] fix: remove unused watchdog logging configuration and FrameFileHandler --- core/streaming/readers.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/core/streaming/readers.py b/core/streaming/readers.py index 5684997..c8c0ec3 100644 --- a/core/streaming/readers.py +++ b/core/streaming/readers.py @@ -43,11 +43,6 @@ def log_info(camera_id: str, message: str): """Log info in cyan""" logger.info(f"{Colors.CYAN}[{camera_id}] {message}{Colors.END}") -# Removed watchdog logging configuration - no longer using file watching - - -# Removed FrameFileHandler - no longer using file watching - class FFmpegRTSPReader: """RTSP stream reader using subprocess FFmpeg piping frames directly to buffer.""" From d8d1b33cd86490cc075a4ca8a208dd68099f86e5 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 19:47:13 +0700 Subject: [PATCH 04/30] feat: add GPU accelerated libraries --- requirements.base.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements.base.txt b/requirements.base.txt index 3511dd4..722962f 100644 --- a/requirements.base.txt +++ b/requirements.base.txt @@ -7,4 +7,7 @@ filterpy psycopg2-binary lap>=0.5.12 pynvml -PyTurboJPEG \ No newline at end of file +PyTurboJPEG +PyNvVideoCodec +pycuda +cupy-cuda12x \ No newline at end of file From 2b382210eb702a0ff87a5ad64e721f2881deffec Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Fri, 26 Sep 2025 20:03:09 +0700 Subject: [PATCH 05/30] Refactor streaming readers: Split into modular files and implement base class - Removed the existing `readers.py` file and created separate modules for `FFmpegRTSPReader`, `HTTPSnapshotReader`, and utility functions. - Introduced an abstract base class `VideoReader` to standardize the interface for video stream readers. - Updated `FFmpegRTSPReader` and `HTTPSnapshotReader` to inherit from `VideoReader` and implement required methods. - Enhanced logging utilities for better readability and maintainability. - Removed `pycuda` from requirements as it is no longer needed. --- core/streaming/readers.py | 557 ------------------------ core/streaming/readers/__init__.py | 18 + core/streaming/readers/base.py | 65 +++ core/streaming/readers/ffmpeg_rtsp.py | 302 +++++++++++++ core/streaming/readers/http_snapshot.py | 249 +++++++++++ core/streaming/readers/utils.py | 38 ++ requirements.base.txt | 1 - 7 files changed, 672 insertions(+), 558 deletions(-) delete mode 100644 core/streaming/readers.py create mode 100644 core/streaming/readers/__init__.py create mode 100644 core/streaming/readers/base.py create mode 100644 core/streaming/readers/ffmpeg_rtsp.py create mode 100644 core/streaming/readers/http_snapshot.py create mode 100644 core/streaming/readers/utils.py diff --git a/core/streaming/readers.py b/core/streaming/readers.py deleted file mode 100644 index c8c0ec3..0000000 --- a/core/streaming/readers.py +++ /dev/null @@ -1,557 +0,0 @@ -""" -Frame readers for RTSP streams and HTTP snapshots. -Optimized for 1280x720@6fps RTSP and 2560x1440 HTTP snapshots. -""" -import cv2 -import logging -import time -import threading -import requests -import numpy as np -import subprocess -from typing import Optional, Callable - - - -logger = logging.getLogger(__name__) - -# Color codes for pretty logging -class Colors: - GREEN = '\033[92m' - YELLOW = '\033[93m' - RED = '\033[91m' - BLUE = '\033[94m' - PURPLE = '\033[95m' - CYAN = '\033[96m' - WHITE = '\033[97m' - BOLD = '\033[1m' - END = '\033[0m' - -def log_success(camera_id: str, message: str): - """Log success messages in green""" - logger.info(f"{Colors.GREEN}[{camera_id}] {message}{Colors.END}") - -def log_warning(camera_id: str, message: str): - """Log warnings in yellow""" - logger.warning(f"{Colors.YELLOW}[{camera_id}] {message}{Colors.END}") - -def log_error(camera_id: str, message: str): - """Log errors in red""" - logger.error(f"{Colors.RED}[{camera_id}] {message}{Colors.END}") - -def log_info(camera_id: str, message: str): - """Log info in cyan""" - logger.info(f"{Colors.CYAN}[{camera_id}] {message}{Colors.END}") - - -class FFmpegRTSPReader: - """RTSP stream reader using subprocess FFmpeg piping frames directly to buffer.""" - - def __init__(self, camera_id: str, rtsp_url: str, max_retries: int = 3): - self.camera_id = camera_id - self.rtsp_url = rtsp_url - self.max_retries = max_retries - self.process = None - self.stop_event = threading.Event() - self.thread = None - self.stderr_thread = None - self.frame_callback: Optional[Callable] = None - - # Expected stream specs (for reference, actual dimensions read from PPM header) - self.width = 1280 - self.height = 720 - - # Watchdog timers for stream reliability - self.process_start_time = None - self.last_frame_time = None - self.is_restart = False # Track if this is a restart (shorter timeout) - self.first_start_timeout = 30.0 # 30s timeout on first start - self.restart_timeout = 15.0 # 15s timeout after restart - - def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]): - """Set callback function to handle captured frames.""" - self.frame_callback = callback - - def start(self): - """Start the FFmpeg subprocess reader.""" - if self.thread and self.thread.is_alive(): - logger.warning(f"FFmpeg reader for {self.camera_id} already running") - return - - self.stop_event.clear() - self.thread = threading.Thread(target=self._read_frames, daemon=True) - self.thread.start() - log_success(self.camera_id, "Stream started") - - def stop(self): - """Stop the FFmpeg subprocess reader.""" - self.stop_event.set() - if self.process: - self.process.terminate() - try: - self.process.wait(timeout=5) - except subprocess.TimeoutExpired: - self.process.kill() - if self.thread: - self.thread.join(timeout=5.0) - if self.stderr_thread: - self.stderr_thread.join(timeout=2.0) - log_info(self.camera_id, "Stream stopped") - - # Removed _probe_stream_info - BMP headers contain dimensions - - def _start_ffmpeg_process(self): - """Start FFmpeg subprocess outputting BMP frames to stdout pipe.""" - cmd = [ - 'ffmpeg', - # DO NOT REMOVE - '-hwaccel', 'cuda', - '-hwaccel_device', '0', - '-rtsp_transport', 'tcp', - '-i', self.rtsp_url, - '-f', 'image2pipe', # Output images to pipe - '-vcodec', 'bmp', # BMP format with header containing dimensions - # Use native stream resolution and framerate - '-an', # No audio - '-' # Output to stdout - ] - - try: - # Start FFmpeg with stdout pipe to read frames directly - self.process = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, # Capture stdout for frame data - stderr=subprocess.PIPE, # Capture stderr for error logging - bufsize=0 # Unbuffered for real-time processing - ) - - # Start stderr reading thread - if self.stderr_thread and self.stderr_thread.is_alive(): - # Stop previous stderr thread - try: - self.stderr_thread.join(timeout=1.0) - except: - pass - - self.stderr_thread = threading.Thread(target=self._read_stderr, daemon=True) - self.stderr_thread.start() - - # Set process start time for watchdog - self.process_start_time = time.time() - self.last_frame_time = None # Reset frame time - - # After successful restart, next timeout will be back to 30s - if self.is_restart: - log_info(self.camera_id, f"FFmpeg restarted successfully, next timeout: {self.first_start_timeout}s") - self.is_restart = False - - return True - except Exception as e: - log_error(self.camera_id, f"FFmpeg startup failed: {e}") - return False - - def _read_bmp_frame(self, pipe): - """Read BMP frame from pipe - BMP header contains dimensions.""" - try: - # Read BMP header (14 bytes file header + 40 bytes info header = 54 bytes minimum) - header_data = b'' - bytes_to_read = 54 - - while len(header_data) < bytes_to_read: - chunk = pipe.read(bytes_to_read - len(header_data)) - if not chunk: - return None # Silent end of stream - header_data += chunk - - # Parse BMP header - if header_data[:2] != b'BM': - return None # Invalid format, skip frame silently - - # Extract file size from header (bytes 2-5) - import struct - file_size = struct.unpack(' bool: - """Check if watchdog timeout has been exceeded.""" - if not self.process_start_time: - return False - - current_time = time.time() - time_since_start = current_time - self.process_start_time - - # Determine timeout based on whether this is a restart - timeout = self.restart_timeout if self.is_restart else self.first_start_timeout - - # If no frames received yet, check against process start time - if not self.last_frame_time: - if time_since_start > timeout: - log_warning(self.camera_id, f"Watchdog timeout: No frames for {time_since_start:.1f}s (limit: {timeout}s)") - return True - else: - # Check time since last frame - time_since_frame = current_time - self.last_frame_time - if time_since_frame > timeout: - log_warning(self.camera_id, f"Watchdog timeout: No frames for {time_since_frame:.1f}s (limit: {timeout}s)") - return True - - return False - - def _restart_ffmpeg_process(self): - """Restart FFmpeg process due to watchdog timeout.""" - log_warning(self.camera_id, "Watchdog triggered FFmpeg restart") - - # Terminate current process - if self.process: - try: - self.process.terminate() - self.process.wait(timeout=3) - except subprocess.TimeoutExpired: - self.process.kill() - except Exception: - pass - self.process = None - - # Mark as restart for shorter timeout - self.is_restart = True - - # Small delay before restart - time.sleep(1.0) - - def _read_frames(self): - """Read frames directly from FFmpeg stdout pipe.""" - frame_count = 0 - last_log_time = time.time() - - while not self.stop_event.is_set(): - try: - # Check watchdog timeout if process is running - if self.process and self.process.poll() is None: - if self._check_watchdog_timeout(): - self._restart_ffmpeg_process() - continue - - # Start FFmpeg if not running - if not self.process or self.process.poll() is not None: - if self.process and self.process.poll() is not None: - log_warning(self.camera_id, "Stream disconnected, reconnecting...") - - if not self._start_ffmpeg_process(): - time.sleep(5.0) - continue - - # Read frames directly from FFmpeg stdout - try: - if self.process and self.process.stdout: - # Read BMP frame data - frame = self._read_bmp_frame(self.process.stdout) - if frame is None: - continue - - # Update watchdog - we got a frame - self.last_frame_time = time.time() - - # Call frame callback - if self.frame_callback: - self.frame_callback(self.camera_id, frame) - - frame_count += 1 - - # Log progress every 60 seconds (quieter) - current_time = time.time() - if current_time - last_log_time >= 60: - log_success(self.camera_id, f"{frame_count} frames captured ({frame.shape[1]}x{frame.shape[0]})") - last_log_time = current_time - - except Exception: - # Process might have died, let it restart on next iteration - if self.process: - self.process.terminate() - self.process = None - time.sleep(1.0) - - except Exception: - time.sleep(1.0) - - # Cleanup - if self.process: - self.process.terminate() - - -logger = logging.getLogger(__name__) - - -class HTTPSnapshotReader: - """HTTP snapshot reader optimized for 2560x1440 (2K) high quality images.""" - - def __init__(self, camera_id: str, snapshot_url: str, interval_ms: int = 5000, max_retries: int = 3): - self.camera_id = camera_id - self.snapshot_url = snapshot_url - self.interval_ms = interval_ms - self.max_retries = max_retries - self.stop_event = threading.Event() - self.thread = None - self.frame_callback: Optional[Callable] = None - - # Expected snapshot specifications - self.expected_width = 2560 - self.expected_height = 1440 - self.max_file_size = 10 * 1024 * 1024 # 10MB max for 2K image - - def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]): - """Set callback function to handle captured frames.""" - self.frame_callback = callback - - def start(self): - """Start the snapshot reader thread.""" - if self.thread and self.thread.is_alive(): - logger.warning(f"Snapshot reader for {self.camera_id} already running") - return - - self.stop_event.clear() - self.thread = threading.Thread(target=self._read_snapshots, daemon=True) - self.thread.start() - logger.info(f"Started snapshot reader for camera {self.camera_id}") - - def stop(self): - """Stop the snapshot reader thread.""" - self.stop_event.set() - if self.thread: - self.thread.join(timeout=5.0) - logger.info(f"Stopped snapshot reader for camera {self.camera_id}") - - def _read_snapshots(self): - """Main snapshot reading loop for high quality 2K images.""" - retries = 0 - frame_count = 0 - last_log_time = time.time() - interval_seconds = self.interval_ms / 1000.0 - - logger.info(f"Snapshot interval for camera {self.camera_id}: {interval_seconds}s") - - while not self.stop_event.is_set(): - try: - start_time = time.time() - frame = self._fetch_snapshot() - - if frame is None: - retries += 1 - logger.warning(f"Failed to fetch snapshot for camera {self.camera_id}, retry {retries}/{self.max_retries}") - - if self.max_retries != -1 and retries > self.max_retries: - logger.error(f"Max retries reached for snapshot camera {self.camera_id}") - break - - time.sleep(min(2.0, interval_seconds)) - continue - - # Accept any valid image dimensions - don't force specific resolution - if frame.shape[1] <= 0 or frame.shape[0] <= 0: - logger.warning(f"Camera {self.camera_id}: Invalid frame dimensions {frame.shape[1]}x{frame.shape[0]}") - continue - - # Reset retry counter on successful fetch - retries = 0 - frame_count += 1 - - # Call frame callback - if self.frame_callback: - try: - self.frame_callback(self.camera_id, frame) - except Exception as e: - logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") - - # Log progress every 30 seconds - current_time = time.time() - if current_time - last_log_time >= 30: - logger.info(f"Camera {self.camera_id}: {frame_count} snapshots processed") - last_log_time = current_time - - # Wait for next interval - elapsed = time.time() - start_time - sleep_time = max(0, interval_seconds - elapsed) - if sleep_time > 0: - self.stop_event.wait(sleep_time) - - except Exception as e: - logger.error(f"Error in snapshot loop for camera {self.camera_id}: {e}") - retries += 1 - if self.max_retries != -1 and retries > self.max_retries: - break - time.sleep(min(2.0, interval_seconds)) - - logger.info(f"Snapshot reader thread ended for camera {self.camera_id}") - - def _fetch_snapshot(self) -> Optional[np.ndarray]: - """Fetch a single high quality snapshot from HTTP URL.""" - try: - # Parse URL for authentication - from urllib.parse import urlparse - parsed_url = urlparse(self.snapshot_url) - - headers = { - 'User-Agent': 'Python-Detector-Worker/1.0', - 'Accept': 'image/jpeg, image/png, image/*' - } - auth = None - - if parsed_url.username and parsed_url.password: - from requests.auth import HTTPBasicAuth, HTTPDigestAuth - auth = HTTPBasicAuth(parsed_url.username, parsed_url.password) - - # Reconstruct URL without credentials - clean_url = f"{parsed_url.scheme}://{parsed_url.hostname}" - if parsed_url.port: - clean_url += f":{parsed_url.port}" - clean_url += parsed_url.path - if parsed_url.query: - clean_url += f"?{parsed_url.query}" - - # Try Basic Auth first - response = requests.get(clean_url, auth=auth, timeout=15, headers=headers, - stream=True, verify=False) - - # If Basic Auth fails, try Digest Auth - if response.status_code == 401: - auth = HTTPDigestAuth(parsed_url.username, parsed_url.password) - response = requests.get(clean_url, auth=auth, timeout=15, headers=headers, - stream=True, verify=False) - else: - response = requests.get(self.snapshot_url, timeout=15, headers=headers, - stream=True, verify=False) - - if response.status_code == 200: - # Check content size - content_length = int(response.headers.get('content-length', 0)) - if content_length > self.max_file_size: - logger.warning(f"Snapshot too large for camera {self.camera_id}: {content_length} bytes") - return None - - # Read content - content = response.content - - # Convert to numpy array - image_array = np.frombuffer(content, np.uint8) - - # Decode as high quality image - frame = cv2.imdecode(image_array, cv2.IMREAD_COLOR) - - if frame is None: - logger.error(f"Failed to decode snapshot for camera {self.camera_id}") - return None - - logger.debug(f"Fetched snapshot for camera {self.camera_id}: {frame.shape[1]}x{frame.shape[0]}") - return frame - else: - logger.warning(f"HTTP {response.status_code} from {self.camera_id}") - return None - - except requests.RequestException as e: - logger.error(f"Request error fetching snapshot for {self.camera_id}: {e}") - return None - except Exception as e: - logger.error(f"Error decoding snapshot for {self.camera_id}: {e}") - return None - - def fetch_single_snapshot(self) -> Optional[np.ndarray]: - """ - Fetch a single high-quality snapshot on demand for pipeline processing. - This method is for one-time fetch from HTTP URL, not continuous streaming. - - Returns: - High quality 2K snapshot frame or None if failed - """ - logger.info(f"[SNAPSHOT] Fetching snapshot for {self.camera_id} from {self.snapshot_url}") - - # Try to fetch snapshot with retries - for attempt in range(self.max_retries): - frame = self._fetch_snapshot() - - if frame is not None: - logger.info(f"[SNAPSHOT] Successfully fetched {frame.shape[1]}x{frame.shape[0]} snapshot for {self.camera_id}") - return frame - - if attempt < self.max_retries - 1: - logger.warning(f"[SNAPSHOT] Attempt {attempt + 1}/{self.max_retries} failed for {self.camera_id}, retrying...") - time.sleep(0.5) - - logger.error(f"[SNAPSHOT] Failed to fetch snapshot for {self.camera_id} after {self.max_retries} attempts") - return None - - def _resize_maintain_aspect(self, frame: np.ndarray, target_width: int, target_height: int) -> np.ndarray: - """Resize image while maintaining aspect ratio for high quality.""" - h, w = frame.shape[:2] - aspect = w / h - target_aspect = target_width / target_height - - if aspect > target_aspect: - # Image is wider - new_width = target_width - new_height = int(target_width / aspect) - else: - # Image is taller - new_height = target_height - new_width = int(target_height * aspect) - - # Use INTER_LANCZOS4 for high quality downsampling - resized = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4) - - # Pad to target size if needed - if new_width < target_width or new_height < target_height: - top = (target_height - new_height) // 2 - bottom = target_height - new_height - top - left = (target_width - new_width) // 2 - right = target_width - new_width - left - resized = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0]) - - return resized \ No newline at end of file diff --git a/core/streaming/readers/__init__.py b/core/streaming/readers/__init__.py new file mode 100644 index 0000000..0903d6d --- /dev/null +++ b/core/streaming/readers/__init__.py @@ -0,0 +1,18 @@ +""" +Stream readers for RTSP and HTTP camera feeds. +""" +from .base import VideoReader +from .ffmpeg_rtsp import FFmpegRTSPReader +from .http_snapshot import HTTPSnapshotReader +from .utils import log_success, log_warning, log_error, log_info, Colors + +__all__ = [ + 'VideoReader', + 'FFmpegRTSPReader', + 'HTTPSnapshotReader', + 'log_success', + 'log_warning', + 'log_error', + 'log_info', + 'Colors' +] \ No newline at end of file diff --git a/core/streaming/readers/base.py b/core/streaming/readers/base.py new file mode 100644 index 0000000..56c41cb --- /dev/null +++ b/core/streaming/readers/base.py @@ -0,0 +1,65 @@ +""" +Abstract base class for video stream readers. +""" +from abc import ABC, abstractmethod +from typing import Optional, Callable +import numpy as np + + +class VideoReader(ABC): + """Abstract base class for video stream readers.""" + + def __init__(self, camera_id: str, source_url: str, max_retries: int = 3): + """ + Initialize the video reader. + + Args: + camera_id: Unique identifier for the camera + source_url: URL or path to the video source + max_retries: Maximum number of retry attempts + """ + self.camera_id = camera_id + self.source_url = source_url + self.max_retries = max_retries + self.frame_callback: Optional[Callable[[str, np.ndarray], None]] = None + + @abstractmethod + def start(self) -> None: + """Start the video reader.""" + pass + + @abstractmethod + def stop(self) -> None: + """Stop the video reader.""" + pass + + @abstractmethod + def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]) -> None: + """ + Set callback function to handle captured frames. + + Args: + callback: Function that takes (camera_id, frame) as arguments + """ + pass + + @property + @abstractmethod + def is_running(self) -> bool: + """Check if the reader is currently running.""" + pass + + @property + @abstractmethod + def reader_type(self) -> str: + """Get the type of reader (e.g., 'rtsp', 'http_snapshot').""" + pass + + def __enter__(self): + """Context manager entry.""" + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + self.stop() \ No newline at end of file diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py new file mode 100644 index 0000000..8641495 --- /dev/null +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -0,0 +1,302 @@ +""" +FFmpeg RTSP stream reader using subprocess piping frames directly to buffer. +""" +import cv2 +import time +import threading +import numpy as np +import subprocess +import struct +from typing import Optional, Callable + +from .base import VideoReader +from .utils import log_success, log_warning, log_error, log_info + + +class FFmpegRTSPReader(VideoReader): + """RTSP stream reader using subprocess FFmpeg piping frames directly to buffer.""" + + def __init__(self, camera_id: str, rtsp_url: str, max_retries: int = 3): + super().__init__(camera_id, rtsp_url, max_retries) + self.rtsp_url = rtsp_url + self.process = None + self.stop_event = threading.Event() + self.thread = None + self.stderr_thread = None + + # Expected stream specs (for reference, actual dimensions read from PPM header) + self.width = 1280 + self.height = 720 + + # Watchdog timers for stream reliability + self.process_start_time = None + self.last_frame_time = None + self.is_restart = False # Track if this is a restart (shorter timeout) + self.first_start_timeout = 30.0 # 30s timeout on first start + self.restart_timeout = 15.0 # 15s timeout after restart + + @property + def is_running(self) -> bool: + """Check if the reader is currently running.""" + return self.thread is not None and self.thread.is_alive() + + @property + def reader_type(self) -> str: + """Get the type of reader.""" + return "rtsp_ffmpeg" + + def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]): + """Set callback function to handle captured frames.""" + self.frame_callback = callback + + def start(self): + """Start the FFmpeg subprocess reader.""" + if self.thread and self.thread.is_alive(): + log_warning(self.camera_id, "FFmpeg reader already running") + return + + self.stop_event.clear() + self.thread = threading.Thread(target=self._read_frames, daemon=True) + self.thread.start() + log_success(self.camera_id, "Stream started") + + def stop(self): + """Stop the FFmpeg subprocess reader.""" + self.stop_event.set() + if self.process: + self.process.terminate() + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.process.kill() + if self.thread: + self.thread.join(timeout=5.0) + if self.stderr_thread: + self.stderr_thread.join(timeout=2.0) + log_info(self.camera_id, "Stream stopped") + + def _start_ffmpeg_process(self): + """Start FFmpeg subprocess outputting BMP frames to stdout pipe.""" + cmd = [ + 'ffmpeg', + # DO NOT REMOVE + '-hwaccel', 'cuda', + '-hwaccel_device', '0', + '-rtsp_transport', 'tcp', + '-i', self.rtsp_url, + '-f', 'image2pipe', # Output images to pipe + '-vcodec', 'bmp', # BMP format with header containing dimensions + # Use native stream resolution and framerate + '-an', # No audio + '-' # Output to stdout + ] + + try: + # Start FFmpeg with stdout pipe to read frames directly + self.process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, # Capture stdout for frame data + stderr=subprocess.PIPE, # Capture stderr for error logging + bufsize=0 # Unbuffered for real-time processing + ) + + # Start stderr reading thread + if self.stderr_thread and self.stderr_thread.is_alive(): + # Stop previous stderr thread + try: + self.stderr_thread.join(timeout=1.0) + except: + pass + + self.stderr_thread = threading.Thread(target=self._read_stderr, daemon=True) + self.stderr_thread.start() + + # Set process start time for watchdog + self.process_start_time = time.time() + self.last_frame_time = None # Reset frame time + + # After successful restart, next timeout will be back to 30s + if self.is_restart: + log_info(self.camera_id, f"FFmpeg restarted successfully, next timeout: {self.first_start_timeout}s") + self.is_restart = False + + return True + except Exception as e: + log_error(self.camera_id, f"FFmpeg startup failed: {e}") + return False + + def _read_bmp_frame(self, pipe): + """Read BMP frame from pipe - BMP header contains dimensions.""" + try: + # Read BMP header (14 bytes file header + 40 bytes info header = 54 bytes minimum) + header_data = b'' + bytes_to_read = 54 + + while len(header_data) < bytes_to_read: + chunk = pipe.read(bytes_to_read - len(header_data)) + if not chunk: + return None # Silent end of stream + header_data += chunk + + # Parse BMP header + if header_data[:2] != b'BM': + return None # Invalid format, skip frame silently + + # Extract file size from header (bytes 2-5) + file_size = struct.unpack(' bool: + """Check if watchdog timeout has been exceeded.""" + if not self.process_start_time: + return False + + current_time = time.time() + time_since_start = current_time - self.process_start_time + + # Determine timeout based on whether this is a restart + timeout = self.restart_timeout if self.is_restart else self.first_start_timeout + + # If no frames received yet, check against process start time + if not self.last_frame_time: + if time_since_start > timeout: + log_warning(self.camera_id, f"Watchdog timeout: No frames for {time_since_start:.1f}s (limit: {timeout}s)") + return True + else: + # Check time since last frame + time_since_frame = current_time - self.last_frame_time + if time_since_frame > timeout: + log_warning(self.camera_id, f"Watchdog timeout: No frames for {time_since_frame:.1f}s (limit: {timeout}s)") + return True + + return False + + def _restart_ffmpeg_process(self): + """Restart FFmpeg process due to watchdog timeout.""" + log_warning(self.camera_id, "Watchdog triggered FFmpeg restart") + + # Terminate current process + if self.process: + try: + self.process.terminate() + self.process.wait(timeout=3) + except subprocess.TimeoutExpired: + self.process.kill() + except Exception: + pass + self.process = None + + # Mark as restart for shorter timeout + self.is_restart = True + + # Small delay before restart + time.sleep(1.0) + + def _read_frames(self): + """Read frames directly from FFmpeg stdout pipe.""" + frame_count = 0 + last_log_time = time.time() + + while not self.stop_event.is_set(): + try: + # Check watchdog timeout if process is running + if self.process and self.process.poll() is None: + if self._check_watchdog_timeout(): + self._restart_ffmpeg_process() + continue + + # Start FFmpeg if not running + if not self.process or self.process.poll() is not None: + if self.process and self.process.poll() is not None: + log_warning(self.camera_id, "Stream disconnected, reconnecting...") + + if not self._start_ffmpeg_process(): + time.sleep(5.0) + continue + + # Read frames directly from FFmpeg stdout + try: + if self.process and self.process.stdout: + # Read BMP frame data + frame = self._read_bmp_frame(self.process.stdout) + if frame is None: + continue + + # Update watchdog - we got a frame + self.last_frame_time = time.time() + + # Call frame callback + if self.frame_callback: + self.frame_callback(self.camera_id, frame) + + frame_count += 1 + + # Log progress every 60 seconds (quieter) + current_time = time.time() + if current_time - last_log_time >= 60: + log_success(self.camera_id, f"{frame_count} frames captured ({frame.shape[1]}x{frame.shape[0]})") + last_log_time = current_time + + except Exception: + # Process might have died, let it restart on next iteration + if self.process: + self.process.terminate() + self.process = None + time.sleep(1.0) + + except Exception: + time.sleep(1.0) + + # Cleanup + if self.process: + self.process.terminate() \ No newline at end of file diff --git a/core/streaming/readers/http_snapshot.py b/core/streaming/readers/http_snapshot.py new file mode 100644 index 0000000..5a479db --- /dev/null +++ b/core/streaming/readers/http_snapshot.py @@ -0,0 +1,249 @@ +""" +HTTP snapshot reader optimized for 2560x1440 (2K) high quality images. +""" +import cv2 +import logging +import time +import threading +import requests +import numpy as np +from typing import Optional, Callable + +from .base import VideoReader +from .utils import log_success, log_warning, log_error, log_info + +logger = logging.getLogger(__name__) + + +class HTTPSnapshotReader(VideoReader): + """HTTP snapshot reader optimized for 2560x1440 (2K) high quality images.""" + + def __init__(self, camera_id: str, snapshot_url: str, interval_ms: int = 5000, max_retries: int = 3): + super().__init__(camera_id, snapshot_url, max_retries) + self.snapshot_url = snapshot_url + self.interval_ms = interval_ms + self.stop_event = threading.Event() + self.thread = None + + # Expected snapshot specifications + self.expected_width = 2560 + self.expected_height = 1440 + self.max_file_size = 10 * 1024 * 1024 # 10MB max for 2K image + + @property + def is_running(self) -> bool: + """Check if the reader is currently running.""" + return self.thread is not None and self.thread.is_alive() + + @property + def reader_type(self) -> str: + """Get the type of reader.""" + return "http_snapshot" + + def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]): + """Set callback function to handle captured frames.""" + self.frame_callback = callback + + def start(self): + """Start the snapshot reader thread.""" + if self.thread and self.thread.is_alive(): + logger.warning(f"Snapshot reader for {self.camera_id} already running") + return + + self.stop_event.clear() + self.thread = threading.Thread(target=self._read_snapshots, daemon=True) + self.thread.start() + logger.info(f"Started snapshot reader for camera {self.camera_id}") + + def stop(self): + """Stop the snapshot reader thread.""" + self.stop_event.set() + if self.thread: + self.thread.join(timeout=5.0) + logger.info(f"Stopped snapshot reader for camera {self.camera_id}") + + def _read_snapshots(self): + """Main snapshot reading loop for high quality 2K images.""" + retries = 0 + frame_count = 0 + last_log_time = time.time() + interval_seconds = self.interval_ms / 1000.0 + + logger.info(f"Snapshot interval for camera {self.camera_id}: {interval_seconds}s") + + while not self.stop_event.is_set(): + try: + start_time = time.time() + frame = self._fetch_snapshot() + + if frame is None: + retries += 1 + logger.warning(f"Failed to fetch snapshot for camera {self.camera_id}, retry {retries}/{self.max_retries}") + + if self.max_retries != -1 and retries > self.max_retries: + logger.error(f"Max retries reached for snapshot camera {self.camera_id}") + break + + time.sleep(min(2.0, interval_seconds)) + continue + + # Accept any valid image dimensions - don't force specific resolution + if frame.shape[1] <= 0 or frame.shape[0] <= 0: + logger.warning(f"Camera {self.camera_id}: Invalid frame dimensions {frame.shape[1]}x{frame.shape[0]}") + continue + + # Reset retry counter on successful fetch + retries = 0 + frame_count += 1 + + # Call frame callback + if self.frame_callback: + try: + self.frame_callback(self.camera_id, frame) + except Exception as e: + logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") + + # Log progress every 30 seconds + current_time = time.time() + if current_time - last_log_time >= 30: + logger.info(f"Camera {self.camera_id}: {frame_count} snapshots processed") + last_log_time = current_time + + # Wait for next interval + elapsed = time.time() - start_time + sleep_time = max(0, interval_seconds - elapsed) + if sleep_time > 0: + self.stop_event.wait(sleep_time) + + except Exception as e: + logger.error(f"Error in snapshot loop for camera {self.camera_id}: {e}") + retries += 1 + if self.max_retries != -1 and retries > self.max_retries: + break + time.sleep(min(2.0, interval_seconds)) + + logger.info(f"Snapshot reader thread ended for camera {self.camera_id}") + + def _fetch_snapshot(self) -> Optional[np.ndarray]: + """Fetch a single high quality snapshot from HTTP URL.""" + try: + # Parse URL for authentication + from urllib.parse import urlparse + parsed_url = urlparse(self.snapshot_url) + + headers = { + 'User-Agent': 'Python-Detector-Worker/1.0', + 'Accept': 'image/jpeg, image/png, image/*' + } + auth = None + + if parsed_url.username and parsed_url.password: + from requests.auth import HTTPBasicAuth, HTTPDigestAuth + auth = HTTPBasicAuth(parsed_url.username, parsed_url.password) + + # Reconstruct URL without credentials + clean_url = f"{parsed_url.scheme}://{parsed_url.hostname}" + if parsed_url.port: + clean_url += f":{parsed_url.port}" + clean_url += parsed_url.path + if parsed_url.query: + clean_url += f"?{parsed_url.query}" + + # Try Basic Auth first + response = requests.get(clean_url, auth=auth, timeout=15, headers=headers, + stream=True, verify=False) + + # If Basic Auth fails, try Digest Auth + if response.status_code == 401: + auth = HTTPDigestAuth(parsed_url.username, parsed_url.password) + response = requests.get(clean_url, auth=auth, timeout=15, headers=headers, + stream=True, verify=False) + else: + response = requests.get(self.snapshot_url, timeout=15, headers=headers, + stream=True, verify=False) + + if response.status_code == 200: + # Check content size + content_length = int(response.headers.get('content-length', 0)) + if content_length > self.max_file_size: + logger.warning(f"Snapshot too large for camera {self.camera_id}: {content_length} bytes") + return None + + # Read content + content = response.content + + # Convert to numpy array + image_array = np.frombuffer(content, np.uint8) + + # Decode as high quality image + frame = cv2.imdecode(image_array, cv2.IMREAD_COLOR) + + if frame is None: + logger.error(f"Failed to decode snapshot for camera {self.camera_id}") + return None + + logger.debug(f"Fetched snapshot for camera {self.camera_id}: {frame.shape[1]}x{frame.shape[0]}") + return frame + else: + logger.warning(f"HTTP {response.status_code} from {self.camera_id}") + return None + + except requests.RequestException as e: + logger.error(f"Request error fetching snapshot for {self.camera_id}: {e}") + return None + except Exception as e: + logger.error(f"Error decoding snapshot for {self.camera_id}: {e}") + return None + + def fetch_single_snapshot(self) -> Optional[np.ndarray]: + """ + Fetch a single high-quality snapshot on demand for pipeline processing. + This method is for one-time fetch from HTTP URL, not continuous streaming. + + Returns: + High quality 2K snapshot frame or None if failed + """ + logger.info(f"[SNAPSHOT] Fetching snapshot for {self.camera_id} from {self.snapshot_url}") + + # Try to fetch snapshot with retries + for attempt in range(self.max_retries): + frame = self._fetch_snapshot() + + if frame is not None: + logger.info(f"[SNAPSHOT] Successfully fetched {frame.shape[1]}x{frame.shape[0]} snapshot for {self.camera_id}") + return frame + + if attempt < self.max_retries - 1: + logger.warning(f"[SNAPSHOT] Attempt {attempt + 1}/{self.max_retries} failed for {self.camera_id}, retrying...") + time.sleep(0.5) + + logger.error(f"[SNAPSHOT] Failed to fetch snapshot for {self.camera_id} after {self.max_retries} attempts") + return None + + def _resize_maintain_aspect(self, frame: np.ndarray, target_width: int, target_height: int) -> np.ndarray: + """Resize image while maintaining aspect ratio for high quality.""" + h, w = frame.shape[:2] + aspect = w / h + target_aspect = target_width / target_height + + if aspect > target_aspect: + # Image is wider + new_width = target_width + new_height = int(target_width / aspect) + else: + # Image is taller + new_height = target_height + new_width = int(target_height * aspect) + + # Use INTER_LANCZOS4 for high quality downsampling + resized = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4) + + # Pad to target size if needed + if new_width < target_width or new_height < target_height: + top = (target_height - new_height) // 2 + bottom = target_height - new_height - top + left = (target_width - new_width) // 2 + right = target_width - new_width - left + resized = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0]) + + return resized \ No newline at end of file diff --git a/core/streaming/readers/utils.py b/core/streaming/readers/utils.py new file mode 100644 index 0000000..813f49f --- /dev/null +++ b/core/streaming/readers/utils.py @@ -0,0 +1,38 @@ +""" +Utility functions for stream readers. +""" +import logging +import os + +# Keep OpenCV errors visible but allow FFmpeg stderr logging +os.environ["OPENCV_LOG_LEVEL"] = "ERROR" + +logger = logging.getLogger(__name__) + +# Color codes for pretty logging +class Colors: + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BLUE = '\033[94m' + PURPLE = '\033[95m' + CYAN = '\033[96m' + WHITE = '\033[97m' + BOLD = '\033[1m' + END = '\033[0m' + +def log_success(camera_id: str, message: str): + """Log success messages in green""" + logger.info(f"{Colors.GREEN}[{camera_id}] {message}{Colors.END}") + +def log_warning(camera_id: str, message: str): + """Log warnings in yellow""" + logger.warning(f"{Colors.YELLOW}[{camera_id}] {message}{Colors.END}") + +def log_error(camera_id: str, message: str): + """Log errors in red""" + logger.error(f"{Colors.RED}[{camera_id}] {message}{Colors.END}") + +def log_info(camera_id: str, message: str): + """Log info in cyan""" + logger.info(f"{Colors.CYAN}[{camera_id}] {message}{Colors.END}") \ No newline at end of file diff --git a/requirements.base.txt b/requirements.base.txt index 722962f..b8af923 100644 --- a/requirements.base.txt +++ b/requirements.base.txt @@ -9,5 +9,4 @@ lap>=0.5.12 pynvml PyTurboJPEG PyNvVideoCodec -pycuda cupy-cuda12x \ No newline at end of file From b08ce27de22a80e31f34cc5f3b89756d74eb2677 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Sat, 27 Sep 2025 12:27:38 +0700 Subject: [PATCH 06/30] Implement comprehensive health monitoring for streams and threads - Added RecoveryManager for automatic handling of health issues, including circuit breaker patterns, automatic restarts, and graceful degradation. - Introduced StreamHealthTracker to monitor video stream metrics, including frame production, connection health, and error rates. - Developed ThreadHealthMonitor for detecting unresponsive and deadlocked threads, providing liveness detection and responsiveness testing. - Integrated health checks for streams and threads, reporting metrics and recovery actions to the health monitor. - Enhanced logging for recovery attempts, errors, and health checks to improve observability and debugging. --- .claude/settings.local.json | 3 +- app.py | 314 ++++++++++++++++ core/monitoring/__init__.py | 18 + core/monitoring/health.py | 456 ++++++++++++++++++++++++ core/monitoring/recovery.py | 385 ++++++++++++++++++++ core/monitoring/stream_health.py | 351 ++++++++++++++++++ core/monitoring/thread_health.py | 381 ++++++++++++++++++++ core/streaming/readers/ffmpeg_rtsp.py | 139 +++++++- core/streaming/readers/http_snapshot.py | 137 ++++++- 9 files changed, 2173 insertions(+), 11 deletions(-) create mode 100644 core/monitoring/__init__.py create mode 100644 core/monitoring/health.py create mode 100644 core/monitoring/recovery.py create mode 100644 core/monitoring/stream_health.py create mode 100644 core/monitoring/thread_health.py diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 97cf5c1..9e296ac 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -2,7 +2,8 @@ "permissions": { "allow": [ "Bash(dir:*)", - "WebSearch" + "WebSearch", + "Bash(mkdir:*)" ], "deny": [], "ask": [] diff --git a/app.py b/app.py index 605aa0b..eb1440f 100644 --- a/app.py +++ b/app.py @@ -8,6 +8,7 @@ import os import time import cv2 from contextlib import asynccontextmanager +from typing import Dict, Any from fastapi import FastAPI, WebSocket, HTTPException from fastapi.responses import Response @@ -31,21 +32,135 @@ logger.setLevel(logging.DEBUG) # Frames are now stored in the shared cache buffer from core.streaming.buffers # latest_frames = {} # Deprecated - using shared_cache_buffer instead + +# Health monitoring recovery handlers +def _handle_stream_restart_recovery(component: str, details: Dict[str, Any]) -> bool: + """Handle stream restart recovery at the application level.""" + try: + from core.streaming.manager import shared_stream_manager + + # Extract camera ID from component name (e.g., "stream_cam-001" -> "cam-001") + if component.startswith("stream_"): + camera_id = component[7:] # Remove "stream_" prefix + else: + camera_id = component + + logger.info(f"Attempting stream restart recovery for {camera_id}") + + # Find and restart the subscription + subscriptions = shared_stream_manager.get_all_subscriptions() + for sub_info in subscriptions: + if sub_info.camera_id == camera_id: + # Remove and re-add the subscription + shared_stream_manager.remove_subscription(sub_info.subscription_id) + time.sleep(1.0) # Brief delay + + # Re-add subscription + success = shared_stream_manager.add_subscription( + sub_info.subscription_id, + sub_info.stream_config, + sub_info.crop_coords, + sub_info.model_id, + sub_info.model_url, + sub_info.tracking_integration + ) + + if success: + logger.info(f"Stream restart recovery successful for {camera_id}") + return True + else: + logger.error(f"Stream restart recovery failed for {camera_id}") + return False + + logger.warning(f"No subscription found for camera {camera_id} during recovery") + return False + + except Exception as e: + logger.error(f"Error in stream restart recovery for {component}: {e}") + return False + + +def _handle_stream_reconnect_recovery(component: str, details: Dict[str, Any]) -> bool: + """Handle stream reconnect recovery at the application level.""" + try: + from core.streaming.manager import shared_stream_manager + + # Extract camera ID from component name + if component.startswith("stream_"): + camera_id = component[7:] + else: + camera_id = component + + logger.info(f"Attempting stream reconnect recovery for {camera_id}") + + # For reconnect, we just need to trigger the stream's internal reconnect + # The stream readers handle their own reconnection logic + active_cameras = shared_stream_manager.get_active_cameras() + + if camera_id in active_cameras: + logger.info(f"Stream reconnect recovery triggered for {camera_id}") + return True + else: + logger.warning(f"Camera {camera_id} not found in active cameras during reconnect recovery") + return False + + except Exception as e: + logger.error(f"Error in stream reconnect recovery for {component}: {e}") + return False + # Lifespan event handler (modern FastAPI approach) @asynccontextmanager async def lifespan(app: FastAPI): """Application lifespan management.""" # Startup logger.info("Detector Worker started successfully") + + # Initialize health monitoring system + try: + from core.monitoring.health import health_monitor + from core.monitoring.stream_health import stream_health_tracker + from core.monitoring.thread_health import thread_health_monitor + from core.monitoring.recovery import recovery_manager + + # Start health monitoring + health_monitor.start() + logger.info("Health monitoring system started") + + # Register recovery handlers for stream management + from core.streaming.manager import shared_stream_manager + recovery_manager.register_recovery_handler( + "restart_stream", + _handle_stream_restart_recovery + ) + recovery_manager.register_recovery_handler( + "reconnect", + _handle_stream_reconnect_recovery + ) + + logger.info("Recovery handlers registered") + + except Exception as e: + logger.error(f"Failed to initialize health monitoring: {e}") + logger.info("WebSocket endpoint available at: ws://0.0.0.0:8001/") logger.info("HTTP camera endpoint available at: http://0.0.0.0:8001/camera/{camera_id}/image") logger.info("Health check available at: http://0.0.0.0:8001/health") + logger.info("Detailed health monitoring available at: http://0.0.0.0:8001/health/detailed") logger.info("Ready and waiting for backend WebSocket connections") yield # Shutdown logger.info("Detector Worker shutting down...") + + # Stop health monitoring + try: + from core.monitoring.health import health_monitor + health_monitor.stop() + logger.info("Health monitoring system stopped") + except Exception as e: + logger.error(f"Error stopping health monitoring: {e}") + # Clear all state worker_state.set_subscriptions([]) worker_state.session_ids.clear() @@ -197,6 +312,205 @@ async def health_check(): } +@app.get("/health/detailed") +async def detailed_health_check(): + """Comprehensive health status with detailed monitoring data.""" + try: + from core.monitoring.health import health_monitor + from core.monitoring.stream_health import stream_health_tracker + from core.monitoring.thread_health import thread_health_monitor + from core.monitoring.recovery import recovery_manager + + # Get comprehensive health status + overall_health = health_monitor.get_health_status() + stream_metrics = stream_health_tracker.get_all_metrics() + thread_info = thread_health_monitor.get_all_thread_info() + recovery_stats = recovery_manager.get_recovery_stats() + + return { + "timestamp": time.time(), + "overall_health": overall_health, + "stream_metrics": stream_metrics, + "thread_health": thread_info, + "recovery_stats": recovery_stats, + "system_info": { + "active_subscriptions": len(worker_state.subscriptions), + "active_sessions": len(worker_state.session_ids), + "version": "2.0.0" + } + } + + except Exception as e: + logger.error(f"Error generating detailed health report: {e}") + raise HTTPException(status_code=500, detail=f"Health monitoring error: {str(e)}") + + +@app.get("/health/streams") +async def stream_health_status(): + """Stream-specific health monitoring.""" + try: + from core.monitoring.stream_health import stream_health_tracker + from core.streaming.buffers import shared_cache_buffer + + stream_metrics = stream_health_tracker.get_all_metrics() + buffer_stats = shared_cache_buffer.get_stats() + + return { + "timestamp": time.time(), + "stream_count": len(stream_metrics), + "stream_metrics": stream_metrics, + "buffer_stats": buffer_stats, + "frame_ages": { + camera_id: { + "age_seconds": time.time() - info["last_frame_time"] if info and info.get("last_frame_time") else None, + "total_frames": info.get("frame_count", 0) if info else 0 + } + for camera_id, info in stream_metrics.items() + } + } + + except Exception as e: + logger.error(f"Error generating stream health report: {e}") + raise HTTPException(status_code=500, detail=f"Stream health error: {str(e)}") + + +@app.get("/health/threads") +async def thread_health_status(): + """Thread-specific health monitoring.""" + try: + from core.monitoring.thread_health import thread_health_monitor + + thread_info = thread_health_monitor.get_all_thread_info() + deadlocks = thread_health_monitor.detect_deadlocks() + + return { + "timestamp": time.time(), + "thread_count": len(thread_info), + "thread_info": thread_info, + "potential_deadlocks": deadlocks, + "summary": { + "responsive_threads": sum(1 for info in thread_info.values() if info.get("is_responsive", False)), + "unresponsive_threads": sum(1 for info in thread_info.values() if not info.get("is_responsive", True)), + "deadlock_count": len(deadlocks) + } + } + + except Exception as e: + logger.error(f"Error generating thread health report: {e}") + raise HTTPException(status_code=500, detail=f"Thread health error: {str(e)}") + + +@app.get("/health/recovery") +async def recovery_status(): + """Recovery system status and history.""" + try: + from core.monitoring.recovery import recovery_manager + + recovery_stats = recovery_manager.get_recovery_stats() + + return { + "timestamp": time.time(), + "recovery_stats": recovery_stats, + "summary": { + "total_recoveries_last_hour": recovery_stats.get("total_recoveries_last_hour", 0), + "components_with_recovery_state": len(recovery_stats.get("recovery_states", {})), + "total_recovery_failures": sum( + state.get("failure_count", 0) + for state in recovery_stats.get("recovery_states", {}).values() + ), + "total_recovery_successes": sum( + state.get("success_count", 0) + for state in recovery_stats.get("recovery_states", {}).values() + ) + } + } + + except Exception as e: + logger.error(f"Error generating recovery status report: {e}") + raise HTTPException(status_code=500, detail=f"Recovery status error: {str(e)}") + + +@app.post("/health/recovery/force/{component}") +async def force_recovery(component: str, action: str = "restart_stream"): + """Force recovery action for a specific component.""" + try: + from core.monitoring.recovery import recovery_manager, RecoveryAction + + # Validate action + try: + recovery_action = RecoveryAction(action) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"Invalid recovery action: {action}. Valid actions: {[a.value for a in RecoveryAction]}" + ) + + # Force recovery + success = recovery_manager.force_recovery(component, recovery_action, "manual_api_request") + + return { + "timestamp": time.time(), + "component": component, + "action": action, + "success": success, + "message": f"Recovery {'successful' if success else 'failed'} for component {component}" + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error forcing recovery for {component}: {e}") + raise HTTPException(status_code=500, detail=f"Recovery error: {str(e)}") + + +@app.get("/health/metrics") +async def health_metrics(): + """Performance and health metrics in a format suitable for monitoring systems.""" + try: + from core.monitoring.health import health_monitor + from core.monitoring.stream_health import stream_health_tracker + from core.streaming.buffers import shared_cache_buffer + + # Get basic metrics + overall_health = health_monitor.get_health_status() + stream_metrics = stream_health_tracker.get_all_metrics() + buffer_stats = shared_cache_buffer.get_stats() + + # Format for monitoring systems (Prometheus-style) + metrics = { + "detector_worker_up": 1, + "detector_worker_streams_total": len(stream_metrics), + "detector_worker_subscriptions_total": len(worker_state.subscriptions), + "detector_worker_sessions_total": len(worker_state.session_ids), + "detector_worker_memory_mb": buffer_stats.get("total_memory_mb", 0), + "detector_worker_health_status": { + "healthy": 1, + "warning": 2, + "critical": 3, + "unknown": 4 + }.get(overall_health.get("overall_status", "unknown"), 4) + } + + # Add per-stream metrics + for camera_id, stream_info in stream_metrics.items(): + safe_camera_id = camera_id.replace("-", "_").replace(".", "_") + metrics.update({ + f"detector_worker_stream_frames_total{{camera=\"{safe_camera_id}\"}}": stream_info.get("frame_count", 0), + f"detector_worker_stream_errors_total{{camera=\"{safe_camera_id}\"}}": stream_info.get("error_count", 0), + f"detector_worker_stream_fps{{camera=\"{safe_camera_id}\"}}": stream_info.get("frames_per_second", 0), + f"detector_worker_stream_frame_age_seconds{{camera=\"{safe_camera_id}\"}}": stream_info.get("last_frame_age_seconds") or 0 + }) + + return { + "timestamp": time.time(), + "metrics": metrics + } + + except Exception as e: + logger.error(f"Error generating health metrics: {e}") + raise HTTPException(status_code=500, detail=f"Metrics error: {str(e)}") + + if __name__ == "__main__": diff --git a/core/monitoring/__init__.py b/core/monitoring/__init__.py new file mode 100644 index 0000000..2ad32ed --- /dev/null +++ b/core/monitoring/__init__.py @@ -0,0 +1,18 @@ +""" +Comprehensive health monitoring system for detector worker. +Tracks stream health, thread responsiveness, and system performance. +""" + +from .health import HealthMonitor, HealthStatus, HealthCheck +from .stream_health import StreamHealthTracker +from .thread_health import ThreadHealthMonitor +from .recovery import RecoveryManager + +__all__ = [ + 'HealthMonitor', + 'HealthStatus', + 'HealthCheck', + 'StreamHealthTracker', + 'ThreadHealthMonitor', + 'RecoveryManager' +] \ No newline at end of file diff --git a/core/monitoring/health.py b/core/monitoring/health.py new file mode 100644 index 0000000..be094f3 --- /dev/null +++ b/core/monitoring/health.py @@ -0,0 +1,456 @@ +""" +Core health monitoring system for comprehensive stream and system health tracking. +Provides centralized health status, alerting, and recovery coordination. +""" +import time +import threading +import logging +import psutil +from typing import Dict, List, Optional, Any, Callable +from dataclasses import dataclass, field +from enum import Enum +from collections import defaultdict, deque + + +logger = logging.getLogger(__name__) + + +class HealthStatus(Enum): + """Health status levels.""" + HEALTHY = "healthy" + WARNING = "warning" + CRITICAL = "critical" + UNKNOWN = "unknown" + + +@dataclass +class HealthCheck: + """Individual health check result.""" + name: str + status: HealthStatus + message: str + timestamp: float = field(default_factory=time.time) + details: Dict[str, Any] = field(default_factory=dict) + recovery_action: Optional[str] = None + + +@dataclass +class HealthMetrics: + """Health metrics for a component.""" + component_id: str + last_update: float + frame_count: int = 0 + error_count: int = 0 + warning_count: int = 0 + restart_count: int = 0 + avg_frame_interval: float = 0.0 + last_frame_time: Optional[float] = None + thread_alive: bool = True + connection_healthy: bool = True + memory_usage_mb: float = 0.0 + cpu_usage_percent: float = 0.0 + + +class HealthMonitor: + """Comprehensive health monitoring system.""" + + def __init__(self, check_interval: float = 30.0): + """ + Initialize health monitor. + + Args: + check_interval: Interval between health checks in seconds + """ + self.check_interval = check_interval + self.running = False + self.monitor_thread = None + self._lock = threading.RLock() + + # Health data storage + self.health_checks: Dict[str, HealthCheck] = {} + self.metrics: Dict[str, HealthMetrics] = {} + self.alert_history: deque = deque(maxlen=1000) + self.recovery_actions: deque = deque(maxlen=500) + + # Thresholds (configurable) + self.thresholds = { + 'frame_stale_warning_seconds': 120, # 2 minutes + 'frame_stale_critical_seconds': 300, # 5 minutes + 'thread_unresponsive_seconds': 60, # 1 minute + 'memory_warning_mb': 500, # 500MB per stream + 'memory_critical_mb': 1000, # 1GB per stream + 'cpu_warning_percent': 80, # 80% CPU + 'cpu_critical_percent': 95, # 95% CPU + 'error_rate_warning': 0.1, # 10% error rate + 'error_rate_critical': 0.3, # 30% error rate + 'restart_threshold': 3 # Max restarts per hour + } + + # Health check functions + self.health_checkers: List[Callable[[], List[HealthCheck]]] = [] + self.recovery_callbacks: Dict[str, Callable[[str, HealthCheck], bool]] = {} + + # System monitoring + self.process = psutil.Process() + self.system_start_time = time.time() + + def start(self): + """Start health monitoring.""" + if self.running: + logger.warning("Health monitor already running") + return + + self.running = True + self.monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True) + self.monitor_thread.start() + logger.info(f"Health monitor started (check interval: {self.check_interval}s)") + + def stop(self): + """Stop health monitoring.""" + self.running = False + if self.monitor_thread: + self.monitor_thread.join(timeout=5.0) + logger.info("Health monitor stopped") + + def register_health_checker(self, checker: Callable[[], List[HealthCheck]]): + """Register a health check function.""" + self.health_checkers.append(checker) + logger.debug(f"Registered health checker: {checker.__name__}") + + def register_recovery_callback(self, component: str, callback: Callable[[str, HealthCheck], bool]): + """Register a recovery callback for a component.""" + self.recovery_callbacks[component] = callback + logger.debug(f"Registered recovery callback for {component}") + + def update_metrics(self, component_id: str, **kwargs): + """Update metrics for a component.""" + with self._lock: + if component_id not in self.metrics: + self.metrics[component_id] = HealthMetrics( + component_id=component_id, + last_update=time.time() + ) + + metrics = self.metrics[component_id] + metrics.last_update = time.time() + + # Update provided metrics + for key, value in kwargs.items(): + if hasattr(metrics, key): + setattr(metrics, key, value) + + def report_frame_received(self, component_id: str): + """Report that a frame was received for a component.""" + current_time = time.time() + with self._lock: + if component_id not in self.metrics: + self.metrics[component_id] = HealthMetrics( + component_id=component_id, + last_update=current_time + ) + + metrics = self.metrics[component_id] + + # Update frame metrics + if metrics.last_frame_time: + interval = current_time - metrics.last_frame_time + # Moving average of frame intervals + if metrics.avg_frame_interval == 0: + metrics.avg_frame_interval = interval + else: + metrics.avg_frame_interval = (metrics.avg_frame_interval * 0.9) + (interval * 0.1) + + metrics.last_frame_time = current_time + metrics.frame_count += 1 + metrics.last_update = current_time + + def report_error(self, component_id: str, error_type: str = "general"): + """Report an error for a component.""" + with self._lock: + if component_id not in self.metrics: + self.metrics[component_id] = HealthMetrics( + component_id=component_id, + last_update=time.time() + ) + + self.metrics[component_id].error_count += 1 + self.metrics[component_id].last_update = time.time() + + logger.debug(f"Error reported for {component_id}: {error_type}") + + def report_warning(self, component_id: str, warning_type: str = "general"): + """Report a warning for a component.""" + with self._lock: + if component_id not in self.metrics: + self.metrics[component_id] = HealthMetrics( + component_id=component_id, + last_update=time.time() + ) + + self.metrics[component_id].warning_count += 1 + self.metrics[component_id].last_update = time.time() + + logger.debug(f"Warning reported for {component_id}: {warning_type}") + + def report_restart(self, component_id: str): + """Report that a component was restarted.""" + with self._lock: + if component_id not in self.metrics: + self.metrics[component_id] = HealthMetrics( + component_id=component_id, + last_update=time.time() + ) + + self.metrics[component_id].restart_count += 1 + self.metrics[component_id].last_update = time.time() + + # Log recovery action + recovery_action = { + 'timestamp': time.time(), + 'component': component_id, + 'action': 'restart', + 'reason': 'manual_restart' + } + + with self._lock: + self.recovery_actions.append(recovery_action) + + logger.info(f"Restart reported for {component_id}") + + def get_health_status(self, component_id: Optional[str] = None) -> Dict[str, Any]: + """Get comprehensive health status.""" + with self._lock: + if component_id: + # Get health for specific component + return self._get_component_health(component_id) + else: + # Get overall health status + return self._get_overall_health() + + def _get_component_health(self, component_id: str) -> Dict[str, Any]: + """Get health status for a specific component.""" + if component_id not in self.metrics: + return { + 'component_id': component_id, + 'status': HealthStatus.UNKNOWN.value, + 'message': 'No metrics available', + 'metrics': {} + } + + metrics = self.metrics[component_id] + current_time = time.time() + + # Determine health status + status = HealthStatus.HEALTHY + issues = [] + + # Check frame freshness + if metrics.last_frame_time: + frame_age = current_time - metrics.last_frame_time + if frame_age > self.thresholds['frame_stale_critical_seconds']: + status = HealthStatus.CRITICAL + issues.append(f"Frames stale for {frame_age:.1f}s") + elif frame_age > self.thresholds['frame_stale_warning_seconds']: + if status == HealthStatus.HEALTHY: + status = HealthStatus.WARNING + issues.append(f"Frames aging ({frame_age:.1f}s)") + + # Check error rates + if metrics.frame_count > 0: + error_rate = metrics.error_count / metrics.frame_count + if error_rate > self.thresholds['error_rate_critical']: + status = HealthStatus.CRITICAL + issues.append(f"High error rate ({error_rate:.1%})") + elif error_rate > self.thresholds['error_rate_warning']: + if status == HealthStatus.HEALTHY: + status = HealthStatus.WARNING + issues.append(f"Elevated error rate ({error_rate:.1%})") + + # Check restart frequency + restart_rate = metrics.restart_count / max(1, (current_time - self.system_start_time) / 3600) + if restart_rate > self.thresholds['restart_threshold']: + status = HealthStatus.CRITICAL + issues.append(f"Frequent restarts ({restart_rate:.1f}/hour)") + + # Check thread health + if not metrics.thread_alive: + status = HealthStatus.CRITICAL + issues.append("Thread not alive") + + # Check connection health + if not metrics.connection_healthy: + if status == HealthStatus.HEALTHY: + status = HealthStatus.WARNING + issues.append("Connection unhealthy") + + return { + 'component_id': component_id, + 'status': status.value, + 'message': '; '.join(issues) if issues else 'All checks passing', + 'metrics': { + 'frame_count': metrics.frame_count, + 'error_count': metrics.error_count, + 'warning_count': metrics.warning_count, + 'restart_count': metrics.restart_count, + 'avg_frame_interval': metrics.avg_frame_interval, + 'last_frame_age': current_time - metrics.last_frame_time if metrics.last_frame_time else None, + 'thread_alive': metrics.thread_alive, + 'connection_healthy': metrics.connection_healthy, + 'memory_usage_mb': metrics.memory_usage_mb, + 'cpu_usage_percent': metrics.cpu_usage_percent, + 'uptime_seconds': current_time - self.system_start_time + }, + 'last_update': metrics.last_update + } + + def _get_overall_health(self) -> Dict[str, Any]: + """Get overall system health status.""" + current_time = time.time() + components = {} + overall_status = HealthStatus.HEALTHY + + # Get health for all components + for component_id in self.metrics.keys(): + component_health = self._get_component_health(component_id) + components[component_id] = component_health + + # Determine overall status + component_status = HealthStatus(component_health['status']) + if component_status == HealthStatus.CRITICAL: + overall_status = HealthStatus.CRITICAL + elif component_status == HealthStatus.WARNING and overall_status == HealthStatus.HEALTHY: + overall_status = HealthStatus.WARNING + + # System metrics + try: + system_memory = self.process.memory_info() + system_cpu = self.process.cpu_percent() + except Exception: + system_memory = None + system_cpu = 0.0 + + return { + 'overall_status': overall_status.value, + 'timestamp': current_time, + 'uptime_seconds': current_time - self.system_start_time, + 'total_components': len(self.metrics), + 'components': components, + 'system_metrics': { + 'memory_mb': system_memory.rss / (1024 * 1024) if system_memory else 0, + 'cpu_percent': system_cpu, + 'process_id': self.process.pid + }, + 'recent_alerts': list(self.alert_history)[-10:], # Last 10 alerts + 'recent_recoveries': list(self.recovery_actions)[-10:] # Last 10 recovery actions + } + + def _monitor_loop(self): + """Main health monitoring loop.""" + logger.info("Health monitor loop started") + + while self.running: + try: + start_time = time.time() + + # Run all registered health checks + all_checks = [] + for checker in self.health_checkers: + try: + checks = checker() + all_checks.extend(checks) + except Exception as e: + logger.error(f"Error in health checker {checker.__name__}: {e}") + + # Process health checks and trigger recovery if needed + for check in all_checks: + self._process_health_check(check) + + # Update system metrics + self._update_system_metrics() + + # Sleep until next check + elapsed = time.time() - start_time + sleep_time = max(0, self.check_interval - elapsed) + if sleep_time > 0: + time.sleep(sleep_time) + + except Exception as e: + logger.error(f"Error in health monitor loop: {e}") + time.sleep(5.0) # Fallback sleep + + logger.info("Health monitor loop ended") + + def _process_health_check(self, check: HealthCheck): + """Process a health check result and trigger recovery if needed.""" + with self._lock: + # Store health check + self.health_checks[check.name] = check + + # Log alerts for non-healthy status + if check.status != HealthStatus.HEALTHY: + alert = { + 'timestamp': check.timestamp, + 'component': check.name, + 'status': check.status.value, + 'message': check.message, + 'details': check.details + } + self.alert_history.append(alert) + + logger.warning(f"Health alert [{check.status.value.upper()}] {check.name}: {check.message}") + + # Trigger recovery if critical and recovery action available + if check.status == HealthStatus.CRITICAL and check.recovery_action: + self._trigger_recovery(check.name, check) + + def _trigger_recovery(self, component: str, check: HealthCheck): + """Trigger recovery action for a component.""" + if component in self.recovery_callbacks: + try: + logger.info(f"Triggering recovery for {component}: {check.recovery_action}") + + success = self.recovery_callbacks[component](component, check) + + recovery_action = { + 'timestamp': time.time(), + 'component': component, + 'action': check.recovery_action, + 'reason': check.message, + 'success': success + } + + with self._lock: + self.recovery_actions.append(recovery_action) + + if success: + logger.info(f"Recovery successful for {component}") + else: + logger.error(f"Recovery failed for {component}") + + except Exception as e: + logger.error(f"Error in recovery callback for {component}: {e}") + + def _update_system_metrics(self): + """Update system-level metrics.""" + try: + # Update process metrics for all components + current_time = time.time() + + with self._lock: + for component_id, metrics in self.metrics.items(): + # Update CPU and memory if available + try: + # This is a simplified approach - in practice you'd want + # per-thread or per-component resource tracking + metrics.cpu_usage_percent = self.process.cpu_percent() / len(self.metrics) + memory_info = self.process.memory_info() + metrics.memory_usage_mb = memory_info.rss / (1024 * 1024) / len(self.metrics) + except Exception: + pass + + except Exception as e: + logger.error(f"Error updating system metrics: {e}") + + +# Global health monitor instance +health_monitor = HealthMonitor() \ No newline at end of file diff --git a/core/monitoring/recovery.py b/core/monitoring/recovery.py new file mode 100644 index 0000000..4ea16dc --- /dev/null +++ b/core/monitoring/recovery.py @@ -0,0 +1,385 @@ +""" +Recovery manager for automatic handling of health issues. +Provides circuit breaker patterns, automatic restarts, and graceful degradation. +""" +import time +import logging +import threading +from typing import Dict, List, Optional, Any, Callable +from dataclasses import dataclass +from enum import Enum +from collections import defaultdict, deque + +from .health import HealthCheck, HealthStatus, health_monitor + + +logger = logging.getLogger(__name__) + + +class RecoveryAction(Enum): + """Types of recovery actions.""" + RESTART_STREAM = "restart_stream" + RESTART_THREAD = "restart_thread" + CLEAR_BUFFER = "clear_buffer" + RECONNECT = "reconnect" + THROTTLE = "throttle" + DISABLE = "disable" + + +@dataclass +class RecoveryAttempt: + """Record of a recovery attempt.""" + timestamp: float + component: str + action: RecoveryAction + reason: str + success: bool + details: Dict[str, Any] = None + + +@dataclass +class RecoveryState: + """Recovery state for a component - simplified without circuit breaker.""" + failure_count: int = 0 + success_count: int = 0 + last_failure_time: Optional[float] = None + last_success_time: Optional[float] = None + + +class RecoveryManager: + """Manages automatic recovery actions for health issues.""" + + def __init__(self): + self.recovery_handlers: Dict[str, Callable[[str, HealthCheck], bool]] = {} + self.recovery_states: Dict[str, RecoveryState] = {} + self.recovery_history: deque = deque(maxlen=1000) + self._lock = threading.RLock() + + # Configuration - simplified without circuit breaker + self.recovery_cooldown = 30 # 30 seconds between recovery attempts + self.max_attempts_per_hour = 20 # Still limit to prevent spam, but much higher + + # Track recovery attempts per component + self.recovery_attempts: Dict[str, deque] = defaultdict(lambda: deque(maxlen=50)) + + # Register with health monitor + health_monitor.register_recovery_callback("stream", self._handle_stream_recovery) + health_monitor.register_recovery_callback("thread", self._handle_thread_recovery) + health_monitor.register_recovery_callback("buffer", self._handle_buffer_recovery) + + def register_recovery_handler(self, action: RecoveryAction, handler: Callable[[str, Dict[str, Any]], bool]): + """ + Register a recovery handler for a specific action. + + Args: + action: Type of recovery action + handler: Function that performs the recovery + """ + self.recovery_handlers[action.value] = handler + logger.info(f"Registered recovery handler for {action.value}") + + def can_attempt_recovery(self, component: str) -> bool: + """ + Check if recovery can be attempted for a component. + + Args: + component: Component identifier + + Returns: + True if recovery can be attempted (always allow with minimal throttling) + """ + with self._lock: + current_time = time.time() + + # Check recovery attempt rate limiting (much more permissive) + recent_attempts = [ + attempt for attempt in self.recovery_attempts[component] + if current_time - attempt <= 3600 # Last hour + ] + + # Only block if truly excessive attempts + if len(recent_attempts) >= self.max_attempts_per_hour: + logger.warning(f"Recovery rate limit exceeded for {component} " + f"({len(recent_attempts)} attempts in last hour)") + return False + + # Check cooldown period (shorter cooldown) + if recent_attempts: + last_attempt = max(recent_attempts) + if current_time - last_attempt < self.recovery_cooldown: + logger.debug(f"Recovery cooldown active for {component} " + f"(last attempt {current_time - last_attempt:.1f}s ago)") + return False + + return True + + def attempt_recovery(self, component: str, action: RecoveryAction, reason: str, + details: Optional[Dict[str, Any]] = None) -> bool: + """ + Attempt recovery for a component. + + Args: + component: Component identifier + action: Recovery action to perform + reason: Reason for recovery + details: Additional details + + Returns: + True if recovery was successful + """ + if not self.can_attempt_recovery(component): + return False + + current_time = time.time() + + logger.info(f"Attempting recovery for {component}: {action.value} ({reason})") + + try: + # Record recovery attempt + with self._lock: + self.recovery_attempts[component].append(current_time) + + # Perform recovery action + success = self._execute_recovery_action(component, action, details or {}) + + # Record recovery result + attempt = RecoveryAttempt( + timestamp=current_time, + component=component, + action=action, + reason=reason, + success=success, + details=details + ) + + with self._lock: + self.recovery_history.append(attempt) + + # Update recovery state + self._update_recovery_state(component, success) + + if success: + logger.info(f"Recovery successful for {component}: {action.value}") + else: + logger.error(f"Recovery failed for {component}: {action.value}") + + return success + + except Exception as e: + logger.error(f"Error during recovery for {component}: {e}") + self._update_recovery_state(component, False) + return False + + def _execute_recovery_action(self, component: str, action: RecoveryAction, + details: Dict[str, Any]) -> bool: + """Execute a specific recovery action.""" + handler_key = action.value + + if handler_key not in self.recovery_handlers: + logger.error(f"No recovery handler registered for action: {handler_key}") + return False + + try: + handler = self.recovery_handlers[handler_key] + return handler(component, details) + + except Exception as e: + logger.error(f"Error executing recovery action {handler_key} for {component}: {e}") + return False + + def _update_recovery_state(self, component: str, success: bool): + """Update recovery state based on recovery result.""" + current_time = time.time() + + with self._lock: + if component not in self.recovery_states: + self.recovery_states[component] = RecoveryState() + + state = self.recovery_states[component] + + if success: + state.success_count += 1 + state.last_success_time = current_time + # Reset failure count on success + state.failure_count = max(0, state.failure_count - 1) + logger.debug(f"Recovery success for {component} (total successes: {state.success_count})") + else: + state.failure_count += 1 + state.last_failure_time = current_time + logger.debug(f"Recovery failure for {component} (total failures: {state.failure_count})") + + def _handle_stream_recovery(self, component: str, health_check: HealthCheck) -> bool: + """Handle recovery for stream-related issues.""" + if "frames" in health_check.name: + # Frame-related issue - restart stream + return self.attempt_recovery( + component, + RecoveryAction.RESTART_STREAM, + health_check.message, + health_check.details + ) + elif "connection" in health_check.name: + # Connection issue - reconnect + return self.attempt_recovery( + component, + RecoveryAction.RECONNECT, + health_check.message, + health_check.details + ) + elif "errors" in health_check.name: + # High error rate - throttle or restart + return self.attempt_recovery( + component, + RecoveryAction.THROTTLE, + health_check.message, + health_check.details + ) + else: + # Generic stream issue - restart + return self.attempt_recovery( + component, + RecoveryAction.RESTART_STREAM, + health_check.message, + health_check.details + ) + + def _handle_thread_recovery(self, component: str, health_check: HealthCheck) -> bool: + """Handle recovery for thread-related issues.""" + if "deadlock" in health_check.name: + # Deadlock detected - restart thread + return self.attempt_recovery( + component, + RecoveryAction.RESTART_THREAD, + health_check.message, + health_check.details + ) + elif "responsive" in health_check.name: + # Thread unresponsive - restart + return self.attempt_recovery( + component, + RecoveryAction.RESTART_THREAD, + health_check.message, + health_check.details + ) + else: + # Generic thread issue - restart + return self.attempt_recovery( + component, + RecoveryAction.RESTART_THREAD, + health_check.message, + health_check.details + ) + + def _handle_buffer_recovery(self, component: str, health_check: HealthCheck) -> bool: + """Handle recovery for buffer-related issues.""" + # Buffer issues - clear buffer + return self.attempt_recovery( + component, + RecoveryAction.CLEAR_BUFFER, + health_check.message, + health_check.details + ) + + def get_recovery_stats(self) -> Dict[str, Any]: + """Get recovery statistics.""" + current_time = time.time() + + with self._lock: + # Calculate stats from history + recent_recoveries = [ + attempt for attempt in self.recovery_history + if current_time - attempt.timestamp <= 3600 # Last hour + ] + + stats_by_component = defaultdict(lambda: { + 'attempts': 0, + 'successes': 0, + 'failures': 0, + 'last_attempt': None, + 'last_success': None + }) + + for attempt in recent_recoveries: + stats = stats_by_component[attempt.component] + stats['attempts'] += 1 + + if attempt.success: + stats['successes'] += 1 + if not stats['last_success'] or attempt.timestamp > stats['last_success']: + stats['last_success'] = attempt.timestamp + else: + stats['failures'] += 1 + + if not stats['last_attempt'] or attempt.timestamp > stats['last_attempt']: + stats['last_attempt'] = attempt.timestamp + + return { + 'total_recoveries_last_hour': len(recent_recoveries), + 'recovery_by_component': dict(stats_by_component), + 'recovery_states': { + component: { + 'failure_count': state.failure_count, + 'success_count': state.success_count, + 'last_failure_time': state.last_failure_time, + 'last_success_time': state.last_success_time + } + for component, state in self.recovery_states.items() + }, + 'recent_history': [ + { + 'timestamp': attempt.timestamp, + 'component': attempt.component, + 'action': attempt.action.value, + 'reason': attempt.reason, + 'success': attempt.success + } + for attempt in list(self.recovery_history)[-10:] # Last 10 attempts + ] + } + + def force_recovery(self, component: str, action: RecoveryAction, reason: str = "manual") -> bool: + """ + Force recovery for a component, bypassing rate limiting. + + Args: + component: Component identifier + action: Recovery action to perform + reason: Reason for forced recovery + + Returns: + True if recovery was successful + """ + logger.info(f"Forcing recovery for {component}: {action.value} ({reason})") + + current_time = time.time() + + try: + # Execute recovery action directly + success = self._execute_recovery_action(component, action, {}) + + # Record forced recovery + attempt = RecoveryAttempt( + timestamp=current_time, + component=component, + action=action, + reason=f"forced: {reason}", + success=success, + details={'forced': True} + ) + + with self._lock: + self.recovery_history.append(attempt) + self.recovery_attempts[component].append(current_time) + + # Update recovery state + self._update_recovery_state(component, success) + + return success + + except Exception as e: + logger.error(f"Error during forced recovery for {component}: {e}") + return False + + +# Global recovery manager instance +recovery_manager = RecoveryManager() \ No newline at end of file diff --git a/core/monitoring/stream_health.py b/core/monitoring/stream_health.py new file mode 100644 index 0000000..770dfe4 --- /dev/null +++ b/core/monitoring/stream_health.py @@ -0,0 +1,351 @@ +""" +Stream-specific health monitoring for video streams. +Tracks frame production, connection health, and stream-specific metrics. +""" +import time +import logging +import threading +import requests +from typing import Dict, Optional, List, Any +from collections import deque +from dataclasses import dataclass + +from .health import HealthCheck, HealthStatus, health_monitor + + +logger = logging.getLogger(__name__) + + +@dataclass +class StreamMetrics: + """Metrics for an individual stream.""" + camera_id: str + stream_type: str # 'rtsp', 'http_snapshot' + start_time: float + last_frame_time: Optional[float] = None + frame_count: int = 0 + error_count: int = 0 + reconnect_count: int = 0 + bytes_received: int = 0 + frames_per_second: float = 0.0 + connection_attempts: int = 0 + last_connection_test: Optional[float] = None + connection_healthy: bool = True + last_error: Optional[str] = None + last_error_time: Optional[float] = None + + +class StreamHealthTracker: + """Tracks health for individual video streams.""" + + def __init__(self): + self.streams: Dict[str, StreamMetrics] = {} + self._lock = threading.RLock() + + # Configuration + self.connection_test_interval = 300 # Test connection every 5 minutes + self.frame_timeout_warning = 120 # Warn if no frames for 2 minutes + self.frame_timeout_critical = 300 # Critical if no frames for 5 minutes + self.error_rate_threshold = 0.1 # 10% error rate threshold + + # Register with health monitor + health_monitor.register_health_checker(self._perform_health_checks) + + def register_stream(self, camera_id: str, stream_type: str, source_url: Optional[str] = None): + """Register a new stream for monitoring.""" + with self._lock: + if camera_id not in self.streams: + self.streams[camera_id] = StreamMetrics( + camera_id=camera_id, + stream_type=stream_type, + start_time=time.time() + ) + logger.info(f"Registered stream for monitoring: {camera_id} ({stream_type})") + + # Update health monitor metrics + health_monitor.update_metrics( + camera_id, + thread_alive=True, + connection_healthy=True + ) + + def unregister_stream(self, camera_id: str): + """Unregister a stream from monitoring.""" + with self._lock: + if camera_id in self.streams: + del self.streams[camera_id] + logger.info(f"Unregistered stream from monitoring: {camera_id}") + + def report_frame_received(self, camera_id: str, frame_size_bytes: int = 0): + """Report that a frame was received.""" + current_time = time.time() + + with self._lock: + if camera_id not in self.streams: + logger.warning(f"Frame received for unregistered stream: {camera_id}") + return + + stream = self.streams[camera_id] + + # Update frame metrics + if stream.last_frame_time: + interval = current_time - stream.last_frame_time + # Calculate FPS as moving average + if stream.frames_per_second == 0: + stream.frames_per_second = 1.0 / interval if interval > 0 else 0 + else: + new_fps = 1.0 / interval if interval > 0 else 0 + stream.frames_per_second = (stream.frames_per_second * 0.9) + (new_fps * 0.1) + + stream.last_frame_time = current_time + stream.frame_count += 1 + stream.bytes_received += frame_size_bytes + + # Report to health monitor + health_monitor.report_frame_received(camera_id) + health_monitor.update_metrics( + camera_id, + frame_count=stream.frame_count, + avg_frame_interval=1.0 / stream.frames_per_second if stream.frames_per_second > 0 else 0, + last_frame_time=current_time + ) + + def report_error(self, camera_id: str, error_message: str): + """Report an error for a stream.""" + current_time = time.time() + + with self._lock: + if camera_id not in self.streams: + logger.warning(f"Error reported for unregistered stream: {camera_id}") + return + + stream = self.streams[camera_id] + stream.error_count += 1 + stream.last_error = error_message + stream.last_error_time = current_time + + # Report to health monitor + health_monitor.report_error(camera_id, "stream_error") + health_monitor.update_metrics( + camera_id, + error_count=stream.error_count + ) + + logger.debug(f"Error reported for stream {camera_id}: {error_message}") + + def report_reconnect(self, camera_id: str, reason: str = "unknown"): + """Report that a stream reconnected.""" + current_time = time.time() + + with self._lock: + if camera_id not in self.streams: + logger.warning(f"Reconnect reported for unregistered stream: {camera_id}") + return + + stream = self.streams[camera_id] + stream.reconnect_count += 1 + + # Report to health monitor + health_monitor.report_restart(camera_id) + health_monitor.update_metrics( + camera_id, + restart_count=stream.reconnect_count + ) + + logger.info(f"Reconnect reported for stream {camera_id}: {reason}") + + def report_connection_attempt(self, camera_id: str, success: bool): + """Report a connection attempt.""" + with self._lock: + if camera_id not in self.streams: + return + + stream = self.streams[camera_id] + stream.connection_attempts += 1 + stream.connection_healthy = success + + # Report to health monitor + health_monitor.update_metrics( + camera_id, + connection_healthy=success + ) + + def test_http_connection(self, camera_id: str, url: str) -> bool: + """Test HTTP connection health for snapshot streams.""" + try: + # Quick HEAD request to test connectivity + response = requests.head(url, timeout=5, verify=False) + success = response.status_code in [200, 404] # 404 might be normal for some cameras + + self.report_connection_attempt(camera_id, success) + + if success: + logger.debug(f"Connection test passed for {camera_id}") + else: + logger.warning(f"Connection test failed for {camera_id}: HTTP {response.status_code}") + + return success + + except Exception as e: + logger.warning(f"Connection test failed for {camera_id}: {e}") + self.report_connection_attempt(camera_id, False) + return False + + def get_stream_metrics(self, camera_id: str) -> Optional[Dict[str, Any]]: + """Get metrics for a specific stream.""" + with self._lock: + if camera_id not in self.streams: + return None + + stream = self.streams[camera_id] + current_time = time.time() + + # Calculate derived metrics + uptime = current_time - stream.start_time + frame_age = current_time - stream.last_frame_time if stream.last_frame_time else None + error_rate = stream.error_count / max(1, stream.frame_count) + + return { + 'camera_id': camera_id, + 'stream_type': stream.stream_type, + 'uptime_seconds': uptime, + 'frame_count': stream.frame_count, + 'frames_per_second': stream.frames_per_second, + 'bytes_received': stream.bytes_received, + 'error_count': stream.error_count, + 'error_rate': error_rate, + 'reconnect_count': stream.reconnect_count, + 'connection_attempts': stream.connection_attempts, + 'connection_healthy': stream.connection_healthy, + 'last_frame_age_seconds': frame_age, + 'last_error': stream.last_error, + 'last_error_time': stream.last_error_time + } + + def get_all_metrics(self) -> Dict[str, Dict[str, Any]]: + """Get metrics for all streams.""" + with self._lock: + return { + camera_id: self.get_stream_metrics(camera_id) + for camera_id in self.streams.keys() + } + + def _perform_health_checks(self) -> List[HealthCheck]: + """Perform health checks for all streams.""" + checks = [] + current_time = time.time() + + with self._lock: + for camera_id, stream in self.streams.items(): + checks.extend(self._check_stream_health(camera_id, stream, current_time)) + + return checks + + def _check_stream_health(self, camera_id: str, stream: StreamMetrics, current_time: float) -> List[HealthCheck]: + """Perform health checks for a single stream.""" + checks = [] + + # Check frame freshness + if stream.last_frame_time: + frame_age = current_time - stream.last_frame_time + + if frame_age > self.frame_timeout_critical: + checks.append(HealthCheck( + name=f"stream_{camera_id}_frames", + status=HealthStatus.CRITICAL, + message=f"No frames for {frame_age:.1f}s (critical threshold: {self.frame_timeout_critical}s)", + details={ + 'frame_age': frame_age, + 'threshold': self.frame_timeout_critical, + 'last_frame_time': stream.last_frame_time + }, + recovery_action="restart_stream" + )) + elif frame_age > self.frame_timeout_warning: + checks.append(HealthCheck( + name=f"stream_{camera_id}_frames", + status=HealthStatus.WARNING, + message=f"Frames aging: {frame_age:.1f}s (warning threshold: {self.frame_timeout_warning}s)", + details={ + 'frame_age': frame_age, + 'threshold': self.frame_timeout_warning, + 'last_frame_time': stream.last_frame_time + } + )) + else: + # No frames received yet + startup_time = current_time - stream.start_time + if startup_time > 60: # Allow 1 minute for initial connection + checks.append(HealthCheck( + name=f"stream_{camera_id}_startup", + status=HealthStatus.CRITICAL, + message=f"No frames received since startup {startup_time:.1f}s ago", + details={ + 'startup_time': startup_time, + 'start_time': stream.start_time + }, + recovery_action="restart_stream" + )) + + # Check error rate + if stream.frame_count > 10: # Need sufficient samples + error_rate = stream.error_count / stream.frame_count + if error_rate > self.error_rate_threshold: + checks.append(HealthCheck( + name=f"stream_{camera_id}_errors", + status=HealthStatus.WARNING, + message=f"High error rate: {error_rate:.1%} ({stream.error_count}/{stream.frame_count})", + details={ + 'error_rate': error_rate, + 'error_count': stream.error_count, + 'frame_count': stream.frame_count, + 'last_error': stream.last_error + } + )) + + # Check connection health + if not stream.connection_healthy: + checks.append(HealthCheck( + name=f"stream_{camera_id}_connection", + status=HealthStatus.WARNING, + message="Connection unhealthy (last test failed)", + details={ + 'connection_attempts': stream.connection_attempts, + 'last_connection_test': stream.last_connection_test + } + )) + + # Check excessive reconnects + uptime_hours = (current_time - stream.start_time) / 3600 + if uptime_hours > 1 and stream.reconnect_count > 5: # More than 5 reconnects per hour + reconnect_rate = stream.reconnect_count / uptime_hours + checks.append(HealthCheck( + name=f"stream_{camera_id}_stability", + status=HealthStatus.WARNING, + message=f"Frequent reconnects: {reconnect_rate:.1f}/hour ({stream.reconnect_count} total)", + details={ + 'reconnect_rate': reconnect_rate, + 'reconnect_count': stream.reconnect_count, + 'uptime_hours': uptime_hours + } + )) + + # Check frame rate health + if stream.last_frame_time and stream.frames_per_second > 0: + expected_fps = 6.0 # Expected FPS for streams + if stream.frames_per_second < expected_fps * 0.5: # Less than 50% of expected + checks.append(HealthCheck( + name=f"stream_{camera_id}_framerate", + status=HealthStatus.WARNING, + message=f"Low frame rate: {stream.frames_per_second:.1f} fps (expected: ~{expected_fps} fps)", + details={ + 'current_fps': stream.frames_per_second, + 'expected_fps': expected_fps + } + )) + + return checks + + +# Global stream health tracker instance +stream_health_tracker = StreamHealthTracker() \ No newline at end of file diff --git a/core/monitoring/thread_health.py b/core/monitoring/thread_health.py new file mode 100644 index 0000000..a29625b --- /dev/null +++ b/core/monitoring/thread_health.py @@ -0,0 +1,381 @@ +""" +Thread health monitoring for detecting unresponsive and deadlocked threads. +Provides thread liveness detection and responsiveness testing. +""" +import time +import threading +import logging +import signal +import traceback +from typing import Dict, List, Optional, Any, Callable +from dataclasses import dataclass +from collections import defaultdict + +from .health import HealthCheck, HealthStatus, health_monitor + + +logger = logging.getLogger(__name__) + + +@dataclass +class ThreadInfo: + """Information about a monitored thread.""" + thread_id: int + thread_name: str + start_time: float + last_heartbeat: float + heartbeat_count: int = 0 + is_responsive: bool = True + last_activity: Optional[str] = None + stack_traces: List[str] = None + + +class ThreadHealthMonitor: + """Monitors thread health and responsiveness.""" + + def __init__(self): + self.monitored_threads: Dict[int, ThreadInfo] = {} + self.heartbeat_callbacks: Dict[int, Callable[[], bool]] = {} + self._lock = threading.RLock() + + # Configuration + self.heartbeat_timeout = 60.0 # 1 minute without heartbeat = unresponsive + self.responsiveness_test_interval = 30.0 # Test responsiveness every 30 seconds + self.stack_trace_count = 5 # Keep last 5 stack traces for analysis + + # Register with health monitor + health_monitor.register_health_checker(self._perform_health_checks) + + # Enable periodic responsiveness testing + self.test_thread = threading.Thread(target=self._responsiveness_test_loop, daemon=True) + self.test_thread.start() + + def register_thread(self, thread: threading.Thread, heartbeat_callback: Optional[Callable[[], bool]] = None): + """ + Register a thread for monitoring. + + Args: + thread: Thread to monitor + heartbeat_callback: Optional callback to test thread responsiveness + """ + with self._lock: + thread_info = ThreadInfo( + thread_id=thread.ident, + thread_name=thread.name, + start_time=time.time(), + last_heartbeat=time.time() + ) + + self.monitored_threads[thread.ident] = thread_info + + if heartbeat_callback: + self.heartbeat_callbacks[thread.ident] = heartbeat_callback + + logger.info(f"Registered thread for monitoring: {thread.name} (ID: {thread.ident})") + + def unregister_thread(self, thread_id: int): + """Unregister a thread from monitoring.""" + with self._lock: + if thread_id in self.monitored_threads: + thread_name = self.monitored_threads[thread_id].thread_name + del self.monitored_threads[thread_id] + + if thread_id in self.heartbeat_callbacks: + del self.heartbeat_callbacks[thread_id] + + logger.info(f"Unregistered thread from monitoring: {thread_name} (ID: {thread_id})") + + def heartbeat(self, thread_id: Optional[int] = None, activity: Optional[str] = None): + """ + Report thread heartbeat. + + Args: + thread_id: Thread ID (uses current thread if None) + activity: Description of current activity + """ + if thread_id is None: + thread_id = threading.current_thread().ident + + current_time = time.time() + + with self._lock: + if thread_id in self.monitored_threads: + thread_info = self.monitored_threads[thread_id] + thread_info.last_heartbeat = current_time + thread_info.heartbeat_count += 1 + thread_info.is_responsive = True + + if activity: + thread_info.last_activity = activity + + # Report to health monitor + health_monitor.update_metrics( + f"thread_{thread_info.thread_name}", + thread_alive=True, + last_frame_time=current_time + ) + + def get_thread_info(self, thread_id: int) -> Optional[Dict[str, Any]]: + """Get information about a monitored thread.""" + with self._lock: + if thread_id not in self.monitored_threads: + return None + + thread_info = self.monitored_threads[thread_id] + current_time = time.time() + + return { + 'thread_id': thread_id, + 'thread_name': thread_info.thread_name, + 'uptime_seconds': current_time - thread_info.start_time, + 'last_heartbeat_age': current_time - thread_info.last_heartbeat, + 'heartbeat_count': thread_info.heartbeat_count, + 'is_responsive': thread_info.is_responsive, + 'last_activity': thread_info.last_activity, + 'stack_traces': thread_info.stack_traces or [] + } + + def get_all_thread_info(self) -> Dict[int, Dict[str, Any]]: + """Get information about all monitored threads.""" + with self._lock: + return { + thread_id: self.get_thread_info(thread_id) + for thread_id in self.monitored_threads.keys() + } + + def test_thread_responsiveness(self, thread_id: int) -> bool: + """ + Test if a thread is responsive by calling its heartbeat callback. + + Args: + thread_id: ID of thread to test + + Returns: + True if thread responds within timeout + """ + if thread_id not in self.heartbeat_callbacks: + return True # Can't test if no callback provided + + try: + # Call the heartbeat callback with a timeout + callback = self.heartbeat_callbacks[thread_id] + + # This is a simple approach - in practice you might want to use + # threading.Timer or asyncio for more sophisticated timeout handling + start_time = time.time() + result = callback() + response_time = time.time() - start_time + + with self._lock: + if thread_id in self.monitored_threads: + self.monitored_threads[thread_id].is_responsive = result + + if response_time > 5.0: # Slow response + logger.warning(f"Thread {thread_id} slow response: {response_time:.1f}s") + + return result + + except Exception as e: + logger.error(f"Error testing thread {thread_id} responsiveness: {e}") + with self._lock: + if thread_id in self.monitored_threads: + self.monitored_threads[thread_id].is_responsive = False + return False + + def capture_stack_trace(self, thread_id: int) -> Optional[str]: + """ + Capture stack trace for a thread. + + Args: + thread_id: ID of thread to capture + + Returns: + Stack trace string or None if not available + """ + try: + # Get all frames for all threads + frames = dict(threading._current_frames()) + + if thread_id not in frames: + return None + + # Format stack trace + frame = frames[thread_id] + stack_trace = ''.join(traceback.format_stack(frame)) + + # Store in thread info + with self._lock: + if thread_id in self.monitored_threads: + thread_info = self.monitored_threads[thread_id] + if thread_info.stack_traces is None: + thread_info.stack_traces = [] + + thread_info.stack_traces.append(f"{time.time()}: {stack_trace}") + + # Keep only last N stack traces + if len(thread_info.stack_traces) > self.stack_trace_count: + thread_info.stack_traces = thread_info.stack_traces[-self.stack_trace_count:] + + return stack_trace + + except Exception as e: + logger.error(f"Error capturing stack trace for thread {thread_id}: {e}") + return None + + def detect_deadlocks(self) -> List[Dict[str, Any]]: + """ + Attempt to detect potential deadlocks by analyzing thread states. + + Returns: + List of potential deadlock scenarios + """ + deadlocks = [] + current_time = time.time() + + with self._lock: + # Look for threads that haven't had heartbeats for a long time + # and are supposedly alive + for thread_id, thread_info in self.monitored_threads.items(): + heartbeat_age = current_time - thread_info.last_heartbeat + + if heartbeat_age > self.heartbeat_timeout * 2: # Double the timeout + # Check if thread still exists + thread_exists = any( + t.ident == thread_id and t.is_alive() + for t in threading.enumerate() + ) + + if thread_exists: + # Thread exists but not responding - potential deadlock + stack_trace = self.capture_stack_trace(thread_id) + + deadlock_info = { + 'thread_id': thread_id, + 'thread_name': thread_info.thread_name, + 'heartbeat_age': heartbeat_age, + 'last_activity': thread_info.last_activity, + 'stack_trace': stack_trace, + 'detection_time': current_time + } + + deadlocks.append(deadlock_info) + logger.warning(f"Potential deadlock detected in thread {thread_info.thread_name}") + + return deadlocks + + def _responsiveness_test_loop(self): + """Background loop to test thread responsiveness.""" + logger.info("Thread responsiveness testing started") + + while True: + try: + time.sleep(self.responsiveness_test_interval) + + with self._lock: + thread_ids = list(self.monitored_threads.keys()) + + for thread_id in thread_ids: + try: + self.test_thread_responsiveness(thread_id) + except Exception as e: + logger.error(f"Error testing thread {thread_id}: {e}") + + except Exception as e: + logger.error(f"Error in responsiveness test loop: {e}") + time.sleep(10.0) # Fallback sleep + + def _perform_health_checks(self) -> List[HealthCheck]: + """Perform health checks for all monitored threads.""" + checks = [] + current_time = time.time() + + with self._lock: + for thread_id, thread_info in self.monitored_threads.items(): + checks.extend(self._check_thread_health(thread_id, thread_info, current_time)) + + # Check for deadlocks + deadlocks = self.detect_deadlocks() + for deadlock in deadlocks: + checks.append(HealthCheck( + name=f"deadlock_detection_{deadlock['thread_id']}", + status=HealthStatus.CRITICAL, + message=f"Potential deadlock in thread {deadlock['thread_name']} " + f"(unresponsive for {deadlock['heartbeat_age']:.1f}s)", + details=deadlock, + recovery_action="restart_thread" + )) + + return checks + + def _check_thread_health(self, thread_id: int, thread_info: ThreadInfo, current_time: float) -> List[HealthCheck]: + """Perform health checks for a single thread.""" + checks = [] + + # Check if thread still exists + thread_exists = any( + t.ident == thread_id and t.is_alive() + for t in threading.enumerate() + ) + + if not thread_exists: + checks.append(HealthCheck( + name=f"thread_{thread_info.thread_name}_alive", + status=HealthStatus.CRITICAL, + message=f"Thread {thread_info.thread_name} is no longer alive", + details={ + 'thread_id': thread_id, + 'uptime': current_time - thread_info.start_time, + 'last_heartbeat': thread_info.last_heartbeat + }, + recovery_action="restart_thread" + )) + return checks + + # Check heartbeat freshness + heartbeat_age = current_time - thread_info.last_heartbeat + + if heartbeat_age > self.heartbeat_timeout: + checks.append(HealthCheck( + name=f"thread_{thread_info.thread_name}_responsive", + status=HealthStatus.CRITICAL, + message=f"Thread {thread_info.thread_name} unresponsive for {heartbeat_age:.1f}s", + details={ + 'thread_id': thread_id, + 'heartbeat_age': heartbeat_age, + 'heartbeat_count': thread_info.heartbeat_count, + 'last_activity': thread_info.last_activity, + 'is_responsive': thread_info.is_responsive + }, + recovery_action="restart_thread" + )) + elif heartbeat_age > self.heartbeat_timeout * 0.5: # Warning at 50% of timeout + checks.append(HealthCheck( + name=f"thread_{thread_info.thread_name}_responsive", + status=HealthStatus.WARNING, + message=f"Thread {thread_info.thread_name} slow heartbeat: {heartbeat_age:.1f}s", + details={ + 'thread_id': thread_id, + 'heartbeat_age': heartbeat_age, + 'heartbeat_count': thread_info.heartbeat_count, + 'last_activity': thread_info.last_activity, + 'is_responsive': thread_info.is_responsive + } + )) + + # Check responsiveness test results + if not thread_info.is_responsive: + checks.append(HealthCheck( + name=f"thread_{thread_info.thread_name}_callback", + status=HealthStatus.WARNING, + message=f"Thread {thread_info.thread_name} failed responsiveness test", + details={ + 'thread_id': thread_id, + 'last_activity': thread_info.last_activity + } + )) + + return checks + + +# Global thread health monitor instance +thread_health_monitor = ThreadHealthMonitor() \ No newline at end of file diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py index 8641495..f2fb8d1 100644 --- a/core/streaming/readers/ffmpeg_rtsp.py +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -1,5 +1,6 @@ """ FFmpeg RTSP stream reader using subprocess piping frames directly to buffer. +Enhanced with comprehensive health monitoring and automatic recovery. """ import cv2 import time @@ -7,10 +8,13 @@ import threading import numpy as np import subprocess import struct -from typing import Optional, Callable +from typing import Optional, Callable, Dict, Any from .base import VideoReader from .utils import log_success, log_warning, log_error, log_info +from ..monitoring.stream_health import stream_health_tracker +from ..monitoring.thread_health import thread_health_monitor +from ..monitoring.recovery import recovery_manager, RecoveryAction class FFmpegRTSPReader(VideoReader): @@ -35,6 +39,21 @@ class FFmpegRTSPReader(VideoReader): self.first_start_timeout = 30.0 # 30s timeout on first start self.restart_timeout = 15.0 # 15s timeout after restart + # Health monitoring setup + self.last_heartbeat = time.time() + self.consecutive_errors = 0 + self.ffmpeg_restart_count = 0 + + # Register recovery handlers + recovery_manager.register_recovery_handler( + RecoveryAction.RESTART_STREAM, + self._handle_restart_recovery + ) + recovery_manager.register_recovery_handler( + RecoveryAction.RECONNECT, + self._handle_reconnect_recovery + ) + @property def is_running(self) -> bool: """Check if the reader is currently running.""" @@ -58,21 +77,35 @@ class FFmpegRTSPReader(VideoReader): self.stop_event.clear() self.thread = threading.Thread(target=self._read_frames, daemon=True) self.thread.start() - log_success(self.camera_id, "Stream started") + + # Register with health monitoring + stream_health_tracker.register_stream(self.camera_id, "rtsp_ffmpeg", self.rtsp_url) + thread_health_monitor.register_thread(self.thread, self._heartbeat_callback) + + log_success(self.camera_id, "Stream started with health monitoring") def stop(self): """Stop the FFmpeg subprocess reader.""" self.stop_event.set() + + # Unregister from health monitoring + if self.thread: + thread_health_monitor.unregister_thread(self.thread.ident) + if self.process: self.process.terminate() try: self.process.wait(timeout=5) except subprocess.TimeoutExpired: self.process.kill() + if self.thread: self.thread.join(timeout=5.0) if self.stderr_thread: self.stderr_thread.join(timeout=2.0) + + stream_health_tracker.unregister_stream(self.camera_id) + log_info(self.camera_id, "Stream stopped") def _start_ffmpeg_process(self): @@ -249,6 +282,9 @@ class FFmpegRTSPReader(VideoReader): while not self.stop_event.is_set(): try: + # Send heartbeat for thread health monitoring + self._send_heartbeat("reading_frames") + # Check watchdog timeout if process is running if self.process and self.process.poll() is None: if self._check_watchdog_timeout(): @@ -259,8 +295,17 @@ class FFmpegRTSPReader(VideoReader): if not self.process or self.process.poll() is not None: if self.process and self.process.poll() is not None: log_warning(self.camera_id, "Stream disconnected, reconnecting...") + stream_health_tracker.report_error( + self.camera_id, + "FFmpeg process disconnected" + ) if not self._start_ffmpeg_process(): + self.consecutive_errors += 1 + stream_health_tracker.report_error( + self.camera_id, + "Failed to start FFmpeg process" + ) time.sleep(5.0) continue @@ -275,9 +320,22 @@ class FFmpegRTSPReader(VideoReader): # Update watchdog - we got a frame self.last_frame_time = time.time() + # Reset error counter on successful frame + self.consecutive_errors = 0 + + # Report successful frame to health monitoring + frame_size = frame.nbytes + stream_health_tracker.report_frame_received(self.camera_id, frame_size) + # Call frame callback if self.frame_callback: - self.frame_callback(self.camera_id, frame) + try: + self.frame_callback(self.camera_id, frame) + except Exception as e: + stream_health_tracker.report_error( + self.camera_id, + f"Frame callback error: {e}" + ) frame_count += 1 @@ -287,16 +345,85 @@ class FFmpegRTSPReader(VideoReader): log_success(self.camera_id, f"{frame_count} frames captured ({frame.shape[1]}x{frame.shape[0]})") last_log_time = current_time - except Exception: + except Exception as e: # Process might have died, let it restart on next iteration + stream_health_tracker.report_error( + self.camera_id, + f"Frame reading error: {e}" + ) if self.process: self.process.terminate() self.process = None time.sleep(1.0) - except Exception: + except Exception as e: + stream_health_tracker.report_error( + self.camera_id, + f"Main loop error: {e}" + ) time.sleep(1.0) # Cleanup if self.process: - self.process.terminate() \ No newline at end of file + self.process.terminate() + + # Health monitoring methods + def _send_heartbeat(self, activity: str = "running"): + """Send heartbeat to thread health monitor.""" + self.last_heartbeat = time.time() + thread_health_monitor.heartbeat(activity=activity) + + def _heartbeat_callback(self) -> bool: + """Heartbeat callback for thread responsiveness testing.""" + try: + # Check if thread is responsive by checking recent heartbeat + current_time = time.time() + age = current_time - self.last_heartbeat + + # Thread is responsive if heartbeat is recent + return age < 30.0 # 30 second responsiveness threshold + + except Exception: + return False + + def _handle_restart_recovery(self, component: str, details: Dict[str, Any]) -> bool: + """Handle restart recovery action.""" + try: + log_info(self.camera_id, "Restarting FFmpeg RTSP reader for health recovery") + + # Stop current instance + self.stop() + + # Small delay + time.sleep(2.0) + + # Restart + self.start() + + # Report successful restart + stream_health_tracker.report_reconnect(self.camera_id, "health_recovery_restart") + self.ffmpeg_restart_count += 1 + + return True + + except Exception as e: + log_error(self.camera_id, f"Failed to restart FFmpeg RTSP reader: {e}") + return False + + def _handle_reconnect_recovery(self, component: str, details: Dict[str, Any]) -> bool: + """Handle reconnect recovery action.""" + try: + log_info(self.camera_id, "Reconnecting FFmpeg RTSP reader for health recovery") + + # Force restart FFmpeg process + self._restart_ffmpeg_process() + + # Reset error counters + self.consecutive_errors = 0 + stream_health_tracker.report_reconnect(self.camera_id, "health_recovery_reconnect") + + return True + + except Exception as e: + log_error(self.camera_id, f"Failed to reconnect FFmpeg RTSP reader: {e}") + return False \ No newline at end of file diff --git a/core/streaming/readers/http_snapshot.py b/core/streaming/readers/http_snapshot.py index 5a479db..1aab967 100644 --- a/core/streaming/readers/http_snapshot.py +++ b/core/streaming/readers/http_snapshot.py @@ -1,5 +1,6 @@ """ HTTP snapshot reader optimized for 2560x1440 (2K) high quality images. +Enhanced with comprehensive health monitoring and automatic recovery. """ import cv2 import logging @@ -7,10 +8,13 @@ import time import threading import requests import numpy as np -from typing import Optional, Callable +from typing import Optional, Callable, Dict, Any from .base import VideoReader from .utils import log_success, log_warning, log_error, log_info +from ..monitoring.stream_health import stream_health_tracker +from ..monitoring.thread_health import thread_health_monitor +from ..monitoring.recovery import recovery_manager, RecoveryAction logger = logging.getLogger(__name__) @@ -30,6 +34,22 @@ class HTTPSnapshotReader(VideoReader): self.expected_height = 1440 self.max_file_size = 10 * 1024 * 1024 # 10MB max for 2K image + # Health monitoring setup + self.last_heartbeat = time.time() + self.consecutive_errors = 0 + self.connection_test_interval = 300 # Test connection every 5 minutes + self.last_connection_test = None + + # Register recovery handlers + recovery_manager.register_recovery_handler( + RecoveryAction.RESTART_STREAM, + self._handle_restart_recovery + ) + recovery_manager.register_recovery_handler( + RecoveryAction.RECONNECT, + self._handle_reconnect_recovery + ) + @property def is_running(self) -> bool: """Check if the reader is currently running.""" @@ -53,13 +73,24 @@ class HTTPSnapshotReader(VideoReader): self.stop_event.clear() self.thread = threading.Thread(target=self._read_snapshots, daemon=True) self.thread.start() - logger.info(f"Started snapshot reader for camera {self.camera_id}") + + # Register with health monitoring + stream_health_tracker.register_stream(self.camera_id, "http_snapshot", self.snapshot_url) + thread_health_monitor.register_thread(self.thread, self._heartbeat_callback) + + logger.info(f"Started snapshot reader for camera {self.camera_id} with health monitoring") def stop(self): """Stop the snapshot reader thread.""" self.stop_event.set() + + # Unregister from health monitoring if self.thread: + thread_health_monitor.unregister_thread(self.thread.ident) self.thread.join(timeout=5.0) + + stream_health_tracker.unregister_stream(self.camera_id) + logger.info(f"Stopped snapshot reader for camera {self.camera_id}") def _read_snapshots(self): @@ -67,17 +98,29 @@ class HTTPSnapshotReader(VideoReader): retries = 0 frame_count = 0 last_log_time = time.time() + last_connection_test = time.time() interval_seconds = self.interval_ms / 1000.0 logger.info(f"Snapshot interval for camera {self.camera_id}: {interval_seconds}s") while not self.stop_event.is_set(): try: + # Send heartbeat for thread health monitoring + self._send_heartbeat("fetching_snapshot") + start_time = time.time() frame = self._fetch_snapshot() if frame is None: retries += 1 + self.consecutive_errors += 1 + + # Report error to health monitoring + stream_health_tracker.report_error( + self.camera_id, + f"Failed to fetch snapshot (retry {retries}/{self.max_retries})" + ) + logger.warning(f"Failed to fetch snapshot for camera {self.camera_id}, retry {retries}/{self.max_retries}") if self.max_retries != -1 and retries > self.max_retries: @@ -90,21 +133,36 @@ class HTTPSnapshotReader(VideoReader): # Accept any valid image dimensions - don't force specific resolution if frame.shape[1] <= 0 or frame.shape[0] <= 0: logger.warning(f"Camera {self.camera_id}: Invalid frame dimensions {frame.shape[1]}x{frame.shape[0]}") + stream_health_tracker.report_error( + self.camera_id, + f"Invalid frame dimensions: {frame.shape[1]}x{frame.shape[0]}" + ) continue # Reset retry counter on successful fetch retries = 0 + self.consecutive_errors = 0 frame_count += 1 + # Report successful frame to health monitoring + frame_size = frame.nbytes + stream_health_tracker.report_frame_received(self.camera_id, frame_size) + # Call frame callback if self.frame_callback: try: self.frame_callback(self.camera_id, frame) except Exception as e: logger.error(f"Camera {self.camera_id}: Frame callback error: {e}") + stream_health_tracker.report_error(self.camera_id, f"Frame callback error: {e}") + + # Periodic connection health test + current_time = time.time() + if current_time - last_connection_test >= self.connection_test_interval: + self._test_connection_health() + last_connection_test = current_time # Log progress every 30 seconds - current_time = time.time() if current_time - last_log_time >= 30: logger.info(f"Camera {self.camera_id}: {frame_count} snapshots processed") last_log_time = current_time @@ -117,6 +175,7 @@ class HTTPSnapshotReader(VideoReader): except Exception as e: logger.error(f"Error in snapshot loop for camera {self.camera_id}: {e}") + stream_health_tracker.report_error(self.camera_id, f"Snapshot loop error: {e}") retries += 1 if self.max_retries != -1 and retries > self.max_retries: break @@ -246,4 +305,74 @@ class HTTPSnapshotReader(VideoReader): right = target_width - new_width - left resized = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0]) - return resized \ No newline at end of file + return resized + + # Health monitoring methods + def _send_heartbeat(self, activity: str = "running"): + """Send heartbeat to thread health monitor.""" + self.last_heartbeat = time.time() + thread_health_monitor.heartbeat(activity=activity) + + def _heartbeat_callback(self) -> bool: + """Heartbeat callback for thread responsiveness testing.""" + try: + # Check if thread is responsive by checking recent heartbeat + current_time = time.time() + age = current_time - self.last_heartbeat + + # Thread is responsive if heartbeat is recent + return age < 30.0 # 30 second responsiveness threshold + + except Exception: + return False + + def _test_connection_health(self): + """Test HTTP connection health.""" + try: + stream_health_tracker.test_http_connection(self.camera_id, self.snapshot_url) + except Exception as e: + logger.error(f"Error testing connection health for {self.camera_id}: {e}") + + def _handle_restart_recovery(self, component: str, details: Dict[str, Any]) -> bool: + """Handle restart recovery action.""" + try: + logger.info(f"Restarting HTTP snapshot reader for {self.camera_id}") + + # Stop current instance + self.stop() + + # Small delay + time.sleep(2.0) + + # Restart + self.start() + + # Report successful restart + stream_health_tracker.report_reconnect(self.camera_id, "health_recovery_restart") + + return True + + except Exception as e: + logger.error(f"Failed to restart HTTP snapshot reader for {self.camera_id}: {e}") + return False + + def _handle_reconnect_recovery(self, component: str, details: Dict[str, Any]) -> bool: + """Handle reconnect recovery action.""" + try: + logger.info(f"Reconnecting HTTP snapshot reader for {self.camera_id}") + + # Test connection first + success = stream_health_tracker.test_http_connection(self.camera_id, self.snapshot_url) + + if success: + # Reset error counters + self.consecutive_errors = 0 + stream_health_tracker.report_reconnect(self.camera_id, "health_recovery_reconnect") + return True + else: + logger.warning(f"Connection test failed during recovery for {self.camera_id}") + return False + + except Exception as e: + logger.error(f"Failed to reconnect HTTP snapshot reader for {self.camera_id}: {e}") + return False \ No newline at end of file From eb57de02c37300d57100924596eaf42c794e5a08 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Sat, 27 Sep 2025 14:57:20 +0700 Subject: [PATCH 07/30] fix: update import paths for monitoring modules in FFmpegRTSPReader and HTTPSnapshotReader --- core/streaming/readers/ffmpeg_rtsp.py | 6 +++--- core/streaming/readers/http_snapshot.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py index f2fb8d1..7c453f3 100644 --- a/core/streaming/readers/ffmpeg_rtsp.py +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -12,9 +12,9 @@ from typing import Optional, Callable, Dict, Any from .base import VideoReader from .utils import log_success, log_warning, log_error, log_info -from ..monitoring.stream_health import stream_health_tracker -from ..monitoring.thread_health import thread_health_monitor -from ..monitoring.recovery import recovery_manager, RecoveryAction +from ...monitoring.stream_health import stream_health_tracker +from ...monitoring.thread_health import thread_health_monitor +from ...monitoring.recovery import recovery_manager, RecoveryAction class FFmpegRTSPReader(VideoReader): diff --git a/core/streaming/readers/http_snapshot.py b/core/streaming/readers/http_snapshot.py index 1aab967..bbbf943 100644 --- a/core/streaming/readers/http_snapshot.py +++ b/core/streaming/readers/http_snapshot.py @@ -12,9 +12,9 @@ from typing import Optional, Callable, Dict, Any from .base import VideoReader from .utils import log_success, log_warning, log_error, log_info -from ..monitoring.stream_health import stream_health_tracker -from ..monitoring.thread_health import thread_health_monitor -from ..monitoring.recovery import recovery_manager, RecoveryAction +from ...monitoring.stream_health import stream_health_tracker +from ...monitoring.thread_health import thread_health_monitor +from ...monitoring.recovery import recovery_manager, RecoveryAction logger = logging.getLogger(__name__) From 52ba1ff316fb784102fd0937629f1d704823491d Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 29 Sep 2025 17:43:30 +0700 Subject: [PATCH 08/30] fix: sessionId type mismatch --- core/communication/websocket.py | 2 +- core/streaming/manager.py | 2 ++ core/tracking/integration.py | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/communication/websocket.py b/core/communication/websocket.py index 4e40d2a..e53096a 100644 --- a/core/communication/websocket.py +++ b/core/communication/websocket.py @@ -539,7 +539,7 @@ class WebSocketHandler: async def _handle_set_session_id(self, message: SetSessionIdMessage) -> None: """Handle setSessionId message.""" display_identifier = message.payload.displayIdentifier - session_id = message.payload.sessionId + session_id = str(message.payload.sessionId) if message.payload.sessionId is not None else None logger.info(f"[RX Processing] setSessionId for display {display_identifier}: {session_id}") diff --git a/core/streaming/manager.py b/core/streaming/manager.py index 5b4637c..e2f02d9 100644 --- a/core/streaming/manager.py +++ b/core/streaming/manager.py @@ -380,6 +380,8 @@ class StreamManager: def set_session_id(self, display_id: str, session_id: str): """Set session ID for tracking integration.""" + # Ensure session_id is always a string for consistent type handling + session_id = str(session_id) if session_id is not None else None with self._lock: for subscription_info in self._subscriptions.values(): # Check if this subscription matches the display_id diff --git a/core/tracking/integration.py b/core/tracking/integration.py index 3f1ebe0..8c96750 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -474,6 +474,8 @@ class TrackingPipelineIntegration: display_id: Display identifier session_id: Session identifier """ + # Ensure session_id is always a string for consistent type handling + session_id = str(session_id) if session_id is not None else None self.active_sessions[display_id] = session_id logger.info(f"Set session {session_id} for display {display_id}") From ee484b4655c0d5e89fa7a351187d4331ff647973 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 29 Sep 2025 23:45:20 +0700 Subject: [PATCH 09/30] feat: add min bbox for frontal tracking --- core/tracking/integration.py | 60 +++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/core/tracking/integration.py b/core/tracking/integration.py index 8c96750..d1401ef 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -71,12 +71,17 @@ class TrackingPipelineIntegration: # Thread pool for pipeline execution self.executor = ThreadPoolExecutor(max_workers=2) + # Min bbox filtering configuration + # TODO: Make this configurable via pipeline.json in the future + self.min_bbox_area_percentage = 4.5 # 4.5% of frame area minimum + # Statistics self.stats = { 'frames_processed': 0, 'vehicles_detected': 0, 'vehicles_validated': 0, - 'pipelines_executed': 0 + 'pipelines_executed': 0, + 'frontals_filtered_small': 0 # Track filtered detections } @@ -202,6 +207,10 @@ class TrackingPipelineIntegration: else: logger.debug(f"No tracking results or detections attribute") + # Filter out small frontal detections (neighboring pumps/distant cars) + if tracking_results and hasattr(tracking_results, 'detections'): + tracking_results = self._filter_small_frontals(tracking_results, frame) + # Process tracking results tracked_vehicles = self.tracker.process_detections( tracking_results, @@ -667,6 +676,55 @@ class TrackingPipelineIntegration: if stage == "car_wait_staff": logger.info(f"Started monitoring session {session_id} for car abandonment") + def _filter_small_frontals(self, tracking_results, frame): + """ + Filter out frontal detections that are smaller than minimum bbox area percentage. + This prevents processing of cars from neighboring pumps that appear in camera view. + + Args: + tracking_results: YOLO tracking results with detections + frame: Input frame for calculating frame area + + Returns: + Modified tracking_results with small frontals removed + """ + if not hasattr(tracking_results, 'detections') or not tracking_results.detections: + return tracking_results + + # Calculate frame area and minimum bbox area threshold + frame_area = frame.shape[0] * frame.shape[1] # height * width + min_bbox_area = frame_area * (self.min_bbox_area_percentage / 100.0) + + # Filter detections + filtered_detections = [] + filtered_count = 0 + + for detection in tracking_results.detections: + # Calculate detection bbox area + bbox = detection.bbox # Assuming bbox is [x1, y1, x2, y2] + bbox_area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) + + if bbox_area >= min_bbox_area: + # Keep detection - bbox is large enough + filtered_detections.append(detection) + else: + # Filter out small detection + filtered_count += 1 + area_percentage = (bbox_area / frame_area) * 100 + logger.debug(f"Filtered small frontal: area={bbox_area:.0f}px² ({area_percentage:.1f}% of frame, " + f"min required: {self.min_bbox_area_percentage}%)") + + # Update tracking results with filtered detections + tracking_results.detections = filtered_detections + + # Update statistics + if filtered_count > 0: + self.stats['frontals_filtered_small'] += filtered_count + logger.info(f"Filtered {filtered_count} small frontal detections, " + f"{len(filtered_detections)} remaining (total filtered: {self.stats['frontals_filtered_small']})") + + return tracking_results + def cleanup(self): """Cleanup resources.""" self.executor.shutdown(wait=False) From fa0f865319753d30c499899450117d4094293009 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 00:53:27 +0700 Subject: [PATCH 10/30] feat: add fallback when cant initially detect but backend start session --- core/tracking/integration.py | 136 +++++++++++++++++++++++++++++------ 1 file changed, 116 insertions(+), 20 deletions(-) diff --git a/core/tracking/integration.py b/core/tracking/integration.py index d1401ef..7d5f3f8 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -411,27 +411,12 @@ class TrackingPipelineIntegration: logger.info(f"Executing processing phase for session {session_id}, vehicle {vehicle.track_id}") # Capture high-quality snapshot for pipeline processing - frame = None - if self.subscription_info and self.subscription_info.stream_config.snapshot_url: - from ..streaming.readers import HTTPSnapshotReader + logger.info(f"[PROCESSING PHASE] Fetching 2K snapshot for session {session_id}") + frame = self._fetch_snapshot() - logger.info(f"[PROCESSING PHASE] Fetching 2K snapshot for session {session_id}") - snapshot_reader = HTTPSnapshotReader( - camera_id=self.subscription_info.camera_id, - snapshot_url=self.subscription_info.stream_config.snapshot_url, - max_retries=3 - ) - - frame = snapshot_reader.fetch_single_snapshot() - - if frame is not None: - logger.info(f"[PROCESSING PHASE] Successfully fetched {frame.shape[1]}x{frame.shape[0]} snapshot for pipeline") - else: - logger.warning(f"[PROCESSING PHASE] Failed to capture snapshot, falling back to RTSP frame") - # Fall back to RTSP frame if snapshot fails - frame = processing_data['frame'] - else: - logger.warning(f"[PROCESSING PHASE] No snapshot URL available, using RTSP frame") + if frame is None: + logger.warning(f"[PROCESSING PHASE] Failed to capture snapshot, falling back to RTSP frame") + # Fall back to RTSP frame if snapshot fails frame = processing_data['frame'] # Extract detected regions from detection phase result if available @@ -527,6 +512,19 @@ class TrackingPipelineIntegration: else: logger.warning(f"No pending processing data found for display {display_id} when setting session {session_id}") + # FALLBACK: Execute pipeline for POS-initiated sessions + logger.info(f"[FALLBACK] Triggering fallback pipeline for session {session_id} on display {display_id}") + + # Create subscription_id for fallback (needed for pipeline execution) + fallback_subscription_id = f"{display_id};fallback" + + # Trigger the fallback pipeline asynchronously + asyncio.create_task(self._execute_fallback_pipeline( + display_id=display_id, + session_id=session_id, + subscription_id=fallback_subscription_id + )) + def clear_session_id(self, session_id: str): """ Clear session ID (post-fueling). @@ -676,6 +674,104 @@ class TrackingPipelineIntegration: if stage == "car_wait_staff": logger.info(f"Started monitoring session {session_id} for car abandonment") + def _fetch_snapshot(self) -> Optional[np.ndarray]: + """ + Fetch high-quality snapshot from camera's snapshot URL. + Reusable method for both processing phase and fallback pipeline. + + Returns: + Snapshot frame or None if unavailable + """ + if not (self.subscription_info and self.subscription_info.stream_config.snapshot_url): + logger.warning("[SNAPSHOT] No subscription info or snapshot URL available") + return None + + try: + from ..streaming.readers import HTTPSnapshotReader + + logger.info(f"[SNAPSHOT] Fetching snapshot for {self.subscription_info.camera_id}") + snapshot_reader = HTTPSnapshotReader( + camera_id=self.subscription_info.camera_id, + snapshot_url=self.subscription_info.stream_config.snapshot_url, + max_retries=3 + ) + + frame = snapshot_reader.fetch_single_snapshot() + + if frame is not None: + logger.info(f"[SNAPSHOT] Successfully fetched {frame.shape[1]}x{frame.shape[0]} snapshot") + return frame + else: + logger.warning("[SNAPSHOT] Failed to fetch snapshot") + return None + + except Exception as e: + logger.error(f"[SNAPSHOT] Error fetching snapshot: {e}", exc_info=True) + return None + + async def _execute_fallback_pipeline(self, display_id: str, session_id: str, subscription_id: str): + """ + Execute fallback pipeline when sessionId is received without prior detection. + This handles POS-initiated sessions where backend starts transaction before car detection. + + Args: + display_id: Display identifier + session_id: Session ID from backend + subscription_id: Subscription identifier for pipeline execution + """ + try: + logger.info(f"[FALLBACK PIPELINE] Executing for session {session_id}, display {display_id}") + + # Fetch fresh snapshot from camera + frame = self._fetch_snapshot() + + if frame is None: + logger.error(f"[FALLBACK] Failed to fetch snapshot for session {session_id}, cannot execute pipeline") + return + + logger.info(f"[FALLBACK] Using snapshot frame {frame.shape[1]}x{frame.shape[0]} for session {session_id}") + + # Check if detection pipeline is available + if not self.detection_pipeline: + logger.error(f"[FALLBACK] Detection pipeline not available for session {session_id}") + return + + # Execute detection phase to get detected regions + detection_result = await self.detection_pipeline.execute_detection_phase( + frame=frame, + display_id=display_id, + subscription_id=subscription_id + ) + + logger.info(f"[FALLBACK] Detection phase completed for session {session_id}: " + f"status={detection_result.get('status', 'unknown')}, " + f"regions={list(detection_result.get('detected_regions', {}).keys())}") + + # If detection found regions, execute processing phase + detected_regions = detection_result.get('detected_regions', {}) + if detected_regions: + processing_result = await self.detection_pipeline.execute_processing_phase( + frame=frame, + display_id=display_id, + session_id=session_id, + subscription_id=subscription_id, + detected_regions=detected_regions + ) + + logger.info(f"[FALLBACK] Processing phase completed for session {session_id}: " + f"status={processing_result.get('status', 'unknown')}, " + f"branches={len(processing_result.get('branch_results', {}))}, " + f"actions={len(processing_result.get('actions_executed', []))}") + + # Update statistics + self.stats['pipelines_executed'] += 1 + + else: + logger.warning(f"[FALLBACK] No detections found in snapshot for session {session_id}") + + except Exception as e: + logger.error(f"[FALLBACK] Error executing fallback pipeline for session {session_id}: {e}", exc_info=True) + def _filter_small_frontals(self, tracking_results, frame): """ Filter out frontal detections that are smaller than minimum bbox area percentage. From 31bc91d57ba03d0cd2e4d6f8b936ad18d9adfaae Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 12:06:03 +0700 Subject: [PATCH 11/30] fix: add ffmpeg flags fix frame delay --- core/streaming/readers/ffmpeg_rtsp.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py index 7c453f3..352c28e 100644 --- a/core/streaming/readers/ffmpeg_rtsp.py +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -115,10 +115,17 @@ class FFmpegRTSPReader(VideoReader): # DO NOT REMOVE '-hwaccel', 'cuda', '-hwaccel_device', '0', + # Real-time input flags + '-fflags', 'nobuffer+genpts+discardcorrupt', + '-flags', 'low_delay', + '-max_delay', '0', # No reordering delay + # RTSP configuration '-rtsp_transport', 'tcp', '-i', self.rtsp_url, + # Output configuration (keeping BMP) '-f', 'image2pipe', # Output images to pipe '-vcodec', 'bmp', # BMP format with header containing dimensions + '-vsync', 'passthrough', # Pass frames as-is # Use native stream resolution and framerate '-an', # No audio '-' # Output to stdout From fed71046a9437be76cc80c2ce6705e4f273405a6 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 12:20:52 +0700 Subject: [PATCH 12/30] fix: update ffmpeg flags to improve frame handling --- core/streaming/readers/ffmpeg_rtsp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py index 352c28e..88f45ae 100644 --- a/core/streaming/readers/ffmpeg_rtsp.py +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -116,7 +116,7 @@ class FFmpegRTSPReader(VideoReader): '-hwaccel', 'cuda', '-hwaccel_device', '0', # Real-time input flags - '-fflags', 'nobuffer+genpts+discardcorrupt', + '-fflags', 'nobuffer+genpts', '-flags', 'low_delay', '-max_delay', '0', # No reordering delay # RTSP configuration From 8d2a71fcd73daa8f6ddc156f72e20eb09b0bf3de Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 14:21:29 +0700 Subject: [PATCH 13/30] fix: inference in reader thread --- core/streaming/manager.py | 223 +++++++++++++++++++++++++- core/streaming/readers/ffmpeg_rtsp.py | 4 +- 2 files changed, 223 insertions(+), 4 deletions(-) diff --git a/core/streaming/manager.py b/core/streaming/manager.py index e2f02d9..c082e70 100644 --- a/core/streaming/manager.py +++ b/core/streaming/manager.py @@ -5,6 +5,8 @@ Optimized for 1280x720@6fps RTSP and 2560x1440 HTTP snapshots. import logging import threading import time +import queue +import asyncio from typing import Dict, Set, Optional, List, Any from dataclasses import dataclass from collections import defaultdict @@ -50,6 +52,64 @@ class StreamManager: self._camera_subscribers: Dict[str, Set[str]] = defaultdict(set) # camera_id -> set of subscription_ids self._lock = threading.RLock() + # Fair tracking queue system - per camera queues + self._tracking_queues: Dict[str, queue.Queue] = {} # camera_id -> queue + self._tracking_workers = [] + self._stop_workers = threading.Event() + self._dropped_frame_counts: Dict[str, int] = {} # per-camera drop counts + + # Round-robin scheduling state + self._camera_list = [] # Ordered list of active cameras + self._camera_round_robin_index = 0 + self._round_robin_lock = threading.Lock() + + # Start worker threads for tracking processing + num_workers = min(4, max_streams // 2 + 1) # Scale with streams + for i in range(num_workers): + worker = threading.Thread( + target=self._tracking_worker_loop, + name=f"TrackingWorker-{i}", + daemon=True + ) + worker.start() + self._tracking_workers.append(worker) + + logger.info(f"Started {num_workers} tracking worker threads") + + def _ensure_camera_queue(self, camera_id: str): + """Ensure a tracking queue exists for the camera.""" + if camera_id not in self._tracking_queues: + self._tracking_queues[camera_id] = queue.Queue(maxsize=10) # 10 frames per camera + self._dropped_frame_counts[camera_id] = 0 + + with self._round_robin_lock: + if camera_id not in self._camera_list: + self._camera_list.append(camera_id) + + logger.info(f"Created tracking queue for camera {camera_id}") + + def _remove_camera_queue(self, camera_id: str): + """Remove tracking queue for a camera that's no longer active.""" + if camera_id in self._tracking_queues: + # Clear any remaining items + while not self._tracking_queues[camera_id].empty(): + try: + self._tracking_queues[camera_id].get_nowait() + except queue.Empty: + break + + del self._tracking_queues[camera_id] + del self._dropped_frame_counts[camera_id] + + with self._round_robin_lock: + if camera_id in self._camera_list: + self._camera_list.remove(camera_id) + # Reset index if needed + if self._camera_round_robin_index >= len(self._camera_list): + self._camera_round_robin_index = 0 + + logger.info(f"Removed tracking queue for camera {camera_id}") + def add_subscription(self, subscription_id: str, stream_config: StreamConfig, crop_coords: Optional[tuple] = None, model_id: Optional[str] = None, @@ -139,6 +199,7 @@ class StreamManager: reader.set_frame_callback(self._frame_callback) reader.start() self._streams[camera_id] = reader + self._ensure_camera_queue(camera_id) # Create tracking queue logger.info(f"\033[92m[RTSP] {camera_id} connected\033[0m") elif stream_config.snapshot_url: @@ -153,6 +214,7 @@ class StreamManager: reader.set_frame_callback(self._frame_callback) reader.start() self._streams[camera_id] = reader + self._ensure_camera_queue(camera_id) # Create tracking queue logger.info(f"\033[92m[HTTP] {camera_id} connected\033[0m") else: @@ -171,6 +233,7 @@ class StreamManager: try: self._streams[camera_id].stop() del self._streams[camera_id] + self._remove_camera_queue(camera_id) # Remove tracking queue # DON'T clear frames - they should persist until replaced # shared_cache_buffer.clear_camera(camera_id) # REMOVED - frames should persist logger.info(f"Stopped stream for camera {camera_id} (frames preserved in buffer)") @@ -193,8 +256,19 @@ class StreamManager: available_cameras = shared_cache_buffer.frame_buffer.get_camera_list() logger.info(f"\033[96m[BUFFER] {len(available_cameras)} active cameras: {', '.join(available_cameras)}\033[0m") - # Process tracking for subscriptions with tracking integration - self._process_tracking_for_camera(camera_id, frame) + # Queue for tracking processing (non-blocking) - route to camera-specific queue + if camera_id in self._tracking_queues: + try: + self._tracking_queues[camera_id].put_nowait({ + 'frame': frame, + 'timestamp': time.time() + }) + except queue.Full: + # Drop frame if camera queue is full (maintain real-time) + self._dropped_frame_counts[camera_id] += 1 + + if self._dropped_frame_counts[camera_id] % 50 == 0: + logger.warning(f"Dropped {self._dropped_frame_counts[camera_id]} frames for camera {camera_id} due to full queue") except Exception as e: logger.error(f"Error in frame callback for camera {camera_id}: {e}") @@ -251,6 +325,127 @@ class StreamManager: except Exception as e: logger.error(f"Error processing tracking for camera {camera_id}: {e}") + def _tracking_worker_loop(self): + """Worker thread loop for round-robin processing of camera queues.""" + logger.info(f"Tracking worker {threading.current_thread().name} started") + + consecutive_empty = 0 + max_consecutive_empty = 10 # Sleep if all cameras empty this many times + + while not self._stop_workers.is_set(): + try: + # Get next camera in round-robin fashion + camera_id, item = self._get_next_camera_item() + + if camera_id is None: + # No cameras have items, sleep briefly + consecutive_empty += 1 + if consecutive_empty >= max_consecutive_empty: + time.sleep(0.1) # Sleep 100ms if nothing to process + consecutive_empty = 0 + continue + + consecutive_empty = 0 # Reset counter when we find work + + frame = item['frame'] + timestamp = item['timestamp'] + + # Check if frame is too old (drop if > 1 second old) + age = time.time() - timestamp + if age > 1.0: + logger.debug(f"Dropping old frame for {camera_id} (age: {age:.2f}s)") + continue + + # Process tracking for this camera's frame + self._process_tracking_for_camera_sync(camera_id, frame) + + except Exception as e: + logger.error(f"Error in tracking worker: {e}", exc_info=True) + + logger.info(f"Tracking worker {threading.current_thread().name} stopped") + + def _get_next_camera_item(self): + """Get next item from camera queues using round-robin scheduling.""" + with self._round_robin_lock: + if not self._camera_list: + return None, None + + attempts = 0 + max_attempts = len(self._camera_list) + + while attempts < max_attempts: + # Get current camera + if self._camera_round_robin_index >= len(self._camera_list): + self._camera_round_robin_index = 0 + + camera_id = self._camera_list[self._camera_round_robin_index] + + # Move to next camera for next call + self._camera_round_robin_index = (self._camera_round_robin_index + 1) % len(self._camera_list) + + # Try to get item from this camera's queue + if camera_id in self._tracking_queues: + try: + item = self._tracking_queues[camera_id].get_nowait() + return camera_id, item + except queue.Empty: + pass # Try next camera + + attempts += 1 + + return None, None # All cameras empty + + def _process_tracking_for_camera_sync(self, camera_id: str, frame): + """Synchronous version of tracking processing for worker threads.""" + try: + with self._lock: + subscription_ids = list(self._camera_subscribers.get(camera_id, [])) + + for subscription_id in subscription_ids: + subscription_info = self._subscriptions.get(subscription_id) + + if not subscription_info or not subscription_info.tracking_integration: + continue + + display_id = subscription_id.split(';')[0] if ';' in subscription_id else subscription_id + + try: + # Run async tracking in thread's event loop + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + result = loop.run_until_complete( + subscription_info.tracking_integration.process_frame( + frame, display_id, subscription_id + ) + ) + + # Log tracking results + if result: + tracked_count = len(result.get('tracked_vehicles', [])) + validated_vehicle = result.get('validated_vehicle') + pipeline_result = result.get('pipeline_result') + + if tracked_count > 0: + logger.info(f"[Tracking] {camera_id}: {tracked_count} vehicles tracked") + + if validated_vehicle: + logger.info(f"[Tracking] {camera_id}: Vehicle {validated_vehicle['track_id']} " + f"validated as {validated_vehicle['state']} " + f"(confidence: {validated_vehicle['confidence']:.2f})") + + if pipeline_result: + logger.info(f"[Pipeline] {camera_id}: {pipeline_result.get('status', 'unknown')} - " + f"{pipeline_result.get('message', 'no message')}") + finally: + loop.close() + + except Exception as track_e: + logger.error(f"Error in tracking for {subscription_id}: {track_e}") + + except Exception as e: + logger.error(f"Error processing tracking for camera {camera_id}: {e}") + def get_frame(self, camera_id: str, crop_coords: Optional[tuple] = None): """Get the latest frame for a camera with optional cropping.""" return shared_cache_buffer.get_frame(camera_id, crop_coords) @@ -366,6 +561,30 @@ class StreamManager: def stop_all(self): """Stop all streams and clear all subscriptions.""" + # Signal workers to stop + self._stop_workers.set() + + # Clear all camera queues + for camera_id, camera_queue in list(self._tracking_queues.items()): + while not camera_queue.empty(): + try: + camera_queue.get_nowait() + except queue.Empty: + break + + # Wait for workers to finish + for worker in self._tracking_workers: + worker.join(timeout=2.0) + + # Clear queue management structures + self._tracking_queues.clear() + self._dropped_frame_counts.clear() + with self._round_robin_lock: + self._camera_list.clear() + self._camera_round_robin_index = 0 + + logger.info("Stopped all tracking worker threads") + with self._lock: # Stop all streams for camera_id in list(self._streams.keys()): diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py index 88f45ae..e469c9e 100644 --- a/core/streaming/readers/ffmpeg_rtsp.py +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -113,8 +113,8 @@ class FFmpegRTSPReader(VideoReader): cmd = [ 'ffmpeg', # DO NOT REMOVE - '-hwaccel', 'cuda', - '-hwaccel_device', '0', + # '-hwaccel', 'cuda', + # '-hwaccel_device', '0', # Real-time input flags '-fflags', 'nobuffer+genpts', '-flags', 'low_delay', From e92efdbe11e6fe9254d2f44581fab2fc92546eb1 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 15:14:28 +0700 Subject: [PATCH 14/30] fix: custom subscriptionIdentifier --- core/streaming/manager.py | 9 +++++++-- core/tracking/integration.py | 35 +++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/core/streaming/manager.py b/core/streaming/manager.py index c082e70..497f1b8 100644 --- a/core/streaming/manager.py +++ b/core/streaming/manager.py @@ -606,8 +606,13 @@ class StreamManager: # Check if this subscription matches the display_id subscription_display_id = subscription_info.subscription_id.split(';')[0] if subscription_display_id == display_id and subscription_info.tracking_integration: - subscription_info.tracking_integration.set_session_id(display_id, session_id) - logger.debug(f"Set session {session_id} for display {display_id}") + # Pass the full subscription_id (displayId;cameraId) to the tracking integration + subscription_info.tracking_integration.set_session_id( + display_id, + session_id, + subscription_id=subscription_info.subscription_id + ) + logger.debug(f"Set session {session_id} for display {display_id} with subscription {subscription_info.subscription_id}") def clear_session_id(self, session_id: str): """Clear session ID from the specific tracking integration handling this session.""" diff --git a/core/tracking/integration.py b/core/tracking/integration.py index 7d5f3f8..58afcec 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -61,6 +61,7 @@ class TrackingPipelineIntegration: self.cleared_sessions: Dict[str, float] = {} # session_id -> clear_time self.pending_vehicles: Dict[str, int] = {} # display_id -> track_id (waiting for session ID) self.pending_processing_data: Dict[str, Dict] = {} # display_id -> processing data (waiting for session ID) + self.display_to_subscription: Dict[str, str] = {} # display_id -> subscription_id (for fallback) # Additional validators for enhanced flow control self.permanently_processed: Dict[str, float] = {} # "camera_id:track_id" -> process_time (never process again) @@ -459,7 +460,7 @@ class TrackingPipelineIntegration: self.subscription_info = subscription_info logger.debug(f"Set subscription info with snapshot_url: {subscription_info.stream_config.snapshot_url if subscription_info else None}") - def set_session_id(self, display_id: str, session_id: str): + def set_session_id(self, display_id: str, session_id: str, subscription_id: str = None): """ Set session ID for a display (from backend). This is called when backend sends setSessionId after receiving imageDetection. @@ -467,11 +468,18 @@ class TrackingPipelineIntegration: Args: display_id: Display identifier session_id: Session identifier + subscription_id: Subscription identifier (displayId;cameraId) - needed for fallback """ # Ensure session_id is always a string for consistent type handling session_id = str(session_id) if session_id is not None else None self.active_sessions[display_id] = session_id - logger.info(f"Set session {session_id} for display {display_id}") + + # Store subscription_id for fallback usage + if subscription_id: + self.display_to_subscription[display_id] = subscription_id + logger.info(f"Set session {session_id} for display {display_id} with subscription {subscription_id}") + else: + logger.info(f"Set session {session_id} for display {display_id}") # Check if we have a pending vehicle for this display if display_id in self.pending_vehicles: @@ -513,17 +521,19 @@ class TrackingPipelineIntegration: logger.warning(f"No pending processing data found for display {display_id} when setting session {session_id}") # FALLBACK: Execute pipeline for POS-initiated sessions - logger.info(f"[FALLBACK] Triggering fallback pipeline for session {session_id} on display {display_id}") + # Use stored subscription_id instead of creating fake one + stored_subscription_id = self.display_to_subscription.get(display_id) + if stored_subscription_id: + logger.info(f"[FALLBACK] Triggering fallback pipeline for session {session_id} on display {display_id} with subscription {stored_subscription_id}") - # Create subscription_id for fallback (needed for pipeline execution) - fallback_subscription_id = f"{display_id};fallback" - - # Trigger the fallback pipeline asynchronously - asyncio.create_task(self._execute_fallback_pipeline( - display_id=display_id, - session_id=session_id, - subscription_id=fallback_subscription_id - )) + # Trigger the fallback pipeline asynchronously with real subscription_id + asyncio.create_task(self._execute_fallback_pipeline( + display_id=display_id, + session_id=session_id, + subscription_id=stored_subscription_id + )) + else: + logger.error(f"[FALLBACK] No subscription_id stored for display {display_id}, cannot execute fallback pipeline") def clear_session_id(self, session_id: str): """ @@ -574,6 +584,7 @@ class TrackingPipelineIntegration: self.cleared_sessions.clear() self.pending_vehicles.clear() self.pending_processing_data.clear() + self.display_to_subscription.clear() self.permanently_processed.clear() self.progression_stages.clear() self.last_detection_time.clear() From 354ed9ce3cfae296450b2e747ac77e963d3080a4 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 15:46:32 +0700 Subject: [PATCH 15/30] fix: fallback when there is sessionId --- core/detection/pipeline.py | 92 ++++++++++++++++++++++++++++-------- core/tracking/integration.py | 26 +++++----- 2 files changed, 88 insertions(+), 30 deletions(-) diff --git a/core/detection/pipeline.py b/core/detection/pipeline.py index 076cdc9..d395f3a 100644 --- a/core/detection/pipeline.py +++ b/core/detection/pipeline.py @@ -64,6 +64,10 @@ class DetectionPipeline: # SessionId to processing results mapping (for combining with license plate results) self.session_processing_results = {} + # Field mappings from parallelActions (e.g., {"car_brand": "{car_brand_cls_v3.brand}"}) + self.field_mappings = {} + self._parse_field_mappings() + # Statistics self.stats = { 'detections_processed': 0, @@ -74,6 +78,25 @@ class DetectionPipeline: logger.info("DetectionPipeline initialized") + def _parse_field_mappings(self): + """ + Parse field mappings from parallelActions.postgresql_update_combined.fields. + Extracts mappings like {"car_brand": "{car_brand_cls_v3.brand}"} for dynamic field resolution. + """ + try: + if not self.pipeline_config or not hasattr(self.pipeline_config, 'parallel_actions'): + return + + for action in self.pipeline_config.parallel_actions: + if action.type.value == 'postgresql_update_combined': + fields = action.params.get('fields', {}) + self.field_mappings = fields + logger.info(f"[FIELD MAPPINGS] Parsed from pipeline config: {self.field_mappings}") + break + + except Exception as e: + logger.error(f"Error parsing field mappings: {e}", exc_info=True) + async def initialize(self) -> bool: """ Initialize all pipeline components including models, Redis, and database. @@ -165,6 +188,44 @@ class DetectionPipeline: logger.error(f"Error initializing detection model: {e}", exc_info=True) return False + def _extract_fields_from_branches(self, branch_results: Dict[str, Any]) -> Dict[str, Any]: + """ + Extract fields dynamically from branch results using field mappings. + + Args: + branch_results: Dictionary of branch execution results + + Returns: + Dictionary with extracted field values (e.g., {"car_brand": "Honda", "body_type": "Sedan"}) + """ + extracted = {} + + try: + for db_field_name, template in self.field_mappings.items(): + # Parse template like "{car_brand_cls_v3.brand}" -> branch_id="car_brand_cls_v3", field="brand" + if template.startswith('{') and template.endswith('}'): + var_name = template[1:-1] + if '.' in var_name: + branch_id, field_name = var_name.split('.', 1) + + # Look up value in branch_results + if branch_id in branch_results: + branch_data = branch_results[branch_id] + if isinstance(branch_data, dict) and 'result' in branch_data: + result_data = branch_data['result'] + if isinstance(result_data, dict) and field_name in result_data: + extracted[field_name] = result_data[field_name] + logger.debug(f"[DYNAMIC EXTRACT] {field_name}={result_data[field_name]} from branch {branch_id}") + else: + logger.debug(f"[DYNAMIC EXTRACT] Field '{field_name}' not found in branch {branch_id}") + else: + logger.debug(f"[DYNAMIC EXTRACT] Branch '{branch_id}' not in results") + + except Exception as e: + logger.error(f"Error extracting fields from branches: {e}", exc_info=True) + + return extracted + async def _on_license_plate_result(self, session_id: str, license_data: Dict[str, Any]): """ Callback for handling license plate results from LPR service. @@ -272,12 +333,12 @@ class DetectionPipeline: branch_results = self.session_processing_results[session_id_for_lookup] logger.info(f"[LICENSE PLATE] Retrieved processing results for session {session_id_for_lookup}") - if 'car_brand_cls_v2' in branch_results: - brand_result = branch_results['car_brand_cls_v2'].get('result', {}) - car_brand = brand_result.get('brand') - if 'car_bodytype_cls_v1' in branch_results: - bodytype_result = branch_results['car_bodytype_cls_v1'].get('result', {}) - body_type = bodytype_result.get('body_type') + # Extract fields dynamically using field mappings from pipeline config + extracted_fields = self._extract_fields_from_branches(branch_results) + car_brand = extracted_fields.get('brand') + body_type = extracted_fields.get('body_type') + + logger.info(f"[LICENSE PLATE] Extracted fields: brand={car_brand}, body_type={body_type}") # Clean up stored results after use del self.session_processing_results[session_id_for_lookup] @@ -1003,7 +1064,7 @@ class DetectionPipeline: Resolve field template using branch results and context. Args: - template: Template string like "{car_brand_cls_v2.brand}" + template: Template string like "{car_brand_cls_v3.brand}" branch_results: Dictionary of branch execution results context: Detection context @@ -1015,7 +1076,7 @@ class DetectionPipeline: if template.startswith('{') and template.endswith('}'): var_name = template[1:-1] - # Check for branch result reference (e.g., "car_brand_cls_v2.brand") + # Check for branch result reference (e.g., "car_brand_cls_v3.brand") if '.' in var_name: branch_id, field_name = var_name.split('.', 1) if branch_id in branch_results: @@ -1061,17 +1122,10 @@ class DetectionPipeline: logger.warning("No session_id in context for processing results") return - # Extract car brand from car_brand_cls_v2 results - car_brand = None - if 'car_brand_cls_v2' in branch_results: - brand_result = branch_results['car_brand_cls_v2'].get('result', {}) - car_brand = brand_result.get('brand') - - # Extract body type from car_bodytype_cls_v1 results - body_type = None - if 'car_bodytype_cls_v1' in branch_results: - bodytype_result = branch_results['car_bodytype_cls_v1'].get('result', {}) - body_type = bodytype_result.get('body_type') + # Extract fields dynamically using field mappings from pipeline config + extracted_fields = self._extract_fields_from_branches(branch_results) + car_brand = extracted_fields.get('brand') + body_type = extracted_fields.get('body_type') logger.info(f"[PROCESSING RESULTS] Completed for session {session_id}: " f"brand={car_brand}, bodyType={body_type}") diff --git a/core/tracking/integration.py b/core/tracking/integration.py index 58afcec..8e0d8fa 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -521,19 +521,23 @@ class TrackingPipelineIntegration: logger.warning(f"No pending processing data found for display {display_id} when setting session {session_id}") # FALLBACK: Execute pipeline for POS-initiated sessions - # Use stored subscription_id instead of creating fake one - stored_subscription_id = self.display_to_subscription.get(display_id) - if stored_subscription_id: - logger.info(f"[FALLBACK] Triggering fallback pipeline for session {session_id} on display {display_id} with subscription {stored_subscription_id}") + # Skip if session_id is None (no car present or car has left) + if session_id is not None: + # Use stored subscription_id instead of creating fake one + stored_subscription_id = self.display_to_subscription.get(display_id) + if stored_subscription_id: + logger.info(f"[FALLBACK] Triggering fallback pipeline for session {session_id} on display {display_id} with subscription {stored_subscription_id}") - # Trigger the fallback pipeline asynchronously with real subscription_id - asyncio.create_task(self._execute_fallback_pipeline( - display_id=display_id, - session_id=session_id, - subscription_id=stored_subscription_id - )) + # Trigger the fallback pipeline asynchronously with real subscription_id + asyncio.create_task(self._execute_fallback_pipeline( + display_id=display_id, + session_id=session_id, + subscription_id=stored_subscription_id + )) + else: + logger.error(f"[FALLBACK] No subscription_id stored for display {display_id}, cannot execute fallback pipeline") else: - logger.error(f"[FALLBACK] No subscription_id stored for display {display_id}, cannot execute fallback pipeline") + logger.debug(f"[FALLBACK] Skipping pipeline execution for session_id=None on display {display_id}") def clear_session_id(self, session_id: str): """ From 793beb15710cb46605a754a83b08abb0e4fe1d92 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 16:04:24 +0700 Subject: [PATCH 16/30] fix: tracking works but absent not work --- app.py | 9 +++-- core/communication/websocket.py | 10 ++++- core/streaming/manager.py | 71 +++++++++++++++++++++++++-------- 3 files changed, 68 insertions(+), 22 deletions(-) diff --git a/app.py b/app.py index eb1440f..7b82d23 100644 --- a/app.py +++ b/app.py @@ -201,10 +201,11 @@ else: os.makedirs("models", exist_ok=True) logger.info("Ensured models directory exists") -# Initialize stream manager with config value -from core.streaming import initialize_stream_manager -initialize_stream_manager(max_streams=config.get('max_streams', 10)) -logger.info(f"Initialized stream manager with max_streams={config.get('max_streams', 10)}") +# Stream manager already initialized at module level with max_streams=20 +# Calling initialize_stream_manager() creates a NEW instance, breaking references +# from core.streaming import initialize_stream_manager +# initialize_stream_manager(max_streams=config.get('max_streams', 10)) +logger.info(f"Using stream manager with max_streams=20 (module-level initialization)") # Frames are now stored in the shared cache buffer from core.streaming.buffers # latest_frames = {} # Deprecated - using shared_cache_buffer instead diff --git a/core/communication/websocket.py b/core/communication/websocket.py index e53096a..d20ee32 100644 --- a/core/communication/websocket.py +++ b/core/communication/websocket.py @@ -197,18 +197,24 @@ class WebSocketHandler: async def _handle_set_subscription_list(self, message: SetSubscriptionListMessage) -> None: """Handle setSubscriptionList message for declarative subscription management.""" - logger.info(f"[RX Processing] setSubscriptionList with {len(message.subscriptions)} subscriptions") + logger.info(f"🎯 [RX Processing] setSubscriptionList with {len(message.subscriptions)} subscriptions") + for i, sub in enumerate(message.subscriptions): + logger.info(f" 📋 Sub {i+1}: {sub.subscriptionIdentifier} (model: {sub.modelId})") # Update worker state with new subscriptions worker_state.set_subscriptions(message.subscriptions) # Phase 2: Download and manage models + logger.info("📦 Starting model download phase...") await self._ensure_models(message.subscriptions) + logger.info("✅ Model download phase complete") # Phase 3 & 4: Integrate with streaming management and tracking + logger.info("🎬 Starting stream subscription update...") await self._update_stream_subscriptions(message.subscriptions) + logger.info("✅ Stream subscription update complete") - logger.info("Subscription list updated successfully") + logger.info("🏁 Subscription list updated successfully") async def _ensure_models(self, subscriptions) -> None: """Ensure all required models are downloaded and available.""" diff --git a/core/streaming/manager.py b/core/streaming/manager.py index 497f1b8..2de86e4 100644 --- a/core/streaming/manager.py +++ b/core/streaming/manager.py @@ -85,8 +85,11 @@ class StreamManager: with self._round_robin_lock: if camera_id not in self._camera_list: self._camera_list.append(camera_id) - - logger.info(f"Created tracking queue for camera {camera_id}") + logger.info(f"✅ Created tracking queue for camera {camera_id}, camera_list now has {len(self._camera_list)} cameras: {self._camera_list}") + else: + logger.warning(f"Camera {camera_id} already in camera_list") + else: + logger.debug(f"Camera {camera_id} already has tracking queue") def _remove_camera_queue(self, camera_id: str): """Remove tracking queue for a camera that's no longer active.""" @@ -153,6 +156,10 @@ class StreamManager: if not success: self._remove_subscription_internal(subscription_id) return False + else: + # Stream already exists, but ensure queue exists too + logger.info(f"Stream already exists for {camera_id}, ensuring queue exists") + self._ensure_camera_queue(camera_id) logger.info(f"Added subscription {subscription_id} for camera {camera_id} " f"({len(self._camera_subscribers[camera_id])} total subscribers)") @@ -188,6 +195,7 @@ class StreamManager: def _start_stream(self, camera_id: str, stream_config: StreamConfig) -> bool: """Start a stream for the given camera.""" try: + logger.info(f"🚀 _start_stream called for {camera_id}") if stream_config.rtsp_url: # RTSP stream using FFmpeg subprocess with CUDA acceleration logger.info(f"\033[94m[RTSP] Starting {camera_id}\033[0m") @@ -199,7 +207,9 @@ class StreamManager: reader.set_frame_callback(self._frame_callback) reader.start() self._streams[camera_id] = reader + logger.info(f"🎬 About to call _ensure_camera_queue for {camera_id}") self._ensure_camera_queue(camera_id) # Create tracking queue + logger.info(f"✅ _ensure_camera_queue completed for {camera_id}") logger.info(f"\033[92m[RTSP] {camera_id} connected\033[0m") elif stream_config.snapshot_url: @@ -214,7 +224,9 @@ class StreamManager: reader.set_frame_callback(self._frame_callback) reader.start() self._streams[camera_id] = reader + logger.info(f"🎬 About to call _ensure_camera_queue for {camera_id}") self._ensure_camera_queue(camera_id) # Create tracking queue + logger.info(f"✅ _ensure_camera_queue completed for {camera_id}") logger.info(f"\033[92m[HTTP] {camera_id} connected\033[0m") else: @@ -334,18 +346,22 @@ class StreamManager: while not self._stop_workers.is_set(): try: + logger.debug(f"Worker {threading.current_thread().name} loop iteration, stop_event={self._stop_workers.is_set()}") + # Get next camera in round-robin fashion camera_id, item = self._get_next_camera_item() if camera_id is None: # No cameras have items, sleep briefly consecutive_empty += 1 + logger.debug(f"Worker {threading.current_thread().name}: All queues empty ({consecutive_empty}/{max_consecutive_empty})") if consecutive_empty >= max_consecutive_empty: time.sleep(0.1) # Sleep 100ms if nothing to process consecutive_empty = 0 continue consecutive_empty = 0 # Reset counter when we find work + logger.info(f"Worker {threading.current_thread().name}: Processing frame from {camera_id}") frame = item['frame'] timestamp = item['timestamp'] @@ -353,11 +369,13 @@ class StreamManager: # Check if frame is too old (drop if > 1 second old) age = time.time() - timestamp if age > 1.0: - logger.debug(f"Dropping old frame for {camera_id} (age: {age:.2f}s)") + logger.warning(f"Dropping old frame for {camera_id} (age: {age:.2f}s)") continue + logger.info(f"Worker {threading.current_thread().name}: Calling tracking sync for {camera_id}") # Process tracking for this camera's frame self._process_tracking_for_camera_sync(camera_id, frame) + logger.info(f"Worker {threading.current_thread().name}: Finished tracking sync for {camera_id}") except Exception as e: logger.error(f"Error in tracking worker: {e}", exc_info=True) @@ -367,32 +385,48 @@ class StreamManager: def _get_next_camera_item(self): """Get next item from camera queues using round-robin scheduling.""" with self._round_robin_lock: - if not self._camera_list: + # Get current list of cameras from actual tracking queues (central state) + camera_list = list(self._tracking_queues.keys()) + + # Debug: show ALL state + logger.info(f"🔍 _tracking_queues keys: {list(self._tracking_queues.keys())}") + logger.info(f"🔍 _streams keys: {list(self._streams.keys())}") + logger.info(f"🔍 _subscriptions keys: {list(self._subscriptions.keys())}") + + if not camera_list: + logger.warning("⚠️ _get_next_camera_item: No cameras have tracking queues yet, but streams/subscriptions exist!") return None, None + logger.debug(f"_get_next_camera_item: {len(camera_list)} cameras with queues: {camera_list}") + attempts = 0 - max_attempts = len(self._camera_list) + max_attempts = len(camera_list) while attempts < max_attempts: - # Get current camera - if self._camera_round_robin_index >= len(self._camera_list): + # Get current camera using round-robin index + if self._camera_round_robin_index >= len(camera_list): self._camera_round_robin_index = 0 - camera_id = self._camera_list[self._camera_round_robin_index] + camera_id = camera_list[self._camera_round_robin_index] + logger.debug(f"_get_next_camera_item: Trying camera {camera_id} (attempt {attempts + 1}/{max_attempts})") # Move to next camera for next call - self._camera_round_robin_index = (self._camera_round_robin_index + 1) % len(self._camera_list) + self._camera_round_robin_index = (self._camera_round_robin_index + 1) % len(camera_list) # Try to get item from this camera's queue - if camera_id in self._tracking_queues: - try: - item = self._tracking_queues[camera_id].get_nowait() - return camera_id, item - except queue.Empty: - pass # Try next camera + queue_size = self._tracking_queues[camera_id].qsize() + logger.debug(f"_get_next_camera_item: Camera {camera_id} queue has {queue_size} items") + try: + item = self._tracking_queues[camera_id].get_nowait() + logger.info(f"_get_next_camera_item: Got item from {camera_id}") + return camera_id, item + except queue.Empty: + logger.debug(f"_get_next_camera_item: Camera {camera_id} queue empty") + pass # Try next camera attempts += 1 + logger.debug("_get_next_camera_item: All cameras empty") return None, None # All cameras empty def _process_tracking_for_camera_sync(self, camera_id: str, frame): @@ -404,7 +438,12 @@ class StreamManager: for subscription_id in subscription_ids: subscription_info = self._subscriptions.get(subscription_id) - if not subscription_info or not subscription_info.tracking_integration: + if not subscription_info: + logger.warning(f"No subscription info found for {subscription_id}") + continue + + if not subscription_info.tracking_integration: + logger.debug(f"No tracking integration for {subscription_id} (camera {camera_id}), skipping inference") continue display_id = subscription_id.split(';')[0] if ';' in subscription_id else subscription_id From 3ed7a2cd53dbf3fd06055fc189f3b3f1368770d7 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 16:20:39 +0700 Subject: [PATCH 17/30] fix: abandonment works --- core/communication/websocket.py | 10 ++-------- core/streaming/manager.py | 31 ++----------------------------- core/tracking/integration.py | 11 ++++++++++- 3 files changed, 14 insertions(+), 38 deletions(-) diff --git a/core/communication/websocket.py b/core/communication/websocket.py index d20ee32..e53096a 100644 --- a/core/communication/websocket.py +++ b/core/communication/websocket.py @@ -197,24 +197,18 @@ class WebSocketHandler: async def _handle_set_subscription_list(self, message: SetSubscriptionListMessage) -> None: """Handle setSubscriptionList message for declarative subscription management.""" - logger.info(f"🎯 [RX Processing] setSubscriptionList with {len(message.subscriptions)} subscriptions") - for i, sub in enumerate(message.subscriptions): - logger.info(f" 📋 Sub {i+1}: {sub.subscriptionIdentifier} (model: {sub.modelId})") + logger.info(f"[RX Processing] setSubscriptionList with {len(message.subscriptions)} subscriptions") # Update worker state with new subscriptions worker_state.set_subscriptions(message.subscriptions) # Phase 2: Download and manage models - logger.info("📦 Starting model download phase...") await self._ensure_models(message.subscriptions) - logger.info("✅ Model download phase complete") # Phase 3 & 4: Integrate with streaming management and tracking - logger.info("🎬 Starting stream subscription update...") await self._update_stream_subscriptions(message.subscriptions) - logger.info("✅ Stream subscription update complete") - logger.info("🏁 Subscription list updated successfully") + logger.info("Subscription list updated successfully") async def _ensure_models(self, subscriptions) -> None: """Ensure all required models are downloaded and available.""" diff --git a/core/streaming/manager.py b/core/streaming/manager.py index 2de86e4..c4ebd77 100644 --- a/core/streaming/manager.py +++ b/core/streaming/manager.py @@ -85,9 +85,7 @@ class StreamManager: with self._round_robin_lock: if camera_id not in self._camera_list: self._camera_list.append(camera_id) - logger.info(f"✅ Created tracking queue for camera {camera_id}, camera_list now has {len(self._camera_list)} cameras: {self._camera_list}") - else: - logger.warning(f"Camera {camera_id} already in camera_list") + logger.info(f"Created tracking queue for camera {camera_id}") else: logger.debug(f"Camera {camera_id} already has tracking queue") @@ -195,7 +193,6 @@ class StreamManager: def _start_stream(self, camera_id: str, stream_config: StreamConfig) -> bool: """Start a stream for the given camera.""" try: - logger.info(f"🚀 _start_stream called for {camera_id}") if stream_config.rtsp_url: # RTSP stream using FFmpeg subprocess with CUDA acceleration logger.info(f"\033[94m[RTSP] Starting {camera_id}\033[0m") @@ -207,9 +204,7 @@ class StreamManager: reader.set_frame_callback(self._frame_callback) reader.start() self._streams[camera_id] = reader - logger.info(f"🎬 About to call _ensure_camera_queue for {camera_id}") self._ensure_camera_queue(camera_id) # Create tracking queue - logger.info(f"✅ _ensure_camera_queue completed for {camera_id}") logger.info(f"\033[92m[RTSP] {camera_id} connected\033[0m") elif stream_config.snapshot_url: @@ -224,9 +219,7 @@ class StreamManager: reader.set_frame_callback(self._frame_callback) reader.start() self._streams[camera_id] = reader - logger.info(f"🎬 About to call _ensure_camera_queue for {camera_id}") self._ensure_camera_queue(camera_id) # Create tracking queue - logger.info(f"✅ _ensure_camera_queue completed for {camera_id}") logger.info(f"\033[92m[HTTP] {camera_id} connected\033[0m") else: @@ -346,22 +339,18 @@ class StreamManager: while not self._stop_workers.is_set(): try: - logger.debug(f"Worker {threading.current_thread().name} loop iteration, stop_event={self._stop_workers.is_set()}") - # Get next camera in round-robin fashion camera_id, item = self._get_next_camera_item() if camera_id is None: # No cameras have items, sleep briefly consecutive_empty += 1 - logger.debug(f"Worker {threading.current_thread().name}: All queues empty ({consecutive_empty}/{max_consecutive_empty})") if consecutive_empty >= max_consecutive_empty: time.sleep(0.1) # Sleep 100ms if nothing to process consecutive_empty = 0 continue consecutive_empty = 0 # Reset counter when we find work - logger.info(f"Worker {threading.current_thread().name}: Processing frame from {camera_id}") frame = item['frame'] timestamp = item['timestamp'] @@ -369,13 +358,11 @@ class StreamManager: # Check if frame is too old (drop if > 1 second old) age = time.time() - timestamp if age > 1.0: - logger.warning(f"Dropping old frame for {camera_id} (age: {age:.2f}s)") + logger.debug(f"Dropping old frame for {camera_id} (age: {age:.2f}s)") continue - logger.info(f"Worker {threading.current_thread().name}: Calling tracking sync for {camera_id}") # Process tracking for this camera's frame self._process_tracking_for_camera_sync(camera_id, frame) - logger.info(f"Worker {threading.current_thread().name}: Finished tracking sync for {camera_id}") except Exception as e: logger.error(f"Error in tracking worker: {e}", exc_info=True) @@ -388,17 +375,9 @@ class StreamManager: # Get current list of cameras from actual tracking queues (central state) camera_list = list(self._tracking_queues.keys()) - # Debug: show ALL state - logger.info(f"🔍 _tracking_queues keys: {list(self._tracking_queues.keys())}") - logger.info(f"🔍 _streams keys: {list(self._streams.keys())}") - logger.info(f"🔍 _subscriptions keys: {list(self._subscriptions.keys())}") - if not camera_list: - logger.warning("⚠️ _get_next_camera_item: No cameras have tracking queues yet, but streams/subscriptions exist!") return None, None - logger.debug(f"_get_next_camera_item: {len(camera_list)} cameras with queues: {camera_list}") - attempts = 0 max_attempts = len(camera_list) @@ -408,25 +387,19 @@ class StreamManager: self._camera_round_robin_index = 0 camera_id = camera_list[self._camera_round_robin_index] - logger.debug(f"_get_next_camera_item: Trying camera {camera_id} (attempt {attempts + 1}/{max_attempts})") # Move to next camera for next call self._camera_round_robin_index = (self._camera_round_robin_index + 1) % len(camera_list) # Try to get item from this camera's queue - queue_size = self._tracking_queues[camera_id].qsize() - logger.debug(f"_get_next_camera_item: Camera {camera_id} queue has {queue_size} items") try: item = self._tracking_queues[camera_id].get_nowait() - logger.info(f"_get_next_camera_item: Got item from {camera_id}") return camera_id, item except queue.Empty: - logger.debug(f"_get_next_camera_item: Camera {camera_id} queue empty") pass # Try next camera attempts += 1 - logger.debug("_get_next_camera_item: All cameras empty") return None, None # All cameras empty def _process_tracking_for_camera_sync(self, camera_id: str, frame): diff --git a/core/tracking/integration.py b/core/tracking/integration.py index 8e0d8fa..28e7d3a 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -220,8 +220,10 @@ class TrackingPipelineIntegration: ) # Update last detection time for abandonment detection + # Update when vehicles ARE detected, so when they leave, timestamp ages if tracked_vehicles: self.last_detection_time[display_id] = time.time() + logger.debug(f"Updated last_detection_time for {display_id}: {len(tracked_vehicles)} vehicles") # Check for car abandonment (vehicle left after getting car_wait_staff stage) await self._check_car_abandonment(display_id, subscription_id) @@ -632,10 +634,16 @@ class TrackingPipelineIntegration: last_detection = self.last_detection_time.get(session_display, 0) time_since_detection = current_time - last_detection + logger.info(f"[ABANDON CHECK] Session {session_id} (display: {session_display}): " + f"time_since_detection={time_since_detection:.1f}s, " + f"timeout={self.abandonment_timeout}s") + if time_since_detection > self.abandonment_timeout: - logger.info(f"Car abandonment detected: session {session_id}, " + logger.warning(f"🚨 Car abandonment detected: session {session_id}, " f"no detection for {time_since_detection:.1f}s") abandoned_sessions.append(session_id) + else: + logger.debug(f"[ABANDON CHECK] Session {session_id} has no associated display") # Send abandonment detection for each abandoned session for session_id in abandoned_sessions: @@ -643,6 +651,7 @@ class TrackingPipelineIntegration: # Remove from progression stages to avoid repeated detection if session_id in self.progression_stages: del self.progression_stages[session_id] + logger.info(f"[ABANDON] Removed session {session_id} from progression_stages after notification") async def _send_abandonment_detection(self, subscription_id: str, session_id: str): """ From 9e5b5a32adf02658b6f699fcdbba1aa98f172bcc Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 16:23:07 +0700 Subject: [PATCH 18/30] fix: bring back gpu usage --- core/streaming/readers/ffmpeg_rtsp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py index e469c9e..88f45ae 100644 --- a/core/streaming/readers/ffmpeg_rtsp.py +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -113,8 +113,8 @@ class FFmpegRTSPReader(VideoReader): cmd = [ 'ffmpeg', # DO NOT REMOVE - # '-hwaccel', 'cuda', - # '-hwaccel_device', '0', + '-hwaccel', 'cuda', + '-hwaccel_device', '0', # Real-time input flags '-fflags', 'nobuffer+genpts', '-flags', 'low_delay', From 402f7732a8aeaa12c3916637798bab2f0d9243a2 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Tue, 30 Sep 2025 17:24:33 +0700 Subject: [PATCH 19/30] fix: change min bbox size for frontal --- core/tracking/integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/tracking/integration.py b/core/tracking/integration.py index 28e7d3a..2fba002 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -74,7 +74,7 @@ class TrackingPipelineIntegration: # Min bbox filtering configuration # TODO: Make this configurable via pipeline.json in the future - self.min_bbox_area_percentage = 4.5 # 4.5% of frame area minimum + self.min_bbox_area_percentage = 3.5 # 3.5% of frame area minimum # Statistics self.stats = { From b2e7bc499d5edbaab724fc0e596ef8824671b9ac Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Wed, 1 Oct 2025 01:27:12 +0700 Subject: [PATCH 20/30] feat: add session image retrieval endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add HTTP endpoint to retrieve saved session images by session ID. Images are saved during car_fueling progression stage. - Add GET /session-image/{session_id} endpoint - Search images directory for files matching session ID pattern - Return most recent image if multiple exist - Proper error handling (404 for not found, 500 for errors) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- app.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/app.py b/app.py index 7b82d23..21d89db 100644 --- a/app.py +++ b/app.py @@ -302,6 +302,63 @@ async def get_camera_image(camera_id: str): raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") +@app.get("/session-image/{session_id}") +async def get_session_image(session_id: int): + """ + HTTP endpoint to retrieve the saved session image by session ID. + + Args: + session_id: The session ID to retrieve the image for + + Returns: + JPEG image as binary response + + Raises: + HTTPException: 404 if no image found for the session + HTTPException: 500 if reading image fails + """ + try: + from pathlib import Path + import glob + + # Images directory + images_dir = Path("images") + + if not images_dir.exists(): + logger.warning(f"Images directory does not exist") + raise HTTPException( + status_code=404, + detail=f"No images directory found" + ) + + # Search for files matching session ID pattern: {session_id}_* + pattern = str(images_dir / f"{session_id}_*.jpg") + matching_files = glob.glob(pattern) + + if not matching_files: + logger.warning(f"No image found for session {session_id}") + raise HTTPException( + status_code=404, + detail=f"No image found for session {session_id}" + ) + + # Get the most recent file if multiple exist + most_recent_file = max(matching_files, key=os.path.getmtime) + logger.info(f"Found session image for session {session_id}: {most_recent_file}") + + # Read the image file + image_data = open(most_recent_file, 'rb').read() + + # Return image as binary response + return Response(content=image_data, media_type="image/jpeg") + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error retrieving session image for session {session_id}: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") + + @app.get("/health") async def health_check(): """Health check endpoint for monitoring.""" From 69671bbc1a693d9f8ffd7b866220fa0f8e931429 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Sun, 19 Oct 2025 15:45:32 +0700 Subject: [PATCH 21/30] fix: docker compose file --- .gitea/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml index 316c4dc..d44c87b 100644 --- a/.gitea/workflows/build.yml +++ b/.gitea/workflows/build.yml @@ -105,7 +105,7 @@ jobs: echo "Pulling and starting containers on server..." if [ "${{ github.ref_name }}" = "main" ]; then echo "Deploying production stack..." - ssh -i ~/.ssh/id_rsa ${{ vars.DEPLOY_USER_CMS }}@${{ vars.DEPLOY_HOST_CMS }} "cd ~/cms-system-k8s && docker compose -f docker-compose.production.yml pull && docker compose -f docker-compose.production.yml up -d" + ssh -i ~/.ssh/id_rsa ${{ vars.DEPLOY_USER_CMS }}@${{ vars.DEPLOY_HOST_CMS }} "cd ~/cms-system-k8s && docker compose -f docker-compose.staging.yml -f docker-compose.production.yml pull && docker compose -f docker-compose.staging.yml -f docker-compose.production.yml up -d" else echo "Deploying staging stack..." ssh -i ~/.ssh/id_rsa ${{ vars.DEPLOY_USER_CMS }}@${{ vars.DEPLOY_HOST_CMS }} "cd ~/cms-system-k8s && docker compose -f docker-compose.staging.yml pull && docker compose -f docker-compose.staging.yml up -d" From 498b285e8032904b31cf73446211b4d19066c3a5 Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Sun, 19 Oct 2025 15:48:00 +0700 Subject: [PATCH 22/30] fix: staging deployment --- .gitea/workflows/build.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml index d44c87b..dc4f18d 100644 --- a/.gitea/workflows/build.yml +++ b/.gitea/workflows/build.yml @@ -103,10 +103,4 @@ jobs: - name: Deploy stack run: | echo "Pulling and starting containers on server..." - if [ "${{ github.ref_name }}" = "main" ]; then - echo "Deploying production stack..." - ssh -i ~/.ssh/id_rsa ${{ vars.DEPLOY_USER_CMS }}@${{ vars.DEPLOY_HOST_CMS }} "cd ~/cms-system-k8s && docker compose -f docker-compose.staging.yml -f docker-compose.production.yml pull && docker compose -f docker-compose.staging.yml -f docker-compose.production.yml up -d" - else - echo "Deploying staging stack..." - ssh -i ~/.ssh/id_rsa ${{ vars.DEPLOY_USER_CMS }}@${{ vars.DEPLOY_HOST_CMS }} "cd ~/cms-system-k8s && docker compose -f docker-compose.staging.yml pull && docker compose -f docker-compose.staging.yml up -d" - fi + ssh -i ~/.ssh/id_rsa ${{ vars.DEPLOY_USER_CMS }}@${{ vars.DEPLOY_HOST_CMS }} "cd ~/cms-system-k8s && docker compose -f docker-compose.staging.yml -f docker-compose.production.yml pull && docker compose -f docker-compose.staging.yml -f docker-compose.production.yml up -d" From 5e59e00c553c1b3bb18b2845cb280c3281a04ce3 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 20 Oct 2025 14:52:59 +0700 Subject: [PATCH 23/30] fix: classification top-1, dynamic result field, removed crop filter --- core/detection/branches.py | 113 ++++++++++++++++++++------ core/streaming/readers/ffmpeg_rtsp.py | 4 +- 2 files changed, 88 insertions(+), 29 deletions(-) diff --git a/core/detection/branches.py b/core/detection/branches.py index 247c5f8..9359ea8 100644 --- a/core/detection/branches.py +++ b/core/detection/branches.py @@ -35,6 +35,9 @@ class BranchProcessor: # Branch models cache self.branch_models: Dict[str, YOLOWrapper] = {} + # Dynamic field mapping: branch_id → output_field_name (e.g., {"car_brand_cls_v3": "brand"}) + self.branch_output_fields: Dict[str, str] = {} + # Thread pool for parallel execution self.executor = ThreadPoolExecutor(max_workers=4) @@ -68,6 +71,9 @@ class BranchProcessor: self.redis_manager = redis_manager self.db_manager = db_manager + # Parse field mappings from parallelActions to enable dynamic field extraction + self._parse_branch_output_fields(pipeline_config) + # Pre-load branch models if they exist branches = getattr(pipeline_config, 'branches', []) if branches: @@ -141,6 +147,46 @@ class BranchProcessor: logger.error(f"Error loading branch model {getattr(branch_config, 'model_id', 'unknown')}: {e}") return None + def _parse_branch_output_fields(self, pipeline_config: Any) -> None: + """ + Parse parallelActions.fields to determine what output field each branch produces. + Creates dynamic mapping from branch_id to output field name. + + Example: + Input: parallelActions.fields = {"car_brand": "{car_brand_cls_v3.brand}"} + Output: self.branch_output_fields = {"car_brand_cls_v3": "brand"} + + Args: + pipeline_config: Pipeline configuration object + """ + try: + if not pipeline_config or not hasattr(pipeline_config, 'parallel_actions'): + logger.debug("[FIELD MAPPING] No parallelActions found in pipeline config") + return + + for action in pipeline_config.parallel_actions: + if action.type.value == 'postgresql_update_combined': + fields = action.params.get('fields', {}) + + # Parse each field template to extract branch_id and field_name + for db_field_name, template in fields.items(): + # Template format: "{branch_id.field_name}" + if template.startswith('{') and template.endswith('}'): + var_name = template[1:-1] # Remove { } + + if '.' in var_name: + branch_id, field_name = var_name.split('.', 1) + + # Store the mapping + self.branch_output_fields[branch_id] = field_name + + logger.info(f"[FIELD MAPPING] Branch '{branch_id}' → outputs field '{field_name}'") + + logger.info(f"[FIELD MAPPING] Parsed {len(self.branch_output_fields)} branch output field mappings") + + except Exception as e: + logger.error(f"[FIELD MAPPING] Error parsing branch output fields: {e}", exc_info=True) + async def execute_branches(self, frame: np.ndarray, branches: List[Any], @@ -350,10 +396,11 @@ class BranchProcessor: logger.debug(f"[REGION DATA] {branch_id}: '{region_name}' -> bbox={region_data.get('bbox')}, conf={region_data.get('confidence')}") if trigger_classes: - # Check if any parent detection matches our trigger classes + # Check if any parent detection matches our trigger classes (case-insensitive) should_execute = False for trigger_class in trigger_classes: - if trigger_class in detected_regions: + # Case-insensitive comparison for robustness + if trigger_class.lower() in [k.lower() for k in detected_regions.keys()]: should_execute = True logger.info(f"[TRIGGER CHECK] {branch_id}: Found '{trigger_class}' in parent detections - branch will execute") break @@ -410,16 +457,15 @@ class BranchProcessor: region = detected_regions[crop_class] confidence = region.get('confidence', 0.0) - # Only use detections above min_confidence - if confidence >= min_confidence: - bbox = region['bbox'] - area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) # width * height + # Select largest bbox (no confidence filtering - parent already validated it) + bbox = region['bbox'] + area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) # width * height - # Choose biggest bbox among valid detections - if area > best_area: - best_region = region - best_class = crop_class - best_area = area + # Choose biggest bbox among available detections + if area > best_area: + best_region = region + best_class = crop_class + best_area = area if best_region: bbox = best_region['bbox'] @@ -478,17 +524,25 @@ class BranchProcessor: top_indices = probs.top5 # Get top 5 predictions top_conf = probs.top5conf.cpu().numpy() - for idx, conf in zip(top_indices, top_conf): - if conf >= min_confidence: - class_name = model.model.names[int(idx)] - logger.debug(f"[CLASSIFICATION RESULT {len(branch_detections)+1}] {branch_id}: '{class_name}', conf={conf:.3f}") + # For classification: take only TOP-1 prediction (not all top-5) + # This prevents empty results when all top-5 predictions are below threshold + if len(top_indices) > 0 and len(top_conf) > 0: + top_idx = top_indices[0] + top_confidence = float(top_conf[0]) + + # Apply minConfidence threshold to top-1 only + if top_confidence >= min_confidence: + class_name = model.model.names[int(top_idx)] + logger.info(f"[CLASSIFICATION TOP-1] {branch_id}: '{class_name}', conf={top_confidence:.3f}") # For classification, use full input frame dimensions as bbox branch_detections.append({ 'class_name': class_name, - 'confidence': float(conf), + 'confidence': top_confidence, 'bbox': [0, 0, input_frame.shape[1], input_frame.shape[0]] }) + else: + logger.warning(f"[CLASSIFICATION FILTERED] {branch_id}: Top prediction conf={top_confidence:.3f} < threshold={min_confidence}") else: logger.warning(f"[UNKNOWN MODEL] {branch_id}: Model results have no .boxes or .probs") @@ -499,22 +553,27 @@ class BranchProcessor: logger.info(f"[FINAL RESULTS] {branch_id}: {len(branch_detections)} detections processed") - # Extract best result for classification models + # Determine output field name from dynamic mapping (parsed from parallelActions.fields) + output_field = self.branch_output_fields.get(branch_id) + + # Always initialize the field (even if None) to ensure it exists for database update + if output_field: + result['result'][output_field] = None + logger.debug(f"[FIELD INIT] {branch_id}: Initialized field '{output_field}' = None") + + # Extract best detection if available if branch_detections: best_detection = max(branch_detections, key=lambda x: x['confidence']) logger.info(f"[BEST DETECTION] {branch_id}: '{best_detection['class_name']}' with confidence {best_detection['confidence']:.3f}") - # Add classification-style results for database operations - if 'brand' in branch_id.lower(): - result['result']['brand'] = best_detection['class_name'] - elif 'body' in branch_id.lower() or 'bodytype' in branch_id.lower(): - result['result']['body_type'] = best_detection['class_name'] - elif 'front_rear' in branch_id.lower(): - result['result']['front_rear'] = best_detection['confidence'] - - logger.info(f"[CLASSIFICATION RESULT] {branch_id}: Extracted classification fields") + # Set the output field value using dynamic mapping + if output_field: + result['result'][output_field] = best_detection['class_name'] + logger.info(f"[FIELD SET] {branch_id}: Set field '{output_field}' = '{best_detection['class_name']}'") + else: + logger.warning(f"[NO MAPPING] {branch_id}: No output field defined in parallelActions.fields") else: - logger.warning(f"[NO RESULTS] {branch_id}: No detections found") + logger.warning(f"[NO RESULTS] {branch_id}: No detections found, field '{output_field}' remains None") # Execute branch actions if this branch found valid detections actions_executed = [] diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py index 88f45ae..e469c9e 100644 --- a/core/streaming/readers/ffmpeg_rtsp.py +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -113,8 +113,8 @@ class FFmpegRTSPReader(VideoReader): cmd = [ 'ffmpeg', # DO NOT REMOVE - '-hwaccel', 'cuda', - '-hwaccel_device', '0', + # '-hwaccel', 'cuda', + # '-hwaccel_device', '0', # Real-time input flags '-fflags', 'nobuffer+genpts', '-flags', 'low_delay', From a4cfb264b9bddd305c47e4f42afe517a05a434d3 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 20 Oct 2025 16:54:27 +0700 Subject: [PATCH 24/30] fix: car detection use wrong image source --- core/detection/branches.py | 54 ++++++++++++++++++++++++++---------- core/detection/pipeline.py | 34 ++++++++++++++++------- core/tracking/integration.py | 44 ++++++++++++++++++++++------- 3 files changed, 98 insertions(+), 34 deletions(-) diff --git a/core/detection/branches.py b/core/detection/branches.py index 9359ea8..97c44ff 100644 --- a/core/detection/branches.py +++ b/core/detection/branches.py @@ -393,7 +393,12 @@ class BranchProcessor: trigger_classes = getattr(branch_config, 'trigger_classes', []) logger.info(f"[DETECTED REGIONS] {branch_id}: Available parent detections: {list(detected_regions.keys())}") for region_name, region_data in detected_regions.items(): - logger.debug(f"[REGION DATA] {branch_id}: '{region_name}' -> bbox={region_data.get('bbox')}, conf={region_data.get('confidence')}") + # Handle both list (new) and single dict (backward compat) + if isinstance(region_data, list): + for i, region in enumerate(region_data): + logger.debug(f"[REGION DATA] {branch_id}: '{region_name}[{i}]' -> bbox={region.get('bbox')}, conf={region.get('confidence')}") + else: + logger.debug(f"[REGION DATA] {branch_id}: '{region_name}' -> bbox={region_data.get('bbox')}, conf={region_data.get('confidence')}") if trigger_classes: # Check if any parent detection matches our trigger classes (case-insensitive) @@ -454,18 +459,24 @@ class BranchProcessor: for crop_class in crop_classes: if crop_class in detected_regions: - region = detected_regions[crop_class] - confidence = region.get('confidence', 0.0) + regions = detected_regions[crop_class] - # Select largest bbox (no confidence filtering - parent already validated it) - bbox = region['bbox'] - area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) # width * height + # Handle both list (new) and single dict (backward compat) + if not isinstance(regions, list): + regions = [regions] - # Choose biggest bbox among available detections - if area > best_area: - best_region = region - best_class = crop_class - best_area = area + # Find largest bbox from all detections of this class + for region in regions: + confidence = region.get('confidence', 0.0) + bbox = region['bbox'] + area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) # width * height + + # Choose biggest bbox among all available detections + if area > best_area: + best_region = region + best_class = crop_class + best_area = area + logger.debug(f"[CROP] Selected larger bbox for '{crop_class}': area={area:.0f}px², conf={confidence:.3f}") if best_region: bbox = best_region['bbox'] @@ -483,7 +494,6 @@ class BranchProcessor: logger.info(f"[INFERENCE START] {branch_id}: Running inference on {'cropped' if input_frame is not frame else 'full'} frame " f"({input_frame.shape[1]}x{input_frame.shape[0]}) with confidence={min_confidence}") - # Use .predict() method for both detection and classification models inference_start = time.time() detection_results = model.model.predict(input_frame, conf=min_confidence, verbose=False) @@ -690,10 +700,26 @@ class BranchProcessor: bbox = None if region_name and region_name in detected_regions: # Crop the specified region - bbox = detected_regions[region_name]['bbox'] + # Handle both list (new) and single dict (backward compat) + regions = detected_regions[region_name] + if isinstance(regions, list): + # Multiple detections - select largest bbox + if regions: + best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1])) + bbox = best_region['bbox'] + else: + bbox = regions['bbox'] elif region_name and region_name.lower() == 'frontal' and 'front_rear' in detected_regions: # Special case: "frontal" region maps to "front_rear" detection - bbox = detected_regions['front_rear']['bbox'] + # Handle both list (new) and single dict (backward compat) + regions = detected_regions['front_rear'] + if isinstance(regions, list): + # Multiple detections - select largest bbox + if regions: + best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1])) + bbox = best_region['bbox'] + else: + bbox = regions['bbox'] if bbox is not None: x1, y1, x2, y2 = [int(coord) for coord in bbox] diff --git a/core/detection/pipeline.py b/core/detection/pipeline.py index d395f3a..ba9ac9a 100644 --- a/core/detection/pipeline.py +++ b/core/detection/pipeline.py @@ -495,11 +495,13 @@ class DetectionPipeline: } valid_detections.append(detection_info) - # Store region for processing phase - detected_regions[class_name] = { + # Store region for processing phase (support multiple detections per class) + if class_name not in detected_regions: + detected_regions[class_name] = [] + detected_regions[class_name].append({ 'bbox': bbox, 'confidence': confidence - } + }) else: logger.warning("[DETECTION PHASE] No boxes found in detection results") @@ -951,14 +953,26 @@ class DetectionPipeline: if region_name and region_name in detected_regions: # Crop the specified region - bbox = detected_regions[region_name]['bbox'] - x1, y1, x2, y2 = [int(coord) for coord in bbox] - cropped = frame[y1:y2, x1:x2] - if cropped.size > 0: - image_to_save = cropped - logger.debug(f"Cropped region '{region_name}' for redis_save_image") + # Handle both list (new) and single dict (backward compat) + regions = detected_regions[region_name] + if isinstance(regions, list): + # Multiple detections - select largest bbox + if regions: + best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1])) + bbox = best_region['bbox'] + else: + bbox = None else: - logger.warning(f"Empty crop for region '{region_name}', using full frame") + bbox = regions['bbox'] + + if bbox: + x1, y1, x2, y2 = [int(coord) for coord in bbox] + cropped = frame[y1:y2, x1:x2] + if cropped.size > 0: + image_to_save = cropped + logger.debug(f"Cropped region '{region_name}' for redis_save_image") + else: + logger.warning(f"Empty crop for region '{region_name}', using full frame") # Format key with context key = action.params['key'].format(**context) diff --git a/core/tracking/integration.py b/core/tracking/integration.py index 2fba002..1e3fc97 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -350,10 +350,21 @@ class TrackingPipelineIntegration: 'session_id': session_id } + # Fetch high-quality 2K snapshot for detection phase (not RTSP frame) + # This ensures bbox coordinates match the frame used in processing phase + logger.info(f"[DETECTION PHASE] Fetching 2K snapshot for vehicle {vehicle.track_id}") + snapshot_frame = self._fetch_snapshot() + + if snapshot_frame is None: + logger.warning(f"[DETECTION PHASE] Failed to fetch snapshot, falling back to RTSP frame") + snapshot_frame = frame # Fallback to RTSP if snapshot fails + else: + logger.info(f"[DETECTION PHASE] Using {snapshot_frame.shape[1]}x{snapshot_frame.shape[0]} snapshot for detection") + # Execute only the detection phase (first phase) # This will run detection and send imageDetection message to backend detection_result = await self.detection_pipeline.execute_detection_phase( - frame=frame, + frame=snapshot_frame, # Use 2K snapshot instead of RTSP frame display_id=display_id, subscription_id=subscription_id ) @@ -373,13 +384,13 @@ class TrackingPipelineIntegration: if detection_result['message_sent']: # Store for later processing when sessionId is received self.pending_processing_data[display_id] = { - 'frame': frame.copy(), # Store copy of frame for processing phase + 'frame': snapshot_frame.copy(), # Store copy of 2K snapshot (not RTSP frame!) 'vehicle': vehicle, 'subscription_id': subscription_id, 'detection_result': detection_result, 'timestamp': time.time() } - logger.info(f"Stored processing data for {display_id}, waiting for sessionId from backend") + logger.info(f"Stored processing data ({snapshot_frame.shape[1]}x{snapshot_frame.shape[0]} frame) for {display_id}, waiting for sessionId from backend") return detection_result @@ -413,14 +424,27 @@ class TrackingPipelineIntegration: logger.info(f"Executing processing phase for session {session_id}, vehicle {vehicle.track_id}") - # Capture high-quality snapshot for pipeline processing - logger.info(f"[PROCESSING PHASE] Fetching 2K snapshot for session {session_id}") - frame = self._fetch_snapshot() + # Reuse the snapshot from detection phase OR fetch fresh one if detection used RTSP fallback + detection_frame = processing_data['frame'] + frame_height = detection_frame.shape[0] - if frame is None: - logger.warning(f"[PROCESSING PHASE] Failed to capture snapshot, falling back to RTSP frame") - # Fall back to RTSP frame if snapshot fails - frame = processing_data['frame'] + # Check if detection phase used 2K snapshot (height > 1000) or RTSP fallback (height = 720) + if frame_height >= 1000: + # Detection used 2K snapshot - reuse it for consistent coordinates + logger.info(f"[PROCESSING PHASE] Reusing 2K snapshot from detection phase ({detection_frame.shape[1]}x{detection_frame.shape[0]})") + frame = detection_frame + else: + # Detection used RTSP fallback - need to fetch fresh 2K snapshot + logger.warning(f"[PROCESSING PHASE] Detection used RTSP fallback ({detection_frame.shape[1]}x{detection_frame.shape[0]}), fetching fresh 2K snapshot") + frame = self._fetch_snapshot() + + if frame is None: + logger.error(f"[PROCESSING PHASE] Failed to fetch snapshot and detection used RTSP - coordinate mismatch will occur!") + logger.error(f"[PROCESSING PHASE] Cannot proceed with mismatched coordinates. Aborting processing phase.") + return # Cannot process safely - bbox coordinates won't match frame resolution + else: + logger.warning(f"[PROCESSING PHASE] Fetched fresh 2K snapshot ({frame.shape[1]}x{frame.shape[0]}), but coordinates may not match exactly") + logger.warning(f"[PROCESSING PHASE] Re-running detection on fresh snapshot is recommended but not implemented yet") # Extract detected regions from detection phase result if available detected_regions = detection_result.get('detected_regions', {}) From d102f1c4de34390ada31abd8e19498e5fcf023e0 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 20 Oct 2025 17:05:05 +0700 Subject: [PATCH 25/30] fix: send partial results --- core/detection/pipeline.py | 48 ++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/core/detection/pipeline.py b/core/detection/pipeline.py index ba9ac9a..78001da 100644 --- a/core/detection/pipeline.py +++ b/core/detection/pipeline.py @@ -199,6 +199,8 @@ class DetectionPipeline: Dictionary with extracted field values (e.g., {"car_brand": "Honda", "body_type": "Sedan"}) """ extracted = {} + missing_fields = [] + available_fields = [] try: for db_field_name, template in self.field_mappings.items(): @@ -215,12 +217,21 @@ class DetectionPipeline: result_data = branch_data['result'] if isinstance(result_data, dict) and field_name in result_data: extracted[field_name] = result_data[field_name] + available_fields.append(f"{field_name}={result_data[field_name]}") logger.debug(f"[DYNAMIC EXTRACT] {field_name}={result_data[field_name]} from branch {branch_id}") else: + missing_fields.append(f"{field_name} (field not in branch {branch_id})") logger.debug(f"[DYNAMIC EXTRACT] Field '{field_name}' not found in branch {branch_id}") else: + missing_fields.append(f"{field_name} (branch {branch_id} missing)") logger.debug(f"[DYNAMIC EXTRACT] Branch '{branch_id}' not in results") + # Log summary of extraction + if available_fields: + logger.info(f"[FIELD EXTRACTION] Available fields: {', '.join(available_fields)}") + if missing_fields: + logger.warning(f"[FIELD EXTRACTION] Missing fields (will be null): {', '.join(missing_fields)}") + except Exception as e: logger.error(f"Error extracting fields from branches: {e}", exc_info=True) @@ -338,7 +349,17 @@ class DetectionPipeline: car_brand = extracted_fields.get('brand') body_type = extracted_fields.get('body_type') - logger.info(f"[LICENSE PLATE] Extracted fields: brand={car_brand}, body_type={body_type}") + # Log extraction results + fields_status = [] + if car_brand is not None: + fields_status.append(f"brand={car_brand}") + else: + fields_status.append("brand=null") + if body_type is not None: + fields_status.append(f"bodyType={body_type}") + else: + fields_status.append("bodyType=null") + logger.info(f"[LICENSE PLATE] Extracted fields: {', '.join(fields_status)}") # Clean up stored results after use del self.session_processing_results[session_id_for_lookup] @@ -367,7 +388,18 @@ class DetectionPipeline: # Send message await self.message_sender(detection_message) - logger.info(f"[COMBINED MESSAGE] Sent imageDetection with brand='{car_brand}', bodyType='{body_type}', license='{license_text}' to '{subscription_id}'") + + # Log with indication of partial results + null_fields = [] + if car_brand is None: + null_fields.append('brand') + if body_type is None: + null_fields.append('bodyType') + + if null_fields: + logger.info(f"[COMBINED MESSAGE] Sent imageDetection with PARTIAL results (null: {', '.join(null_fields)}) - brand='{car_brand}', bodyType='{body_type}', license='{license_text}' to '{subscription_id}'") + else: + logger.info(f"[COMBINED MESSAGE] Sent imageDetection with brand='{car_brand}', bodyType='{body_type}', license='{license_text}' to '{subscription_id}'") except Exception as e: logger.error(f"Error sending license plate imageDetection message: {e}", exc_info=True) @@ -1033,11 +1065,13 @@ class DetectionPipeline: wait_for_branches = action.params.get('waitForBranches', []) branch_results = context.get('branch_results', {}) - # Check if all required branches have completed - for branch_id in wait_for_branches: - if branch_id not in branch_results: - logger.warning(f"Branch {branch_id} result not available for database update") - return {'status': 'error', 'message': f'Missing branch result: {branch_id}'} + # Log missing branches but don't block the update (allow partial results) + missing_branches = [b for b in wait_for_branches if b not in branch_results] + if missing_branches: + logger.warning(f"Some branches missing from results (will use null): {missing_branches}") + available_branches = [b for b in wait_for_branches if b in branch_results] + if available_branches: + logger.info(f"Available branches for database update: {available_branches}") # Prepare fields for database update table = action.params.get('table', 'car_frontal_info') From 10c54bc6e01dde612c382a5fc5d7e47f05f68fd1 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 20 Oct 2025 17:05:20 +0700 Subject: [PATCH 26/30] chore: bring back cuda --- core/streaming/readers/ffmpeg_rtsp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/streaming/readers/ffmpeg_rtsp.py b/core/streaming/readers/ffmpeg_rtsp.py index e469c9e..88f45ae 100644 --- a/core/streaming/readers/ffmpeg_rtsp.py +++ b/core/streaming/readers/ffmpeg_rtsp.py @@ -113,8 +113,8 @@ class FFmpegRTSPReader(VideoReader): cmd = [ 'ffmpeg', # DO NOT REMOVE - # '-hwaccel', 'cuda', - # '-hwaccel_device', '0', + '-hwaccel', 'cuda', + '-hwaccel_device', '0', # Real-time input flags '-fflags', 'nobuffer+genpts', '-flags', 'low_delay', From 0dd1b9f5c2682964581fe2b027eb639651b59831 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 20 Oct 2025 17:54:50 +0700 Subject: [PATCH 27/30] fix: sent partial results --- core/detection/branches.py | 95 ++++++++++++++++++++++++++++++-------- core/detection/pipeline.py | 79 +++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 19 deletions(-) diff --git a/core/detection/branches.py b/core/detection/branches.py index 97c44ff..61b6dbb 100644 --- a/core/detection/branches.py +++ b/core/detection/branches.py @@ -45,12 +45,17 @@ class BranchProcessor: self.redis_manager = None self.db_manager = None + # Branch execution timeout (seconds) + self.branch_timeout = 30.0 + # Statistics self.stats = { 'branches_processed': 0, 'parallel_executions': 0, 'total_processing_time': 0.0, - 'models_loaded': 0 + 'models_loaded': 0, + 'branches_timed_out': 0, + 'branches_failed': 0 } logger.info("BranchProcessor initialized") @@ -279,22 +284,46 @@ class BranchProcessor: ) future_to_branch[future] = branch - # Collect results as they complete - for future in as_completed(future_to_branch): - branch = future_to_branch[future] - branch_id = getattr(branch, 'model_id', 'unknown') + # Collect results as they complete with timeout + try: + for future in as_completed(future_to_branch, timeout=self.branch_timeout): + branch = future_to_branch[future] + branch_id = getattr(branch, 'model_id', 'unknown') - try: - result = future.result() - results[branch_id] = result - logger.info(f"[PARALLEL COMPLETE] {branch_id}: Branch completed successfully") - except Exception as e: - logger.error(f"Error in parallel branch {branch_id}: {e}") - results[branch_id] = { - 'status': 'error', - 'message': str(e), - 'processing_time': 0.0 - } + try: + # Get result with timeout to prevent indefinite hanging + result = future.result(timeout=self.branch_timeout) + results[branch_id] = result + logger.info(f"[PARALLEL COMPLETE] {branch_id}: Branch completed successfully") + except TimeoutError: + logger.error(f"[TIMEOUT] Branch {branch_id} exceeded timeout of {self.branch_timeout}s") + self.stats['branches_timed_out'] += 1 + results[branch_id] = { + 'status': 'timeout', + 'message': f'Branch execution timeout after {self.branch_timeout}s', + 'processing_time': self.branch_timeout + } + except Exception as e: + logger.error(f"[ERROR] Error in parallel branch {branch_id}: {e}", exc_info=True) + self.stats['branches_failed'] += 1 + results[branch_id] = { + 'status': 'error', + 'message': str(e), + 'processing_time': 0.0 + } + except TimeoutError: + # as_completed iterator timed out - mark remaining futures as timed out + logger.error(f"[TIMEOUT] Branch execution timeout after {self.branch_timeout}s - some branches did not complete") + for future, branch in future_to_branch.items(): + branch_id = getattr(branch, 'model_id', 'unknown') + if branch_id not in results: + logger.error(f"[TIMEOUT] Branch {branch_id} did not complete within timeout") + self.stats['branches_timed_out'] += 1 + results[branch_id] = { + 'status': 'timeout', + 'message': f'Branch did not complete within {self.branch_timeout}s timeout', + 'processing_time': self.branch_timeout + } # Flatten nested branch results to top level for database access flattened_results = {} @@ -309,6 +338,24 @@ class BranchProcessor: flattened_results[nested_branch_id] = nested_result logger.info(f"[FLATTEN] Added nested branch {nested_branch_id} to top-level results") + # Log summary of branch execution results + succeeded = [bid for bid, res in results.items() if res.get('status') == 'success'] + failed = [bid for bid, res in results.items() if res.get('status') == 'error'] + timed_out = [bid for bid, res in results.items() if res.get('status') == 'timeout'] + skipped = [bid for bid, res in results.items() if res.get('status') == 'skipped'] + + summary_parts = [] + if succeeded: + summary_parts.append(f"{len(succeeded)} succeeded: {', '.join(succeeded)}") + if failed: + summary_parts.append(f"{len(failed)} FAILED: {', '.join(failed)}") + if timed_out: + summary_parts.append(f"{len(timed_out)} TIMED OUT: {', '.join(timed_out)}") + if skipped: + summary_parts.append(f"{len(skipped)} skipped: {', '.join(skipped)}") + + logger.info(f"[PARALLEL SUMMARY] Branch execution completed: {' | '.join(summary_parts) if summary_parts else 'no branches'}") + return flattened_results async def _execute_sequential_branches(self, @@ -496,9 +543,19 @@ class BranchProcessor: # Use .predict() method for both detection and classification models inference_start = time.time() - detection_results = model.model.predict(input_frame, conf=min_confidence, verbose=False) - inference_time = time.time() - inference_start - logger.info(f"[INFERENCE DONE] {branch_id}: Predict completed in {inference_time:.3f}s using .predict() method") + try: + detection_results = model.model.predict(input_frame, conf=min_confidence, verbose=False) + inference_time = time.time() - inference_start + logger.info(f"[INFERENCE DONE] {branch_id}: Predict completed in {inference_time:.3f}s using .predict() method") + except Exception as inference_error: + inference_time = time.time() - inference_start + logger.error(f"[INFERENCE ERROR] {branch_id}: Model inference failed after {inference_time:.3f}s: {inference_error}", exc_info=True) + return { + 'status': 'error', + 'branch_id': branch_id, + 'message': f'Model inference failed: {str(inference_error)}', + 'processing_time': time.time() - start_time + } # Initialize branch_detections outside the conditional branch_detections = [] diff --git a/core/detection/pipeline.py b/core/detection/pipeline.py index 78001da..d71f525 100644 --- a/core/detection/pipeline.py +++ b/core/detection/pipeline.py @@ -445,6 +445,78 @@ class DetectionPipeline: except Exception as e: logger.error(f"Error sending initial detection imageDetection message: {e}", exc_info=True) + async def _send_classification_results(self, subscription_id: str, session_id: str, branch_results: Dict[str, Any]): + """ + Send imageDetection message with classification results (without license plate). + Called after processing phase completes to send partial results immediately. + + Args: + subscription_id: Subscription identifier to send message to + session_id: Session identifier + branch_results: Dictionary of branch execution results + """ + try: + if not self.message_sender: + logger.warning("No message sender configured, cannot send imageDetection") + return + + # Import here to avoid circular imports + from ..communication.models import ImageDetectionMessage, DetectionData + + # Extract classification fields from branch results + extracted_fields = self._extract_fields_from_branches(branch_results) + car_brand = extracted_fields.get('brand') + body_type = extracted_fields.get('body_type') + + # Log what we're sending + fields_status = [] + if car_brand is not None: + fields_status.append(f"brand={car_brand}") + else: + fields_status.append("brand=null") + if body_type is not None: + fields_status.append(f"bodyType={body_type}") + else: + fields_status.append("bodyType=null") + logger.info(f"[CLASSIFICATION] Sending partial results for session {session_id}: {', '.join(fields_status)}") + + # Create detection data with classification results (license plate still pending) + detection_data_obj = DetectionData( + detection={ + "carBrand": car_brand, + "carModel": None, # Not implemented yet + "bodyType": body_type, + "licensePlateText": None, # Will be sent later via license plate callback + "licensePlateConfidence": None + }, + modelId=self.model_id, + modelName=self.pipeline_parser.pipeline_config.model_id if self.pipeline_parser.pipeline_config else "detection_model" + ) + + # Create imageDetection message + detection_message = ImageDetectionMessage( + subscriptionIdentifier=subscription_id, + data=detection_data_obj + ) + + # Send message + await self.message_sender(detection_message) + + # Log with indication of partial results + null_fields = [] + if car_brand is None: + null_fields.append('brand') + if body_type is None: + null_fields.append('bodyType') + + if null_fields: + logger.info(f"[PARTIAL RESULTS] Sent imageDetection with PARTIAL results (null: {', '.join(null_fields)}) - brand='{car_brand}', bodyType='{body_type}' to '{subscription_id}'") + else: + logger.info(f"[CLASSIFICATION COMPLETE] Sent imageDetection with brand='{car_brand}', bodyType='{body_type}' to '{subscription_id}'") + + except Exception as e: + logger.error(f"Error sending classification results imageDetection message: {e}", exc_info=True) + async def execute_detection_phase(self, frame: np.ndarray, display_id: str, @@ -693,6 +765,13 @@ class DetectionPipeline: self.session_processing_results[session_id] = result['branch_results'] logger.info(f"[PROCESSING RESULTS] Stored results for session {session_id} for later combination") + # Send classification results immediately (license plate will come later via callback) + await self._send_classification_results( + subscription_id=subscription_id, + session_id=session_id, + branch_results=result['branch_results'] + ) + logger.info(f"Processing phase completed for session {session_id}: " f"{len(result['branch_results'])} branches, {len(result['actions_executed'])} actions") From f495b47a9611a4058746212ef577e367ac948ace Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 20 Oct 2025 18:04:23 +0700 Subject: [PATCH 28/30] fix: increase tracking thread and lower tracking min confidence --- core/tracking/integration.py | 3 ++- core/tracking/validator.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/core/tracking/integration.py b/core/tracking/integration.py index 1e3fc97..6ff2ee7 100644 --- a/core/tracking/integration.py +++ b/core/tracking/integration.py @@ -70,7 +70,8 @@ class TrackingPipelineIntegration: self.abandonment_timeout = 3.0 # seconds to wait before declaring car abandoned # Thread pool for pipeline execution - self.executor = ThreadPoolExecutor(max_workers=2) + # Increased to 8 workers to handle 8 concurrent cameras without queuing + self.executor = ThreadPoolExecutor(max_workers=8) # Min bbox filtering configuration # TODO: Make this configurable via pipeline.json in the future diff --git a/core/tracking/validator.py b/core/tracking/validator.py index d86a3f6..0c1dca4 100644 --- a/core/tracking/validator.py +++ b/core/tracking/validator.py @@ -56,10 +56,15 @@ class StableCarValidator: self.config = config or {} # Validation thresholds - self.min_stable_duration = self.config.get('min_stable_duration', 3.0) # seconds - self.min_stable_frames = self.config.get('min_stable_frames', 10) + # Optimized for 6 FPS RTSP source with 8 concurrent cameras on GPU + # GPU contention reduces effective FPS to ~3-5 per camera + # Reduced from 3.0s to 1.5s to achieve ~2.75s total validation time (was ~4.25s) + self.min_stable_duration = self.config.get('min_stable_duration', 1.5) # seconds + # Reduced from 10 to 5 to align with tracker requirement and reduce validation time + self.min_stable_frames = self.config.get('min_stable_frames', 5) self.position_variance_threshold = self.config.get('position_variance_threshold', 25.0) # pixels - self.min_confidence = self.config.get('min_confidence', 0.7) + # Reduced from 0.7 to 0.45 to be more permissive under GPU load + self.min_confidence = self.config.get('min_confidence', 0.45) self.velocity_threshold = self.config.get('velocity_threshold', 5.0) # pixels/frame self.entering_zone_ratio = self.config.get('entering_zone_ratio', 0.3) # 30% of frame self.leaving_zone_ratio = self.config.get('leaving_zone_ratio', 0.3) From 0348812fcc8b6170bfd848b510b677224dd75ae5 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 20 Oct 2025 18:24:19 +0700 Subject: [PATCH 29/30] refactor: improve get session image --- app.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index 21d89db..8e17400 100644 --- a/app.py +++ b/app.py @@ -319,7 +319,6 @@ async def get_session_image(session_id: int): """ try: from pathlib import Path - import glob # Images directory images_dir = Path("images") @@ -331,23 +330,34 @@ async def get_session_image(session_id: int): detail=f"No images directory found" ) - # Search for files matching session ID pattern: {session_id}_* - pattern = str(images_dir / f"{session_id}_*.jpg") - matching_files = glob.glob(pattern) + # Use os.scandir() for efficient file searching (3-5x faster than glob.glob) + # Filter files matching session ID pattern: {session_id}_*.jpg + prefix = f"{session_id}_" + most_recent_file = None + most_recent_mtime = 0 - if not matching_files: + with os.scandir(images_dir) as entries: + for entry in entries: + # Filter: must be a file, start with session_id prefix, and end with .jpg + if entry.is_file() and entry.name.startswith(prefix) and entry.name.endswith('.jpg'): + # Use cached stat info from DirEntry (much faster than separate stat calls) + entry_stat = entry.stat() + if entry_stat.st_mtime > most_recent_mtime: + most_recent_mtime = entry_stat.st_mtime + most_recent_file = entry.path + + if not most_recent_file: logger.warning(f"No image found for session {session_id}") raise HTTPException( status_code=404, detail=f"No image found for session {session_id}" ) - # Get the most recent file if multiple exist - most_recent_file = max(matching_files, key=os.path.getmtime) logger.info(f"Found session image for session {session_id}: {most_recent_file}") # Read the image file - image_data = open(most_recent_file, 'rb').read() + with open(most_recent_file, 'rb') as f: + image_data = f.read() # Return image as binary response return Response(content=image_data, media_type="image/jpeg") From eb9bedae67e61559a80a280e475ac6a54eb82ee1 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Mon, 3 Nov 2025 17:20:19 +0700 Subject: [PATCH 30/30] refactor: remove all postgresql related --- core/detection/branches.py | 41 ++- core/detection/pipeline.py | 232 ++++++------ core/models/__init__.py | 4 +- core/models/pipeline.py | 44 ++- core/storage/__init__.py | 9 +- core/storage/database.py | 712 +++++++++++++++++++------------------ 6 files changed, 545 insertions(+), 497 deletions(-) diff --git a/core/detection/branches.py b/core/detection/branches.py index 61b6dbb..89881b2 100644 --- a/core/detection/branches.py +++ b/core/detection/branches.py @@ -43,7 +43,7 @@ class BranchProcessor: # Storage managers (set during initialization) self.redis_manager = None - self.db_manager = None + # self.db_manager = None # Disabled - PostgreSQL operations moved to microservices # Branch execution timeout (seconds) self.branch_timeout = 30.0 @@ -60,21 +60,21 @@ class BranchProcessor: logger.info("BranchProcessor initialized") - async def initialize(self, pipeline_config: Any, redis_manager: Any, db_manager: Any) -> bool: + async def initialize(self, pipeline_config: Any, redis_manager: Any, db_manager: Any = None) -> bool: """ Initialize branch processor with pipeline configuration. Args: pipeline_config: Pipeline configuration object redis_manager: Redis manager instance - db_manager: Database manager instance + db_manager: Database manager instance (deprecated, not used) Returns: True if successful, False otherwise """ try: self.redis_manager = redis_manager - self.db_manager = db_manager + # self.db_manager = db_manager # Disabled - PostgreSQL operations moved to microservices # Parse field mappings from parallelActions to enable dynamic field extraction self._parse_branch_output_fields(pipeline_config) @@ -170,22 +170,25 @@ class BranchProcessor: return for action in pipeline_config.parallel_actions: + # Skip PostgreSQL actions - they are disabled if action.type.value == 'postgresql_update_combined': - fields = action.params.get('fields', {}) - - # Parse each field template to extract branch_id and field_name - for db_field_name, template in fields.items(): - # Template format: "{branch_id.field_name}" - if template.startswith('{') and template.endswith('}'): - var_name = template[1:-1] # Remove { } - - if '.' in var_name: - branch_id, field_name = var_name.split('.', 1) - - # Store the mapping - self.branch_output_fields[branch_id] = field_name - - logger.info(f"[FIELD MAPPING] Branch '{branch_id}' → outputs field '{field_name}'") + logger.debug(f"[FIELD MAPPING] Skipping PostgreSQL action (disabled)") + continue # Skip field parsing for disabled PostgreSQL operations + # fields = action.params.get('fields', {}) + # + # # Parse each field template to extract branch_id and field_name + # for db_field_name, template in fields.items(): + # # Template format: "{branch_id.field_name}" + # if template.startswith('{') and template.endswith('}'): + # var_name = template[1:-1] # Remove { } + # + # if '.' in var_name: + # branch_id, field_name = var_name.split('.', 1) + # + # # Store the mapping + # self.branch_output_fields[branch_id] = field_name + # + # logger.info(f"[FIELD MAPPING] Branch '{branch_id}' → outputs field '{field_name}'") logger.info(f"[FIELD MAPPING] Parsed {len(self.branch_output_fields)} branch output field mappings") diff --git a/core/detection/pipeline.py b/core/detection/pipeline.py index d71f525..26654cc 100644 --- a/core/detection/pipeline.py +++ b/core/detection/pipeline.py @@ -15,7 +15,7 @@ from ..models.inference import YOLOWrapper from ..models.pipeline import PipelineParser from .branches import BranchProcessor from ..storage.redis import RedisManager -from ..storage.database import DatabaseManager +# from ..storage.database import DatabaseManager # Disabled - PostgreSQL moved to microservices from ..storage.license_plate import LicensePlateManager logger = logging.getLogger(__name__) @@ -45,7 +45,7 @@ class DetectionPipeline: # Initialize components self.branch_processor = BranchProcessor(model_manager, model_id) self.redis_manager = None - self.db_manager = None + # self.db_manager = None # Disabled - PostgreSQL operations moved to microservices self.license_plate_manager = None # Main detection model @@ -113,16 +113,18 @@ class DetectionPipeline: return False logger.info("Redis connection initialized") - # Initialize database connection - if self.pipeline_parser.postgresql_config: - self.db_manager = DatabaseManager(self.pipeline_parser.postgresql_config.__dict__) - if not self.db_manager.connect(): - logger.error("Failed to initialize database connection") - return False - # Create required tables - if not self.db_manager.create_car_frontal_info_table(): - logger.warning("Failed to create car_frontal_info table") - logger.info("Database connection initialized") + # PostgreSQL database connection DISABLED - operations moved to microservices + # Database operations are now handled by backend services via WebSocket + # if self.pipeline_parser.postgresql_config: + # self.db_manager = DatabaseManager(self.pipeline_parser.postgresql_config.__dict__) + # if not self.db_manager.connect(): + # logger.error("Failed to initialize database connection") + # return False + # # Create required tables + # if not self.db_manager.create_car_frontal_info_table(): + # logger.warning("Failed to create car_frontal_info table") + # logger.info("Database connection initialized") + logger.info("PostgreSQL operations disabled - using WebSocket for data communication") # Initialize license plate manager (using same Redis config as main Redis manager) if self.pipeline_parser.redis_config: @@ -138,11 +140,11 @@ class DetectionPipeline: logger.error("Failed to initialize detection model") return False - # Initialize branch processor + # Initialize branch processor (db_manager=None since PostgreSQL is disabled) if not await self.branch_processor.initialize( self.pipeline_config, self.redis_manager, - self.db_manager + db_manager=None # PostgreSQL disabled ): logger.error("Failed to initialize branch processor") return False @@ -283,23 +285,25 @@ class DetectionPipeline: return # Send imageDetection message with license plate data combined with processing results + # This is the PRIMARY data flow to backend - WebSocket is critical, keep this! await self._send_license_plate_message(subscription_id, license_text, confidence, session_id) - # Update database with license plate information if database manager is available - if self.db_manager and license_text: - success = self.db_manager.execute_update( - table='car_frontal_info', - key_field='session_id', - key_value=session_id, - fields={ - 'license_character': license_text, - 'license_type': 'LPR_detected' # Mark as detected by LPR service - } - ) - if success: - logger.info(f"[LICENSE PLATE] Updated database for session {session_id}") - else: - logger.warning(f"[LICENSE PLATE] Failed to update database for session {session_id}") + # PostgreSQL database update DISABLED - backend handles data via WebSocket messages + # if self.db_manager and license_text: + # success = self.db_manager.execute_update( + # table='car_frontal_info', + # key_field='session_id', + # key_value=session_id, + # fields={ + # 'license_character': license_text, + # 'license_type': 'LPR_detected' # Mark as detected by LPR service + # } + # ) + # if success: + # logger.info(f"[LICENSE PLATE] Updated database for session {session_id}") + # else: + # logger.warning(f"[LICENSE PLATE] Failed to update database for session {session_id}") + logger.debug(f"[LICENSE PLATE] Data sent via WebSocket for session {session_id}") except Exception as e: logger.error(f"Error in license plate result callback: {e}", exc_info=True) @@ -710,17 +714,18 @@ class DetectionPipeline: self.session_to_subscription[session_id] = subscription_id logger.info(f"[SESSION MAPPING] Stored mapping: sessionId '{session_id}' -> subscriptionIdentifier '{subscription_id}'") - # Initialize database record with session_id - if session_id and self.db_manager: - success = self.db_manager.insert_initial_detection( - display_id=display_id, - captured_timestamp=detection_context['timestamp'], - session_id=session_id - ) - if success: - logger.info(f"Created initial database record with session {session_id}") - else: - logger.warning(f"Failed to create initial database record for session {session_id}") + # PostgreSQL database insert DISABLED - backend handles data via WebSocket + # if session_id and self.db_manager: + # success = self.db_manager.insert_initial_detection( + # display_id=display_id, + # captured_timestamp=detection_context['timestamp'], + # session_id=session_id + # ) + # if success: + # logger.info(f"Created initial database record with session {session_id}") + # else: + # logger.warning(f"Failed to create initial database record for session {session_id}") + logger.debug(f"Session {session_id} will be communicated via WebSocket") # Execute branches in parallel if hasattr(self.pipeline_config, 'branches') and self.pipeline_config.branches: @@ -886,15 +891,16 @@ class DetectionPipeline: if valid_detections: logger.info(f"Found {len(valid_detections)} valid detections for pipeline processing") - # Initialize database record if session_id is provided - if session_id and self.db_manager: - success = self.db_manager.insert_initial_detection( - display_id=display_id, - captured_timestamp=detection_context['timestamp'], - session_id=session_id - ) - if not success: - logger.warning(f"Failed to create initial database record for session {session_id}") + # PostgreSQL database insert DISABLED - backend handles data via WebSocket + # if session_id and self.db_manager: + # success = self.db_manager.insert_initial_detection( + # display_id=display_id, + # captured_timestamp=detection_context['timestamp'], + # session_id=session_id + # ) + # if not success: + # logger.warning(f"Failed to create initial database record for session {session_id}") + logger.debug(f"Detection results for session {session_id} will be sent via WebSocket") # Execute branches in parallel if hasattr(self.pipeline_config, 'branches') and self.pipeline_config.branches: @@ -1025,11 +1031,16 @@ class DetectionPipeline: logger.debug(f"Executing parallel action: {action_type}") if action_type == 'postgresql_update_combined': - result = await self._execute_postgresql_update_combined(action, context) + # PostgreSQL action SKIPPED - database operations disabled + logger.info(f"Skipping PostgreSQL action '{action_type}' (disabled)") + result = {'status': 'skipped', 'message': 'PostgreSQL operations disabled'} - # Update session state with processing results after database update - if result.get('status') == 'success': - await self._update_session_with_processing_results(context) + # Still update session state for WebSocket messaging + await self._update_session_with_processing_results(context) + + # result = await self._execute_postgresql_update_combined(action, context) + # if result.get('status') == 'success': + # await self._update_session_with_processing_results(context) else: logger.warning(f"Unknown parallel action type: {action_type}") result = {'status': 'error', 'message': f'Unknown action type: {action_type}'} @@ -1132,59 +1143,61 @@ class DetectionPipeline: logger.error(f"Error in redis_publish action: {e}", exc_info=True) return {'status': 'error', 'message': str(e)} - async def _execute_postgresql_update_combined(self, - action: Dict, - context: Dict[str, Any]) -> Dict[str, Any]: - """Execute postgresql_update_combined action.""" - if not self.db_manager: - return {'status': 'error', 'message': 'Database not available'} - - try: - # Wait for required branches if specified - wait_for_branches = action.params.get('waitForBranches', []) - branch_results = context.get('branch_results', {}) - - # Log missing branches but don't block the update (allow partial results) - missing_branches = [b for b in wait_for_branches if b not in branch_results] - if missing_branches: - logger.warning(f"Some branches missing from results (will use null): {missing_branches}") - available_branches = [b for b in wait_for_branches if b in branch_results] - if available_branches: - logger.info(f"Available branches for database update: {available_branches}") - - # Prepare fields for database update - table = action.params.get('table', 'car_frontal_info') - key_field = action.params.get('key_field', 'session_id') - key_value = action.params.get('key_value', '{session_id}').format(**context) - field_mappings = action.params.get('fields', {}) - - # Resolve field values using branch results - resolved_fields = {} - for field_name, field_template in field_mappings.items(): - try: - # Replace template variables with actual values from branch results - resolved_value = self._resolve_field_template(field_template, branch_results, context) - resolved_fields[field_name] = resolved_value - except Exception as e: - logger.warning(f"Failed to resolve field {field_name}: {e}") - resolved_fields[field_name] = None - - # Execute database update - success = self.db_manager.execute_update( - table=table, - key_field=key_field, - key_value=key_value, - fields=resolved_fields - ) - - if success: - return {'status': 'success', 'table': table, 'key': f'{key_field}={key_value}', 'fields': resolved_fields} - else: - return {'status': 'error', 'message': 'Database update failed'} - - except Exception as e: - logger.error(f"Error in postgresql_update_combined action: {e}", exc_info=True) - return {'status': 'error', 'message': str(e)} + # PostgreSQL update method DISABLED - database operations moved to microservices + # This method is no longer used as data flows via WebSocket messages to backend + # async def _execute_postgresql_update_combined(self, + # action: Dict, + # context: Dict[str, Any]) -> Dict[str, Any]: + # """Execute postgresql_update_combined action.""" + # if not self.db_manager: + # return {'status': 'error', 'message': 'Database not available'} + # + # try: + # # Wait for required branches if specified + # wait_for_branches = action.params.get('waitForBranches', []) + # branch_results = context.get('branch_results', {}) + # + # # Log missing branches but don't block the update (allow partial results) + # missing_branches = [b for b in wait_for_branches if b not in branch_results] + # if missing_branches: + # logger.warning(f"Some branches missing from results (will use null): {missing_branches}") + # available_branches = [b for b in wait_for_branches if b in branch_results] + # if available_branches: + # logger.info(f"Available branches for database update: {available_branches}") + # + # # Prepare fields for database update + # table = action.params.get('table', 'car_frontal_info') + # key_field = action.params.get('key_field', 'session_id') + # key_value = action.params.get('key_value', '{session_id}').format(**context) + # field_mappings = action.params.get('fields', {}) + # + # # Resolve field values using branch results + # resolved_fields = {} + # for field_name, field_template in field_mappings.items(): + # try: + # # Replace template variables with actual values from branch results + # resolved_value = self._resolve_field_template(field_template, branch_results, context) + # resolved_fields[field_name] = resolved_value + # except Exception as e: + # logger.warning(f"Failed to resolve field {field_name}: {e}") + # resolved_fields[field_name] = None + # + # # Execute database update + # success = self.db_manager.execute_update( + # table=table, + # key_field=key_field, + # key_value=key_value, + # fields=resolved_fields + # ) + # + # if success: + # return {'status': 'success', 'table': table, 'key': f'{key_field}={key_value}', 'fields': resolved_fields} + # else: + # return {'status': 'error', 'message': 'Database update failed'} + # + # except Exception as e: + # logger.error(f"Error in postgresql_update_combined action: {e}", exc_info=True) + # return {'status': 'error', 'message': str(e)} def _resolve_field_template(self, template: str, branch_results: Dict, context: Dict) -> str: """ @@ -1270,7 +1283,7 @@ class DetectionPipeline: 'branches': branch_stats, 'license_plate': license_stats, 'redis_available': self.redis_manager is not None, - 'database_available': self.db_manager is not None, + # 'database_available': self.db_manager is not None, # PostgreSQL disabled 'detection_model_loaded': self.detection_model is not None } @@ -1282,8 +1295,9 @@ class DetectionPipeline: if self.redis_manager: self.redis_manager.cleanup() - if self.db_manager: - self.db_manager.disconnect() + # PostgreSQL disconnect DISABLED - database operations moved to microservices + # if self.db_manager: + # self.db_manager.disconnect() if self.branch_processor: self.branch_processor.cleanup() diff --git a/core/models/__init__.py b/core/models/__init__.py index c817eb2..fa2c71a 100644 --- a/core/models/__init__.py +++ b/core/models/__init__.py @@ -11,7 +11,7 @@ from .pipeline import ( Action, ActionType, RedisConfig, - PostgreSQLConfig + # PostgreSQLConfig # Disabled - moved to microservices ) from .inference import ( YOLOWrapper, @@ -32,7 +32,7 @@ __all__ = [ 'Action', 'ActionType', 'RedisConfig', - 'PostgreSQLConfig', + # 'PostgreSQLConfig', # Disabled - moved to microservices # Inference 'YOLOWrapper', diff --git a/core/models/pipeline.py b/core/models/pipeline.py index de5667b..3ae7463 100644 --- a/core/models/pipeline.py +++ b/core/models/pipeline.py @@ -16,6 +16,8 @@ class ActionType(Enum): """Supported action types in pipeline""" REDIS_SAVE_IMAGE = "redis_save_image" REDIS_PUBLISH = "redis_publish" + # PostgreSQL actions below are DEPRECATED - kept for backward compatibility only + # These actions will be silently skipped during pipeline execution POSTGRESQL_UPDATE = "postgresql_update" POSTGRESQL_UPDATE_COMBINED = "postgresql_update_combined" POSTGRESQL_INSERT = "postgresql_insert" @@ -41,7 +43,15 @@ class RedisConfig: @dataclass class PostgreSQLConfig: - """PostgreSQL connection configuration""" + """ + PostgreSQL connection configuration - DISABLED + + NOTE: This configuration is kept for backward compatibility with existing + pipeline.json files, but PostgreSQL operations are disabled. All database + operations have been moved to microservices architecture. + + This config will be parsed but not used for any database connections. + """ host: str port: int database: str @@ -50,6 +60,7 @@ class PostgreSQLConfig: @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'PostgreSQLConfig': + """Parse PostgreSQL config from dict (kept for backward compatibility)""" return cls( host=data['host'], port=data.get('port', 5432), @@ -272,17 +283,19 @@ class PipelineParser: if not self._validate_actions(self.pipeline_config): return False - # Validate parallel actions + # Validate parallel actions (PostgreSQL actions are skipped) for action in self.pipeline_config.parallel_actions: if action.type == ActionType.POSTGRESQL_UPDATE_COMBINED: - wait_for = action.params.get('waitForBranches', []) - if wait_for: - # Check that referenced branches exist - branch_ids = self._get_all_branch_ids(self.pipeline_config) - for branch_id in wait_for: - if branch_id not in branch_ids: - logger.error(f"Referenced branch '{branch_id}' in waitForBranches not found") - return False + logger.warning(f"PostgreSQL parallel action {action.type.value} found but will be SKIPPED (PostgreSQL disabled)") + # Skip validation for PostgreSQL actions since they won't be executed + # wait_for = action.params.get('waitForBranches', []) + # if wait_for: + # # Check that referenced branches exist + # branch_ids = self._get_all_branch_ids(self.pipeline_config) + # for branch_id in wait_for: + # if branch_id not in branch_ids: + # logger.error(f"Referenced branch '{branch_id}' in waitForBranches not found") + # return False logger.info("Pipeline configuration validated successfully") return True @@ -305,11 +318,14 @@ class PipelineParser: logger.error(f"Action {action.type} requires Redis configuration") return False - # Validate PostgreSQL actions need PostgreSQL config + # PostgreSQL actions are disabled - log warning instead of failing + # Kept for backward compatibility with existing pipeline.json files if action.type in [ActionType.POSTGRESQL_UPDATE, ActionType.POSTGRESQL_UPDATE_COMBINED, ActionType.POSTGRESQL_INSERT]: - if not self.postgresql_config: - logger.error(f"Action {action.type} requires PostgreSQL configuration") - return False + logger.warning(f"PostgreSQL action {action.type.value} found but will be SKIPPED (PostgreSQL disabled)") + # Do not fail validation - just skip these actions during execution + # if not self.postgresql_config: + # logger.error(f"Action {action.type} requires PostgreSQL configuration") + # return False # Recursively validate branches if hasattr(config, 'branches'): diff --git a/core/storage/__init__.py b/core/storage/__init__.py index 973837a..b2ff324 100644 --- a/core/storage/__init__.py +++ b/core/storage/__init__.py @@ -1,10 +1,13 @@ """ Storage module for the Python Detector Worker. -This module provides Redis and PostgreSQL operations for data persistence +This module provides Redis operations for data persistence and caching in the detection pipeline. + +Note: PostgreSQL operations have been disabled as database functionality +has been moved to microservices architecture. """ from .redis import RedisManager -from .database import DatabaseManager +# from .database import DatabaseManager # Disabled - moved to microservices -__all__ = ['RedisManager', 'DatabaseManager'] \ No newline at end of file +__all__ = ['RedisManager'] # Removed 'DatabaseManager' \ No newline at end of file diff --git a/core/storage/database.py b/core/storage/database.py index a90df97..4715768 100644 --- a/core/storage/database.py +++ b/core/storage/database.py @@ -1,357 +1,369 @@ """ -Database Operations Module. -Handles PostgreSQL operations for the detection pipeline. +Database Operations Module - DISABLED + +NOTE: This module has been disabled as PostgreSQL database operations have been +moved to microservices architecture. All database connections, reads, and writes +are now handled by separate backend services. + +The detection worker now communicates results via: +- WebSocket imageDetection messages (primary data flow to backend) +- Redis image storage and pub/sub (temporary storage) + +Original functionality: PostgreSQL operations for the detection pipeline. +Status: Commented out - DO NOT ENABLE without updating architecture """ -import psycopg2 -import psycopg2.extras + +# All PostgreSQL functionality below has been commented out +# import psycopg2 +# import psycopg2.extras from typing import Optional, Dict, Any import logging -import uuid +# import uuid logger = logging.getLogger(__name__) - -class DatabaseManager: - """ - Manages PostgreSQL connections and operations for the detection pipeline. - Handles database operations and schema management. - """ - - def __init__(self, config: Dict[str, Any]): - """ - Initialize database manager with configuration. - - Args: - config: Database configuration dictionary - """ - self.config = config - self.connection: Optional[psycopg2.extensions.connection] = None - - def connect(self) -> bool: - """ - Connect to PostgreSQL database. - - Returns: - True if successful, False otherwise - """ - try: - self.connection = psycopg2.connect( - host=self.config['host'], - port=self.config['port'], - database=self.config['database'], - user=self.config['username'], - password=self.config['password'] - ) - logger.info("PostgreSQL connection established successfully") - return True - except Exception as e: - logger.error(f"Failed to connect to PostgreSQL: {e}") - return False - - def disconnect(self): - """Disconnect from PostgreSQL database.""" - if self.connection: - self.connection.close() - self.connection = None - logger.info("PostgreSQL connection closed") - - def is_connected(self) -> bool: - """ - Check if database connection is active. - - Returns: - True if connected, False otherwise - """ - try: - if self.connection and not self.connection.closed: - cur = self.connection.cursor() - cur.execute("SELECT 1") - cur.fetchone() - cur.close() - return True - except: - pass - return False - - def update_car_info(self, session_id: str, brand: str, model: str, body_type: str) -> bool: - """ - Update car information in the database. - - Args: - session_id: Session identifier - brand: Car brand - model: Car model - body_type: Car body type - - Returns: - True if successful, False otherwise - """ - if not self.is_connected(): - if not self.connect(): - return False - - try: - cur = self.connection.cursor() - query = """ - INSERT INTO car_frontal_info (session_id, car_brand, car_model, car_body_type, updated_at) - VALUES (%s, %s, %s, %s, NOW()) - ON CONFLICT (session_id) - DO UPDATE SET - car_brand = EXCLUDED.car_brand, - car_model = EXCLUDED.car_model, - car_body_type = EXCLUDED.car_body_type, - updated_at = NOW() - """ - cur.execute(query, (session_id, brand, model, body_type)) - self.connection.commit() - cur.close() - logger.info(f"Updated car info for session {session_id}: {brand} {model} ({body_type})") - return True - except Exception as e: - logger.error(f"Failed to update car info: {e}") - if self.connection: - self.connection.rollback() - return False - - def execute_update(self, table: str, key_field: str, key_value: str, fields: Dict[str, str]) -> bool: - """ - Execute a dynamic update query on the database. - - Args: - table: Table name - key_field: Primary key field name - key_value: Primary key value - fields: Dictionary of fields to update - - Returns: - True if successful, False otherwise - """ - if not self.is_connected(): - if not self.connect(): - return False - - try: - cur = self.connection.cursor() - - # Build the UPDATE query dynamically - set_clauses = [] - values = [] - - for field, value in fields.items(): - if value == "NOW()": - set_clauses.append(f"{field} = NOW()") - else: - set_clauses.append(f"{field} = %s") - values.append(value) - - # Add schema prefix if table doesn't already have it - full_table_name = table if '.' in table else f"gas_station_1.{table}" - - query = f""" - INSERT INTO {full_table_name} ({key_field}, {', '.join(fields.keys())}) - VALUES (%s, {', '.join(['%s'] * len(fields))}) - ON CONFLICT ({key_field}) - DO UPDATE SET {', '.join(set_clauses)} - """ - - # Add key_value to the beginning of values list - all_values = [key_value] + list(fields.values()) + values - - cur.execute(query, all_values) - self.connection.commit() - cur.close() - logger.info(f"Updated {table} for {key_field}={key_value}") - return True - except Exception as e: - logger.error(f"Failed to execute update on {table}: {e}") - if self.connection: - self.connection.rollback() - return False - - def create_car_frontal_info_table(self) -> bool: - """ - Create the car_frontal_info table in gas_station_1 schema if it doesn't exist. - - Returns: - True if successful, False otherwise - """ - if not self.is_connected(): - if not self.connect(): - return False - - try: - # Since the database already exists, just verify connection - cur = self.connection.cursor() - - # Simple verification that the table exists - cur.execute(""" - SELECT EXISTS ( - SELECT FROM information_schema.tables - WHERE table_schema = 'gas_station_1' - AND table_name = 'car_frontal_info' - ) - """) - - table_exists = cur.fetchone()[0] - cur.close() - - if table_exists: - logger.info("Verified car_frontal_info table exists") - return True - else: - logger.error("car_frontal_info table does not exist in the database") - return False - - except Exception as e: - logger.error(f"Failed to create car_frontal_info table: {e}") - if self.connection: - self.connection.rollback() - return False - - def insert_initial_detection(self, display_id: str, captured_timestamp: str, session_id: str = None) -> str: - """ - Insert initial detection record and return the session_id. - - Args: - display_id: Display identifier - captured_timestamp: Timestamp of the detection - session_id: Optional session ID, generates one if not provided - - Returns: - Session ID string or None on error - """ - if not self.is_connected(): - if not self.connect(): - return None - - # Generate session_id if not provided - if not session_id: - session_id = str(uuid.uuid4()) - - try: - # Ensure table exists - if not self.create_car_frontal_info_table(): - logger.error("Failed to create/verify table before insertion") - return None - - cur = self.connection.cursor() - insert_query = """ - INSERT INTO gas_station_1.car_frontal_info - (display_id, captured_timestamp, session_id, license_character, license_type, car_brand, car_model, car_body_type) - VALUES (%s, %s, %s, NULL, 'No model available', NULL, NULL, NULL) - ON CONFLICT (session_id) DO NOTHING - """ - - cur.execute(insert_query, (display_id, captured_timestamp, session_id)) - self.connection.commit() - cur.close() - logger.info(f"Inserted initial detection record with session_id: {session_id}") - return session_id - - except Exception as e: - logger.error(f"Failed to insert initial detection record: {e}") - if self.connection: - self.connection.rollback() - return None - - def get_session_info(self, session_id: str) -> Optional[Dict[str, Any]]: - """ - Get session information from the database. - - Args: - session_id: Session identifier - - Returns: - Dictionary with session data or None if not found - """ - if not self.is_connected(): - if not self.connect(): - return None - - try: - cur = self.connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) - query = "SELECT * FROM gas_station_1.car_frontal_info WHERE session_id = %s" - cur.execute(query, (session_id,)) - result = cur.fetchone() - cur.close() - - if result: - return dict(result) - else: - logger.debug(f"No session info found for session_id: {session_id}") - return None - - except Exception as e: - logger.error(f"Failed to get session info: {e}") - return None - - def delete_session(self, session_id: str) -> bool: - """ - Delete session record from the database. - - Args: - session_id: Session identifier - - Returns: - True if successful, False otherwise - """ - if not self.is_connected(): - if not self.connect(): - return False - - try: - cur = self.connection.cursor() - query = "DELETE FROM gas_station_1.car_frontal_info WHERE session_id = %s" - cur.execute(query, (session_id,)) - rows_affected = cur.rowcount - self.connection.commit() - cur.close() - - if rows_affected > 0: - logger.info(f"Deleted session record: {session_id}") - return True - else: - logger.warning(f"No session record found to delete: {session_id}") - return False - - except Exception as e: - logger.error(f"Failed to delete session: {e}") - if self.connection: - self.connection.rollback() - return False - - def get_statistics(self) -> Dict[str, Any]: - """ - Get database statistics. - - Returns: - Dictionary with database statistics - """ - stats = { - 'connected': self.is_connected(), - 'host': self.config.get('host', 'unknown'), - 'port': self.config.get('port', 'unknown'), - 'database': self.config.get('database', 'unknown') - } - - if self.is_connected(): - try: - cur = self.connection.cursor() - - # Get table record count - cur.execute("SELECT COUNT(*) FROM gas_station_1.car_frontal_info") - stats['total_records'] = cur.fetchone()[0] - - # Get recent records count (last hour) - cur.execute(""" - SELECT COUNT(*) FROM gas_station_1.car_frontal_info - WHERE created_at > NOW() - INTERVAL '1 hour' - """) - stats['recent_records'] = cur.fetchone()[0] - - cur.close() - except Exception as e: - logger.warning(f"Failed to get database statistics: {e}") - stats['error'] = str(e) - - return stats \ No newline at end of file +# DatabaseManager class is disabled - all methods commented out +# class DatabaseManager: +# """ +# Manages PostgreSQL connections and operations for the detection pipeline. +# Handles database operations and schema management. +# """ +# +# def __init__(self, config: Dict[str, Any]): +# """ +# Initialize database manager with configuration. +# +# Args: +# config: Database configuration dictionary +# """ +# self.config = config +# self.connection: Optional[psycopg2.extensions.connection] = None +# +# def connect(self) -> bool: +# """ +# Connect to PostgreSQL database. +# +# Returns: +# True if successful, False otherwise +# """ +# try: +# self.connection = psycopg2.connect( +# host=self.config['host'], +# port=self.config['port'], +# database=self.config['database'], +# user=self.config['username'], +# password=self.config['password'] +# ) +# logger.info("PostgreSQL connection established successfully") +# return True +# except Exception as e: +# logger.error(f"Failed to connect to PostgreSQL: {e}") +# return False +# +# def disconnect(self): +# """Disconnect from PostgreSQL database.""" +# if self.connection: +# self.connection.close() +# self.connection = None +# logger.info("PostgreSQL connection closed") +# +# def is_connected(self) -> bool: +# """ +# Check if database connection is active. +# +# Returns: +# True if connected, False otherwise +# """ +# try: +# if self.connection and not self.connection.closed: +# cur = self.connection.cursor() +# cur.execute("SELECT 1") +# cur.fetchone() +# cur.close() +# return True +# except: +# pass +# return False +# +# def update_car_info(self, session_id: str, brand: str, model: str, body_type: str) -> bool: +# """ +# Update car information in the database. +# +# Args: +# session_id: Session identifier +# brand: Car brand +# model: Car model +# body_type: Car body type +# +# Returns: +# True if successful, False otherwise +# """ +# if not self.is_connected(): +# if not self.connect(): +# return False +# +# try: +# cur = self.connection.cursor() +# query = """ +# INSERT INTO car_frontal_info (session_id, car_brand, car_model, car_body_type, updated_at) +# VALUES (%s, %s, %s, %s, NOW()) +# ON CONFLICT (session_id) +# DO UPDATE SET +# car_brand = EXCLUDED.car_brand, +# car_model = EXCLUDED.car_model, +# car_body_type = EXCLUDED.car_body_type, +# updated_at = NOW() +# """ +# cur.execute(query, (session_id, brand, model, body_type)) +# self.connection.commit() +# cur.close() +# logger.info(f"Updated car info for session {session_id}: {brand} {model} ({body_type})") +# return True +# except Exception as e: +# logger.error(f"Failed to update car info: {e}") +# if self.connection: +# self.connection.rollback() +# return False +# +# def execute_update(self, table: str, key_field: str, key_value: str, fields: Dict[str, str]) -> bool: +# """ +# Execute a dynamic update query on the database. +# +# Args: +# table: Table name +# key_field: Primary key field name +# key_value: Primary key value +# fields: Dictionary of fields to update +# +# Returns: +# True if successful, False otherwise +# """ +# if not self.is_connected(): +# if not self.connect(): +# return False +# +# try: +# cur = self.connection.cursor() +# +# # Build the UPDATE query dynamically +# set_clauses = [] +# values = [] +# +# for field, value in fields.items(): +# if value == "NOW()": +# set_clauses.append(f"{field} = NOW()") +# else: +# set_clauses.append(f"{field} = %s") +# values.append(value) +# +# # Add schema prefix if table doesn't already have it +# full_table_name = table if '.' in table else f"gas_station_1.{table}" +# +# query = f""" +# INSERT INTO {full_table_name} ({key_field}, {', '.join(fields.keys())}) +# VALUES (%s, {', '.join(['%s'] * len(fields))}) +# ON CONFLICT ({key_field}) +# DO UPDATE SET {', '.join(set_clauses)} +# """ +# +# # Add key_value to the beginning of values list +# all_values = [key_value] + list(fields.values()) + values +# +# cur.execute(query, all_values) +# self.connection.commit() +# cur.close() +# logger.info(f"Updated {table} for {key_field}={key_value}") +# return True +# except Exception as e: +# logger.error(f"Failed to execute update on {table}: {e}") +# if self.connection: +# self.connection.rollback() +# return False +# +# def create_car_frontal_info_table(self) -> bool: +# """ +# Create the car_frontal_info table in gas_station_1 schema if it doesn't exist. +# +# Returns: +# True if successful, False otherwise +# """ +# if not self.is_connected(): +# if not self.connect(): +# return False +# +# try: +# # Since the database already exists, just verify connection +# cur = self.connection.cursor() +# +# # Simple verification that the table exists +# cur.execute(""" +# SELECT EXISTS ( +# SELECT FROM information_schema.tables +# WHERE table_schema = 'gas_station_1' +# AND table_name = 'car_frontal_info' +# ) +# """) +# +# table_exists = cur.fetchone()[0] +# cur.close() +# +# if table_exists: +# logger.info("Verified car_frontal_info table exists") +# return True +# else: +# logger.error("car_frontal_info table does not exist in the database") +# return False +# +# except Exception as e: +# logger.error(f"Failed to create car_frontal_info table: {e}") +# if self.connection: +# self.connection.rollback() +# return False +# +# def insert_initial_detection(self, display_id: str, captured_timestamp: str, session_id: str = None) -> str: +# """ +# Insert initial detection record and return the session_id. +# +# Args: +# display_id: Display identifier +# captured_timestamp: Timestamp of the detection +# session_id: Optional session ID, generates one if not provided +# +# Returns: +# Session ID string or None on error +# """ +# if not self.is_connected(): +# if not self.connect(): +# return None +# +# # Generate session_id if not provided +# if not session_id: +# session_id = str(uuid.uuid4()) +# +# try: +# # Ensure table exists +# if not self.create_car_frontal_info_table(): +# logger.error("Failed to create/verify table before insertion") +# return None +# +# cur = self.connection.cursor() +# insert_query = """ +# INSERT INTO gas_station_1.car_frontal_info +# (display_id, captured_timestamp, session_id, license_character, license_type, car_brand, car_model, car_body_type) +# VALUES (%s, %s, %s, NULL, 'No model available', NULL, NULL, NULL) +# ON CONFLICT (session_id) DO NOTHING +# """ +# +# cur.execute(insert_query, (display_id, captured_timestamp, session_id)) +# self.connection.commit() +# cur.close() +# logger.info(f"Inserted initial detection record with session_id: {session_id}") +# return session_id +# +# except Exception as e: +# logger.error(f"Failed to insert initial detection record: {e}") +# if self.connection: +# self.connection.rollback() +# return None +# +# def get_session_info(self, session_id: str) -> Optional[Dict[str, Any]]: +# """ +# Get session information from the database. +# +# Args: +# session_id: Session identifier +# +# Returns: +# Dictionary with session data or None if not found +# """ +# if not self.is_connected(): +# if not self.connect(): +# return None +# +# try: +# cur = self.connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) +# query = "SELECT * FROM gas_station_1.car_frontal_info WHERE session_id = %s" +# cur.execute(query, (session_id,)) +# result = cur.fetchone() +# cur.close() +# +# if result: +# return dict(result) +# else: +# logger.debug(f"No session info found for session_id: {session_id}") +# return None +# +# except Exception as e: +# logger.error(f"Failed to get session info: {e}") +# return None +# +# def delete_session(self, session_id: str) -> bool: +# """ +# Delete session record from the database. +# +# Args: +# session_id: Session identifier +# +# Returns: +# True if successful, False otherwise +# """ +# if not self.is_connected(): +# if not self.connect(): +# return False +# +# try: +# cur = self.connection.cursor() +# query = "DELETE FROM gas_station_1.car_frontal_info WHERE session_id = %s" +# cur.execute(query, (session_id,)) +# rows_affected = cur.rowcount +# self.connection.commit() +# cur.close() +# +# if rows_affected > 0: +# logger.info(f"Deleted session record: {session_id}") +# return True +# else: +# logger.warning(f"No session record found to delete: {session_id}") +# return False +# +# except Exception as e: +# logger.error(f"Failed to delete session: {e}") +# if self.connection: +# self.connection.rollback() +# return False +# +# def get_statistics(self) -> Dict[str, Any]: +# """ +# Get database statistics. +# +# Returns: +# Dictionary with database statistics +# """ +# stats = { +# 'connected': self.is_connected(), +# 'host': self.config.get('host', 'unknown'), +# 'port': self.config.get('port', 'unknown'), +# 'database': self.config.get('database', 'unknown') +# } +# +# if self.is_connected(): +# try: +# cur = self.connection.cursor() +# +# # Get table record count +# cur.execute("SELECT COUNT(*) FROM gas_station_1.car_frontal_info") +# stats['total_records'] = cur.fetchone()[0] +# +# # Get recent records count (last hour) +# cur.execute(""" +# SELECT COUNT(*) FROM gas_station_1.car_frontal_info +# WHERE created_at > NOW() - INTERVAL '1 hour' +# """) +# stats['recent_records'] = cur.fetchone()[0] +# +# cur.close() +# except Exception as e: +# logger.warning(f"Failed to get database statistics: {e}") +# stats['error'] = str(e) +# +# return stats