Compare commits
63 commits
dev-refact
...
dev
Author | SHA1 | Date | |
---|---|---|---|
b2e7bc499d | |||
|
402f7732a8 | ||
|
9e5b5a32ad | ||
|
3ed7a2cd53 | ||
|
793beb1571 | ||
|
354ed9ce3c | ||
|
e92efdbe11 | ||
|
8d2a71fcd7 | ||
|
fed71046a9 | ||
|
31bc91d57b | ||
|
fa0f865319 | ||
|
ee484b4655 | ||
|
52ba1ff316 | ||
eb57de02c3 | |||
b08ce27de2 | |||
8c08c815ce | |||
2b382210eb | |||
d8d1b33cd8 | |||
33d738b31b | |||
2808316e94 | |||
|
cd1359f5d2 | ||
|
cc604841d0 | ||
|
9f8372d844 | ||
|
61ac39b4f3 | ||
|
791f611f7d | ||
|
bd201acac1 | ||
|
519e073f7f | ||
|
d43a971d5b | ||
|
83aaf95f59 | ||
|
f5c6da8014 | ||
a12e3efa12 | |||
d663aaa446 | |||
fe0da18d0f | |||
73c3367681 | |||
95c39a008f | |||
2742b86961 | |||
84144a2955 | |||
cb31633cc1 | |||
79a1189675 | |||
c38b58e34c | |||
08cb4eafc4 | |||
65b7573fed | |||
a1e7c42fb3 | |||
c6a4258055 | |||
cb9ff7bc86 | |||
bdbf688946 | |||
fa3ab5c6d2 | |||
6fe4b6ebf0 | |||
e2e5356047 | |||
59e8448f0d | |||
360a4ab890 | |||
719d16ae4d | |||
dc1db635d0 | |||
47d4fa6b8f | |||
|
ff56c1b666 | ||
|
a45f76884f | ||
|
0fc86fb72b | ||
|
6bb679f4d8 | ||
|
5f29392c2f | ||
|
b919a1ebe2 | ||
f9a67935d6 | |||
b6d5aabf22 | |||
1cc46e0663 |
28 changed files with 4686 additions and 1049 deletions
11
.claude/settings.local.json
Normal file
11
.claude/settings.local.json
Normal file
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(dir:*)",
|
||||
"WebSearch",
|
||||
"Bash(mkdir:*)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": []
|
||||
}
|
||||
}
|
124
Dockerfile.base
124
Dockerfile.base
|
@ -1,21 +1,123 @@
|
|||
# Base image with all ML dependencies
|
||||
# Base image with complete ML and hardware acceleration stack
|
||||
FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt update && apt install -y \
|
||||
libgl1 \
|
||||
# Install build dependencies and system libraries
|
||||
RUN apt-get update && apt-get install -y \
|
||||
# Build tools
|
||||
build-essential \
|
||||
cmake \
|
||||
git \
|
||||
pkg-config \
|
||||
wget \
|
||||
unzip \
|
||||
yasm \
|
||||
nasm \
|
||||
# Additional dependencies for FFmpeg/NVIDIA build
|
||||
libtool \
|
||||
libc6 \
|
||||
libc6-dev \
|
||||
libnuma1 \
|
||||
libnuma-dev \
|
||||
# Essential compilation libraries
|
||||
gcc \
|
||||
g++ \
|
||||
libc6-dev \
|
||||
linux-libc-dev \
|
||||
# System libraries
|
||||
libgl1-mesa-glx \
|
||||
libglib2.0-0 \
|
||||
libgstreamer1.0-0 \
|
||||
libgtk-3-0 \
|
||||
libavcodec58 \
|
||||
libavformat58 \
|
||||
libswscale5 \
|
||||
libgomp1 \
|
||||
# Core media libraries (essential ones only)
|
||||
libjpeg-dev \
|
||||
libpng-dev \
|
||||
libx264-dev \
|
||||
libx265-dev \
|
||||
libvpx-dev \
|
||||
libmp3lame-dev \
|
||||
libv4l-dev \
|
||||
# TurboJPEG for fast JPEG encoding
|
||||
libturbojpeg0-dev \
|
||||
# Python development
|
||||
python3-dev \
|
||||
python3-numpy \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy and install base requirements (ML dependencies that rarely change)
|
||||
# Add NVIDIA CUDA repository and install minimal development tools
|
||||
RUN apt-get update && apt-get install -y wget gnupg && \
|
||||
wget -O - https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub | apt-key add - && \
|
||||
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
cuda-nvcc-12-6 \
|
||||
cuda-cudart-dev-12-6 \
|
||||
libnpp-dev-12-6 \
|
||||
&& apt-get remove -y wget gnupg && \
|
||||
apt-get autoremove -y && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Ensure CUDA paths are available
|
||||
ENV PATH="/usr/local/cuda/bin:${PATH}"
|
||||
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
|
||||
|
||||
# Install NVIDIA Video Codec SDK headers (official method)
|
||||
RUN cd /tmp && \
|
||||
git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers.git && \
|
||||
cd nv-codec-headers && \
|
||||
make install && \
|
||||
cd / && rm -rf /tmp/*
|
||||
|
||||
# Build FFmpeg from source with NVIDIA CUDA support
|
||||
RUN cd /tmp && \
|
||||
echo "Building FFmpeg with NVIDIA CUDA support..." && \
|
||||
# Download FFmpeg source (official method)
|
||||
git clone https://git.ffmpeg.org/ffmpeg.git ffmpeg/ && \
|
||||
cd ffmpeg && \
|
||||
# Configure with NVIDIA support (simplified to avoid configure issues)
|
||||
./configure \
|
||||
--prefix=/usr/local \
|
||||
--enable-shared \
|
||||
--disable-static \
|
||||
--enable-nonfree \
|
||||
--enable-gpl \
|
||||
--enable-cuda-nvcc \
|
||||
--enable-cuvid \
|
||||
--enable-nvdec \
|
||||
--enable-nvenc \
|
||||
--enable-libnpp \
|
||||
--extra-cflags=-I/usr/local/cuda/include \
|
||||
--extra-ldflags=-L/usr/local/cuda/lib64 \
|
||||
--enable-libx264 \
|
||||
--enable-libx265 \
|
||||
--enable-libvpx \
|
||||
--enable-libmp3lame && \
|
||||
# Build and install
|
||||
make -j$(nproc) && \
|
||||
make install && \
|
||||
ldconfig && \
|
||||
# Verify CUVID decoders are available
|
||||
echo "=== Verifying FFmpeg CUVID Support ===" && \
|
||||
(ffmpeg -hide_banner -decoders 2>/dev/null | grep cuvid || echo "No CUVID decoders found") && \
|
||||
echo "=== Verifying FFmpeg NVENC Support ===" && \
|
||||
(ffmpeg -hide_banner -encoders 2>/dev/null | grep nvenc || echo "No NVENC encoders found") && \
|
||||
cd / && rm -rf /tmp/*
|
||||
|
||||
# Set environment variables for maximum hardware acceleration
|
||||
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/lib:${LD_LIBRARY_PATH}"
|
||||
ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:${PKG_CONFIG_PATH}"
|
||||
ENV PYTHONPATH="/usr/local/lib/python3.10/dist-packages:${PYTHONPATH}"
|
||||
|
||||
# Optimized environment variables for hardware acceleration
|
||||
ENV OPENCV_FFMPEG_CAPTURE_OPTIONS="rtsp_transport;tcp|hwaccel;cuda|hwaccel_device;0|video_codec;h264_cuvid|hwaccel_output_format;cuda"
|
||||
ENV OPENCV_FFMPEG_WRITER_OPTIONS="video_codec;h264_nvenc|preset;fast|tune;zerolatency|gpu;0"
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,video,utility
|
||||
|
||||
# Copy and install base requirements (exclude opencv-python since we built from source)
|
||||
COPY requirements.base.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.base.txt
|
||||
RUN grep -v opencv-python requirements.base.txt > requirements.tmp && \
|
||||
mv requirements.tmp requirements.base.txt && \
|
||||
pip install --no-cache-dir -r requirements.base.txt
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
|
433
app.py
433
app.py
|
@ -6,8 +6,10 @@ import json
|
|||
import logging
|
||||
import os
|
||||
import time
|
||||
import cv2
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI, WebSocket, HTTPException, Request
|
||||
from typing import Dict, Any
|
||||
from fastapi import FastAPI, WebSocket, HTTPException
|
||||
from fastapi.responses import Response
|
||||
|
||||
# Import new modular communication system
|
||||
|
@ -27,8 +29,84 @@ logging.basicConfig(
|
|||
logger = logging.getLogger("detector_worker")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Store cached frames for REST API access (temporary storage)
|
||||
latest_frames = {}
|
||||
# Frames are now stored in the shared cache buffer from core.streaming.buffers
|
||||
# latest_frames = {} # Deprecated - using shared_cache_buffer instead
|
||||
|
||||
|
||||
# Health monitoring recovery handlers
|
||||
def _handle_stream_restart_recovery(component: str, details: Dict[str, Any]) -> bool:
|
||||
"""Handle stream restart recovery at the application level."""
|
||||
try:
|
||||
from core.streaming.manager import shared_stream_manager
|
||||
|
||||
# Extract camera ID from component name (e.g., "stream_cam-001" -> "cam-001")
|
||||
if component.startswith("stream_"):
|
||||
camera_id = component[7:] # Remove "stream_" prefix
|
||||
else:
|
||||
camera_id = component
|
||||
|
||||
logger.info(f"Attempting stream restart recovery for {camera_id}")
|
||||
|
||||
# Find and restart the subscription
|
||||
subscriptions = shared_stream_manager.get_all_subscriptions()
|
||||
for sub_info in subscriptions:
|
||||
if sub_info.camera_id == camera_id:
|
||||
# Remove and re-add the subscription
|
||||
shared_stream_manager.remove_subscription(sub_info.subscription_id)
|
||||
time.sleep(1.0) # Brief delay
|
||||
|
||||
# Re-add subscription
|
||||
success = shared_stream_manager.add_subscription(
|
||||
sub_info.subscription_id,
|
||||
sub_info.stream_config,
|
||||
sub_info.crop_coords,
|
||||
sub_info.model_id,
|
||||
sub_info.model_url,
|
||||
sub_info.tracking_integration
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info(f"Stream restart recovery successful for {camera_id}")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Stream restart recovery failed for {camera_id}")
|
||||
return False
|
||||
|
||||
logger.warning(f"No subscription found for camera {camera_id} during recovery")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in stream restart recovery for {component}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _handle_stream_reconnect_recovery(component: str, details: Dict[str, Any]) -> bool:
|
||||
"""Handle stream reconnect recovery at the application level."""
|
||||
try:
|
||||
from core.streaming.manager import shared_stream_manager
|
||||
|
||||
# Extract camera ID from component name
|
||||
if component.startswith("stream_"):
|
||||
camera_id = component[7:]
|
||||
else:
|
||||
camera_id = component
|
||||
|
||||
logger.info(f"Attempting stream reconnect recovery for {camera_id}")
|
||||
|
||||
# For reconnect, we just need to trigger the stream's internal reconnect
|
||||
# The stream readers handle their own reconnection logic
|
||||
active_cameras = shared_stream_manager.get_active_cameras()
|
||||
|
||||
if camera_id in active_cameras:
|
||||
logger.info(f"Stream reconnect recovery triggered for {camera_id}")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Camera {camera_id} not found in active cameras during reconnect recovery")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in stream reconnect recovery for {component}: {e}")
|
||||
return False
|
||||
|
||||
# Lifespan event handler (modern FastAPI approach)
|
||||
@asynccontextmanager
|
||||
|
@ -36,20 +114,58 @@ async def lifespan(app: FastAPI):
|
|||
"""Application lifespan management."""
|
||||
# Startup
|
||||
logger.info("Detector Worker started successfully")
|
||||
|
||||
# Initialize health monitoring system
|
||||
try:
|
||||
from core.monitoring.health import health_monitor
|
||||
from core.monitoring.stream_health import stream_health_tracker
|
||||
from core.monitoring.thread_health import thread_health_monitor
|
||||
from core.monitoring.recovery import recovery_manager
|
||||
|
||||
# Start health monitoring
|
||||
health_monitor.start()
|
||||
logger.info("Health monitoring system started")
|
||||
|
||||
# Register recovery handlers for stream management
|
||||
from core.streaming.manager import shared_stream_manager
|
||||
recovery_manager.register_recovery_handler(
|
||||
"restart_stream",
|
||||
_handle_stream_restart_recovery
|
||||
)
|
||||
recovery_manager.register_recovery_handler(
|
||||
"reconnect",
|
||||
_handle_stream_reconnect_recovery
|
||||
)
|
||||
|
||||
logger.info("Recovery handlers registered")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize health monitoring: {e}")
|
||||
|
||||
logger.info("WebSocket endpoint available at: ws://0.0.0.0:8001/")
|
||||
logger.info("HTTP camera endpoint available at: http://0.0.0.0:8001/camera/{camera_id}/image")
|
||||
logger.info("Health check available at: http://0.0.0.0:8001/health")
|
||||
logger.info("Detailed health monitoring available at: http://0.0.0.0:8001/health/detailed")
|
||||
logger.info("Ready and waiting for backend WebSocket connections")
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Detector Worker shutting down...")
|
||||
|
||||
# Stop health monitoring
|
||||
try:
|
||||
from core.monitoring.health import health_monitor
|
||||
health_monitor.stop()
|
||||
logger.info("Health monitoring system stopped")
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping health monitoring: {e}")
|
||||
|
||||
# Clear all state
|
||||
worker_state.set_subscriptions([])
|
||||
worker_state.session_ids.clear()
|
||||
worker_state.progression_stages.clear()
|
||||
latest_frames.clear()
|
||||
# latest_frames.clear() # No longer needed - frames are in shared_cache_buffer
|
||||
logger.info("Detector Worker shutdown complete")
|
||||
|
||||
# Create FastAPI application with detailed WebSocket logging
|
||||
|
@ -85,13 +201,14 @@ else:
|
|||
os.makedirs("models", exist_ok=True)
|
||||
logger.info("Ensured models directory exists")
|
||||
|
||||
# Initialize stream manager with config value
|
||||
from core.streaming import initialize_stream_manager
|
||||
initialize_stream_manager(max_streams=config.get('max_streams', 10))
|
||||
logger.info(f"Initialized stream manager with max_streams={config.get('max_streams', 10)}")
|
||||
# Stream manager already initialized at module level with max_streams=20
|
||||
# Calling initialize_stream_manager() creates a NEW instance, breaking references
|
||||
# from core.streaming import initialize_stream_manager
|
||||
# initialize_stream_manager(max_streams=config.get('max_streams', 10))
|
||||
logger.info(f"Using stream manager with max_streams=20 (module-level initialization)")
|
||||
|
||||
# Store cached frames for REST API access (temporary storage)
|
||||
latest_frames = {}
|
||||
# Frames are now stored in the shared cache buffer from core.streaming.buffers
|
||||
# latest_frames = {} # Deprecated - using shared_cache_buffer instead
|
||||
|
||||
logger.info("Starting detector worker application (refactored)")
|
||||
logger.info(f"Configuration: Target FPS: {config.get('target_fps', 10)}, "
|
||||
|
@ -150,31 +267,33 @@ async def get_camera_image(camera_id: str):
|
|||
detail=f"Camera {camera_id} not found or not active"
|
||||
)
|
||||
|
||||
# Check if we have a cached frame for this camera
|
||||
if camera_id not in latest_frames:
|
||||
logger.warning(f"No cached frame available for camera '{camera_id}'")
|
||||
# Extract actual camera_id from subscription identifier (displayId;cameraId)
|
||||
# Frames are stored using just the camera_id part
|
||||
actual_camera_id = camera_id.split(';')[-1] if ';' in camera_id else camera_id
|
||||
|
||||
# Get frame from the shared cache buffer
|
||||
from core.streaming.buffers import shared_cache_buffer
|
||||
|
||||
# Only show buffer debug info if camera not found (to reduce log spam)
|
||||
available_cameras = shared_cache_buffer.frame_buffer.get_camera_list()
|
||||
|
||||
frame = shared_cache_buffer.get_frame(actual_camera_id)
|
||||
if frame is None:
|
||||
logger.warning(f"\033[93m[API] No frame for '{actual_camera_id}' - Available: {available_cameras}\033[0m")
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"No frame available for camera {camera_id}"
|
||||
detail=f"No frame available for camera {actual_camera_id}"
|
||||
)
|
||||
|
||||
frame = latest_frames[camera_id]
|
||||
logger.debug(f"Retrieved cached frame for camera '{camera_id}', shape: {frame.shape}")
|
||||
# Successful frame retrieval - log only occasionally to avoid spam
|
||||
|
||||
# TODO: This import will be replaced in Phase 3 (Streaming System)
|
||||
# For now, we need to handle the case where OpenCV is not available
|
||||
try:
|
||||
import cv2
|
||||
# Encode frame as JPEG
|
||||
success, buffer_img = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode image as JPEG")
|
||||
# Encode frame as JPEG
|
||||
success, buffer_img = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode image as JPEG")
|
||||
|
||||
# Return image as binary response
|
||||
return Response(content=buffer_img.tobytes(), media_type="image/jpeg")
|
||||
except ImportError:
|
||||
logger.error("OpenCV not available for image encoding")
|
||||
raise HTTPException(status_code=500, detail="Image processing not available")
|
||||
# Return image as binary response
|
||||
return Response(content=buffer_img.tobytes(), media_type="image/jpeg")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
|
@ -183,6 +302,63 @@ async def get_camera_image(camera_id: str):
|
|||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/session-image/{session_id}")
|
||||
async def get_session_image(session_id: int):
|
||||
"""
|
||||
HTTP endpoint to retrieve the saved session image by session ID.
|
||||
|
||||
Args:
|
||||
session_id: The session ID to retrieve the image for
|
||||
|
||||
Returns:
|
||||
JPEG image as binary response
|
||||
|
||||
Raises:
|
||||
HTTPException: 404 if no image found for the session
|
||||
HTTPException: 500 if reading image fails
|
||||
"""
|
||||
try:
|
||||
from pathlib import Path
|
||||
import glob
|
||||
|
||||
# Images directory
|
||||
images_dir = Path("images")
|
||||
|
||||
if not images_dir.exists():
|
||||
logger.warning(f"Images directory does not exist")
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"No images directory found"
|
||||
)
|
||||
|
||||
# Search for files matching session ID pattern: {session_id}_*
|
||||
pattern = str(images_dir / f"{session_id}_*.jpg")
|
||||
matching_files = glob.glob(pattern)
|
||||
|
||||
if not matching_files:
|
||||
logger.warning(f"No image found for session {session_id}")
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"No image found for session {session_id}"
|
||||
)
|
||||
|
||||
# Get the most recent file if multiple exist
|
||||
most_recent_file = max(matching_files, key=os.path.getmtime)
|
||||
logger.info(f"Found session image for session {session_id}: {most_recent_file}")
|
||||
|
||||
# Read the image file
|
||||
image_data = open(most_recent_file, 'rb').read()
|
||||
|
||||
# Return image as binary response
|
||||
return Response(content=image_data, media_type="image/jpeg")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving session image for session {session_id}: {str(e)}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint for monitoring."""
|
||||
|
@ -194,6 +370,205 @@ async def health_check():
|
|||
}
|
||||
|
||||
|
||||
@app.get("/health/detailed")
|
||||
async def detailed_health_check():
|
||||
"""Comprehensive health status with detailed monitoring data."""
|
||||
try:
|
||||
from core.monitoring.health import health_monitor
|
||||
from core.monitoring.stream_health import stream_health_tracker
|
||||
from core.monitoring.thread_health import thread_health_monitor
|
||||
from core.monitoring.recovery import recovery_manager
|
||||
|
||||
# Get comprehensive health status
|
||||
overall_health = health_monitor.get_health_status()
|
||||
stream_metrics = stream_health_tracker.get_all_metrics()
|
||||
thread_info = thread_health_monitor.get_all_thread_info()
|
||||
recovery_stats = recovery_manager.get_recovery_stats()
|
||||
|
||||
return {
|
||||
"timestamp": time.time(),
|
||||
"overall_health": overall_health,
|
||||
"stream_metrics": stream_metrics,
|
||||
"thread_health": thread_info,
|
||||
"recovery_stats": recovery_stats,
|
||||
"system_info": {
|
||||
"active_subscriptions": len(worker_state.subscriptions),
|
||||
"active_sessions": len(worker_state.session_ids),
|
||||
"version": "2.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating detailed health report: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Health monitoring error: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/health/streams")
|
||||
async def stream_health_status():
|
||||
"""Stream-specific health monitoring."""
|
||||
try:
|
||||
from core.monitoring.stream_health import stream_health_tracker
|
||||
from core.streaming.buffers import shared_cache_buffer
|
||||
|
||||
stream_metrics = stream_health_tracker.get_all_metrics()
|
||||
buffer_stats = shared_cache_buffer.get_stats()
|
||||
|
||||
return {
|
||||
"timestamp": time.time(),
|
||||
"stream_count": len(stream_metrics),
|
||||
"stream_metrics": stream_metrics,
|
||||
"buffer_stats": buffer_stats,
|
||||
"frame_ages": {
|
||||
camera_id: {
|
||||
"age_seconds": time.time() - info["last_frame_time"] if info and info.get("last_frame_time") else None,
|
||||
"total_frames": info.get("frame_count", 0) if info else 0
|
||||
}
|
||||
for camera_id, info in stream_metrics.items()
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating stream health report: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Stream health error: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/health/threads")
|
||||
async def thread_health_status():
|
||||
"""Thread-specific health monitoring."""
|
||||
try:
|
||||
from core.monitoring.thread_health import thread_health_monitor
|
||||
|
||||
thread_info = thread_health_monitor.get_all_thread_info()
|
||||
deadlocks = thread_health_monitor.detect_deadlocks()
|
||||
|
||||
return {
|
||||
"timestamp": time.time(),
|
||||
"thread_count": len(thread_info),
|
||||
"thread_info": thread_info,
|
||||
"potential_deadlocks": deadlocks,
|
||||
"summary": {
|
||||
"responsive_threads": sum(1 for info in thread_info.values() if info.get("is_responsive", False)),
|
||||
"unresponsive_threads": sum(1 for info in thread_info.values() if not info.get("is_responsive", True)),
|
||||
"deadlock_count": len(deadlocks)
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating thread health report: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Thread health error: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/health/recovery")
|
||||
async def recovery_status():
|
||||
"""Recovery system status and history."""
|
||||
try:
|
||||
from core.monitoring.recovery import recovery_manager
|
||||
|
||||
recovery_stats = recovery_manager.get_recovery_stats()
|
||||
|
||||
return {
|
||||
"timestamp": time.time(),
|
||||
"recovery_stats": recovery_stats,
|
||||
"summary": {
|
||||
"total_recoveries_last_hour": recovery_stats.get("total_recoveries_last_hour", 0),
|
||||
"components_with_recovery_state": len(recovery_stats.get("recovery_states", {})),
|
||||
"total_recovery_failures": sum(
|
||||
state.get("failure_count", 0)
|
||||
for state in recovery_stats.get("recovery_states", {}).values()
|
||||
),
|
||||
"total_recovery_successes": sum(
|
||||
state.get("success_count", 0)
|
||||
for state in recovery_stats.get("recovery_states", {}).values()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating recovery status report: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Recovery status error: {str(e)}")
|
||||
|
||||
|
||||
@app.post("/health/recovery/force/{component}")
|
||||
async def force_recovery(component: str, action: str = "restart_stream"):
|
||||
"""Force recovery action for a specific component."""
|
||||
try:
|
||||
from core.monitoring.recovery import recovery_manager, RecoveryAction
|
||||
|
||||
# Validate action
|
||||
try:
|
||||
recovery_action = RecoveryAction(action)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid recovery action: {action}. Valid actions: {[a.value for a in RecoveryAction]}"
|
||||
)
|
||||
|
||||
# Force recovery
|
||||
success = recovery_manager.force_recovery(component, recovery_action, "manual_api_request")
|
||||
|
||||
return {
|
||||
"timestamp": time.time(),
|
||||
"component": component,
|
||||
"action": action,
|
||||
"success": success,
|
||||
"message": f"Recovery {'successful' if success else 'failed'} for component {component}"
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error forcing recovery for {component}: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Recovery error: {str(e)}")
|
||||
|
||||
|
||||
@app.get("/health/metrics")
|
||||
async def health_metrics():
|
||||
"""Performance and health metrics in a format suitable for monitoring systems."""
|
||||
try:
|
||||
from core.monitoring.health import health_monitor
|
||||
from core.monitoring.stream_health import stream_health_tracker
|
||||
from core.streaming.buffers import shared_cache_buffer
|
||||
|
||||
# Get basic metrics
|
||||
overall_health = health_monitor.get_health_status()
|
||||
stream_metrics = stream_health_tracker.get_all_metrics()
|
||||
buffer_stats = shared_cache_buffer.get_stats()
|
||||
|
||||
# Format for monitoring systems (Prometheus-style)
|
||||
metrics = {
|
||||
"detector_worker_up": 1,
|
||||
"detector_worker_streams_total": len(stream_metrics),
|
||||
"detector_worker_subscriptions_total": len(worker_state.subscriptions),
|
||||
"detector_worker_sessions_total": len(worker_state.session_ids),
|
||||
"detector_worker_memory_mb": buffer_stats.get("total_memory_mb", 0),
|
||||
"detector_worker_health_status": {
|
||||
"healthy": 1,
|
||||
"warning": 2,
|
||||
"critical": 3,
|
||||
"unknown": 4
|
||||
}.get(overall_health.get("overall_status", "unknown"), 4)
|
||||
}
|
||||
|
||||
# Add per-stream metrics
|
||||
for camera_id, stream_info in stream_metrics.items():
|
||||
safe_camera_id = camera_id.replace("-", "_").replace(".", "_")
|
||||
metrics.update({
|
||||
f"detector_worker_stream_frames_total{{camera=\"{safe_camera_id}\"}}": stream_info.get("frame_count", 0),
|
||||
f"detector_worker_stream_errors_total{{camera=\"{safe_camera_id}\"}}": stream_info.get("error_count", 0),
|
||||
f"detector_worker_stream_fps{{camera=\"{safe_camera_id}\"}}": stream_info.get("frames_per_second", 0),
|
||||
f"detector_worker_stream_frame_age_seconds{{camera=\"{safe_camera_id}\"}}": stream_info.get("last_frame_age_seconds") or 0
|
||||
})
|
||||
|
||||
return {
|
||||
"timestamp": time.time(),
|
||||
"metrics": metrics
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating health metrics: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Metrics error: {str(e)}")
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -297,31 +297,31 @@ class WebSocketHandler:
|
|||
async def _reconcile_subscriptions_with_tracking(self, target_subscriptions) -> dict:
|
||||
"""Reconcile subscriptions with tracking integration."""
|
||||
try:
|
||||
# First, we need to create tracking integrations for each unique model
|
||||
# Create separate tracking integrations for each subscription (camera isolation)
|
||||
tracking_integrations = {}
|
||||
|
||||
for subscription_payload in target_subscriptions:
|
||||
subscription_id = subscription_payload['subscriptionIdentifier']
|
||||
model_id = subscription_payload['modelId']
|
||||
|
||||
# Create tracking integration if not already created
|
||||
if model_id not in tracking_integrations:
|
||||
# Get pipeline configuration for this model
|
||||
pipeline_parser = model_manager.get_pipeline_config(model_id)
|
||||
if pipeline_parser:
|
||||
# Create tracking integration with message sender
|
||||
tracking_integration = TrackingPipelineIntegration(
|
||||
pipeline_parser, model_manager, model_id, self._send_message
|
||||
)
|
||||
# Create separate tracking integration per subscription for camera isolation
|
||||
# Get pipeline configuration for this model
|
||||
pipeline_parser = model_manager.get_pipeline_config(model_id)
|
||||
if pipeline_parser:
|
||||
# Create tracking integration with message sender (separate instance per camera)
|
||||
tracking_integration = TrackingPipelineIntegration(
|
||||
pipeline_parser, model_manager, model_id, self._send_message
|
||||
)
|
||||
|
||||
# Initialize tracking model
|
||||
success = await tracking_integration.initialize_tracking_model()
|
||||
if success:
|
||||
tracking_integrations[model_id] = tracking_integration
|
||||
logger.info(f"[Tracking] Created tracking integration for model {model_id}")
|
||||
else:
|
||||
logger.warning(f"[Tracking] Failed to initialize tracking for model {model_id}")
|
||||
# Initialize tracking model
|
||||
success = await tracking_integration.initialize_tracking_model()
|
||||
if success:
|
||||
tracking_integrations[subscription_id] = tracking_integration
|
||||
logger.info(f"[Tracking] Created isolated tracking integration for subscription {subscription_id} (model {model_id})")
|
||||
else:
|
||||
logger.warning(f"[Tracking] No pipeline config found for model {model_id}")
|
||||
logger.warning(f"[Tracking] Failed to initialize tracking for subscription {subscription_id} (model {model_id})")
|
||||
else:
|
||||
logger.warning(f"[Tracking] No pipeline config found for model {model_id} in subscription {subscription_id}")
|
||||
|
||||
# Now reconcile with StreamManager, adding tracking integrations
|
||||
current_subscription_ids = set()
|
||||
|
@ -377,8 +377,10 @@ class WebSocketHandler:
|
|||
camera_id = subscription_id.split(';')[-1]
|
||||
model_id = payload['modelId']
|
||||
|
||||
# Get tracking integration for this model
|
||||
tracking_integration = tracking_integrations.get(model_id)
|
||||
logger.info(f"[SUBSCRIPTION_MAPPING] subscription_id='{subscription_id}' → camera_id='{camera_id}'")
|
||||
|
||||
# Get tracking integration for this subscription (camera-isolated)
|
||||
tracking_integration = tracking_integrations.get(subscription_id)
|
||||
|
||||
# Extract crop coordinates if present
|
||||
crop_coords = None
|
||||
|
@ -410,7 +412,7 @@ class WebSocketHandler:
|
|||
)
|
||||
|
||||
if success and tracking_integration:
|
||||
logger.info(f"[Tracking] Subscription {subscription_id} configured with tracking for model {model_id}")
|
||||
logger.info(f"[Tracking] Subscription {subscription_id} configured with isolated tracking for model {model_id}")
|
||||
|
||||
return success
|
||||
|
||||
|
@ -537,7 +539,7 @@ class WebSocketHandler:
|
|||
async def _handle_set_session_id(self, message: SetSessionIdMessage) -> None:
|
||||
"""Handle setSessionId message."""
|
||||
display_identifier = message.payload.displayIdentifier
|
||||
session_id = message.payload.sessionId
|
||||
session_id = str(message.payload.sessionId) if message.payload.sessionId is not None else None
|
||||
|
||||
logger.info(f"[RX Processing] setSessionId for display {display_identifier}: {session_id}")
|
||||
|
||||
|
@ -547,10 +549,6 @@ class WebSocketHandler:
|
|||
# Update tracking integrations with session ID
|
||||
shared_stream_manager.set_session_id(display_identifier, session_id)
|
||||
|
||||
# Save snapshot image after getting sessionId
|
||||
if session_id:
|
||||
await self._save_snapshot(display_identifier, session_id)
|
||||
|
||||
async def _handle_set_progression_stage(self, message: SetProgressionStageMessage) -> None:
|
||||
"""Handle setProgressionStage message."""
|
||||
display_identifier = message.payload.displayIdentifier
|
||||
|
@ -566,6 +564,10 @@ class WebSocketHandler:
|
|||
if session_id:
|
||||
shared_stream_manager.set_progression_stage(session_id, stage)
|
||||
|
||||
# Save snapshot image when progression stage is car_fueling
|
||||
if stage == 'car_fueling' and session_id:
|
||||
await self._save_snapshot(display_identifier, session_id)
|
||||
|
||||
# If stage indicates session is cleared/finished, clear from tracking
|
||||
if stage in ['finished', 'cleared', 'idle']:
|
||||
# Get session ID for this display and clear it
|
||||
|
|
|
@ -64,6 +64,10 @@ class DetectionPipeline:
|
|||
# SessionId to processing results mapping (for combining with license plate results)
|
||||
self.session_processing_results = {}
|
||||
|
||||
# Field mappings from parallelActions (e.g., {"car_brand": "{car_brand_cls_v3.brand}"})
|
||||
self.field_mappings = {}
|
||||
self._parse_field_mappings()
|
||||
|
||||
# Statistics
|
||||
self.stats = {
|
||||
'detections_processed': 0,
|
||||
|
@ -74,6 +78,25 @@ class DetectionPipeline:
|
|||
|
||||
logger.info("DetectionPipeline initialized")
|
||||
|
||||
def _parse_field_mappings(self):
|
||||
"""
|
||||
Parse field mappings from parallelActions.postgresql_update_combined.fields.
|
||||
Extracts mappings like {"car_brand": "{car_brand_cls_v3.brand}"} for dynamic field resolution.
|
||||
"""
|
||||
try:
|
||||
if not self.pipeline_config or not hasattr(self.pipeline_config, 'parallel_actions'):
|
||||
return
|
||||
|
||||
for action in self.pipeline_config.parallel_actions:
|
||||
if action.type.value == 'postgresql_update_combined':
|
||||
fields = action.params.get('fields', {})
|
||||
self.field_mappings = fields
|
||||
logger.info(f"[FIELD MAPPINGS] Parsed from pipeline config: {self.field_mappings}")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing field mappings: {e}", exc_info=True)
|
||||
|
||||
async def initialize(self) -> bool:
|
||||
"""
|
||||
Initialize all pipeline components including models, Redis, and database.
|
||||
|
@ -165,6 +188,44 @@ class DetectionPipeline:
|
|||
logger.error(f"Error initializing detection model: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _extract_fields_from_branches(self, branch_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract fields dynamically from branch results using field mappings.
|
||||
|
||||
Args:
|
||||
branch_results: Dictionary of branch execution results
|
||||
|
||||
Returns:
|
||||
Dictionary with extracted field values (e.g., {"car_brand": "Honda", "body_type": "Sedan"})
|
||||
"""
|
||||
extracted = {}
|
||||
|
||||
try:
|
||||
for db_field_name, template in self.field_mappings.items():
|
||||
# Parse template like "{car_brand_cls_v3.brand}" -> branch_id="car_brand_cls_v3", field="brand"
|
||||
if template.startswith('{') and template.endswith('}'):
|
||||
var_name = template[1:-1]
|
||||
if '.' in var_name:
|
||||
branch_id, field_name = var_name.split('.', 1)
|
||||
|
||||
# Look up value in branch_results
|
||||
if branch_id in branch_results:
|
||||
branch_data = branch_results[branch_id]
|
||||
if isinstance(branch_data, dict) and 'result' in branch_data:
|
||||
result_data = branch_data['result']
|
||||
if isinstance(result_data, dict) and field_name in result_data:
|
||||
extracted[field_name] = result_data[field_name]
|
||||
logger.debug(f"[DYNAMIC EXTRACT] {field_name}={result_data[field_name]} from branch {branch_id}")
|
||||
else:
|
||||
logger.debug(f"[DYNAMIC EXTRACT] Field '{field_name}' not found in branch {branch_id}")
|
||||
else:
|
||||
logger.debug(f"[DYNAMIC EXTRACT] Branch '{branch_id}' not in results")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting fields from branches: {e}", exc_info=True)
|
||||
|
||||
return extracted
|
||||
|
||||
async def _on_license_plate_result(self, session_id: str, license_data: Dict[str, Any]):
|
||||
"""
|
||||
Callback for handling license plate results from LPR service.
|
||||
|
@ -272,12 +333,12 @@ class DetectionPipeline:
|
|||
branch_results = self.session_processing_results[session_id_for_lookup]
|
||||
logger.info(f"[LICENSE PLATE] Retrieved processing results for session {session_id_for_lookup}")
|
||||
|
||||
if 'car_brand_cls_v2' in branch_results:
|
||||
brand_result = branch_results['car_brand_cls_v2'].get('result', {})
|
||||
car_brand = brand_result.get('brand')
|
||||
if 'car_bodytype_cls_v1' in branch_results:
|
||||
bodytype_result = branch_results['car_bodytype_cls_v1'].get('result', {})
|
||||
body_type = bodytype_result.get('body_type')
|
||||
# Extract fields dynamically using field mappings from pipeline config
|
||||
extracted_fields = self._extract_fields_from_branches(branch_results)
|
||||
car_brand = extracted_fields.get('brand')
|
||||
body_type = extracted_fields.get('body_type')
|
||||
|
||||
logger.info(f"[LICENSE PLATE] Extracted fields: brand={car_brand}, body_type={body_type}")
|
||||
|
||||
# Clean up stored results after use
|
||||
del self.session_processing_results[session_id_for_lookup]
|
||||
|
@ -1003,7 +1064,7 @@ class DetectionPipeline:
|
|||
Resolve field template using branch results and context.
|
||||
|
||||
Args:
|
||||
template: Template string like "{car_brand_cls_v2.brand}"
|
||||
template: Template string like "{car_brand_cls_v3.brand}"
|
||||
branch_results: Dictionary of branch execution results
|
||||
context: Detection context
|
||||
|
||||
|
@ -1015,7 +1076,7 @@ class DetectionPipeline:
|
|||
if template.startswith('{') and template.endswith('}'):
|
||||
var_name = template[1:-1]
|
||||
|
||||
# Check for branch result reference (e.g., "car_brand_cls_v2.brand")
|
||||
# Check for branch result reference (e.g., "car_brand_cls_v3.brand")
|
||||
if '.' in var_name:
|
||||
branch_id, field_name = var_name.split('.', 1)
|
||||
if branch_id in branch_results:
|
||||
|
@ -1061,17 +1122,10 @@ class DetectionPipeline:
|
|||
logger.warning("No session_id in context for processing results")
|
||||
return
|
||||
|
||||
# Extract car brand from car_brand_cls_v2 results
|
||||
car_brand = None
|
||||
if 'car_brand_cls_v2' in branch_results:
|
||||
brand_result = branch_results['car_brand_cls_v2'].get('result', {})
|
||||
car_brand = brand_result.get('brand')
|
||||
|
||||
# Extract body type from car_bodytype_cls_v1 results
|
||||
body_type = None
|
||||
if 'car_bodytype_cls_v1' in branch_results:
|
||||
bodytype_result = branch_results['car_bodytype_cls_v1'].get('result', {})
|
||||
body_type = bodytype_result.get('body_type')
|
||||
# Extract fields dynamically using field mappings from pipeline config
|
||||
extracted_fields = self._extract_fields_from_branches(branch_results)
|
||||
car_brand = extracted_fields.get('brand')
|
||||
body_type = extracted_fields.get('body_type')
|
||||
|
||||
logger.info(f"[PROCESSING RESULTS] Completed for session {session_id}: "
|
||||
f"brand={car_brand}, bodyType={body_type}")
|
||||
|
|
|
@ -60,6 +60,8 @@ class YOLOWrapper:
|
|||
|
||||
self.model = None
|
||||
self._class_names = []
|
||||
|
||||
|
||||
self._load_model()
|
||||
|
||||
logger.info(f"Initialized YOLO wrapper for {model_id} on {self.device}")
|
||||
|
@ -115,6 +117,7 @@ class YOLOWrapper:
|
|||
logger.error(f"Failed to extract class names: {str(e)}")
|
||||
self._class_names = {}
|
||||
|
||||
|
||||
def infer(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
|
@ -222,55 +225,30 @@ class YOLOWrapper:
|
|||
|
||||
return detections
|
||||
|
||||
|
||||
def track(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
confidence_threshold: float = 0.5,
|
||||
trigger_classes: Optional[List[str]] = None,
|
||||
persist: bool = True
|
||||
persist: bool = True,
|
||||
camera_id: Optional[str] = None
|
||||
) -> InferenceResult:
|
||||
"""
|
||||
Run tracking on an image
|
||||
Run detection (tracking will be handled by external tracker)
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array (BGR format)
|
||||
confidence_threshold: Minimum confidence for detections
|
||||
trigger_classes: List of class names to filter
|
||||
persist: Whether to persist tracks across frames
|
||||
persist: Ignored - tracking handled externally
|
||||
camera_id: Ignored - tracking handled externally
|
||||
|
||||
Returns:
|
||||
InferenceResult containing detections with track IDs
|
||||
InferenceResult containing detections (no track IDs from YOLO)
|
||||
"""
|
||||
if self.model is None:
|
||||
raise RuntimeError(f"Model {self.model_id} not loaded")
|
||||
|
||||
try:
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
# Run tracking
|
||||
results = self.model.track(
|
||||
image,
|
||||
conf=confidence_threshold,
|
||||
persist=persist,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
inference_time = time.time() - start_time
|
||||
|
||||
# Parse results
|
||||
detections = self._parse_results(results[0], trigger_classes)
|
||||
|
||||
return InferenceResult(
|
||||
detections=detections,
|
||||
image_shape=(image.shape[0], image.shape[1]),
|
||||
inference_time=inference_time,
|
||||
model_id=self.model_id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Tracking failed for model {self.model_id}: {str(e)}", exc_info=True)
|
||||
raise
|
||||
# Just do detection - no YOLO tracking
|
||||
return self.infer(image, confidence_threshold, trigger_classes)
|
||||
|
||||
def predict_classification(
|
||||
self,
|
||||
|
@ -350,6 +328,7 @@ class YOLOWrapper:
|
|||
"""Get the number of classes the model can detect"""
|
||||
return len(self._class_names)
|
||||
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Clear the model cache"""
|
||||
with self._cache_lock:
|
||||
|
|
18
core/monitoring/__init__.py
Normal file
18
core/monitoring/__init__.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
"""
|
||||
Comprehensive health monitoring system for detector worker.
|
||||
Tracks stream health, thread responsiveness, and system performance.
|
||||
"""
|
||||
|
||||
from .health import HealthMonitor, HealthStatus, HealthCheck
|
||||
from .stream_health import StreamHealthTracker
|
||||
from .thread_health import ThreadHealthMonitor
|
||||
from .recovery import RecoveryManager
|
||||
|
||||
__all__ = [
|
||||
'HealthMonitor',
|
||||
'HealthStatus',
|
||||
'HealthCheck',
|
||||
'StreamHealthTracker',
|
||||
'ThreadHealthMonitor',
|
||||
'RecoveryManager'
|
||||
]
|
456
core/monitoring/health.py
Normal file
456
core/monitoring/health.py
Normal file
|
@ -0,0 +1,456 @@
|
|||
"""
|
||||
Core health monitoring system for comprehensive stream and system health tracking.
|
||||
Provides centralized health status, alerting, and recovery coordination.
|
||||
"""
|
||||
import time
|
||||
import threading
|
||||
import logging
|
||||
import psutil
|
||||
from typing import Dict, List, Optional, Any, Callable
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from collections import defaultdict, deque
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HealthStatus(Enum):
|
||||
"""Health status levels."""
|
||||
HEALTHY = "healthy"
|
||||
WARNING = "warning"
|
||||
CRITICAL = "critical"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
@dataclass
|
||||
class HealthCheck:
|
||||
"""Individual health check result."""
|
||||
name: str
|
||||
status: HealthStatus
|
||||
message: str
|
||||
timestamp: float = field(default_factory=time.time)
|
||||
details: Dict[str, Any] = field(default_factory=dict)
|
||||
recovery_action: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class HealthMetrics:
|
||||
"""Health metrics for a component."""
|
||||
component_id: str
|
||||
last_update: float
|
||||
frame_count: int = 0
|
||||
error_count: int = 0
|
||||
warning_count: int = 0
|
||||
restart_count: int = 0
|
||||
avg_frame_interval: float = 0.0
|
||||
last_frame_time: Optional[float] = None
|
||||
thread_alive: bool = True
|
||||
connection_healthy: bool = True
|
||||
memory_usage_mb: float = 0.0
|
||||
cpu_usage_percent: float = 0.0
|
||||
|
||||
|
||||
class HealthMonitor:
|
||||
"""Comprehensive health monitoring system."""
|
||||
|
||||
def __init__(self, check_interval: float = 30.0):
|
||||
"""
|
||||
Initialize health monitor.
|
||||
|
||||
Args:
|
||||
check_interval: Interval between health checks in seconds
|
||||
"""
|
||||
self.check_interval = check_interval
|
||||
self.running = False
|
||||
self.monitor_thread = None
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Health data storage
|
||||
self.health_checks: Dict[str, HealthCheck] = {}
|
||||
self.metrics: Dict[str, HealthMetrics] = {}
|
||||
self.alert_history: deque = deque(maxlen=1000)
|
||||
self.recovery_actions: deque = deque(maxlen=500)
|
||||
|
||||
# Thresholds (configurable)
|
||||
self.thresholds = {
|
||||
'frame_stale_warning_seconds': 120, # 2 minutes
|
||||
'frame_stale_critical_seconds': 300, # 5 minutes
|
||||
'thread_unresponsive_seconds': 60, # 1 minute
|
||||
'memory_warning_mb': 500, # 500MB per stream
|
||||
'memory_critical_mb': 1000, # 1GB per stream
|
||||
'cpu_warning_percent': 80, # 80% CPU
|
||||
'cpu_critical_percent': 95, # 95% CPU
|
||||
'error_rate_warning': 0.1, # 10% error rate
|
||||
'error_rate_critical': 0.3, # 30% error rate
|
||||
'restart_threshold': 3 # Max restarts per hour
|
||||
}
|
||||
|
||||
# Health check functions
|
||||
self.health_checkers: List[Callable[[], List[HealthCheck]]] = []
|
||||
self.recovery_callbacks: Dict[str, Callable[[str, HealthCheck], bool]] = {}
|
||||
|
||||
# System monitoring
|
||||
self.process = psutil.Process()
|
||||
self.system_start_time = time.time()
|
||||
|
||||
def start(self):
|
||||
"""Start health monitoring."""
|
||||
if self.running:
|
||||
logger.warning("Health monitor already running")
|
||||
return
|
||||
|
||||
self.running = True
|
||||
self.monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True)
|
||||
self.monitor_thread.start()
|
||||
logger.info(f"Health monitor started (check interval: {self.check_interval}s)")
|
||||
|
||||
def stop(self):
|
||||
"""Stop health monitoring."""
|
||||
self.running = False
|
||||
if self.monitor_thread:
|
||||
self.monitor_thread.join(timeout=5.0)
|
||||
logger.info("Health monitor stopped")
|
||||
|
||||
def register_health_checker(self, checker: Callable[[], List[HealthCheck]]):
|
||||
"""Register a health check function."""
|
||||
self.health_checkers.append(checker)
|
||||
logger.debug(f"Registered health checker: {checker.__name__}")
|
||||
|
||||
def register_recovery_callback(self, component: str, callback: Callable[[str, HealthCheck], bool]):
|
||||
"""Register a recovery callback for a component."""
|
||||
self.recovery_callbacks[component] = callback
|
||||
logger.debug(f"Registered recovery callback for {component}")
|
||||
|
||||
def update_metrics(self, component_id: str, **kwargs):
|
||||
"""Update metrics for a component."""
|
||||
with self._lock:
|
||||
if component_id not in self.metrics:
|
||||
self.metrics[component_id] = HealthMetrics(
|
||||
component_id=component_id,
|
||||
last_update=time.time()
|
||||
)
|
||||
|
||||
metrics = self.metrics[component_id]
|
||||
metrics.last_update = time.time()
|
||||
|
||||
# Update provided metrics
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(metrics, key):
|
||||
setattr(metrics, key, value)
|
||||
|
||||
def report_frame_received(self, component_id: str):
|
||||
"""Report that a frame was received for a component."""
|
||||
current_time = time.time()
|
||||
with self._lock:
|
||||
if component_id not in self.metrics:
|
||||
self.metrics[component_id] = HealthMetrics(
|
||||
component_id=component_id,
|
||||
last_update=current_time
|
||||
)
|
||||
|
||||
metrics = self.metrics[component_id]
|
||||
|
||||
# Update frame metrics
|
||||
if metrics.last_frame_time:
|
||||
interval = current_time - metrics.last_frame_time
|
||||
# Moving average of frame intervals
|
||||
if metrics.avg_frame_interval == 0:
|
||||
metrics.avg_frame_interval = interval
|
||||
else:
|
||||
metrics.avg_frame_interval = (metrics.avg_frame_interval * 0.9) + (interval * 0.1)
|
||||
|
||||
metrics.last_frame_time = current_time
|
||||
metrics.frame_count += 1
|
||||
metrics.last_update = current_time
|
||||
|
||||
def report_error(self, component_id: str, error_type: str = "general"):
|
||||
"""Report an error for a component."""
|
||||
with self._lock:
|
||||
if component_id not in self.metrics:
|
||||
self.metrics[component_id] = HealthMetrics(
|
||||
component_id=component_id,
|
||||
last_update=time.time()
|
||||
)
|
||||
|
||||
self.metrics[component_id].error_count += 1
|
||||
self.metrics[component_id].last_update = time.time()
|
||||
|
||||
logger.debug(f"Error reported for {component_id}: {error_type}")
|
||||
|
||||
def report_warning(self, component_id: str, warning_type: str = "general"):
|
||||
"""Report a warning for a component."""
|
||||
with self._lock:
|
||||
if component_id not in self.metrics:
|
||||
self.metrics[component_id] = HealthMetrics(
|
||||
component_id=component_id,
|
||||
last_update=time.time()
|
||||
)
|
||||
|
||||
self.metrics[component_id].warning_count += 1
|
||||
self.metrics[component_id].last_update = time.time()
|
||||
|
||||
logger.debug(f"Warning reported for {component_id}: {warning_type}")
|
||||
|
||||
def report_restart(self, component_id: str):
|
||||
"""Report that a component was restarted."""
|
||||
with self._lock:
|
||||
if component_id not in self.metrics:
|
||||
self.metrics[component_id] = HealthMetrics(
|
||||
component_id=component_id,
|
||||
last_update=time.time()
|
||||
)
|
||||
|
||||
self.metrics[component_id].restart_count += 1
|
||||
self.metrics[component_id].last_update = time.time()
|
||||
|
||||
# Log recovery action
|
||||
recovery_action = {
|
||||
'timestamp': time.time(),
|
||||
'component': component_id,
|
||||
'action': 'restart',
|
||||
'reason': 'manual_restart'
|
||||
}
|
||||
|
||||
with self._lock:
|
||||
self.recovery_actions.append(recovery_action)
|
||||
|
||||
logger.info(f"Restart reported for {component_id}")
|
||||
|
||||
def get_health_status(self, component_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Get comprehensive health status."""
|
||||
with self._lock:
|
||||
if component_id:
|
||||
# Get health for specific component
|
||||
return self._get_component_health(component_id)
|
||||
else:
|
||||
# Get overall health status
|
||||
return self._get_overall_health()
|
||||
|
||||
def _get_component_health(self, component_id: str) -> Dict[str, Any]:
|
||||
"""Get health status for a specific component."""
|
||||
if component_id not in self.metrics:
|
||||
return {
|
||||
'component_id': component_id,
|
||||
'status': HealthStatus.UNKNOWN.value,
|
||||
'message': 'No metrics available',
|
||||
'metrics': {}
|
||||
}
|
||||
|
||||
metrics = self.metrics[component_id]
|
||||
current_time = time.time()
|
||||
|
||||
# Determine health status
|
||||
status = HealthStatus.HEALTHY
|
||||
issues = []
|
||||
|
||||
# Check frame freshness
|
||||
if metrics.last_frame_time:
|
||||
frame_age = current_time - metrics.last_frame_time
|
||||
if frame_age > self.thresholds['frame_stale_critical_seconds']:
|
||||
status = HealthStatus.CRITICAL
|
||||
issues.append(f"Frames stale for {frame_age:.1f}s")
|
||||
elif frame_age > self.thresholds['frame_stale_warning_seconds']:
|
||||
if status == HealthStatus.HEALTHY:
|
||||
status = HealthStatus.WARNING
|
||||
issues.append(f"Frames aging ({frame_age:.1f}s)")
|
||||
|
||||
# Check error rates
|
||||
if metrics.frame_count > 0:
|
||||
error_rate = metrics.error_count / metrics.frame_count
|
||||
if error_rate > self.thresholds['error_rate_critical']:
|
||||
status = HealthStatus.CRITICAL
|
||||
issues.append(f"High error rate ({error_rate:.1%})")
|
||||
elif error_rate > self.thresholds['error_rate_warning']:
|
||||
if status == HealthStatus.HEALTHY:
|
||||
status = HealthStatus.WARNING
|
||||
issues.append(f"Elevated error rate ({error_rate:.1%})")
|
||||
|
||||
# Check restart frequency
|
||||
restart_rate = metrics.restart_count / max(1, (current_time - self.system_start_time) / 3600)
|
||||
if restart_rate > self.thresholds['restart_threshold']:
|
||||
status = HealthStatus.CRITICAL
|
||||
issues.append(f"Frequent restarts ({restart_rate:.1f}/hour)")
|
||||
|
||||
# Check thread health
|
||||
if not metrics.thread_alive:
|
||||
status = HealthStatus.CRITICAL
|
||||
issues.append("Thread not alive")
|
||||
|
||||
# Check connection health
|
||||
if not metrics.connection_healthy:
|
||||
if status == HealthStatus.HEALTHY:
|
||||
status = HealthStatus.WARNING
|
||||
issues.append("Connection unhealthy")
|
||||
|
||||
return {
|
||||
'component_id': component_id,
|
||||
'status': status.value,
|
||||
'message': '; '.join(issues) if issues else 'All checks passing',
|
||||
'metrics': {
|
||||
'frame_count': metrics.frame_count,
|
||||
'error_count': metrics.error_count,
|
||||
'warning_count': metrics.warning_count,
|
||||
'restart_count': metrics.restart_count,
|
||||
'avg_frame_interval': metrics.avg_frame_interval,
|
||||
'last_frame_age': current_time - metrics.last_frame_time if metrics.last_frame_time else None,
|
||||
'thread_alive': metrics.thread_alive,
|
||||
'connection_healthy': metrics.connection_healthy,
|
||||
'memory_usage_mb': metrics.memory_usage_mb,
|
||||
'cpu_usage_percent': metrics.cpu_usage_percent,
|
||||
'uptime_seconds': current_time - self.system_start_time
|
||||
},
|
||||
'last_update': metrics.last_update
|
||||
}
|
||||
|
||||
def _get_overall_health(self) -> Dict[str, Any]:
|
||||
"""Get overall system health status."""
|
||||
current_time = time.time()
|
||||
components = {}
|
||||
overall_status = HealthStatus.HEALTHY
|
||||
|
||||
# Get health for all components
|
||||
for component_id in self.metrics.keys():
|
||||
component_health = self._get_component_health(component_id)
|
||||
components[component_id] = component_health
|
||||
|
||||
# Determine overall status
|
||||
component_status = HealthStatus(component_health['status'])
|
||||
if component_status == HealthStatus.CRITICAL:
|
||||
overall_status = HealthStatus.CRITICAL
|
||||
elif component_status == HealthStatus.WARNING and overall_status == HealthStatus.HEALTHY:
|
||||
overall_status = HealthStatus.WARNING
|
||||
|
||||
# System metrics
|
||||
try:
|
||||
system_memory = self.process.memory_info()
|
||||
system_cpu = self.process.cpu_percent()
|
||||
except Exception:
|
||||
system_memory = None
|
||||
system_cpu = 0.0
|
||||
|
||||
return {
|
||||
'overall_status': overall_status.value,
|
||||
'timestamp': current_time,
|
||||
'uptime_seconds': current_time - self.system_start_time,
|
||||
'total_components': len(self.metrics),
|
||||
'components': components,
|
||||
'system_metrics': {
|
||||
'memory_mb': system_memory.rss / (1024 * 1024) if system_memory else 0,
|
||||
'cpu_percent': system_cpu,
|
||||
'process_id': self.process.pid
|
||||
},
|
||||
'recent_alerts': list(self.alert_history)[-10:], # Last 10 alerts
|
||||
'recent_recoveries': list(self.recovery_actions)[-10:] # Last 10 recovery actions
|
||||
}
|
||||
|
||||
def _monitor_loop(self):
|
||||
"""Main health monitoring loop."""
|
||||
logger.info("Health monitor loop started")
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Run all registered health checks
|
||||
all_checks = []
|
||||
for checker in self.health_checkers:
|
||||
try:
|
||||
checks = checker()
|
||||
all_checks.extend(checks)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in health checker {checker.__name__}: {e}")
|
||||
|
||||
# Process health checks and trigger recovery if needed
|
||||
for check in all_checks:
|
||||
self._process_health_check(check)
|
||||
|
||||
# Update system metrics
|
||||
self._update_system_metrics()
|
||||
|
||||
# Sleep until next check
|
||||
elapsed = time.time() - start_time
|
||||
sleep_time = max(0, self.check_interval - elapsed)
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in health monitor loop: {e}")
|
||||
time.sleep(5.0) # Fallback sleep
|
||||
|
||||
logger.info("Health monitor loop ended")
|
||||
|
||||
def _process_health_check(self, check: HealthCheck):
|
||||
"""Process a health check result and trigger recovery if needed."""
|
||||
with self._lock:
|
||||
# Store health check
|
||||
self.health_checks[check.name] = check
|
||||
|
||||
# Log alerts for non-healthy status
|
||||
if check.status != HealthStatus.HEALTHY:
|
||||
alert = {
|
||||
'timestamp': check.timestamp,
|
||||
'component': check.name,
|
||||
'status': check.status.value,
|
||||
'message': check.message,
|
||||
'details': check.details
|
||||
}
|
||||
self.alert_history.append(alert)
|
||||
|
||||
logger.warning(f"Health alert [{check.status.value.upper()}] {check.name}: {check.message}")
|
||||
|
||||
# Trigger recovery if critical and recovery action available
|
||||
if check.status == HealthStatus.CRITICAL and check.recovery_action:
|
||||
self._trigger_recovery(check.name, check)
|
||||
|
||||
def _trigger_recovery(self, component: str, check: HealthCheck):
|
||||
"""Trigger recovery action for a component."""
|
||||
if component in self.recovery_callbacks:
|
||||
try:
|
||||
logger.info(f"Triggering recovery for {component}: {check.recovery_action}")
|
||||
|
||||
success = self.recovery_callbacks[component](component, check)
|
||||
|
||||
recovery_action = {
|
||||
'timestamp': time.time(),
|
||||
'component': component,
|
||||
'action': check.recovery_action,
|
||||
'reason': check.message,
|
||||
'success': success
|
||||
}
|
||||
|
||||
with self._lock:
|
||||
self.recovery_actions.append(recovery_action)
|
||||
|
||||
if success:
|
||||
logger.info(f"Recovery successful for {component}")
|
||||
else:
|
||||
logger.error(f"Recovery failed for {component}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in recovery callback for {component}: {e}")
|
||||
|
||||
def _update_system_metrics(self):
|
||||
"""Update system-level metrics."""
|
||||
try:
|
||||
# Update process metrics for all components
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
for component_id, metrics in self.metrics.items():
|
||||
# Update CPU and memory if available
|
||||
try:
|
||||
# This is a simplified approach - in practice you'd want
|
||||
# per-thread or per-component resource tracking
|
||||
metrics.cpu_usage_percent = self.process.cpu_percent() / len(self.metrics)
|
||||
memory_info = self.process.memory_info()
|
||||
metrics.memory_usage_mb = memory_info.rss / (1024 * 1024) / len(self.metrics)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating system metrics: {e}")
|
||||
|
||||
|
||||
# Global health monitor instance
|
||||
health_monitor = HealthMonitor()
|
385
core/monitoring/recovery.py
Normal file
385
core/monitoring/recovery.py
Normal file
|
@ -0,0 +1,385 @@
|
|||
"""
|
||||
Recovery manager for automatic handling of health issues.
|
||||
Provides circuit breaker patterns, automatic restarts, and graceful degradation.
|
||||
"""
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional, Any, Callable
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from collections import defaultdict, deque
|
||||
|
||||
from .health import HealthCheck, HealthStatus, health_monitor
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RecoveryAction(Enum):
|
||||
"""Types of recovery actions."""
|
||||
RESTART_STREAM = "restart_stream"
|
||||
RESTART_THREAD = "restart_thread"
|
||||
CLEAR_BUFFER = "clear_buffer"
|
||||
RECONNECT = "reconnect"
|
||||
THROTTLE = "throttle"
|
||||
DISABLE = "disable"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecoveryAttempt:
|
||||
"""Record of a recovery attempt."""
|
||||
timestamp: float
|
||||
component: str
|
||||
action: RecoveryAction
|
||||
reason: str
|
||||
success: bool
|
||||
details: Dict[str, Any] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecoveryState:
|
||||
"""Recovery state for a component - simplified without circuit breaker."""
|
||||
failure_count: int = 0
|
||||
success_count: int = 0
|
||||
last_failure_time: Optional[float] = None
|
||||
last_success_time: Optional[float] = None
|
||||
|
||||
|
||||
class RecoveryManager:
|
||||
"""Manages automatic recovery actions for health issues."""
|
||||
|
||||
def __init__(self):
|
||||
self.recovery_handlers: Dict[str, Callable[[str, HealthCheck], bool]] = {}
|
||||
self.recovery_states: Dict[str, RecoveryState] = {}
|
||||
self.recovery_history: deque = deque(maxlen=1000)
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Configuration - simplified without circuit breaker
|
||||
self.recovery_cooldown = 30 # 30 seconds between recovery attempts
|
||||
self.max_attempts_per_hour = 20 # Still limit to prevent spam, but much higher
|
||||
|
||||
# Track recovery attempts per component
|
||||
self.recovery_attempts: Dict[str, deque] = defaultdict(lambda: deque(maxlen=50))
|
||||
|
||||
# Register with health monitor
|
||||
health_monitor.register_recovery_callback("stream", self._handle_stream_recovery)
|
||||
health_monitor.register_recovery_callback("thread", self._handle_thread_recovery)
|
||||
health_monitor.register_recovery_callback("buffer", self._handle_buffer_recovery)
|
||||
|
||||
def register_recovery_handler(self, action: RecoveryAction, handler: Callable[[str, Dict[str, Any]], bool]):
|
||||
"""
|
||||
Register a recovery handler for a specific action.
|
||||
|
||||
Args:
|
||||
action: Type of recovery action
|
||||
handler: Function that performs the recovery
|
||||
"""
|
||||
self.recovery_handlers[action.value] = handler
|
||||
logger.info(f"Registered recovery handler for {action.value}")
|
||||
|
||||
def can_attempt_recovery(self, component: str) -> bool:
|
||||
"""
|
||||
Check if recovery can be attempted for a component.
|
||||
|
||||
Args:
|
||||
component: Component identifier
|
||||
|
||||
Returns:
|
||||
True if recovery can be attempted (always allow with minimal throttling)
|
||||
"""
|
||||
with self._lock:
|
||||
current_time = time.time()
|
||||
|
||||
# Check recovery attempt rate limiting (much more permissive)
|
||||
recent_attempts = [
|
||||
attempt for attempt in self.recovery_attempts[component]
|
||||
if current_time - attempt <= 3600 # Last hour
|
||||
]
|
||||
|
||||
# Only block if truly excessive attempts
|
||||
if len(recent_attempts) >= self.max_attempts_per_hour:
|
||||
logger.warning(f"Recovery rate limit exceeded for {component} "
|
||||
f"({len(recent_attempts)} attempts in last hour)")
|
||||
return False
|
||||
|
||||
# Check cooldown period (shorter cooldown)
|
||||
if recent_attempts:
|
||||
last_attempt = max(recent_attempts)
|
||||
if current_time - last_attempt < self.recovery_cooldown:
|
||||
logger.debug(f"Recovery cooldown active for {component} "
|
||||
f"(last attempt {current_time - last_attempt:.1f}s ago)")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def attempt_recovery(self, component: str, action: RecoveryAction, reason: str,
|
||||
details: Optional[Dict[str, Any]] = None) -> bool:
|
||||
"""
|
||||
Attempt recovery for a component.
|
||||
|
||||
Args:
|
||||
component: Component identifier
|
||||
action: Recovery action to perform
|
||||
reason: Reason for recovery
|
||||
details: Additional details
|
||||
|
||||
Returns:
|
||||
True if recovery was successful
|
||||
"""
|
||||
if not self.can_attempt_recovery(component):
|
||||
return False
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
logger.info(f"Attempting recovery for {component}: {action.value} ({reason})")
|
||||
|
||||
try:
|
||||
# Record recovery attempt
|
||||
with self._lock:
|
||||
self.recovery_attempts[component].append(current_time)
|
||||
|
||||
# Perform recovery action
|
||||
success = self._execute_recovery_action(component, action, details or {})
|
||||
|
||||
# Record recovery result
|
||||
attempt = RecoveryAttempt(
|
||||
timestamp=current_time,
|
||||
component=component,
|
||||
action=action,
|
||||
reason=reason,
|
||||
success=success,
|
||||
details=details
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self.recovery_history.append(attempt)
|
||||
|
||||
# Update recovery state
|
||||
self._update_recovery_state(component, success)
|
||||
|
||||
if success:
|
||||
logger.info(f"Recovery successful for {component}: {action.value}")
|
||||
else:
|
||||
logger.error(f"Recovery failed for {component}: {action.value}")
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during recovery for {component}: {e}")
|
||||
self._update_recovery_state(component, False)
|
||||
return False
|
||||
|
||||
def _execute_recovery_action(self, component: str, action: RecoveryAction,
|
||||
details: Dict[str, Any]) -> bool:
|
||||
"""Execute a specific recovery action."""
|
||||
handler_key = action.value
|
||||
|
||||
if handler_key not in self.recovery_handlers:
|
||||
logger.error(f"No recovery handler registered for action: {handler_key}")
|
||||
return False
|
||||
|
||||
try:
|
||||
handler = self.recovery_handlers[handler_key]
|
||||
return handler(component, details)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing recovery action {handler_key} for {component}: {e}")
|
||||
return False
|
||||
|
||||
def _update_recovery_state(self, component: str, success: bool):
|
||||
"""Update recovery state based on recovery result."""
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
if component not in self.recovery_states:
|
||||
self.recovery_states[component] = RecoveryState()
|
||||
|
||||
state = self.recovery_states[component]
|
||||
|
||||
if success:
|
||||
state.success_count += 1
|
||||
state.last_success_time = current_time
|
||||
# Reset failure count on success
|
||||
state.failure_count = max(0, state.failure_count - 1)
|
||||
logger.debug(f"Recovery success for {component} (total successes: {state.success_count})")
|
||||
else:
|
||||
state.failure_count += 1
|
||||
state.last_failure_time = current_time
|
||||
logger.debug(f"Recovery failure for {component} (total failures: {state.failure_count})")
|
||||
|
||||
def _handle_stream_recovery(self, component: str, health_check: HealthCheck) -> bool:
|
||||
"""Handle recovery for stream-related issues."""
|
||||
if "frames" in health_check.name:
|
||||
# Frame-related issue - restart stream
|
||||
return self.attempt_recovery(
|
||||
component,
|
||||
RecoveryAction.RESTART_STREAM,
|
||||
health_check.message,
|
||||
health_check.details
|
||||
)
|
||||
elif "connection" in health_check.name:
|
||||
# Connection issue - reconnect
|
||||
return self.attempt_recovery(
|
||||
component,
|
||||
RecoveryAction.RECONNECT,
|
||||
health_check.message,
|
||||
health_check.details
|
||||
)
|
||||
elif "errors" in health_check.name:
|
||||
# High error rate - throttle or restart
|
||||
return self.attempt_recovery(
|
||||
component,
|
||||
RecoveryAction.THROTTLE,
|
||||
health_check.message,
|
||||
health_check.details
|
||||
)
|
||||
else:
|
||||
# Generic stream issue - restart
|
||||
return self.attempt_recovery(
|
||||
component,
|
||||
RecoveryAction.RESTART_STREAM,
|
||||
health_check.message,
|
||||
health_check.details
|
||||
)
|
||||
|
||||
def _handle_thread_recovery(self, component: str, health_check: HealthCheck) -> bool:
|
||||
"""Handle recovery for thread-related issues."""
|
||||
if "deadlock" in health_check.name:
|
||||
# Deadlock detected - restart thread
|
||||
return self.attempt_recovery(
|
||||
component,
|
||||
RecoveryAction.RESTART_THREAD,
|
||||
health_check.message,
|
||||
health_check.details
|
||||
)
|
||||
elif "responsive" in health_check.name:
|
||||
# Thread unresponsive - restart
|
||||
return self.attempt_recovery(
|
||||
component,
|
||||
RecoveryAction.RESTART_THREAD,
|
||||
health_check.message,
|
||||
health_check.details
|
||||
)
|
||||
else:
|
||||
# Generic thread issue - restart
|
||||
return self.attempt_recovery(
|
||||
component,
|
||||
RecoveryAction.RESTART_THREAD,
|
||||
health_check.message,
|
||||
health_check.details
|
||||
)
|
||||
|
||||
def _handle_buffer_recovery(self, component: str, health_check: HealthCheck) -> bool:
|
||||
"""Handle recovery for buffer-related issues."""
|
||||
# Buffer issues - clear buffer
|
||||
return self.attempt_recovery(
|
||||
component,
|
||||
RecoveryAction.CLEAR_BUFFER,
|
||||
health_check.message,
|
||||
health_check.details
|
||||
)
|
||||
|
||||
def get_recovery_stats(self) -> Dict[str, Any]:
|
||||
"""Get recovery statistics."""
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
# Calculate stats from history
|
||||
recent_recoveries = [
|
||||
attempt for attempt in self.recovery_history
|
||||
if current_time - attempt.timestamp <= 3600 # Last hour
|
||||
]
|
||||
|
||||
stats_by_component = defaultdict(lambda: {
|
||||
'attempts': 0,
|
||||
'successes': 0,
|
||||
'failures': 0,
|
||||
'last_attempt': None,
|
||||
'last_success': None
|
||||
})
|
||||
|
||||
for attempt in recent_recoveries:
|
||||
stats = stats_by_component[attempt.component]
|
||||
stats['attempts'] += 1
|
||||
|
||||
if attempt.success:
|
||||
stats['successes'] += 1
|
||||
if not stats['last_success'] or attempt.timestamp > stats['last_success']:
|
||||
stats['last_success'] = attempt.timestamp
|
||||
else:
|
||||
stats['failures'] += 1
|
||||
|
||||
if not stats['last_attempt'] or attempt.timestamp > stats['last_attempt']:
|
||||
stats['last_attempt'] = attempt.timestamp
|
||||
|
||||
return {
|
||||
'total_recoveries_last_hour': len(recent_recoveries),
|
||||
'recovery_by_component': dict(stats_by_component),
|
||||
'recovery_states': {
|
||||
component: {
|
||||
'failure_count': state.failure_count,
|
||||
'success_count': state.success_count,
|
||||
'last_failure_time': state.last_failure_time,
|
||||
'last_success_time': state.last_success_time
|
||||
}
|
||||
for component, state in self.recovery_states.items()
|
||||
},
|
||||
'recent_history': [
|
||||
{
|
||||
'timestamp': attempt.timestamp,
|
||||
'component': attempt.component,
|
||||
'action': attempt.action.value,
|
||||
'reason': attempt.reason,
|
||||
'success': attempt.success
|
||||
}
|
||||
for attempt in list(self.recovery_history)[-10:] # Last 10 attempts
|
||||
]
|
||||
}
|
||||
|
||||
def force_recovery(self, component: str, action: RecoveryAction, reason: str = "manual") -> bool:
|
||||
"""
|
||||
Force recovery for a component, bypassing rate limiting.
|
||||
|
||||
Args:
|
||||
component: Component identifier
|
||||
action: Recovery action to perform
|
||||
reason: Reason for forced recovery
|
||||
|
||||
Returns:
|
||||
True if recovery was successful
|
||||
"""
|
||||
logger.info(f"Forcing recovery for {component}: {action.value} ({reason})")
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
try:
|
||||
# Execute recovery action directly
|
||||
success = self._execute_recovery_action(component, action, {})
|
||||
|
||||
# Record forced recovery
|
||||
attempt = RecoveryAttempt(
|
||||
timestamp=current_time,
|
||||
component=component,
|
||||
action=action,
|
||||
reason=f"forced: {reason}",
|
||||
success=success,
|
||||
details={'forced': True}
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self.recovery_history.append(attempt)
|
||||
self.recovery_attempts[component].append(current_time)
|
||||
|
||||
# Update recovery state
|
||||
self._update_recovery_state(component, success)
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during forced recovery for {component}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Global recovery manager instance
|
||||
recovery_manager = RecoveryManager()
|
351
core/monitoring/stream_health.py
Normal file
351
core/monitoring/stream_health.py
Normal file
|
@ -0,0 +1,351 @@
|
|||
"""
|
||||
Stream-specific health monitoring for video streams.
|
||||
Tracks frame production, connection health, and stream-specific metrics.
|
||||
"""
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
import requests
|
||||
from typing import Dict, Optional, List, Any
|
||||
from collections import deque
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .health import HealthCheck, HealthStatus, health_monitor
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StreamMetrics:
|
||||
"""Metrics for an individual stream."""
|
||||
camera_id: str
|
||||
stream_type: str # 'rtsp', 'http_snapshot'
|
||||
start_time: float
|
||||
last_frame_time: Optional[float] = None
|
||||
frame_count: int = 0
|
||||
error_count: int = 0
|
||||
reconnect_count: int = 0
|
||||
bytes_received: int = 0
|
||||
frames_per_second: float = 0.0
|
||||
connection_attempts: int = 0
|
||||
last_connection_test: Optional[float] = None
|
||||
connection_healthy: bool = True
|
||||
last_error: Optional[str] = None
|
||||
last_error_time: Optional[float] = None
|
||||
|
||||
|
||||
class StreamHealthTracker:
|
||||
"""Tracks health for individual video streams."""
|
||||
|
||||
def __init__(self):
|
||||
self.streams: Dict[str, StreamMetrics] = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Configuration
|
||||
self.connection_test_interval = 300 # Test connection every 5 minutes
|
||||
self.frame_timeout_warning = 120 # Warn if no frames for 2 minutes
|
||||
self.frame_timeout_critical = 300 # Critical if no frames for 5 minutes
|
||||
self.error_rate_threshold = 0.1 # 10% error rate threshold
|
||||
|
||||
# Register with health monitor
|
||||
health_monitor.register_health_checker(self._perform_health_checks)
|
||||
|
||||
def register_stream(self, camera_id: str, stream_type: str, source_url: Optional[str] = None):
|
||||
"""Register a new stream for monitoring."""
|
||||
with self._lock:
|
||||
if camera_id not in self.streams:
|
||||
self.streams[camera_id] = StreamMetrics(
|
||||
camera_id=camera_id,
|
||||
stream_type=stream_type,
|
||||
start_time=time.time()
|
||||
)
|
||||
logger.info(f"Registered stream for monitoring: {camera_id} ({stream_type})")
|
||||
|
||||
# Update health monitor metrics
|
||||
health_monitor.update_metrics(
|
||||
camera_id,
|
||||
thread_alive=True,
|
||||
connection_healthy=True
|
||||
)
|
||||
|
||||
def unregister_stream(self, camera_id: str):
|
||||
"""Unregister a stream from monitoring."""
|
||||
with self._lock:
|
||||
if camera_id in self.streams:
|
||||
del self.streams[camera_id]
|
||||
logger.info(f"Unregistered stream from monitoring: {camera_id}")
|
||||
|
||||
def report_frame_received(self, camera_id: str, frame_size_bytes: int = 0):
|
||||
"""Report that a frame was received."""
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
if camera_id not in self.streams:
|
||||
logger.warning(f"Frame received for unregistered stream: {camera_id}")
|
||||
return
|
||||
|
||||
stream = self.streams[camera_id]
|
||||
|
||||
# Update frame metrics
|
||||
if stream.last_frame_time:
|
||||
interval = current_time - stream.last_frame_time
|
||||
# Calculate FPS as moving average
|
||||
if stream.frames_per_second == 0:
|
||||
stream.frames_per_second = 1.0 / interval if interval > 0 else 0
|
||||
else:
|
||||
new_fps = 1.0 / interval if interval > 0 else 0
|
||||
stream.frames_per_second = (stream.frames_per_second * 0.9) + (new_fps * 0.1)
|
||||
|
||||
stream.last_frame_time = current_time
|
||||
stream.frame_count += 1
|
||||
stream.bytes_received += frame_size_bytes
|
||||
|
||||
# Report to health monitor
|
||||
health_monitor.report_frame_received(camera_id)
|
||||
health_monitor.update_metrics(
|
||||
camera_id,
|
||||
frame_count=stream.frame_count,
|
||||
avg_frame_interval=1.0 / stream.frames_per_second if stream.frames_per_second > 0 else 0,
|
||||
last_frame_time=current_time
|
||||
)
|
||||
|
||||
def report_error(self, camera_id: str, error_message: str):
|
||||
"""Report an error for a stream."""
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
if camera_id not in self.streams:
|
||||
logger.warning(f"Error reported for unregistered stream: {camera_id}")
|
||||
return
|
||||
|
||||
stream = self.streams[camera_id]
|
||||
stream.error_count += 1
|
||||
stream.last_error = error_message
|
||||
stream.last_error_time = current_time
|
||||
|
||||
# Report to health monitor
|
||||
health_monitor.report_error(camera_id, "stream_error")
|
||||
health_monitor.update_metrics(
|
||||
camera_id,
|
||||
error_count=stream.error_count
|
||||
)
|
||||
|
||||
logger.debug(f"Error reported for stream {camera_id}: {error_message}")
|
||||
|
||||
def report_reconnect(self, camera_id: str, reason: str = "unknown"):
|
||||
"""Report that a stream reconnected."""
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
if camera_id not in self.streams:
|
||||
logger.warning(f"Reconnect reported for unregistered stream: {camera_id}")
|
||||
return
|
||||
|
||||
stream = self.streams[camera_id]
|
||||
stream.reconnect_count += 1
|
||||
|
||||
# Report to health monitor
|
||||
health_monitor.report_restart(camera_id)
|
||||
health_monitor.update_metrics(
|
||||
camera_id,
|
||||
restart_count=stream.reconnect_count
|
||||
)
|
||||
|
||||
logger.info(f"Reconnect reported for stream {camera_id}: {reason}")
|
||||
|
||||
def report_connection_attempt(self, camera_id: str, success: bool):
|
||||
"""Report a connection attempt."""
|
||||
with self._lock:
|
||||
if camera_id not in self.streams:
|
||||
return
|
||||
|
||||
stream = self.streams[camera_id]
|
||||
stream.connection_attempts += 1
|
||||
stream.connection_healthy = success
|
||||
|
||||
# Report to health monitor
|
||||
health_monitor.update_metrics(
|
||||
camera_id,
|
||||
connection_healthy=success
|
||||
)
|
||||
|
||||
def test_http_connection(self, camera_id: str, url: str) -> bool:
|
||||
"""Test HTTP connection health for snapshot streams."""
|
||||
try:
|
||||
# Quick HEAD request to test connectivity
|
||||
response = requests.head(url, timeout=5, verify=False)
|
||||
success = response.status_code in [200, 404] # 404 might be normal for some cameras
|
||||
|
||||
self.report_connection_attempt(camera_id, success)
|
||||
|
||||
if success:
|
||||
logger.debug(f"Connection test passed for {camera_id}")
|
||||
else:
|
||||
logger.warning(f"Connection test failed for {camera_id}: HTTP {response.status_code}")
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Connection test failed for {camera_id}: {e}")
|
||||
self.report_connection_attempt(camera_id, False)
|
||||
return False
|
||||
|
||||
def get_stream_metrics(self, camera_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get metrics for a specific stream."""
|
||||
with self._lock:
|
||||
if camera_id not in self.streams:
|
||||
return None
|
||||
|
||||
stream = self.streams[camera_id]
|
||||
current_time = time.time()
|
||||
|
||||
# Calculate derived metrics
|
||||
uptime = current_time - stream.start_time
|
||||
frame_age = current_time - stream.last_frame_time if stream.last_frame_time else None
|
||||
error_rate = stream.error_count / max(1, stream.frame_count)
|
||||
|
||||
return {
|
||||
'camera_id': camera_id,
|
||||
'stream_type': stream.stream_type,
|
||||
'uptime_seconds': uptime,
|
||||
'frame_count': stream.frame_count,
|
||||
'frames_per_second': stream.frames_per_second,
|
||||
'bytes_received': stream.bytes_received,
|
||||
'error_count': stream.error_count,
|
||||
'error_rate': error_rate,
|
||||
'reconnect_count': stream.reconnect_count,
|
||||
'connection_attempts': stream.connection_attempts,
|
||||
'connection_healthy': stream.connection_healthy,
|
||||
'last_frame_age_seconds': frame_age,
|
||||
'last_error': stream.last_error,
|
||||
'last_error_time': stream.last_error_time
|
||||
}
|
||||
|
||||
def get_all_metrics(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Get metrics for all streams."""
|
||||
with self._lock:
|
||||
return {
|
||||
camera_id: self.get_stream_metrics(camera_id)
|
||||
for camera_id in self.streams.keys()
|
||||
}
|
||||
|
||||
def _perform_health_checks(self) -> List[HealthCheck]:
|
||||
"""Perform health checks for all streams."""
|
||||
checks = []
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
for camera_id, stream in self.streams.items():
|
||||
checks.extend(self._check_stream_health(camera_id, stream, current_time))
|
||||
|
||||
return checks
|
||||
|
||||
def _check_stream_health(self, camera_id: str, stream: StreamMetrics, current_time: float) -> List[HealthCheck]:
|
||||
"""Perform health checks for a single stream."""
|
||||
checks = []
|
||||
|
||||
# Check frame freshness
|
||||
if stream.last_frame_time:
|
||||
frame_age = current_time - stream.last_frame_time
|
||||
|
||||
if frame_age > self.frame_timeout_critical:
|
||||
checks.append(HealthCheck(
|
||||
name=f"stream_{camera_id}_frames",
|
||||
status=HealthStatus.CRITICAL,
|
||||
message=f"No frames for {frame_age:.1f}s (critical threshold: {self.frame_timeout_critical}s)",
|
||||
details={
|
||||
'frame_age': frame_age,
|
||||
'threshold': self.frame_timeout_critical,
|
||||
'last_frame_time': stream.last_frame_time
|
||||
},
|
||||
recovery_action="restart_stream"
|
||||
))
|
||||
elif frame_age > self.frame_timeout_warning:
|
||||
checks.append(HealthCheck(
|
||||
name=f"stream_{camera_id}_frames",
|
||||
status=HealthStatus.WARNING,
|
||||
message=f"Frames aging: {frame_age:.1f}s (warning threshold: {self.frame_timeout_warning}s)",
|
||||
details={
|
||||
'frame_age': frame_age,
|
||||
'threshold': self.frame_timeout_warning,
|
||||
'last_frame_time': stream.last_frame_time
|
||||
}
|
||||
))
|
||||
else:
|
||||
# No frames received yet
|
||||
startup_time = current_time - stream.start_time
|
||||
if startup_time > 60: # Allow 1 minute for initial connection
|
||||
checks.append(HealthCheck(
|
||||
name=f"stream_{camera_id}_startup",
|
||||
status=HealthStatus.CRITICAL,
|
||||
message=f"No frames received since startup {startup_time:.1f}s ago",
|
||||
details={
|
||||
'startup_time': startup_time,
|
||||
'start_time': stream.start_time
|
||||
},
|
||||
recovery_action="restart_stream"
|
||||
))
|
||||
|
||||
# Check error rate
|
||||
if stream.frame_count > 10: # Need sufficient samples
|
||||
error_rate = stream.error_count / stream.frame_count
|
||||
if error_rate > self.error_rate_threshold:
|
||||
checks.append(HealthCheck(
|
||||
name=f"stream_{camera_id}_errors",
|
||||
status=HealthStatus.WARNING,
|
||||
message=f"High error rate: {error_rate:.1%} ({stream.error_count}/{stream.frame_count})",
|
||||
details={
|
||||
'error_rate': error_rate,
|
||||
'error_count': stream.error_count,
|
||||
'frame_count': stream.frame_count,
|
||||
'last_error': stream.last_error
|
||||
}
|
||||
))
|
||||
|
||||
# Check connection health
|
||||
if not stream.connection_healthy:
|
||||
checks.append(HealthCheck(
|
||||
name=f"stream_{camera_id}_connection",
|
||||
status=HealthStatus.WARNING,
|
||||
message="Connection unhealthy (last test failed)",
|
||||
details={
|
||||
'connection_attempts': stream.connection_attempts,
|
||||
'last_connection_test': stream.last_connection_test
|
||||
}
|
||||
))
|
||||
|
||||
# Check excessive reconnects
|
||||
uptime_hours = (current_time - stream.start_time) / 3600
|
||||
if uptime_hours > 1 and stream.reconnect_count > 5: # More than 5 reconnects per hour
|
||||
reconnect_rate = stream.reconnect_count / uptime_hours
|
||||
checks.append(HealthCheck(
|
||||
name=f"stream_{camera_id}_stability",
|
||||
status=HealthStatus.WARNING,
|
||||
message=f"Frequent reconnects: {reconnect_rate:.1f}/hour ({stream.reconnect_count} total)",
|
||||
details={
|
||||
'reconnect_rate': reconnect_rate,
|
||||
'reconnect_count': stream.reconnect_count,
|
||||
'uptime_hours': uptime_hours
|
||||
}
|
||||
))
|
||||
|
||||
# Check frame rate health
|
||||
if stream.last_frame_time and stream.frames_per_second > 0:
|
||||
expected_fps = 6.0 # Expected FPS for streams
|
||||
if stream.frames_per_second < expected_fps * 0.5: # Less than 50% of expected
|
||||
checks.append(HealthCheck(
|
||||
name=f"stream_{camera_id}_framerate",
|
||||
status=HealthStatus.WARNING,
|
||||
message=f"Low frame rate: {stream.frames_per_second:.1f} fps (expected: ~{expected_fps} fps)",
|
||||
details={
|
||||
'current_fps': stream.frames_per_second,
|
||||
'expected_fps': expected_fps
|
||||
}
|
||||
))
|
||||
|
||||
return checks
|
||||
|
||||
|
||||
# Global stream health tracker instance
|
||||
stream_health_tracker = StreamHealthTracker()
|
381
core/monitoring/thread_health.py
Normal file
381
core/monitoring/thread_health.py
Normal file
|
@ -0,0 +1,381 @@
|
|||
"""
|
||||
Thread health monitoring for detecting unresponsive and deadlocked threads.
|
||||
Provides thread liveness detection and responsiveness testing.
|
||||
"""
|
||||
import time
|
||||
import threading
|
||||
import logging
|
||||
import signal
|
||||
import traceback
|
||||
from typing import Dict, List, Optional, Any, Callable
|
||||
from dataclasses import dataclass
|
||||
from collections import defaultdict
|
||||
|
||||
from .health import HealthCheck, HealthStatus, health_monitor
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThreadInfo:
|
||||
"""Information about a monitored thread."""
|
||||
thread_id: int
|
||||
thread_name: str
|
||||
start_time: float
|
||||
last_heartbeat: float
|
||||
heartbeat_count: int = 0
|
||||
is_responsive: bool = True
|
||||
last_activity: Optional[str] = None
|
||||
stack_traces: List[str] = None
|
||||
|
||||
|
||||
class ThreadHealthMonitor:
|
||||
"""Monitors thread health and responsiveness."""
|
||||
|
||||
def __init__(self):
|
||||
self.monitored_threads: Dict[int, ThreadInfo] = {}
|
||||
self.heartbeat_callbacks: Dict[int, Callable[[], bool]] = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Configuration
|
||||
self.heartbeat_timeout = 60.0 # 1 minute without heartbeat = unresponsive
|
||||
self.responsiveness_test_interval = 30.0 # Test responsiveness every 30 seconds
|
||||
self.stack_trace_count = 5 # Keep last 5 stack traces for analysis
|
||||
|
||||
# Register with health monitor
|
||||
health_monitor.register_health_checker(self._perform_health_checks)
|
||||
|
||||
# Enable periodic responsiveness testing
|
||||
self.test_thread = threading.Thread(target=self._responsiveness_test_loop, daemon=True)
|
||||
self.test_thread.start()
|
||||
|
||||
def register_thread(self, thread: threading.Thread, heartbeat_callback: Optional[Callable[[], bool]] = None):
|
||||
"""
|
||||
Register a thread for monitoring.
|
||||
|
||||
Args:
|
||||
thread: Thread to monitor
|
||||
heartbeat_callback: Optional callback to test thread responsiveness
|
||||
"""
|
||||
with self._lock:
|
||||
thread_info = ThreadInfo(
|
||||
thread_id=thread.ident,
|
||||
thread_name=thread.name,
|
||||
start_time=time.time(),
|
||||
last_heartbeat=time.time()
|
||||
)
|
||||
|
||||
self.monitored_threads[thread.ident] = thread_info
|
||||
|
||||
if heartbeat_callback:
|
||||
self.heartbeat_callbacks[thread.ident] = heartbeat_callback
|
||||
|
||||
logger.info(f"Registered thread for monitoring: {thread.name} (ID: {thread.ident})")
|
||||
|
||||
def unregister_thread(self, thread_id: int):
|
||||
"""Unregister a thread from monitoring."""
|
||||
with self._lock:
|
||||
if thread_id in self.monitored_threads:
|
||||
thread_name = self.monitored_threads[thread_id].thread_name
|
||||
del self.monitored_threads[thread_id]
|
||||
|
||||
if thread_id in self.heartbeat_callbacks:
|
||||
del self.heartbeat_callbacks[thread_id]
|
||||
|
||||
logger.info(f"Unregistered thread from monitoring: {thread_name} (ID: {thread_id})")
|
||||
|
||||
def heartbeat(self, thread_id: Optional[int] = None, activity: Optional[str] = None):
|
||||
"""
|
||||
Report thread heartbeat.
|
||||
|
||||
Args:
|
||||
thread_id: Thread ID (uses current thread if None)
|
||||
activity: Description of current activity
|
||||
"""
|
||||
if thread_id is None:
|
||||
thread_id = threading.current_thread().ident
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
if thread_id in self.monitored_threads:
|
||||
thread_info = self.monitored_threads[thread_id]
|
||||
thread_info.last_heartbeat = current_time
|
||||
thread_info.heartbeat_count += 1
|
||||
thread_info.is_responsive = True
|
||||
|
||||
if activity:
|
||||
thread_info.last_activity = activity
|
||||
|
||||
# Report to health monitor
|
||||
health_monitor.update_metrics(
|
||||
f"thread_{thread_info.thread_name}",
|
||||
thread_alive=True,
|
||||
last_frame_time=current_time
|
||||
)
|
||||
|
||||
def get_thread_info(self, thread_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Get information about a monitored thread."""
|
||||
with self._lock:
|
||||
if thread_id not in self.monitored_threads:
|
||||
return None
|
||||
|
||||
thread_info = self.monitored_threads[thread_id]
|
||||
current_time = time.time()
|
||||
|
||||
return {
|
||||
'thread_id': thread_id,
|
||||
'thread_name': thread_info.thread_name,
|
||||
'uptime_seconds': current_time - thread_info.start_time,
|
||||
'last_heartbeat_age': current_time - thread_info.last_heartbeat,
|
||||
'heartbeat_count': thread_info.heartbeat_count,
|
||||
'is_responsive': thread_info.is_responsive,
|
||||
'last_activity': thread_info.last_activity,
|
||||
'stack_traces': thread_info.stack_traces or []
|
||||
}
|
||||
|
||||
def get_all_thread_info(self) -> Dict[int, Dict[str, Any]]:
|
||||
"""Get information about all monitored threads."""
|
||||
with self._lock:
|
||||
return {
|
||||
thread_id: self.get_thread_info(thread_id)
|
||||
for thread_id in self.monitored_threads.keys()
|
||||
}
|
||||
|
||||
def test_thread_responsiveness(self, thread_id: int) -> bool:
|
||||
"""
|
||||
Test if a thread is responsive by calling its heartbeat callback.
|
||||
|
||||
Args:
|
||||
thread_id: ID of thread to test
|
||||
|
||||
Returns:
|
||||
True if thread responds within timeout
|
||||
"""
|
||||
if thread_id not in self.heartbeat_callbacks:
|
||||
return True # Can't test if no callback provided
|
||||
|
||||
try:
|
||||
# Call the heartbeat callback with a timeout
|
||||
callback = self.heartbeat_callbacks[thread_id]
|
||||
|
||||
# This is a simple approach - in practice you might want to use
|
||||
# threading.Timer or asyncio for more sophisticated timeout handling
|
||||
start_time = time.time()
|
||||
result = callback()
|
||||
response_time = time.time() - start_time
|
||||
|
||||
with self._lock:
|
||||
if thread_id in self.monitored_threads:
|
||||
self.monitored_threads[thread_id].is_responsive = result
|
||||
|
||||
if response_time > 5.0: # Slow response
|
||||
logger.warning(f"Thread {thread_id} slow response: {response_time:.1f}s")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error testing thread {thread_id} responsiveness: {e}")
|
||||
with self._lock:
|
||||
if thread_id in self.monitored_threads:
|
||||
self.monitored_threads[thread_id].is_responsive = False
|
||||
return False
|
||||
|
||||
def capture_stack_trace(self, thread_id: int) -> Optional[str]:
|
||||
"""
|
||||
Capture stack trace for a thread.
|
||||
|
||||
Args:
|
||||
thread_id: ID of thread to capture
|
||||
|
||||
Returns:
|
||||
Stack trace string or None if not available
|
||||
"""
|
||||
try:
|
||||
# Get all frames for all threads
|
||||
frames = dict(threading._current_frames())
|
||||
|
||||
if thread_id not in frames:
|
||||
return None
|
||||
|
||||
# Format stack trace
|
||||
frame = frames[thread_id]
|
||||
stack_trace = ''.join(traceback.format_stack(frame))
|
||||
|
||||
# Store in thread info
|
||||
with self._lock:
|
||||
if thread_id in self.monitored_threads:
|
||||
thread_info = self.monitored_threads[thread_id]
|
||||
if thread_info.stack_traces is None:
|
||||
thread_info.stack_traces = []
|
||||
|
||||
thread_info.stack_traces.append(f"{time.time()}: {stack_trace}")
|
||||
|
||||
# Keep only last N stack traces
|
||||
if len(thread_info.stack_traces) > self.stack_trace_count:
|
||||
thread_info.stack_traces = thread_info.stack_traces[-self.stack_trace_count:]
|
||||
|
||||
return stack_trace
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error capturing stack trace for thread {thread_id}: {e}")
|
||||
return None
|
||||
|
||||
def detect_deadlocks(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Attempt to detect potential deadlocks by analyzing thread states.
|
||||
|
||||
Returns:
|
||||
List of potential deadlock scenarios
|
||||
"""
|
||||
deadlocks = []
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
# Look for threads that haven't had heartbeats for a long time
|
||||
# and are supposedly alive
|
||||
for thread_id, thread_info in self.monitored_threads.items():
|
||||
heartbeat_age = current_time - thread_info.last_heartbeat
|
||||
|
||||
if heartbeat_age > self.heartbeat_timeout * 2: # Double the timeout
|
||||
# Check if thread still exists
|
||||
thread_exists = any(
|
||||
t.ident == thread_id and t.is_alive()
|
||||
for t in threading.enumerate()
|
||||
)
|
||||
|
||||
if thread_exists:
|
||||
# Thread exists but not responding - potential deadlock
|
||||
stack_trace = self.capture_stack_trace(thread_id)
|
||||
|
||||
deadlock_info = {
|
||||
'thread_id': thread_id,
|
||||
'thread_name': thread_info.thread_name,
|
||||
'heartbeat_age': heartbeat_age,
|
||||
'last_activity': thread_info.last_activity,
|
||||
'stack_trace': stack_trace,
|
||||
'detection_time': current_time
|
||||
}
|
||||
|
||||
deadlocks.append(deadlock_info)
|
||||
logger.warning(f"Potential deadlock detected in thread {thread_info.thread_name}")
|
||||
|
||||
return deadlocks
|
||||
|
||||
def _responsiveness_test_loop(self):
|
||||
"""Background loop to test thread responsiveness."""
|
||||
logger.info("Thread responsiveness testing started")
|
||||
|
||||
while True:
|
||||
try:
|
||||
time.sleep(self.responsiveness_test_interval)
|
||||
|
||||
with self._lock:
|
||||
thread_ids = list(self.monitored_threads.keys())
|
||||
|
||||
for thread_id in thread_ids:
|
||||
try:
|
||||
self.test_thread_responsiveness(thread_id)
|
||||
except Exception as e:
|
||||
logger.error(f"Error testing thread {thread_id}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in responsiveness test loop: {e}")
|
||||
time.sleep(10.0) # Fallback sleep
|
||||
|
||||
def _perform_health_checks(self) -> List[HealthCheck]:
|
||||
"""Perform health checks for all monitored threads."""
|
||||
checks = []
|
||||
current_time = time.time()
|
||||
|
||||
with self._lock:
|
||||
for thread_id, thread_info in self.monitored_threads.items():
|
||||
checks.extend(self._check_thread_health(thread_id, thread_info, current_time))
|
||||
|
||||
# Check for deadlocks
|
||||
deadlocks = self.detect_deadlocks()
|
||||
for deadlock in deadlocks:
|
||||
checks.append(HealthCheck(
|
||||
name=f"deadlock_detection_{deadlock['thread_id']}",
|
||||
status=HealthStatus.CRITICAL,
|
||||
message=f"Potential deadlock in thread {deadlock['thread_name']} "
|
||||
f"(unresponsive for {deadlock['heartbeat_age']:.1f}s)",
|
||||
details=deadlock,
|
||||
recovery_action="restart_thread"
|
||||
))
|
||||
|
||||
return checks
|
||||
|
||||
def _check_thread_health(self, thread_id: int, thread_info: ThreadInfo, current_time: float) -> List[HealthCheck]:
|
||||
"""Perform health checks for a single thread."""
|
||||
checks = []
|
||||
|
||||
# Check if thread still exists
|
||||
thread_exists = any(
|
||||
t.ident == thread_id and t.is_alive()
|
||||
for t in threading.enumerate()
|
||||
)
|
||||
|
||||
if not thread_exists:
|
||||
checks.append(HealthCheck(
|
||||
name=f"thread_{thread_info.thread_name}_alive",
|
||||
status=HealthStatus.CRITICAL,
|
||||
message=f"Thread {thread_info.thread_name} is no longer alive",
|
||||
details={
|
||||
'thread_id': thread_id,
|
||||
'uptime': current_time - thread_info.start_time,
|
||||
'last_heartbeat': thread_info.last_heartbeat
|
||||
},
|
||||
recovery_action="restart_thread"
|
||||
))
|
||||
return checks
|
||||
|
||||
# Check heartbeat freshness
|
||||
heartbeat_age = current_time - thread_info.last_heartbeat
|
||||
|
||||
if heartbeat_age > self.heartbeat_timeout:
|
||||
checks.append(HealthCheck(
|
||||
name=f"thread_{thread_info.thread_name}_responsive",
|
||||
status=HealthStatus.CRITICAL,
|
||||
message=f"Thread {thread_info.thread_name} unresponsive for {heartbeat_age:.1f}s",
|
||||
details={
|
||||
'thread_id': thread_id,
|
||||
'heartbeat_age': heartbeat_age,
|
||||
'heartbeat_count': thread_info.heartbeat_count,
|
||||
'last_activity': thread_info.last_activity,
|
||||
'is_responsive': thread_info.is_responsive
|
||||
},
|
||||
recovery_action="restart_thread"
|
||||
))
|
||||
elif heartbeat_age > self.heartbeat_timeout * 0.5: # Warning at 50% of timeout
|
||||
checks.append(HealthCheck(
|
||||
name=f"thread_{thread_info.thread_name}_responsive",
|
||||
status=HealthStatus.WARNING,
|
||||
message=f"Thread {thread_info.thread_name} slow heartbeat: {heartbeat_age:.1f}s",
|
||||
details={
|
||||
'thread_id': thread_id,
|
||||
'heartbeat_age': heartbeat_age,
|
||||
'heartbeat_count': thread_info.heartbeat_count,
|
||||
'last_activity': thread_info.last_activity,
|
||||
'is_responsive': thread_info.is_responsive
|
||||
}
|
||||
))
|
||||
|
||||
# Check responsiveness test results
|
||||
if not thread_info.is_responsive:
|
||||
checks.append(HealthCheck(
|
||||
name=f"thread_{thread_info.thread_name}_callback",
|
||||
status=HealthStatus.WARNING,
|
||||
message=f"Thread {thread_info.thread_name} failed responsiveness test",
|
||||
details={
|
||||
'thread_id': thread_id,
|
||||
'last_activity': thread_info.last_activity
|
||||
}
|
||||
))
|
||||
|
||||
return checks
|
||||
|
||||
|
||||
# Global thread health monitor instance
|
||||
thread_health_monitor = ThreadHealthMonitor()
|
|
@ -2,14 +2,14 @@
|
|||
Streaming system for RTSP and HTTP camera feeds.
|
||||
Provides modular frame readers, buffers, and stream management.
|
||||
"""
|
||||
from .readers import RTSPReader, HTTPSnapshotReader
|
||||
from .readers import HTTPSnapshotReader, FFmpegRTSPReader
|
||||
from .buffers import FrameBuffer, CacheBuffer, shared_frame_buffer, shared_cache_buffer
|
||||
from .manager import StreamManager, StreamConfig, SubscriptionInfo, shared_stream_manager, initialize_stream_manager
|
||||
|
||||
__all__ = [
|
||||
# Readers
|
||||
'RTSPReader',
|
||||
'HTTPSnapshotReader',
|
||||
'FFmpegRTSPReader',
|
||||
|
||||
# Buffers
|
||||
'FrameBuffer',
|
||||
|
|
|
@ -9,53 +9,25 @@ import logging
|
|||
import numpy as np
|
||||
from typing import Optional, Dict, Any, Tuple
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StreamType(Enum):
|
||||
"""Stream type enumeration."""
|
||||
RTSP = "rtsp" # 1280x720 @ 6fps
|
||||
HTTP = "http" # 2560x1440 high quality
|
||||
|
||||
|
||||
class FrameBuffer:
|
||||
"""Thread-safe frame buffer optimized for different stream types."""
|
||||
"""Thread-safe frame buffer for all camera streams."""
|
||||
|
||||
def __init__(self, max_age_seconds: int = 5):
|
||||
self.max_age_seconds = max_age_seconds
|
||||
self._frames: Dict[str, Dict[str, Any]] = {}
|
||||
self._stream_types: Dict[str, StreamType] = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Stream-specific settings
|
||||
self.rtsp_config = {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
'fps': 6,
|
||||
'max_size_mb': 3 # 1280x720x3 bytes = ~2.6MB
|
||||
}
|
||||
self.http_config = {
|
||||
'width': 2560,
|
||||
'height': 1440,
|
||||
'max_size_mb': 10
|
||||
}
|
||||
|
||||
def put_frame(self, camera_id: str, frame: np.ndarray, stream_type: Optional[StreamType] = None):
|
||||
"""Store a frame for the given camera ID with type-specific validation."""
|
||||
def put_frame(self, camera_id: str, frame: np.ndarray):
|
||||
"""Store a frame for the given camera ID."""
|
||||
with self._lock:
|
||||
# Detect stream type if not provided
|
||||
if stream_type is None:
|
||||
stream_type = self._detect_stream_type(frame)
|
||||
|
||||
# Store stream type
|
||||
self._stream_types[camera_id] = stream_type
|
||||
|
||||
# Validate frame based on stream type
|
||||
if not self._validate_frame(frame, stream_type):
|
||||
logger.warning(f"Frame validation failed for camera {camera_id} ({stream_type.value})")
|
||||
# Validate frame
|
||||
if not self._validate_frame(frame):
|
||||
logger.warning(f"Frame validation failed for camera {camera_id}")
|
||||
return
|
||||
|
||||
self._frames[camera_id] = {
|
||||
|
@ -63,14 +35,9 @@ class FrameBuffer:
|
|||
'timestamp': time.time(),
|
||||
'shape': frame.shape,
|
||||
'dtype': str(frame.dtype),
|
||||
'stream_type': stream_type.value,
|
||||
'size_mb': frame.nbytes / (1024 * 1024)
|
||||
}
|
||||
|
||||
# Commented out verbose frame storage logging
|
||||
# logger.debug(f"Stored {stream_type.value} frame for camera {camera_id}: "
|
||||
# f"{frame.shape[1]}x{frame.shape[0]}, {frame.nbytes / (1024 * 1024):.2f}MB")
|
||||
|
||||
def get_frame(self, camera_id: str) -> Optional[np.ndarray]:
|
||||
"""Get the latest frame for the given camera ID."""
|
||||
with self._lock:
|
||||
|
@ -79,15 +46,7 @@ class FrameBuffer:
|
|||
|
||||
frame_data = self._frames[camera_id]
|
||||
|
||||
# Check if frame is too old
|
||||
age = time.time() - frame_data['timestamp']
|
||||
if age > self.max_age_seconds:
|
||||
logger.debug(f"Frame for camera {camera_id} is {age:.1f}s old, discarding")
|
||||
del self._frames[camera_id]
|
||||
if camera_id in self._stream_types:
|
||||
del self._stream_types[camera_id]
|
||||
return None
|
||||
|
||||
# Return frame regardless of age - frames persist until replaced
|
||||
return frame_data['frame'].copy()
|
||||
|
||||
def get_frame_info(self, camera_id: str) -> Optional[Dict[str, Any]]:
|
||||
|
@ -99,18 +58,12 @@ class FrameBuffer:
|
|||
frame_data = self._frames[camera_id]
|
||||
age = time.time() - frame_data['timestamp']
|
||||
|
||||
if age > self.max_age_seconds:
|
||||
del self._frames[camera_id]
|
||||
if camera_id in self._stream_types:
|
||||
del self._stream_types[camera_id]
|
||||
return None
|
||||
|
||||
# Return frame info regardless of age - frames persist until replaced
|
||||
return {
|
||||
'timestamp': frame_data['timestamp'],
|
||||
'age': age,
|
||||
'shape': frame_data['shape'],
|
||||
'dtype': frame_data['dtype'],
|
||||
'stream_type': frame_data.get('stream_type', 'unknown'),
|
||||
'size_mb': frame_data.get('size_mb', 0)
|
||||
}
|
||||
|
||||
|
@ -123,8 +76,6 @@ class FrameBuffer:
|
|||
with self._lock:
|
||||
if camera_id in self._frames:
|
||||
del self._frames[camera_id]
|
||||
if camera_id in self._stream_types:
|
||||
del self._stream_types[camera_id]
|
||||
logger.debug(f"Cleared frames for camera {camera_id}")
|
||||
|
||||
def clear_all(self):
|
||||
|
@ -132,30 +83,13 @@ class FrameBuffer:
|
|||
with self._lock:
|
||||
count = len(self._frames)
|
||||
self._frames.clear()
|
||||
self._stream_types.clear()
|
||||
logger.debug(f"Cleared all frames ({count} cameras)")
|
||||
|
||||
def get_camera_list(self) -> list:
|
||||
"""Get list of cameras with valid frames."""
|
||||
"""Get list of cameras with frames - all frames persist until replaced."""
|
||||
with self._lock:
|
||||
current_time = time.time()
|
||||
valid_cameras = []
|
||||
expired_cameras = []
|
||||
|
||||
for camera_id, frame_data in self._frames.items():
|
||||
age = current_time - frame_data['timestamp']
|
||||
if age <= self.max_age_seconds:
|
||||
valid_cameras.append(camera_id)
|
||||
else:
|
||||
expired_cameras.append(camera_id)
|
||||
|
||||
# Clean up expired frames
|
||||
for camera_id in expired_cameras:
|
||||
del self._frames[camera_id]
|
||||
if camera_id in self._stream_types:
|
||||
del self._stream_types[camera_id]
|
||||
|
||||
return valid_cameras
|
||||
# Return all cameras that have frames - no age-based filtering
|
||||
return list(self._frames.keys())
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get buffer statistics."""
|
||||
|
@ -163,104 +97,68 @@ class FrameBuffer:
|
|||
current_time = time.time()
|
||||
stats = {
|
||||
'total_cameras': len(self._frames),
|
||||
'valid_cameras': 0,
|
||||
'expired_cameras': 0,
|
||||
'rtsp_cameras': 0,
|
||||
'http_cameras': 0,
|
||||
'recent_cameras': 0,
|
||||
'stale_cameras': 0,
|
||||
'total_memory_mb': 0,
|
||||
'cameras': {}
|
||||
}
|
||||
|
||||
for camera_id, frame_data in self._frames.items():
|
||||
age = current_time - frame_data['timestamp']
|
||||
stream_type = frame_data.get('stream_type', 'unknown')
|
||||
size_mb = frame_data.get('size_mb', 0)
|
||||
|
||||
# All frames are valid/available, but categorize by freshness for monitoring
|
||||
if age <= self.max_age_seconds:
|
||||
stats['valid_cameras'] += 1
|
||||
stats['recent_cameras'] += 1
|
||||
else:
|
||||
stats['expired_cameras'] += 1
|
||||
|
||||
if stream_type == StreamType.RTSP.value:
|
||||
stats['rtsp_cameras'] += 1
|
||||
elif stream_type == StreamType.HTTP.value:
|
||||
stats['http_cameras'] += 1
|
||||
stats['stale_cameras'] += 1
|
||||
|
||||
stats['total_memory_mb'] += size_mb
|
||||
|
||||
stats['cameras'][camera_id] = {
|
||||
'age': age,
|
||||
'valid': age <= self.max_age_seconds,
|
||||
'recent': age <= self.max_age_seconds, # Recent but all frames available
|
||||
'shape': frame_data['shape'],
|
||||
'dtype': frame_data['dtype'],
|
||||
'stream_type': stream_type,
|
||||
'size_mb': size_mb
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def _detect_stream_type(self, frame: np.ndarray) -> StreamType:
|
||||
"""Detect stream type based on frame dimensions."""
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
# Check if it matches RTSP dimensions (1280x720)
|
||||
if w == self.rtsp_config['width'] and h == self.rtsp_config['height']:
|
||||
return StreamType.RTSP
|
||||
|
||||
# Check if it matches HTTP dimensions (2560x1440) or close to it
|
||||
if w >= 2000 and h >= 1000:
|
||||
return StreamType.HTTP
|
||||
|
||||
# Default based on size
|
||||
if w <= 1920 and h <= 1080:
|
||||
return StreamType.RTSP
|
||||
else:
|
||||
return StreamType.HTTP
|
||||
|
||||
def _validate_frame(self, frame: np.ndarray, stream_type: StreamType) -> bool:
|
||||
"""Validate frame based on stream type."""
|
||||
def _validate_frame(self, frame: np.ndarray) -> bool:
|
||||
"""Validate frame - basic validation for any stream type."""
|
||||
if frame is None or frame.size == 0:
|
||||
return False
|
||||
|
||||
h, w = frame.shape[:2]
|
||||
size_mb = frame.nbytes / (1024 * 1024)
|
||||
|
||||
if stream_type == StreamType.RTSP:
|
||||
config = self.rtsp_config
|
||||
# Allow some tolerance for RTSP streams
|
||||
if abs(w - config['width']) > 100 or abs(h - config['height']) > 100:
|
||||
logger.warning(f"RTSP frame size mismatch: {w}x{h} (expected {config['width']}x{config['height']})")
|
||||
if size_mb > config['max_size_mb']:
|
||||
logger.warning(f"RTSP frame too large: {size_mb:.2f}MB (max {config['max_size_mb']}MB)")
|
||||
return False
|
||||
# Basic size validation - reject extremely large frames regardless of type
|
||||
max_size_mb = 50 # Generous limit for any frame type
|
||||
if size_mb > max_size_mb:
|
||||
logger.warning(f"Frame too large: {size_mb:.2f}MB (max {max_size_mb}MB) for {w}x{h}")
|
||||
return False
|
||||
|
||||
elif stream_type == StreamType.HTTP:
|
||||
config = self.http_config
|
||||
# More flexible for HTTP snapshots
|
||||
if size_mb > config['max_size_mb']:
|
||||
logger.warning(f"HTTP snapshot too large: {size_mb:.2f}MB (max {config['max_size_mb']}MB)")
|
||||
return False
|
||||
# Basic dimension validation
|
||||
if w < 100 or h < 100:
|
||||
logger.warning(f"Frame too small: {w}x{h}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class CacheBuffer:
|
||||
"""Enhanced frame cache with support for cropping and optimized for different formats."""
|
||||
"""Enhanced frame cache with support for cropping."""
|
||||
|
||||
def __init__(self, max_age_seconds: int = 10):
|
||||
self.frame_buffer = FrameBuffer(max_age_seconds)
|
||||
self._crop_cache: Dict[str, Dict[str, Any]] = {}
|
||||
self._cache_lock = threading.RLock()
|
||||
self.jpeg_quality = 95 # High quality for all frames
|
||||
|
||||
# Quality settings for different stream types
|
||||
self.jpeg_quality = {
|
||||
StreamType.RTSP: 90, # Good quality for 720p
|
||||
StreamType.HTTP: 95 # High quality for 2K
|
||||
}
|
||||
|
||||
def put_frame(self, camera_id: str, frame: np.ndarray, stream_type: Optional[StreamType] = None):
|
||||
def put_frame(self, camera_id: str, frame: np.ndarray):
|
||||
"""Store a frame and clear any associated crop cache."""
|
||||
self.frame_buffer.put_frame(camera_id, frame, stream_type)
|
||||
self.frame_buffer.put_frame(camera_id, frame)
|
||||
|
||||
# Clear crop cache for this camera since we have a new frame
|
||||
with self._cache_lock:
|
||||
|
@ -325,21 +223,15 @@ class CacheBuffer:
|
|||
|
||||
def get_frame_as_jpeg(self, camera_id: str, crop_coords: Optional[Tuple[int, int, int, int]] = None,
|
||||
quality: Optional[int] = None) -> Optional[bytes]:
|
||||
"""Get frame as JPEG bytes with format-specific quality settings."""
|
||||
"""Get frame as JPEG bytes."""
|
||||
frame = self.get_frame(camera_id, crop_coords)
|
||||
if frame is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Determine quality based on stream type if not specified
|
||||
# Use specified quality or default
|
||||
if quality is None:
|
||||
frame_info = self.frame_buffer.get_frame_info(camera_id)
|
||||
if frame_info:
|
||||
stream_type_str = frame_info.get('stream_type', StreamType.RTSP.value)
|
||||
stream_type = StreamType.RTSP if stream_type_str == StreamType.RTSP.value else StreamType.HTTP
|
||||
quality = self.jpeg_quality[stream_type]
|
||||
else:
|
||||
quality = 90 # Default
|
||||
quality = self.jpeg_quality
|
||||
|
||||
# Encode as JPEG with specified quality
|
||||
encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
|
||||
|
|
|
@ -5,12 +5,14 @@ Optimized for 1280x720@6fps RTSP and 2560x1440 HTTP snapshots.
|
|||
import logging
|
||||
import threading
|
||||
import time
|
||||
import queue
|
||||
import asyncio
|
||||
from typing import Dict, Set, Optional, List, Any
|
||||
from dataclasses import dataclass
|
||||
from collections import defaultdict
|
||||
|
||||
from .readers import RTSPReader, HTTPSnapshotReader
|
||||
from .buffers import shared_cache_buffer, StreamType
|
||||
from .readers import HTTPSnapshotReader, FFmpegRTSPReader
|
||||
from .buffers import shared_cache_buffer
|
||||
from ..tracking.integration import TrackingPipelineIntegration
|
||||
|
||||
|
||||
|
@ -50,6 +52,65 @@ class StreamManager:
|
|||
self._camera_subscribers: Dict[str, Set[str]] = defaultdict(set) # camera_id -> set of subscription_ids
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Fair tracking queue system - per camera queues
|
||||
self._tracking_queues: Dict[str, queue.Queue] = {} # camera_id -> queue
|
||||
self._tracking_workers = []
|
||||
self._stop_workers = threading.Event()
|
||||
self._dropped_frame_counts: Dict[str, int] = {} # per-camera drop counts
|
||||
|
||||
# Round-robin scheduling state
|
||||
self._camera_list = [] # Ordered list of active cameras
|
||||
self._camera_round_robin_index = 0
|
||||
self._round_robin_lock = threading.Lock()
|
||||
|
||||
# Start worker threads for tracking processing
|
||||
num_workers = min(4, max_streams // 2 + 1) # Scale with streams
|
||||
for i in range(num_workers):
|
||||
worker = threading.Thread(
|
||||
target=self._tracking_worker_loop,
|
||||
name=f"TrackingWorker-{i}",
|
||||
daemon=True
|
||||
)
|
||||
worker.start()
|
||||
self._tracking_workers.append(worker)
|
||||
|
||||
logger.info(f"Started {num_workers} tracking worker threads")
|
||||
|
||||
def _ensure_camera_queue(self, camera_id: str):
|
||||
"""Ensure a tracking queue exists for the camera."""
|
||||
if camera_id not in self._tracking_queues:
|
||||
self._tracking_queues[camera_id] = queue.Queue(maxsize=10) # 10 frames per camera
|
||||
self._dropped_frame_counts[camera_id] = 0
|
||||
|
||||
with self._round_robin_lock:
|
||||
if camera_id not in self._camera_list:
|
||||
self._camera_list.append(camera_id)
|
||||
logger.info(f"Created tracking queue for camera {camera_id}")
|
||||
else:
|
||||
logger.debug(f"Camera {camera_id} already has tracking queue")
|
||||
|
||||
def _remove_camera_queue(self, camera_id: str):
|
||||
"""Remove tracking queue for a camera that's no longer active."""
|
||||
if camera_id in self._tracking_queues:
|
||||
# Clear any remaining items
|
||||
while not self._tracking_queues[camera_id].empty():
|
||||
try:
|
||||
self._tracking_queues[camera_id].get_nowait()
|
||||
except queue.Empty:
|
||||
break
|
||||
|
||||
del self._tracking_queues[camera_id]
|
||||
del self._dropped_frame_counts[camera_id]
|
||||
|
||||
with self._round_robin_lock:
|
||||
if camera_id in self._camera_list:
|
||||
self._camera_list.remove(camera_id)
|
||||
# Reset index if needed
|
||||
if self._camera_round_robin_index >= len(self._camera_list):
|
||||
self._camera_round_robin_index = 0
|
||||
|
||||
logger.info(f"Removed tracking queue for camera {camera_id}")
|
||||
|
||||
def add_subscription(self, subscription_id: str, stream_config: StreamConfig,
|
||||
crop_coords: Optional[tuple] = None,
|
||||
model_id: Optional[str] = None,
|
||||
|
@ -93,6 +154,10 @@ class StreamManager:
|
|||
if not success:
|
||||
self._remove_subscription_internal(subscription_id)
|
||||
return False
|
||||
else:
|
||||
# Stream already exists, but ensure queue exists too
|
||||
logger.info(f"Stream already exists for {camera_id}, ensuring queue exists")
|
||||
self._ensure_camera_queue(camera_id)
|
||||
|
||||
logger.info(f"Added subscription {subscription_id} for camera {camera_id} "
|
||||
f"({len(self._camera_subscribers[camera_id])} total subscribers)")
|
||||
|
@ -129,8 +194,9 @@ class StreamManager:
|
|||
"""Start a stream for the given camera."""
|
||||
try:
|
||||
if stream_config.rtsp_url:
|
||||
# RTSP stream
|
||||
reader = RTSPReader(
|
||||
# RTSP stream using FFmpeg subprocess with CUDA acceleration
|
||||
logger.info(f"\033[94m[RTSP] Starting {camera_id}\033[0m")
|
||||
reader = FFmpegRTSPReader(
|
||||
camera_id=camera_id,
|
||||
rtsp_url=stream_config.rtsp_url,
|
||||
max_retries=stream_config.max_retries
|
||||
|
@ -138,10 +204,12 @@ class StreamManager:
|
|||
reader.set_frame_callback(self._frame_callback)
|
||||
reader.start()
|
||||
self._streams[camera_id] = reader
|
||||
logger.info(f"Started RTSP stream for camera {camera_id}")
|
||||
self._ensure_camera_queue(camera_id) # Create tracking queue
|
||||
logger.info(f"\033[92m[RTSP] {camera_id} connected\033[0m")
|
||||
|
||||
elif stream_config.snapshot_url:
|
||||
# HTTP snapshot stream
|
||||
logger.info(f"\033[95m[HTTP] Starting {camera_id}\033[0m")
|
||||
reader = HTTPSnapshotReader(
|
||||
camera_id=camera_id,
|
||||
snapshot_url=stream_config.snapshot_url,
|
||||
|
@ -151,7 +219,8 @@ class StreamManager:
|
|||
reader.set_frame_callback(self._frame_callback)
|
||||
reader.start()
|
||||
self._streams[camera_id] = reader
|
||||
logger.info(f"Started HTTP snapshot stream for camera {camera_id}")
|
||||
self._ensure_camera_queue(camera_id) # Create tracking queue
|
||||
logger.info(f"\033[92m[HTTP] {camera_id} connected\033[0m")
|
||||
|
||||
else:
|
||||
logger.error(f"No valid URL provided for camera {camera_id}")
|
||||
|
@ -169,23 +238,42 @@ class StreamManager:
|
|||
try:
|
||||
self._streams[camera_id].stop()
|
||||
del self._streams[camera_id]
|
||||
shared_cache_buffer.clear_camera(camera_id)
|
||||
logger.info(f"Stopped stream for camera {camera_id}")
|
||||
self._remove_camera_queue(camera_id) # Remove tracking queue
|
||||
# DON'T clear frames - they should persist until replaced
|
||||
# shared_cache_buffer.clear_camera(camera_id) # REMOVED - frames should persist
|
||||
logger.info(f"Stopped stream for camera {camera_id} (frames preserved in buffer)")
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping stream for camera {camera_id}: {e}")
|
||||
|
||||
def _frame_callback(self, camera_id: str, frame):
|
||||
"""Callback for when a new frame is available."""
|
||||
try:
|
||||
# Detect stream type based on frame dimensions
|
||||
stream_type = self._detect_stream_type(frame)
|
||||
# Store frame in shared buffer
|
||||
shared_cache_buffer.put_frame(camera_id, frame)
|
||||
# Quieter frame callback logging - only log occasionally
|
||||
if hasattr(self, '_frame_log_count'):
|
||||
self._frame_log_count += 1
|
||||
else:
|
||||
self._frame_log_count = 1
|
||||
|
||||
# Store frame in shared buffer with stream type
|
||||
shared_cache_buffer.put_frame(camera_id, frame, stream_type)
|
||||
# Log every 100 frames to avoid spam
|
||||
if self._frame_log_count % 100 == 0:
|
||||
available_cameras = shared_cache_buffer.frame_buffer.get_camera_list()
|
||||
logger.info(f"\033[96m[BUFFER] {len(available_cameras)} active cameras: {', '.join(available_cameras)}\033[0m")
|
||||
|
||||
# Queue for tracking processing (non-blocking) - route to camera-specific queue
|
||||
if camera_id in self._tracking_queues:
|
||||
try:
|
||||
self._tracking_queues[camera_id].put_nowait({
|
||||
'frame': frame,
|
||||
'timestamp': time.time()
|
||||
})
|
||||
except queue.Full:
|
||||
# Drop frame if camera queue is full (maintain real-time)
|
||||
self._dropped_frame_counts[camera_id] += 1
|
||||
|
||||
# Process tracking for subscriptions with tracking integration
|
||||
self._process_tracking_for_camera(camera_id, frame)
|
||||
if self._dropped_frame_counts[camera_id] % 50 == 0:
|
||||
logger.warning(f"Dropped {self._dropped_frame_counts[camera_id]} frames for camera {camera_id} due to full queue")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in frame callback for camera {camera_id}: {e}")
|
||||
|
@ -242,6 +330,134 @@ class StreamManager:
|
|||
except Exception as e:
|
||||
logger.error(f"Error processing tracking for camera {camera_id}: {e}")
|
||||
|
||||
def _tracking_worker_loop(self):
|
||||
"""Worker thread loop for round-robin processing of camera queues."""
|
||||
logger.info(f"Tracking worker {threading.current_thread().name} started")
|
||||
|
||||
consecutive_empty = 0
|
||||
max_consecutive_empty = 10 # Sleep if all cameras empty this many times
|
||||
|
||||
while not self._stop_workers.is_set():
|
||||
try:
|
||||
# Get next camera in round-robin fashion
|
||||
camera_id, item = self._get_next_camera_item()
|
||||
|
||||
if camera_id is None:
|
||||
# No cameras have items, sleep briefly
|
||||
consecutive_empty += 1
|
||||
if consecutive_empty >= max_consecutive_empty:
|
||||
time.sleep(0.1) # Sleep 100ms if nothing to process
|
||||
consecutive_empty = 0
|
||||
continue
|
||||
|
||||
consecutive_empty = 0 # Reset counter when we find work
|
||||
|
||||
frame = item['frame']
|
||||
timestamp = item['timestamp']
|
||||
|
||||
# Check if frame is too old (drop if > 1 second old)
|
||||
age = time.time() - timestamp
|
||||
if age > 1.0:
|
||||
logger.debug(f"Dropping old frame for {camera_id} (age: {age:.2f}s)")
|
||||
continue
|
||||
|
||||
# Process tracking for this camera's frame
|
||||
self._process_tracking_for_camera_sync(camera_id, frame)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in tracking worker: {e}", exc_info=True)
|
||||
|
||||
logger.info(f"Tracking worker {threading.current_thread().name} stopped")
|
||||
|
||||
def _get_next_camera_item(self):
|
||||
"""Get next item from camera queues using round-robin scheduling."""
|
||||
with self._round_robin_lock:
|
||||
# Get current list of cameras from actual tracking queues (central state)
|
||||
camera_list = list(self._tracking_queues.keys())
|
||||
|
||||
if not camera_list:
|
||||
return None, None
|
||||
|
||||
attempts = 0
|
||||
max_attempts = len(camera_list)
|
||||
|
||||
while attempts < max_attempts:
|
||||
# Get current camera using round-robin index
|
||||
if self._camera_round_robin_index >= len(camera_list):
|
||||
self._camera_round_robin_index = 0
|
||||
|
||||
camera_id = camera_list[self._camera_round_robin_index]
|
||||
|
||||
# Move to next camera for next call
|
||||
self._camera_round_robin_index = (self._camera_round_robin_index + 1) % len(camera_list)
|
||||
|
||||
# Try to get item from this camera's queue
|
||||
try:
|
||||
item = self._tracking_queues[camera_id].get_nowait()
|
||||
return camera_id, item
|
||||
except queue.Empty:
|
||||
pass # Try next camera
|
||||
|
||||
attempts += 1
|
||||
|
||||
return None, None # All cameras empty
|
||||
|
||||
def _process_tracking_for_camera_sync(self, camera_id: str, frame):
|
||||
"""Synchronous version of tracking processing for worker threads."""
|
||||
try:
|
||||
with self._lock:
|
||||
subscription_ids = list(self._camera_subscribers.get(camera_id, []))
|
||||
|
||||
for subscription_id in subscription_ids:
|
||||
subscription_info = self._subscriptions.get(subscription_id)
|
||||
|
||||
if not subscription_info:
|
||||
logger.warning(f"No subscription info found for {subscription_id}")
|
||||
continue
|
||||
|
||||
if not subscription_info.tracking_integration:
|
||||
logger.debug(f"No tracking integration for {subscription_id} (camera {camera_id}), skipping inference")
|
||||
continue
|
||||
|
||||
display_id = subscription_id.split(';')[0] if ';' in subscription_id else subscription_id
|
||||
|
||||
try:
|
||||
# Run async tracking in thread's event loop
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
result = loop.run_until_complete(
|
||||
subscription_info.tracking_integration.process_frame(
|
||||
frame, display_id, subscription_id
|
||||
)
|
||||
)
|
||||
|
||||
# Log tracking results
|
||||
if result:
|
||||
tracked_count = len(result.get('tracked_vehicles', []))
|
||||
validated_vehicle = result.get('validated_vehicle')
|
||||
pipeline_result = result.get('pipeline_result')
|
||||
|
||||
if tracked_count > 0:
|
||||
logger.info(f"[Tracking] {camera_id}: {tracked_count} vehicles tracked")
|
||||
|
||||
if validated_vehicle:
|
||||
logger.info(f"[Tracking] {camera_id}: Vehicle {validated_vehicle['track_id']} "
|
||||
f"validated as {validated_vehicle['state']} "
|
||||
f"(confidence: {validated_vehicle['confidence']:.2f})")
|
||||
|
||||
if pipeline_result:
|
||||
logger.info(f"[Pipeline] {camera_id}: {pipeline_result.get('status', 'unknown')} - "
|
||||
f"{pipeline_result.get('message', 'no message')}")
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
except Exception as track_e:
|
||||
logger.error(f"Error in tracking for {subscription_id}: {track_e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing tracking for camera {camera_id}: {e}")
|
||||
|
||||
def get_frame(self, camera_id: str, crop_coords: Optional[tuple] = None):
|
||||
"""Get the latest frame for a camera with optional cropping."""
|
||||
return shared_cache_buffer.get_frame(camera_id, crop_coords)
|
||||
|
@ -357,6 +573,30 @@ class StreamManager:
|
|||
|
||||
def stop_all(self):
|
||||
"""Stop all streams and clear all subscriptions."""
|
||||
# Signal workers to stop
|
||||
self._stop_workers.set()
|
||||
|
||||
# Clear all camera queues
|
||||
for camera_id, camera_queue in list(self._tracking_queues.items()):
|
||||
while not camera_queue.empty():
|
||||
try:
|
||||
camera_queue.get_nowait()
|
||||
except queue.Empty:
|
||||
break
|
||||
|
||||
# Wait for workers to finish
|
||||
for worker in self._tracking_workers:
|
||||
worker.join(timeout=2.0)
|
||||
|
||||
# Clear queue management structures
|
||||
self._tracking_queues.clear()
|
||||
self._dropped_frame_counts.clear()
|
||||
with self._round_robin_lock:
|
||||
self._camera_list.clear()
|
||||
self._camera_round_robin_index = 0
|
||||
|
||||
logger.info("Stopped all tracking worker threads")
|
||||
|
||||
with self._lock:
|
||||
# Stop all streams
|
||||
for camera_id in list(self._streams.keys()):
|
||||
|
@ -371,29 +611,67 @@ class StreamManager:
|
|||
|
||||
def set_session_id(self, display_id: str, session_id: str):
|
||||
"""Set session ID for tracking integration."""
|
||||
# Ensure session_id is always a string for consistent type handling
|
||||
session_id = str(session_id) if session_id is not None else None
|
||||
with self._lock:
|
||||
for subscription_info in self._subscriptions.values():
|
||||
# Check if this subscription matches the display_id
|
||||
subscription_display_id = subscription_info.subscription_id.split(';')[0]
|
||||
if subscription_display_id == display_id and subscription_info.tracking_integration:
|
||||
subscription_info.tracking_integration.set_session_id(display_id, session_id)
|
||||
logger.debug(f"Set session {session_id} for display {display_id}")
|
||||
# Pass the full subscription_id (displayId;cameraId) to the tracking integration
|
||||
subscription_info.tracking_integration.set_session_id(
|
||||
display_id,
|
||||
session_id,
|
||||
subscription_id=subscription_info.subscription_id
|
||||
)
|
||||
logger.debug(f"Set session {session_id} for display {display_id} with subscription {subscription_info.subscription_id}")
|
||||
|
||||
def clear_session_id(self, session_id: str):
|
||||
"""Clear session ID from tracking integrations."""
|
||||
"""Clear session ID from the specific tracking integration handling this session."""
|
||||
with self._lock:
|
||||
# Find the subscription that's handling this session
|
||||
session_subscription = None
|
||||
for subscription_info in self._subscriptions.values():
|
||||
if subscription_info.tracking_integration:
|
||||
subscription_info.tracking_integration.clear_session_id(session_id)
|
||||
logger.debug(f"Cleared session {session_id}")
|
||||
# Check if this integration is handling the given session_id
|
||||
integration = subscription_info.tracking_integration
|
||||
if session_id in integration.session_vehicles:
|
||||
session_subscription = subscription_info
|
||||
break
|
||||
|
||||
if session_subscription and session_subscription.tracking_integration:
|
||||
session_subscription.tracking_integration.clear_session_id(session_id)
|
||||
logger.debug(f"Cleared session {session_id} from subscription {session_subscription.subscription_id}")
|
||||
else:
|
||||
logger.warning(f"No tracking integration found for session {session_id}, broadcasting to all subscriptions")
|
||||
# Fallback: broadcast to all (original behavior)
|
||||
for subscription_info in self._subscriptions.values():
|
||||
if subscription_info.tracking_integration:
|
||||
subscription_info.tracking_integration.clear_session_id(session_id)
|
||||
|
||||
def set_progression_stage(self, session_id: str, stage: str):
|
||||
"""Set progression stage for tracking integrations."""
|
||||
"""Set progression stage for the specific tracking integration handling this session."""
|
||||
with self._lock:
|
||||
# Find the subscription that's handling this session
|
||||
session_subscription = None
|
||||
for subscription_info in self._subscriptions.values():
|
||||
if subscription_info.tracking_integration:
|
||||
subscription_info.tracking_integration.set_progression_stage(session_id, stage)
|
||||
logger.debug(f"Set progression stage for session {session_id}: {stage}")
|
||||
# Check if this integration is handling the given session_id
|
||||
# We need to check the integration's active sessions
|
||||
integration = subscription_info.tracking_integration
|
||||
if session_id in integration.session_vehicles:
|
||||
session_subscription = subscription_info
|
||||
break
|
||||
|
||||
if session_subscription and session_subscription.tracking_integration:
|
||||
session_subscription.tracking_integration.set_progression_stage(session_id, stage)
|
||||
logger.debug(f"Set progression stage for session {session_id}: {stage} on subscription {session_subscription.subscription_id}")
|
||||
else:
|
||||
logger.warning(f"No tracking integration found for session {session_id}, broadcasting to all subscriptions")
|
||||
# Fallback: broadcast to all (original behavior)
|
||||
for subscription_info in self._subscriptions.values():
|
||||
if subscription_info.tracking_integration:
|
||||
subscription_info.tracking_integration.set_progression_stage(session_id, stage)
|
||||
|
||||
def get_tracking_stats(self) -> Dict[str, Any]:
|
||||
"""Get tracking statistics from all subscriptions."""
|
||||
|
@ -404,26 +682,6 @@ class StreamManager:
|
|||
stats[subscription_id] = subscription_info.tracking_integration.get_statistics()
|
||||
return stats
|
||||
|
||||
def _detect_stream_type(self, frame) -> StreamType:
|
||||
"""Detect stream type based on frame dimensions."""
|
||||
if frame is None:
|
||||
return StreamType.RTSP # Default
|
||||
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
# RTSP: 1280x720
|
||||
if w == 1280 and h == 720:
|
||||
return StreamType.RTSP
|
||||
|
||||
# HTTP: 2560x1440 or larger
|
||||
if w >= 2000 and h >= 1000:
|
||||
return StreamType.HTTP
|
||||
|
||||
# Default based on size
|
||||
if w <= 1920 and h <= 1080:
|
||||
return StreamType.RTSP
|
||||
else:
|
||||
return StreamType.HTTP
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive streaming statistics."""
|
||||
|
@ -431,22 +689,11 @@ class StreamManager:
|
|||
buffer_stats = shared_cache_buffer.get_stats()
|
||||
tracking_stats = self.get_tracking_stats()
|
||||
|
||||
# Add stream type information
|
||||
stream_types = {}
|
||||
for camera_id in self._streams.keys():
|
||||
if isinstance(self._streams[camera_id], RTSPReader):
|
||||
stream_types[camera_id] = 'rtsp'
|
||||
elif isinstance(self._streams[camera_id], HTTPSnapshotReader):
|
||||
stream_types[camera_id] = 'http'
|
||||
else:
|
||||
stream_types[camera_id] = 'unknown'
|
||||
|
||||
return {
|
||||
'active_subscriptions': len(self._subscriptions),
|
||||
'active_streams': len(self._streams),
|
||||
'cameras_with_subscribers': len(self._camera_subscribers),
|
||||
'max_streams': self.max_streams,
|
||||
'stream_types': stream_types,
|
||||
'subscriptions_by_camera': {
|
||||
camera_id: len(subscribers)
|
||||
for camera_id, subscribers in self._camera_subscribers.items()
|
||||
|
|
|
@ -1,504 +0,0 @@
|
|||
"""
|
||||
Frame readers for RTSP streams and HTTP snapshots.
|
||||
Optimized for 1280x720@6fps RTSP and 2560x1440 HTTP snapshots.
|
||||
"""
|
||||
import cv2
|
||||
import logging
|
||||
import time
|
||||
import threading
|
||||
import requests
|
||||
import numpy as np
|
||||
import os
|
||||
from typing import Optional, Callable
|
||||
|
||||
# Suppress FFMPEG/H.264 error messages if needed
|
||||
# Set this environment variable to reduce noise from decoder errors
|
||||
os.environ["OPENCV_LOG_LEVEL"] = "ERROR"
|
||||
os.environ["OPENCV_FFMPEG_LOGLEVEL"] = "-8" # Suppress FFMPEG warnings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RTSPReader:
|
||||
"""RTSP stream frame reader optimized for 1280x720 @ 6fps streams."""
|
||||
|
||||
def __init__(self, camera_id: str, rtsp_url: str, max_retries: int = 3):
|
||||
self.camera_id = camera_id
|
||||
self.rtsp_url = rtsp_url
|
||||
self.max_retries = max_retries
|
||||
self.cap = None
|
||||
self.stop_event = threading.Event()
|
||||
self.thread = None
|
||||
self.frame_callback: Optional[Callable] = None
|
||||
|
||||
# Expected stream specifications
|
||||
self.expected_width = 1280
|
||||
self.expected_height = 720
|
||||
self.expected_fps = 6
|
||||
|
||||
# Frame processing parameters
|
||||
self.frame_interval = 1.0 / self.expected_fps # ~167ms for 6fps
|
||||
self.error_recovery_delay = 5.0 # Increased from 2.0 for stability
|
||||
self.max_consecutive_errors = 30 # Increased from 10 to handle network jitter
|
||||
self.stream_timeout = 30.0
|
||||
|
||||
def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]):
|
||||
"""Set callback function to handle captured frames."""
|
||||
self.frame_callback = callback
|
||||
|
||||
def start(self):
|
||||
"""Start the RTSP reader thread."""
|
||||
if self.thread and self.thread.is_alive():
|
||||
logger.warning(f"RTSP reader for {self.camera_id} already running")
|
||||
return
|
||||
|
||||
self.stop_event.clear()
|
||||
self.thread = threading.Thread(target=self._read_frames, daemon=True)
|
||||
self.thread.start()
|
||||
logger.info(f"Started RTSP reader for camera {self.camera_id}")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the RTSP reader thread."""
|
||||
self.stop_event.set()
|
||||
if self.thread:
|
||||
self.thread.join(timeout=5.0)
|
||||
if self.cap:
|
||||
self.cap.release()
|
||||
logger.info(f"Stopped RTSP reader for camera {self.camera_id}")
|
||||
|
||||
def _read_frames(self):
|
||||
"""Main frame reading loop with H.264 error recovery."""
|
||||
consecutive_errors = 0
|
||||
frame_count = 0
|
||||
last_log_time = time.time()
|
||||
last_successful_frame_time = time.time()
|
||||
last_frame_time = 0
|
||||
|
||||
while not self.stop_event.is_set():
|
||||
try:
|
||||
# Initialize/reinitialize capture if needed
|
||||
if not self.cap or not self.cap.isOpened():
|
||||
if not self._initialize_capture():
|
||||
time.sleep(self.error_recovery_delay)
|
||||
continue
|
||||
last_successful_frame_time = time.time()
|
||||
|
||||
# Check for stream timeout
|
||||
if time.time() - last_successful_frame_time > self.stream_timeout:
|
||||
logger.warning(f"Camera {self.camera_id}: Stream timeout, reinitializing")
|
||||
self._reinitialize_capture()
|
||||
last_successful_frame_time = time.time()
|
||||
continue
|
||||
|
||||
# Rate limiting for 6fps
|
||||
current_time = time.time()
|
||||
if current_time - last_frame_time < self.frame_interval:
|
||||
time.sleep(0.01) # Small sleep to avoid busy waiting
|
||||
continue
|
||||
|
||||
ret, frame = self.cap.read()
|
||||
|
||||
if not ret or frame is None:
|
||||
consecutive_errors += 1
|
||||
|
||||
if consecutive_errors >= self.max_consecutive_errors:
|
||||
logger.error(f"Camera {self.camera_id}: Too many consecutive errors, reinitializing")
|
||||
self._reinitialize_capture()
|
||||
consecutive_errors = 0
|
||||
time.sleep(self.error_recovery_delay)
|
||||
else:
|
||||
# Skip corrupted frame and continue with exponential backoff
|
||||
if consecutive_errors <= 5:
|
||||
logger.debug(f"Camera {self.camera_id}: Frame read failed (error {consecutive_errors})")
|
||||
elif consecutive_errors % 10 == 0: # Log every 10th error after 5
|
||||
logger.warning(f"Camera {self.camera_id}: Continuing frame read failures (error {consecutive_errors})")
|
||||
|
||||
# Exponential backoff with cap at 1 second
|
||||
sleep_time = min(0.1 * (1.5 ** min(consecutive_errors, 10)), 1.0)
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
|
||||
# Validate frame dimensions
|
||||
if frame.shape[1] != self.expected_width or frame.shape[0] != self.expected_height:
|
||||
logger.warning(f"Camera {self.camera_id}: Unexpected frame dimensions {frame.shape[1]}x{frame.shape[0]}")
|
||||
# Try to resize if dimensions are wrong
|
||||
if frame.shape[1] > 0 and frame.shape[0] > 0:
|
||||
frame = cv2.resize(frame, (self.expected_width, self.expected_height))
|
||||
else:
|
||||
consecutive_errors += 1
|
||||
continue
|
||||
|
||||
# Check for corrupted frames (all black, all white, excessive noise)
|
||||
if self._is_frame_corrupted(frame):
|
||||
logger.debug(f"Camera {self.camera_id}: Corrupted frame detected, skipping")
|
||||
consecutive_errors += 1
|
||||
continue
|
||||
|
||||
# Frame is valid
|
||||
consecutive_errors = 0
|
||||
frame_count += 1
|
||||
last_successful_frame_time = time.time()
|
||||
last_frame_time = current_time
|
||||
|
||||
# Call frame callback
|
||||
if self.frame_callback:
|
||||
try:
|
||||
self.frame_callback(self.camera_id, frame)
|
||||
except Exception as e:
|
||||
logger.error(f"Camera {self.camera_id}: Frame callback error: {e}")
|
||||
|
||||
# Log progress every 30 seconds
|
||||
if current_time - last_log_time >= 30:
|
||||
logger.info(f"Camera {self.camera_id}: {frame_count} frames processed")
|
||||
last_log_time = current_time
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Camera {self.camera_id}: Error in frame reading loop: {e}")
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= self.max_consecutive_errors:
|
||||
self._reinitialize_capture()
|
||||
consecutive_errors = 0
|
||||
time.sleep(self.error_recovery_delay)
|
||||
|
||||
# Cleanup
|
||||
if self.cap:
|
||||
self.cap.release()
|
||||
logger.info(f"RTSP reader thread ended for camera {self.camera_id}")
|
||||
|
||||
def _initialize_capture(self) -> bool:
|
||||
"""Initialize video capture with optimized settings for 1280x720@6fps."""
|
||||
try:
|
||||
# Release previous capture if exists
|
||||
if self.cap:
|
||||
self.cap.release()
|
||||
time.sleep(0.5)
|
||||
|
||||
logger.info(f"Initializing capture for camera {self.camera_id}")
|
||||
|
||||
# Create capture with FFMPEG backend and TCP transport for reliability
|
||||
# Use TCP instead of UDP to prevent packet loss
|
||||
rtsp_url_tcp = self.rtsp_url.replace('rtsp://', 'rtsp://')
|
||||
if '?' in rtsp_url_tcp:
|
||||
rtsp_url_tcp += '&tcp'
|
||||
else:
|
||||
rtsp_url_tcp += '?tcp'
|
||||
|
||||
# Alternative: Set environment variable for RTSP transport
|
||||
import os
|
||||
os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'rtsp_transport;tcp'
|
||||
|
||||
self.cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
|
||||
|
||||
if not self.cap.isOpened():
|
||||
logger.error(f"Failed to open stream for camera {self.camera_id}")
|
||||
return False
|
||||
|
||||
# Set capture properties for 1280x720@6fps
|
||||
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.expected_width)
|
||||
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.expected_height)
|
||||
self.cap.set(cv2.CAP_PROP_FPS, self.expected_fps)
|
||||
|
||||
# Set moderate buffer to handle network jitter while avoiding excessive latency
|
||||
# Buffer of 3 frames provides resilience without major delay
|
||||
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3)
|
||||
|
||||
# Set FFMPEG options for better H.264 handling
|
||||
self.cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'H264'))
|
||||
|
||||
# Verify stream properties
|
||||
actual_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
actual_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
actual_fps = self.cap.get(cv2.CAP_PROP_FPS)
|
||||
|
||||
logger.info(f"Camera {self.camera_id} initialized: {actual_width}x{actual_height} @ {actual_fps}fps")
|
||||
|
||||
# Read and discard first few frames to stabilize stream
|
||||
for _ in range(5):
|
||||
ret, _ = self.cap.read()
|
||||
if not ret:
|
||||
logger.warning(f"Camera {self.camera_id}: Failed to read initial frames")
|
||||
time.sleep(0.1)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error initializing capture for camera {self.camera_id}: {e}")
|
||||
return False
|
||||
|
||||
def _reinitialize_capture(self):
|
||||
"""Reinitialize capture after errors with retry logic."""
|
||||
logger.info(f"Reinitializing capture for camera {self.camera_id}")
|
||||
if self.cap:
|
||||
self.cap.release()
|
||||
self.cap = None
|
||||
|
||||
# Longer delay before reconnection to avoid rapid reconnect loops
|
||||
time.sleep(3.0)
|
||||
|
||||
# Retry initialization up to 3 times
|
||||
for attempt in range(3):
|
||||
if self._initialize_capture():
|
||||
logger.info(f"Successfully reinitialized camera {self.camera_id} on attempt {attempt + 1}")
|
||||
break
|
||||
else:
|
||||
logger.warning(f"Failed to reinitialize camera {self.camera_id} on attempt {attempt + 1}")
|
||||
time.sleep(2.0)
|
||||
|
||||
def _is_frame_corrupted(self, frame: np.ndarray) -> bool:
|
||||
"""Check if frame is corrupted (all black, all white, or excessive noise)."""
|
||||
if frame is None or frame.size == 0:
|
||||
return True
|
||||
|
||||
# Check mean and standard deviation
|
||||
mean = np.mean(frame)
|
||||
std = np.std(frame)
|
||||
|
||||
# All black or all white
|
||||
if mean < 5 or mean > 250:
|
||||
return True
|
||||
|
||||
# No variation (stuck frame)
|
||||
if std < 1:
|
||||
return True
|
||||
|
||||
# Excessive noise (corrupted H.264 decode)
|
||||
# Calculate edge density as corruption indicator
|
||||
edges = cv2.Canny(frame, 50, 150)
|
||||
edge_density = np.sum(edges > 0) / edges.size
|
||||
|
||||
# Too many edges indicate corruption
|
||||
if edge_density > 0.5:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class HTTPSnapshotReader:
|
||||
"""HTTP snapshot reader optimized for 2560x1440 (2K) high quality images."""
|
||||
|
||||
def __init__(self, camera_id: str, snapshot_url: str, interval_ms: int = 5000, max_retries: int = 3):
|
||||
self.camera_id = camera_id
|
||||
self.snapshot_url = snapshot_url
|
||||
self.interval_ms = interval_ms
|
||||
self.max_retries = max_retries
|
||||
self.stop_event = threading.Event()
|
||||
self.thread = None
|
||||
self.frame_callback: Optional[Callable] = None
|
||||
|
||||
# Expected snapshot specifications
|
||||
self.expected_width = 2560
|
||||
self.expected_height = 1440
|
||||
self.max_file_size = 10 * 1024 * 1024 # 10MB max for 2K image
|
||||
|
||||
def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]):
|
||||
"""Set callback function to handle captured frames."""
|
||||
self.frame_callback = callback
|
||||
|
||||
def start(self):
|
||||
"""Start the snapshot reader thread."""
|
||||
if self.thread and self.thread.is_alive():
|
||||
logger.warning(f"Snapshot reader for {self.camera_id} already running")
|
||||
return
|
||||
|
||||
self.stop_event.clear()
|
||||
self.thread = threading.Thread(target=self._read_snapshots, daemon=True)
|
||||
self.thread.start()
|
||||
logger.info(f"Started snapshot reader for camera {self.camera_id}")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the snapshot reader thread."""
|
||||
self.stop_event.set()
|
||||
if self.thread:
|
||||
self.thread.join(timeout=5.0)
|
||||
logger.info(f"Stopped snapshot reader for camera {self.camera_id}")
|
||||
|
||||
def _read_snapshots(self):
|
||||
"""Main snapshot reading loop for high quality 2K images."""
|
||||
retries = 0
|
||||
frame_count = 0
|
||||
last_log_time = time.time()
|
||||
interval_seconds = self.interval_ms / 1000.0
|
||||
|
||||
logger.info(f"Snapshot interval for camera {self.camera_id}: {interval_seconds}s")
|
||||
|
||||
while not self.stop_event.is_set():
|
||||
try:
|
||||
start_time = time.time()
|
||||
frame = self._fetch_snapshot()
|
||||
|
||||
if frame is None:
|
||||
retries += 1
|
||||
logger.warning(f"Failed to fetch snapshot for camera {self.camera_id}, retry {retries}/{self.max_retries}")
|
||||
|
||||
if self.max_retries != -1 and retries > self.max_retries:
|
||||
logger.error(f"Max retries reached for snapshot camera {self.camera_id}")
|
||||
break
|
||||
|
||||
time.sleep(min(2.0, interval_seconds))
|
||||
continue
|
||||
|
||||
# Validate image dimensions
|
||||
if frame.shape[1] != self.expected_width or frame.shape[0] != self.expected_height:
|
||||
logger.info(f"Camera {self.camera_id}: Snapshot dimensions {frame.shape[1]}x{frame.shape[0]} "
|
||||
f"(expected {self.expected_width}x{self.expected_height})")
|
||||
# Resize if needed (maintaining aspect ratio for high quality)
|
||||
if frame.shape[1] > 0 and frame.shape[0] > 0:
|
||||
# Only resize if significantly different
|
||||
if abs(frame.shape[1] - self.expected_width) > 100:
|
||||
frame = self._resize_maintain_aspect(frame, self.expected_width, self.expected_height)
|
||||
|
||||
# Reset retry counter on successful fetch
|
||||
retries = 0
|
||||
frame_count += 1
|
||||
|
||||
# Call frame callback
|
||||
if self.frame_callback:
|
||||
try:
|
||||
self.frame_callback(self.camera_id, frame)
|
||||
except Exception as e:
|
||||
logger.error(f"Camera {self.camera_id}: Frame callback error: {e}")
|
||||
|
||||
# Log progress every 30 seconds
|
||||
current_time = time.time()
|
||||
if current_time - last_log_time >= 30:
|
||||
logger.info(f"Camera {self.camera_id}: {frame_count} snapshots processed")
|
||||
last_log_time = current_time
|
||||
|
||||
# Wait for next interval
|
||||
elapsed = time.time() - start_time
|
||||
sleep_time = max(0, interval_seconds - elapsed)
|
||||
if sleep_time > 0:
|
||||
self.stop_event.wait(sleep_time)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in snapshot loop for camera {self.camera_id}: {e}")
|
||||
retries += 1
|
||||
if self.max_retries != -1 and retries > self.max_retries:
|
||||
break
|
||||
time.sleep(min(2.0, interval_seconds))
|
||||
|
||||
logger.info(f"Snapshot reader thread ended for camera {self.camera_id}")
|
||||
|
||||
def _fetch_snapshot(self) -> Optional[np.ndarray]:
|
||||
"""Fetch a single high quality snapshot from HTTP URL."""
|
||||
try:
|
||||
# Parse URL for authentication
|
||||
from urllib.parse import urlparse
|
||||
parsed_url = urlparse(self.snapshot_url)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Python-Detector-Worker/1.0',
|
||||
'Accept': 'image/jpeg, image/png, image/*'
|
||||
}
|
||||
auth = None
|
||||
|
||||
if parsed_url.username and parsed_url.password:
|
||||
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
|
||||
auth = HTTPBasicAuth(parsed_url.username, parsed_url.password)
|
||||
|
||||
# Reconstruct URL without credentials
|
||||
clean_url = f"{parsed_url.scheme}://{parsed_url.hostname}"
|
||||
if parsed_url.port:
|
||||
clean_url += f":{parsed_url.port}"
|
||||
clean_url += parsed_url.path
|
||||
if parsed_url.query:
|
||||
clean_url += f"?{parsed_url.query}"
|
||||
|
||||
# Try Basic Auth first
|
||||
response = requests.get(clean_url, auth=auth, timeout=15, headers=headers,
|
||||
stream=True, verify=False)
|
||||
|
||||
# If Basic Auth fails, try Digest Auth
|
||||
if response.status_code == 401:
|
||||
auth = HTTPDigestAuth(parsed_url.username, parsed_url.password)
|
||||
response = requests.get(clean_url, auth=auth, timeout=15, headers=headers,
|
||||
stream=True, verify=False)
|
||||
else:
|
||||
response = requests.get(self.snapshot_url, timeout=15, headers=headers,
|
||||
stream=True, verify=False)
|
||||
|
||||
if response.status_code == 200:
|
||||
# Check content size
|
||||
content_length = int(response.headers.get('content-length', 0))
|
||||
if content_length > self.max_file_size:
|
||||
logger.warning(f"Snapshot too large for camera {self.camera_id}: {content_length} bytes")
|
||||
return None
|
||||
|
||||
# Read content
|
||||
content = response.content
|
||||
|
||||
# Convert to numpy array
|
||||
image_array = np.frombuffer(content, np.uint8)
|
||||
|
||||
# Decode as high quality image
|
||||
frame = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
|
||||
|
||||
if frame is None:
|
||||
logger.error(f"Failed to decode snapshot for camera {self.camera_id}")
|
||||
return None
|
||||
|
||||
logger.debug(f"Fetched snapshot for camera {self.camera_id}: {frame.shape[1]}x{frame.shape[0]}")
|
||||
return frame
|
||||
else:
|
||||
logger.warning(f"HTTP {response.status_code} from {self.camera_id}")
|
||||
return None
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Request error fetching snapshot for {self.camera_id}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error decoding snapshot for {self.camera_id}: {e}")
|
||||
return None
|
||||
|
||||
def fetch_single_snapshot(self) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Fetch a single high-quality snapshot on demand for pipeline processing.
|
||||
This method is for one-time fetch from HTTP URL, not continuous streaming.
|
||||
|
||||
Returns:
|
||||
High quality 2K snapshot frame or None if failed
|
||||
"""
|
||||
logger.info(f"[SNAPSHOT] Fetching snapshot for {self.camera_id} from {self.snapshot_url}")
|
||||
|
||||
# Try to fetch snapshot with retries
|
||||
for attempt in range(self.max_retries):
|
||||
frame = self._fetch_snapshot()
|
||||
|
||||
if frame is not None:
|
||||
logger.info(f"[SNAPSHOT] Successfully fetched {frame.shape[1]}x{frame.shape[0]} snapshot for {self.camera_id}")
|
||||
return frame
|
||||
|
||||
if attempt < self.max_retries - 1:
|
||||
logger.warning(f"[SNAPSHOT] Attempt {attempt + 1}/{self.max_retries} failed for {self.camera_id}, retrying...")
|
||||
time.sleep(0.5)
|
||||
|
||||
logger.error(f"[SNAPSHOT] Failed to fetch snapshot for {self.camera_id} after {self.max_retries} attempts")
|
||||
return None
|
||||
|
||||
def _resize_maintain_aspect(self, frame: np.ndarray, target_width: int, target_height: int) -> np.ndarray:
|
||||
"""Resize image while maintaining aspect ratio for high quality."""
|
||||
h, w = frame.shape[:2]
|
||||
aspect = w / h
|
||||
target_aspect = target_width / target_height
|
||||
|
||||
if aspect > target_aspect:
|
||||
# Image is wider
|
||||
new_width = target_width
|
||||
new_height = int(target_width / aspect)
|
||||
else:
|
||||
# Image is taller
|
||||
new_height = target_height
|
||||
new_width = int(target_height * aspect)
|
||||
|
||||
# Use INTER_LANCZOS4 for high quality downsampling
|
||||
resized = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4)
|
||||
|
||||
# Pad to target size if needed
|
||||
if new_width < target_width or new_height < target_height:
|
||||
top = (target_height - new_height) // 2
|
||||
bottom = target_height - new_height - top
|
||||
left = (target_width - new_width) // 2
|
||||
right = target_width - new_width - left
|
||||
resized = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
|
||||
|
||||
return resized
|
18
core/streaming/readers/__init__.py
Normal file
18
core/streaming/readers/__init__.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
"""
|
||||
Stream readers for RTSP and HTTP camera feeds.
|
||||
"""
|
||||
from .base import VideoReader
|
||||
from .ffmpeg_rtsp import FFmpegRTSPReader
|
||||
from .http_snapshot import HTTPSnapshotReader
|
||||
from .utils import log_success, log_warning, log_error, log_info, Colors
|
||||
|
||||
__all__ = [
|
||||
'VideoReader',
|
||||
'FFmpegRTSPReader',
|
||||
'HTTPSnapshotReader',
|
||||
'log_success',
|
||||
'log_warning',
|
||||
'log_error',
|
||||
'log_info',
|
||||
'Colors'
|
||||
]
|
65
core/streaming/readers/base.py
Normal file
65
core/streaming/readers/base.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
"""
|
||||
Abstract base class for video stream readers.
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, Callable
|
||||
import numpy as np
|
||||
|
||||
|
||||
class VideoReader(ABC):
|
||||
"""Abstract base class for video stream readers."""
|
||||
|
||||
def __init__(self, camera_id: str, source_url: str, max_retries: int = 3):
|
||||
"""
|
||||
Initialize the video reader.
|
||||
|
||||
Args:
|
||||
camera_id: Unique identifier for the camera
|
||||
source_url: URL or path to the video source
|
||||
max_retries: Maximum number of retry attempts
|
||||
"""
|
||||
self.camera_id = camera_id
|
||||
self.source_url = source_url
|
||||
self.max_retries = max_retries
|
||||
self.frame_callback: Optional[Callable[[str, np.ndarray], None]] = None
|
||||
|
||||
@abstractmethod
|
||||
def start(self) -> None:
|
||||
"""Start the video reader."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def stop(self) -> None:
|
||||
"""Stop the video reader."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]) -> None:
|
||||
"""
|
||||
Set callback function to handle captured frames.
|
||||
|
||||
Args:
|
||||
callback: Function that takes (camera_id, frame) as arguments
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def is_running(self) -> bool:
|
||||
"""Check if the reader is currently running."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def reader_type(self) -> str:
|
||||
"""Get the type of reader (e.g., 'rtsp', 'http_snapshot')."""
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
self.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
self.stop()
|
436
core/streaming/readers/ffmpeg_rtsp.py
Normal file
436
core/streaming/readers/ffmpeg_rtsp.py
Normal file
|
@ -0,0 +1,436 @@
|
|||
"""
|
||||
FFmpeg RTSP stream reader using subprocess piping frames directly to buffer.
|
||||
Enhanced with comprehensive health monitoring and automatic recovery.
|
||||
"""
|
||||
import cv2
|
||||
import time
|
||||
import threading
|
||||
import numpy as np
|
||||
import subprocess
|
||||
import struct
|
||||
from typing import Optional, Callable, Dict, Any
|
||||
|
||||
from .base import VideoReader
|
||||
from .utils import log_success, log_warning, log_error, log_info
|
||||
from ...monitoring.stream_health import stream_health_tracker
|
||||
from ...monitoring.thread_health import thread_health_monitor
|
||||
from ...monitoring.recovery import recovery_manager, RecoveryAction
|
||||
|
||||
|
||||
class FFmpegRTSPReader(VideoReader):
|
||||
"""RTSP stream reader using subprocess FFmpeg piping frames directly to buffer."""
|
||||
|
||||
def __init__(self, camera_id: str, rtsp_url: str, max_retries: int = 3):
|
||||
super().__init__(camera_id, rtsp_url, max_retries)
|
||||
self.rtsp_url = rtsp_url
|
||||
self.process = None
|
||||
self.stop_event = threading.Event()
|
||||
self.thread = None
|
||||
self.stderr_thread = None
|
||||
|
||||
# Expected stream specs (for reference, actual dimensions read from PPM header)
|
||||
self.width = 1280
|
||||
self.height = 720
|
||||
|
||||
# Watchdog timers for stream reliability
|
||||
self.process_start_time = None
|
||||
self.last_frame_time = None
|
||||
self.is_restart = False # Track if this is a restart (shorter timeout)
|
||||
self.first_start_timeout = 30.0 # 30s timeout on first start
|
||||
self.restart_timeout = 15.0 # 15s timeout after restart
|
||||
|
||||
# Health monitoring setup
|
||||
self.last_heartbeat = time.time()
|
||||
self.consecutive_errors = 0
|
||||
self.ffmpeg_restart_count = 0
|
||||
|
||||
# Register recovery handlers
|
||||
recovery_manager.register_recovery_handler(
|
||||
RecoveryAction.RESTART_STREAM,
|
||||
self._handle_restart_recovery
|
||||
)
|
||||
recovery_manager.register_recovery_handler(
|
||||
RecoveryAction.RECONNECT,
|
||||
self._handle_reconnect_recovery
|
||||
)
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Check if the reader is currently running."""
|
||||
return self.thread is not None and self.thread.is_alive()
|
||||
|
||||
@property
|
||||
def reader_type(self) -> str:
|
||||
"""Get the type of reader."""
|
||||
return "rtsp_ffmpeg"
|
||||
|
||||
def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]):
|
||||
"""Set callback function to handle captured frames."""
|
||||
self.frame_callback = callback
|
||||
|
||||
def start(self):
|
||||
"""Start the FFmpeg subprocess reader."""
|
||||
if self.thread and self.thread.is_alive():
|
||||
log_warning(self.camera_id, "FFmpeg reader already running")
|
||||
return
|
||||
|
||||
self.stop_event.clear()
|
||||
self.thread = threading.Thread(target=self._read_frames, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
# Register with health monitoring
|
||||
stream_health_tracker.register_stream(self.camera_id, "rtsp_ffmpeg", self.rtsp_url)
|
||||
thread_health_monitor.register_thread(self.thread, self._heartbeat_callback)
|
||||
|
||||
log_success(self.camera_id, "Stream started with health monitoring")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the FFmpeg subprocess reader."""
|
||||
self.stop_event.set()
|
||||
|
||||
# Unregister from health monitoring
|
||||
if self.thread:
|
||||
thread_health_monitor.unregister_thread(self.thread.ident)
|
||||
|
||||
if self.process:
|
||||
self.process.terminate()
|
||||
try:
|
||||
self.process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.process.kill()
|
||||
|
||||
if self.thread:
|
||||
self.thread.join(timeout=5.0)
|
||||
if self.stderr_thread:
|
||||
self.stderr_thread.join(timeout=2.0)
|
||||
|
||||
stream_health_tracker.unregister_stream(self.camera_id)
|
||||
|
||||
log_info(self.camera_id, "Stream stopped")
|
||||
|
||||
def _start_ffmpeg_process(self):
|
||||
"""Start FFmpeg subprocess outputting BMP frames to stdout pipe."""
|
||||
cmd = [
|
||||
'ffmpeg',
|
||||
# DO NOT REMOVE
|
||||
'-hwaccel', 'cuda',
|
||||
'-hwaccel_device', '0',
|
||||
# Real-time input flags
|
||||
'-fflags', 'nobuffer+genpts',
|
||||
'-flags', 'low_delay',
|
||||
'-max_delay', '0', # No reordering delay
|
||||
# RTSP configuration
|
||||
'-rtsp_transport', 'tcp',
|
||||
'-i', self.rtsp_url,
|
||||
# Output configuration (keeping BMP)
|
||||
'-f', 'image2pipe', # Output images to pipe
|
||||
'-vcodec', 'bmp', # BMP format with header containing dimensions
|
||||
'-vsync', 'passthrough', # Pass frames as-is
|
||||
# Use native stream resolution and framerate
|
||||
'-an', # No audio
|
||||
'-' # Output to stdout
|
||||
]
|
||||
|
||||
try:
|
||||
# Start FFmpeg with stdout pipe to read frames directly
|
||||
self.process = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE, # Capture stdout for frame data
|
||||
stderr=subprocess.PIPE, # Capture stderr for error logging
|
||||
bufsize=0 # Unbuffered for real-time processing
|
||||
)
|
||||
|
||||
# Start stderr reading thread
|
||||
if self.stderr_thread and self.stderr_thread.is_alive():
|
||||
# Stop previous stderr thread
|
||||
try:
|
||||
self.stderr_thread.join(timeout=1.0)
|
||||
except:
|
||||
pass
|
||||
|
||||
self.stderr_thread = threading.Thread(target=self._read_stderr, daemon=True)
|
||||
self.stderr_thread.start()
|
||||
|
||||
# Set process start time for watchdog
|
||||
self.process_start_time = time.time()
|
||||
self.last_frame_time = None # Reset frame time
|
||||
|
||||
# After successful restart, next timeout will be back to 30s
|
||||
if self.is_restart:
|
||||
log_info(self.camera_id, f"FFmpeg restarted successfully, next timeout: {self.first_start_timeout}s")
|
||||
self.is_restart = False
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
log_error(self.camera_id, f"FFmpeg startup failed: {e}")
|
||||
return False
|
||||
|
||||
def _read_bmp_frame(self, pipe):
|
||||
"""Read BMP frame from pipe - BMP header contains dimensions."""
|
||||
try:
|
||||
# Read BMP header (14 bytes file header + 40 bytes info header = 54 bytes minimum)
|
||||
header_data = b''
|
||||
bytes_to_read = 54
|
||||
|
||||
while len(header_data) < bytes_to_read:
|
||||
chunk = pipe.read(bytes_to_read - len(header_data))
|
||||
if not chunk:
|
||||
return None # Silent end of stream
|
||||
header_data += chunk
|
||||
|
||||
# Parse BMP header
|
||||
if header_data[:2] != b'BM':
|
||||
return None # Invalid format, skip frame silently
|
||||
|
||||
# Extract file size from header (bytes 2-5)
|
||||
file_size = struct.unpack('<L', header_data[2:6])[0]
|
||||
|
||||
# Extract width and height from info header (bytes 18-21 and 22-25)
|
||||
width = struct.unpack('<L', header_data[18:22])[0]
|
||||
height = struct.unpack('<L', header_data[22:26])[0]
|
||||
|
||||
# Read remaining file data
|
||||
remaining_size = file_size - 54
|
||||
remaining_data = b''
|
||||
|
||||
while len(remaining_data) < remaining_size:
|
||||
chunk = pipe.read(remaining_size - len(remaining_data))
|
||||
if not chunk:
|
||||
return None # Stream ended silently
|
||||
remaining_data += chunk
|
||||
|
||||
# Complete BMP data
|
||||
bmp_data = header_data + remaining_data
|
||||
|
||||
# Use OpenCV to decode BMP directly from memory
|
||||
frame_array = np.frombuffer(bmp_data, dtype=np.uint8)
|
||||
frame = cv2.imdecode(frame_array, cv2.IMREAD_COLOR)
|
||||
|
||||
if frame is None:
|
||||
return None # Decode failed silently
|
||||
|
||||
return frame
|
||||
|
||||
except Exception:
|
||||
return None # Error reading frame silently
|
||||
|
||||
def _read_stderr(self):
|
||||
"""Read and log FFmpeg stderr output in background thread."""
|
||||
if not self.process or not self.process.stderr:
|
||||
return
|
||||
|
||||
try:
|
||||
while self.process and self.process.poll() is None:
|
||||
try:
|
||||
line = self.process.stderr.readline()
|
||||
if line:
|
||||
error_msg = line.decode('utf-8', errors='ignore').strip()
|
||||
if error_msg and not self.stop_event.is_set():
|
||||
# Filter out common noise but log actual errors
|
||||
if any(keyword in error_msg.lower() for keyword in ['error', 'failed', 'cannot', 'invalid']):
|
||||
log_error(self.camera_id, f"FFmpeg: {error_msg}")
|
||||
elif 'warning' in error_msg.lower():
|
||||
log_warning(self.camera_id, f"FFmpeg: {error_msg}")
|
||||
except Exception:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _check_watchdog_timeout(self) -> bool:
|
||||
"""Check if watchdog timeout has been exceeded."""
|
||||
if not self.process_start_time:
|
||||
return False
|
||||
|
||||
current_time = time.time()
|
||||
time_since_start = current_time - self.process_start_time
|
||||
|
||||
# Determine timeout based on whether this is a restart
|
||||
timeout = self.restart_timeout if self.is_restart else self.first_start_timeout
|
||||
|
||||
# If no frames received yet, check against process start time
|
||||
if not self.last_frame_time:
|
||||
if time_since_start > timeout:
|
||||
log_warning(self.camera_id, f"Watchdog timeout: No frames for {time_since_start:.1f}s (limit: {timeout}s)")
|
||||
return True
|
||||
else:
|
||||
# Check time since last frame
|
||||
time_since_frame = current_time - self.last_frame_time
|
||||
if time_since_frame > timeout:
|
||||
log_warning(self.camera_id, f"Watchdog timeout: No frames for {time_since_frame:.1f}s (limit: {timeout}s)")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _restart_ffmpeg_process(self):
|
||||
"""Restart FFmpeg process due to watchdog timeout."""
|
||||
log_warning(self.camera_id, "Watchdog triggered FFmpeg restart")
|
||||
|
||||
# Terminate current process
|
||||
if self.process:
|
||||
try:
|
||||
self.process.terminate()
|
||||
self.process.wait(timeout=3)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.process.kill()
|
||||
except Exception:
|
||||
pass
|
||||
self.process = None
|
||||
|
||||
# Mark as restart for shorter timeout
|
||||
self.is_restart = True
|
||||
|
||||
# Small delay before restart
|
||||
time.sleep(1.0)
|
||||
|
||||
def _read_frames(self):
|
||||
"""Read frames directly from FFmpeg stdout pipe."""
|
||||
frame_count = 0
|
||||
last_log_time = time.time()
|
||||
|
||||
while not self.stop_event.is_set():
|
||||
try:
|
||||
# Send heartbeat for thread health monitoring
|
||||
self._send_heartbeat("reading_frames")
|
||||
|
||||
# Check watchdog timeout if process is running
|
||||
if self.process and self.process.poll() is None:
|
||||
if self._check_watchdog_timeout():
|
||||
self._restart_ffmpeg_process()
|
||||
continue
|
||||
|
||||
# Start FFmpeg if not running
|
||||
if not self.process or self.process.poll() is not None:
|
||||
if self.process and self.process.poll() is not None:
|
||||
log_warning(self.camera_id, "Stream disconnected, reconnecting...")
|
||||
stream_health_tracker.report_error(
|
||||
self.camera_id,
|
||||
"FFmpeg process disconnected"
|
||||
)
|
||||
|
||||
if not self._start_ffmpeg_process():
|
||||
self.consecutive_errors += 1
|
||||
stream_health_tracker.report_error(
|
||||
self.camera_id,
|
||||
"Failed to start FFmpeg process"
|
||||
)
|
||||
time.sleep(5.0)
|
||||
continue
|
||||
|
||||
# Read frames directly from FFmpeg stdout
|
||||
try:
|
||||
if self.process and self.process.stdout:
|
||||
# Read BMP frame data
|
||||
frame = self._read_bmp_frame(self.process.stdout)
|
||||
if frame is None:
|
||||
continue
|
||||
|
||||
# Update watchdog - we got a frame
|
||||
self.last_frame_time = time.time()
|
||||
|
||||
# Reset error counter on successful frame
|
||||
self.consecutive_errors = 0
|
||||
|
||||
# Report successful frame to health monitoring
|
||||
frame_size = frame.nbytes
|
||||
stream_health_tracker.report_frame_received(self.camera_id, frame_size)
|
||||
|
||||
# Call frame callback
|
||||
if self.frame_callback:
|
||||
try:
|
||||
self.frame_callback(self.camera_id, frame)
|
||||
except Exception as e:
|
||||
stream_health_tracker.report_error(
|
||||
self.camera_id,
|
||||
f"Frame callback error: {e}"
|
||||
)
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Log progress every 60 seconds (quieter)
|
||||
current_time = time.time()
|
||||
if current_time - last_log_time >= 60:
|
||||
log_success(self.camera_id, f"{frame_count} frames captured ({frame.shape[1]}x{frame.shape[0]})")
|
||||
last_log_time = current_time
|
||||
|
||||
except Exception as e:
|
||||
# Process might have died, let it restart on next iteration
|
||||
stream_health_tracker.report_error(
|
||||
self.camera_id,
|
||||
f"Frame reading error: {e}"
|
||||
)
|
||||
if self.process:
|
||||
self.process.terminate()
|
||||
self.process = None
|
||||
time.sleep(1.0)
|
||||
|
||||
except Exception as e:
|
||||
stream_health_tracker.report_error(
|
||||
self.camera_id,
|
||||
f"Main loop error: {e}"
|
||||
)
|
||||
time.sleep(1.0)
|
||||
|
||||
# Cleanup
|
||||
if self.process:
|
||||
self.process.terminate()
|
||||
|
||||
# Health monitoring methods
|
||||
def _send_heartbeat(self, activity: str = "running"):
|
||||
"""Send heartbeat to thread health monitor."""
|
||||
self.last_heartbeat = time.time()
|
||||
thread_health_monitor.heartbeat(activity=activity)
|
||||
|
||||
def _heartbeat_callback(self) -> bool:
|
||||
"""Heartbeat callback for thread responsiveness testing."""
|
||||
try:
|
||||
# Check if thread is responsive by checking recent heartbeat
|
||||
current_time = time.time()
|
||||
age = current_time - self.last_heartbeat
|
||||
|
||||
# Thread is responsive if heartbeat is recent
|
||||
return age < 30.0 # 30 second responsiveness threshold
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _handle_restart_recovery(self, component: str, details: Dict[str, Any]) -> bool:
|
||||
"""Handle restart recovery action."""
|
||||
try:
|
||||
log_info(self.camera_id, "Restarting FFmpeg RTSP reader for health recovery")
|
||||
|
||||
# Stop current instance
|
||||
self.stop()
|
||||
|
||||
# Small delay
|
||||
time.sleep(2.0)
|
||||
|
||||
# Restart
|
||||
self.start()
|
||||
|
||||
# Report successful restart
|
||||
stream_health_tracker.report_reconnect(self.camera_id, "health_recovery_restart")
|
||||
self.ffmpeg_restart_count += 1
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log_error(self.camera_id, f"Failed to restart FFmpeg RTSP reader: {e}")
|
||||
return False
|
||||
|
||||
def _handle_reconnect_recovery(self, component: str, details: Dict[str, Any]) -> bool:
|
||||
"""Handle reconnect recovery action."""
|
||||
try:
|
||||
log_info(self.camera_id, "Reconnecting FFmpeg RTSP reader for health recovery")
|
||||
|
||||
# Force restart FFmpeg process
|
||||
self._restart_ffmpeg_process()
|
||||
|
||||
# Reset error counters
|
||||
self.consecutive_errors = 0
|
||||
stream_health_tracker.report_reconnect(self.camera_id, "health_recovery_reconnect")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log_error(self.camera_id, f"Failed to reconnect FFmpeg RTSP reader: {e}")
|
||||
return False
|
378
core/streaming/readers/http_snapshot.py
Normal file
378
core/streaming/readers/http_snapshot.py
Normal file
|
@ -0,0 +1,378 @@
|
|||
"""
|
||||
HTTP snapshot reader optimized for 2560x1440 (2K) high quality images.
|
||||
Enhanced with comprehensive health monitoring and automatic recovery.
|
||||
"""
|
||||
import cv2
|
||||
import logging
|
||||
import time
|
||||
import threading
|
||||
import requests
|
||||
import numpy as np
|
||||
from typing import Optional, Callable, Dict, Any
|
||||
|
||||
from .base import VideoReader
|
||||
from .utils import log_success, log_warning, log_error, log_info
|
||||
from ...monitoring.stream_health import stream_health_tracker
|
||||
from ...monitoring.thread_health import thread_health_monitor
|
||||
from ...monitoring.recovery import recovery_manager, RecoveryAction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HTTPSnapshotReader(VideoReader):
|
||||
"""HTTP snapshot reader optimized for 2560x1440 (2K) high quality images."""
|
||||
|
||||
def __init__(self, camera_id: str, snapshot_url: str, interval_ms: int = 5000, max_retries: int = 3):
|
||||
super().__init__(camera_id, snapshot_url, max_retries)
|
||||
self.snapshot_url = snapshot_url
|
||||
self.interval_ms = interval_ms
|
||||
self.stop_event = threading.Event()
|
||||
self.thread = None
|
||||
|
||||
# Expected snapshot specifications
|
||||
self.expected_width = 2560
|
||||
self.expected_height = 1440
|
||||
self.max_file_size = 10 * 1024 * 1024 # 10MB max for 2K image
|
||||
|
||||
# Health monitoring setup
|
||||
self.last_heartbeat = time.time()
|
||||
self.consecutive_errors = 0
|
||||
self.connection_test_interval = 300 # Test connection every 5 minutes
|
||||
self.last_connection_test = None
|
||||
|
||||
# Register recovery handlers
|
||||
recovery_manager.register_recovery_handler(
|
||||
RecoveryAction.RESTART_STREAM,
|
||||
self._handle_restart_recovery
|
||||
)
|
||||
recovery_manager.register_recovery_handler(
|
||||
RecoveryAction.RECONNECT,
|
||||
self._handle_reconnect_recovery
|
||||
)
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Check if the reader is currently running."""
|
||||
return self.thread is not None and self.thread.is_alive()
|
||||
|
||||
@property
|
||||
def reader_type(self) -> str:
|
||||
"""Get the type of reader."""
|
||||
return "http_snapshot"
|
||||
|
||||
def set_frame_callback(self, callback: Callable[[str, np.ndarray], None]):
|
||||
"""Set callback function to handle captured frames."""
|
||||
self.frame_callback = callback
|
||||
|
||||
def start(self):
|
||||
"""Start the snapshot reader thread."""
|
||||
if self.thread and self.thread.is_alive():
|
||||
logger.warning(f"Snapshot reader for {self.camera_id} already running")
|
||||
return
|
||||
|
||||
self.stop_event.clear()
|
||||
self.thread = threading.Thread(target=self._read_snapshots, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
# Register with health monitoring
|
||||
stream_health_tracker.register_stream(self.camera_id, "http_snapshot", self.snapshot_url)
|
||||
thread_health_monitor.register_thread(self.thread, self._heartbeat_callback)
|
||||
|
||||
logger.info(f"Started snapshot reader for camera {self.camera_id} with health monitoring")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the snapshot reader thread."""
|
||||
self.stop_event.set()
|
||||
|
||||
# Unregister from health monitoring
|
||||
if self.thread:
|
||||
thread_health_monitor.unregister_thread(self.thread.ident)
|
||||
self.thread.join(timeout=5.0)
|
||||
|
||||
stream_health_tracker.unregister_stream(self.camera_id)
|
||||
|
||||
logger.info(f"Stopped snapshot reader for camera {self.camera_id}")
|
||||
|
||||
def _read_snapshots(self):
|
||||
"""Main snapshot reading loop for high quality 2K images."""
|
||||
retries = 0
|
||||
frame_count = 0
|
||||
last_log_time = time.time()
|
||||
last_connection_test = time.time()
|
||||
interval_seconds = self.interval_ms / 1000.0
|
||||
|
||||
logger.info(f"Snapshot interval for camera {self.camera_id}: {interval_seconds}s")
|
||||
|
||||
while not self.stop_event.is_set():
|
||||
try:
|
||||
# Send heartbeat for thread health monitoring
|
||||
self._send_heartbeat("fetching_snapshot")
|
||||
|
||||
start_time = time.time()
|
||||
frame = self._fetch_snapshot()
|
||||
|
||||
if frame is None:
|
||||
retries += 1
|
||||
self.consecutive_errors += 1
|
||||
|
||||
# Report error to health monitoring
|
||||
stream_health_tracker.report_error(
|
||||
self.camera_id,
|
||||
f"Failed to fetch snapshot (retry {retries}/{self.max_retries})"
|
||||
)
|
||||
|
||||
logger.warning(f"Failed to fetch snapshot for camera {self.camera_id}, retry {retries}/{self.max_retries}")
|
||||
|
||||
if self.max_retries != -1 and retries > self.max_retries:
|
||||
logger.error(f"Max retries reached for snapshot camera {self.camera_id}")
|
||||
break
|
||||
|
||||
time.sleep(min(2.0, interval_seconds))
|
||||
continue
|
||||
|
||||
# Accept any valid image dimensions - don't force specific resolution
|
||||
if frame.shape[1] <= 0 or frame.shape[0] <= 0:
|
||||
logger.warning(f"Camera {self.camera_id}: Invalid frame dimensions {frame.shape[1]}x{frame.shape[0]}")
|
||||
stream_health_tracker.report_error(
|
||||
self.camera_id,
|
||||
f"Invalid frame dimensions: {frame.shape[1]}x{frame.shape[0]}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Reset retry counter on successful fetch
|
||||
retries = 0
|
||||
self.consecutive_errors = 0
|
||||
frame_count += 1
|
||||
|
||||
# Report successful frame to health monitoring
|
||||
frame_size = frame.nbytes
|
||||
stream_health_tracker.report_frame_received(self.camera_id, frame_size)
|
||||
|
||||
# Call frame callback
|
||||
if self.frame_callback:
|
||||
try:
|
||||
self.frame_callback(self.camera_id, frame)
|
||||
except Exception as e:
|
||||
logger.error(f"Camera {self.camera_id}: Frame callback error: {e}")
|
||||
stream_health_tracker.report_error(self.camera_id, f"Frame callback error: {e}")
|
||||
|
||||
# Periodic connection health test
|
||||
current_time = time.time()
|
||||
if current_time - last_connection_test >= self.connection_test_interval:
|
||||
self._test_connection_health()
|
||||
last_connection_test = current_time
|
||||
|
||||
# Log progress every 30 seconds
|
||||
if current_time - last_log_time >= 30:
|
||||
logger.info(f"Camera {self.camera_id}: {frame_count} snapshots processed")
|
||||
last_log_time = current_time
|
||||
|
||||
# Wait for next interval
|
||||
elapsed = time.time() - start_time
|
||||
sleep_time = max(0, interval_seconds - elapsed)
|
||||
if sleep_time > 0:
|
||||
self.stop_event.wait(sleep_time)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in snapshot loop for camera {self.camera_id}: {e}")
|
||||
stream_health_tracker.report_error(self.camera_id, f"Snapshot loop error: {e}")
|
||||
retries += 1
|
||||
if self.max_retries != -1 and retries > self.max_retries:
|
||||
break
|
||||
time.sleep(min(2.0, interval_seconds))
|
||||
|
||||
logger.info(f"Snapshot reader thread ended for camera {self.camera_id}")
|
||||
|
||||
def _fetch_snapshot(self) -> Optional[np.ndarray]:
|
||||
"""Fetch a single high quality snapshot from HTTP URL."""
|
||||
try:
|
||||
# Parse URL for authentication
|
||||
from urllib.parse import urlparse
|
||||
parsed_url = urlparse(self.snapshot_url)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Python-Detector-Worker/1.0',
|
||||
'Accept': 'image/jpeg, image/png, image/*'
|
||||
}
|
||||
auth = None
|
||||
|
||||
if parsed_url.username and parsed_url.password:
|
||||
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
|
||||
auth = HTTPBasicAuth(parsed_url.username, parsed_url.password)
|
||||
|
||||
# Reconstruct URL without credentials
|
||||
clean_url = f"{parsed_url.scheme}://{parsed_url.hostname}"
|
||||
if parsed_url.port:
|
||||
clean_url += f":{parsed_url.port}"
|
||||
clean_url += parsed_url.path
|
||||
if parsed_url.query:
|
||||
clean_url += f"?{parsed_url.query}"
|
||||
|
||||
# Try Basic Auth first
|
||||
response = requests.get(clean_url, auth=auth, timeout=15, headers=headers,
|
||||
stream=True, verify=False)
|
||||
|
||||
# If Basic Auth fails, try Digest Auth
|
||||
if response.status_code == 401:
|
||||
auth = HTTPDigestAuth(parsed_url.username, parsed_url.password)
|
||||
response = requests.get(clean_url, auth=auth, timeout=15, headers=headers,
|
||||
stream=True, verify=False)
|
||||
else:
|
||||
response = requests.get(self.snapshot_url, timeout=15, headers=headers,
|
||||
stream=True, verify=False)
|
||||
|
||||
if response.status_code == 200:
|
||||
# Check content size
|
||||
content_length = int(response.headers.get('content-length', 0))
|
||||
if content_length > self.max_file_size:
|
||||
logger.warning(f"Snapshot too large for camera {self.camera_id}: {content_length} bytes")
|
||||
return None
|
||||
|
||||
# Read content
|
||||
content = response.content
|
||||
|
||||
# Convert to numpy array
|
||||
image_array = np.frombuffer(content, np.uint8)
|
||||
|
||||
# Decode as high quality image
|
||||
frame = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
|
||||
|
||||
if frame is None:
|
||||
logger.error(f"Failed to decode snapshot for camera {self.camera_id}")
|
||||
return None
|
||||
|
||||
logger.debug(f"Fetched snapshot for camera {self.camera_id}: {frame.shape[1]}x{frame.shape[0]}")
|
||||
return frame
|
||||
else:
|
||||
logger.warning(f"HTTP {response.status_code} from {self.camera_id}")
|
||||
return None
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Request error fetching snapshot for {self.camera_id}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error decoding snapshot for {self.camera_id}: {e}")
|
||||
return None
|
||||
|
||||
def fetch_single_snapshot(self) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Fetch a single high-quality snapshot on demand for pipeline processing.
|
||||
This method is for one-time fetch from HTTP URL, not continuous streaming.
|
||||
|
||||
Returns:
|
||||
High quality 2K snapshot frame or None if failed
|
||||
"""
|
||||
logger.info(f"[SNAPSHOT] Fetching snapshot for {self.camera_id} from {self.snapshot_url}")
|
||||
|
||||
# Try to fetch snapshot with retries
|
||||
for attempt in range(self.max_retries):
|
||||
frame = self._fetch_snapshot()
|
||||
|
||||
if frame is not None:
|
||||
logger.info(f"[SNAPSHOT] Successfully fetched {frame.shape[1]}x{frame.shape[0]} snapshot for {self.camera_id}")
|
||||
return frame
|
||||
|
||||
if attempt < self.max_retries - 1:
|
||||
logger.warning(f"[SNAPSHOT] Attempt {attempt + 1}/{self.max_retries} failed for {self.camera_id}, retrying...")
|
||||
time.sleep(0.5)
|
||||
|
||||
logger.error(f"[SNAPSHOT] Failed to fetch snapshot for {self.camera_id} after {self.max_retries} attempts")
|
||||
return None
|
||||
|
||||
def _resize_maintain_aspect(self, frame: np.ndarray, target_width: int, target_height: int) -> np.ndarray:
|
||||
"""Resize image while maintaining aspect ratio for high quality."""
|
||||
h, w = frame.shape[:2]
|
||||
aspect = w / h
|
||||
target_aspect = target_width / target_height
|
||||
|
||||
if aspect > target_aspect:
|
||||
# Image is wider
|
||||
new_width = target_width
|
||||
new_height = int(target_width / aspect)
|
||||
else:
|
||||
# Image is taller
|
||||
new_height = target_height
|
||||
new_width = int(target_height * aspect)
|
||||
|
||||
# Use INTER_LANCZOS4 for high quality downsampling
|
||||
resized = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LANCZOS4)
|
||||
|
||||
# Pad to target size if needed
|
||||
if new_width < target_width or new_height < target_height:
|
||||
top = (target_height - new_height) // 2
|
||||
bottom = target_height - new_height - top
|
||||
left = (target_width - new_width) // 2
|
||||
right = target_width - new_width - left
|
||||
resized = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
|
||||
|
||||
return resized
|
||||
|
||||
# Health monitoring methods
|
||||
def _send_heartbeat(self, activity: str = "running"):
|
||||
"""Send heartbeat to thread health monitor."""
|
||||
self.last_heartbeat = time.time()
|
||||
thread_health_monitor.heartbeat(activity=activity)
|
||||
|
||||
def _heartbeat_callback(self) -> bool:
|
||||
"""Heartbeat callback for thread responsiveness testing."""
|
||||
try:
|
||||
# Check if thread is responsive by checking recent heartbeat
|
||||
current_time = time.time()
|
||||
age = current_time - self.last_heartbeat
|
||||
|
||||
# Thread is responsive if heartbeat is recent
|
||||
return age < 30.0 # 30 second responsiveness threshold
|
||||
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _test_connection_health(self):
|
||||
"""Test HTTP connection health."""
|
||||
try:
|
||||
stream_health_tracker.test_http_connection(self.camera_id, self.snapshot_url)
|
||||
except Exception as e:
|
||||
logger.error(f"Error testing connection health for {self.camera_id}: {e}")
|
||||
|
||||
def _handle_restart_recovery(self, component: str, details: Dict[str, Any]) -> bool:
|
||||
"""Handle restart recovery action."""
|
||||
try:
|
||||
logger.info(f"Restarting HTTP snapshot reader for {self.camera_id}")
|
||||
|
||||
# Stop current instance
|
||||
self.stop()
|
||||
|
||||
# Small delay
|
||||
time.sleep(2.0)
|
||||
|
||||
# Restart
|
||||
self.start()
|
||||
|
||||
# Report successful restart
|
||||
stream_health_tracker.report_reconnect(self.camera_id, "health_recovery_restart")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to restart HTTP snapshot reader for {self.camera_id}: {e}")
|
||||
return False
|
||||
|
||||
def _handle_reconnect_recovery(self, component: str, details: Dict[str, Any]) -> bool:
|
||||
"""Handle reconnect recovery action."""
|
||||
try:
|
||||
logger.info(f"Reconnecting HTTP snapshot reader for {self.camera_id}")
|
||||
|
||||
# Test connection first
|
||||
success = stream_health_tracker.test_http_connection(self.camera_id, self.snapshot_url)
|
||||
|
||||
if success:
|
||||
# Reset error counters
|
||||
self.consecutive_errors = 0
|
||||
stream_health_tracker.report_reconnect(self.camera_id, "health_recovery_reconnect")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Connection test failed during recovery for {self.camera_id}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to reconnect HTTP snapshot reader for {self.camera_id}: {e}")
|
||||
return False
|
38
core/streaming/readers/utils.py
Normal file
38
core/streaming/readers/utils.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
"""
|
||||
Utility functions for stream readers.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
|
||||
# Keep OpenCV errors visible but allow FFmpeg stderr logging
|
||||
os.environ["OPENCV_LOG_LEVEL"] = "ERROR"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Color codes for pretty logging
|
||||
class Colors:
|
||||
GREEN = '\033[92m'
|
||||
YELLOW = '\033[93m'
|
||||
RED = '\033[91m'
|
||||
BLUE = '\033[94m'
|
||||
PURPLE = '\033[95m'
|
||||
CYAN = '\033[96m'
|
||||
WHITE = '\033[97m'
|
||||
BOLD = '\033[1m'
|
||||
END = '\033[0m'
|
||||
|
||||
def log_success(camera_id: str, message: str):
|
||||
"""Log success messages in green"""
|
||||
logger.info(f"{Colors.GREEN}[{camera_id}] {message}{Colors.END}")
|
||||
|
||||
def log_warning(camera_id: str, message: str):
|
||||
"""Log warnings in yellow"""
|
||||
logger.warning(f"{Colors.YELLOW}[{camera_id}] {message}{Colors.END}")
|
||||
|
||||
def log_error(camera_id: str, message: str):
|
||||
"""Log errors in red"""
|
||||
logger.error(f"{Colors.RED}[{camera_id}] {message}{Colors.END}")
|
||||
|
||||
def log_info(camera_id: str, message: str):
|
||||
"""Log info in cyan"""
|
||||
logger.info(f"{Colors.CYAN}[{camera_id}] {message}{Colors.END}")
|
408
core/tracking/bot_sort_tracker.py
Normal file
408
core/tracking/bot_sort_tracker.py
Normal file
|
@ -0,0 +1,408 @@
|
|||
"""
|
||||
BoT-SORT Multi-Object Tracker with Camera Isolation
|
||||
Based on BoT-SORT: Robust Associations Multi-Pedestrian Tracking
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from dataclasses import dataclass
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from filterpy.kalman import KalmanFilter
|
||||
import cv2
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrackState:
|
||||
"""Track state enumeration"""
|
||||
TENTATIVE = "tentative" # New track, not confirmed yet
|
||||
CONFIRMED = "confirmed" # Confirmed track
|
||||
DELETED = "deleted" # Track to be deleted
|
||||
|
||||
|
||||
class Track:
|
||||
"""
|
||||
Individual track representation with Kalman filter for motion prediction
|
||||
"""
|
||||
|
||||
def __init__(self, detection, track_id: int, camera_id: str):
|
||||
"""
|
||||
Initialize a new track
|
||||
|
||||
Args:
|
||||
detection: Initial detection (bbox, confidence, class)
|
||||
track_id: Unique track identifier within camera
|
||||
camera_id: Camera identifier
|
||||
"""
|
||||
self.track_id = track_id
|
||||
self.camera_id = camera_id
|
||||
self.state = TrackState.TENTATIVE
|
||||
|
||||
# Time tracking
|
||||
self.start_time = time.time()
|
||||
self.last_update_time = time.time()
|
||||
|
||||
# Appearance and motion
|
||||
self.bbox = detection.bbox # [x1, y1, x2, y2]
|
||||
self.confidence = detection.confidence
|
||||
self.class_name = detection.class_name
|
||||
|
||||
# Track management
|
||||
self.hit_streak = 1
|
||||
self.time_since_update = 0
|
||||
self.age = 1
|
||||
|
||||
# Kalman filter for motion prediction
|
||||
self.kf = self._create_kalman_filter()
|
||||
self._update_kalman_filter(detection.bbox)
|
||||
|
||||
# Track history
|
||||
self.history = [detection.bbox]
|
||||
self.max_history = 10
|
||||
|
||||
def _create_kalman_filter(self) -> KalmanFilter:
|
||||
"""Create Kalman filter for bbox tracking (x, y, w, h, vx, vy, vw, vh)"""
|
||||
kf = KalmanFilter(dim_x=8, dim_z=4)
|
||||
|
||||
# State transition matrix (constant velocity model)
|
||||
kf.F = np.array([
|
||||
[1, 0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 1, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 1, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 1, 0, 0, 0, 1],
|
||||
[0, 0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 1]
|
||||
])
|
||||
|
||||
# Measurement matrix (observe x, y, w, h)
|
||||
kf.H = np.array([
|
||||
[1, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 1, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 1, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 1, 0, 0, 0, 0]
|
||||
])
|
||||
|
||||
# Process noise
|
||||
kf.Q *= 0.01
|
||||
|
||||
# Measurement noise
|
||||
kf.R *= 10
|
||||
|
||||
# Initial covariance
|
||||
kf.P *= 100
|
||||
|
||||
return kf
|
||||
|
||||
def _update_kalman_filter(self, bbox: List[float]):
|
||||
"""Update Kalman filter with new bbox"""
|
||||
# Convert [x1, y1, x2, y2] to [cx, cy, w, h]
|
||||
x1, y1, x2, y2 = bbox
|
||||
cx = (x1 + x2) / 2
|
||||
cy = (y1 + y2) / 2
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
|
||||
# Properly assign to column vector
|
||||
self.kf.x[:4, 0] = [cx, cy, w, h]
|
||||
|
||||
def predict(self) -> np.ndarray:
|
||||
"""Predict next position using Kalman filter"""
|
||||
self.kf.predict()
|
||||
|
||||
# Convert back to [x1, y1, x2, y2] format
|
||||
cx, cy, w, h = self.kf.x[:4, 0] # Extract from column vector
|
||||
x1 = cx - w/2
|
||||
y1 = cy - h/2
|
||||
x2 = cx + w/2
|
||||
y2 = cy + h/2
|
||||
|
||||
return np.array([x1, y1, x2, y2])
|
||||
|
||||
def update(self, detection):
|
||||
"""Update track with new detection"""
|
||||
self.last_update_time = time.time()
|
||||
self.time_since_update = 0
|
||||
self.hit_streak += 1
|
||||
self.age += 1
|
||||
|
||||
# Update track properties
|
||||
self.bbox = detection.bbox
|
||||
self.confidence = detection.confidence
|
||||
|
||||
# Update Kalman filter
|
||||
x1, y1, x2, y2 = detection.bbox
|
||||
cx = (x1 + x2) / 2
|
||||
cy = (y1 + y2) / 2
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
|
||||
self.kf.update([cx, cy, w, h])
|
||||
|
||||
# Update history
|
||||
self.history.append(detection.bbox)
|
||||
if len(self.history) > self.max_history:
|
||||
self.history.pop(0)
|
||||
|
||||
# Update state
|
||||
if self.state == TrackState.TENTATIVE and self.hit_streak >= 3:
|
||||
self.state = TrackState.CONFIRMED
|
||||
|
||||
def mark_missed(self):
|
||||
"""Mark track as missed in this frame"""
|
||||
self.time_since_update += 1
|
||||
self.age += 1
|
||||
|
||||
if self.time_since_update > 5: # Delete after 5 missed frames
|
||||
self.state = TrackState.DELETED
|
||||
|
||||
def is_confirmed(self) -> bool:
|
||||
"""Check if track is confirmed"""
|
||||
return self.state == TrackState.CONFIRMED
|
||||
|
||||
def is_deleted(self) -> bool:
|
||||
"""Check if track should be deleted"""
|
||||
return self.state == TrackState.DELETED
|
||||
|
||||
|
||||
class CameraTracker:
|
||||
"""
|
||||
BoT-SORT tracker for a single camera
|
||||
"""
|
||||
|
||||
def __init__(self, camera_id: str, max_disappeared: int = 10):
|
||||
"""
|
||||
Initialize camera tracker
|
||||
|
||||
Args:
|
||||
camera_id: Unique camera identifier
|
||||
max_disappeared: Maximum frames a track can be missed before deletion
|
||||
"""
|
||||
self.camera_id = camera_id
|
||||
self.max_disappeared = max_disappeared
|
||||
|
||||
# Track management
|
||||
self.tracks: Dict[int, Track] = {}
|
||||
self.next_id = 1
|
||||
self.frame_count = 0
|
||||
|
||||
logger.info(f"Initialized BoT-SORT tracker for camera {camera_id}")
|
||||
|
||||
def update(self, detections: List) -> List[Track]:
|
||||
"""
|
||||
Update tracker with new detections
|
||||
|
||||
Args:
|
||||
detections: List of Detection objects
|
||||
|
||||
Returns:
|
||||
List of active confirmed tracks
|
||||
"""
|
||||
self.frame_count += 1
|
||||
|
||||
# Predict all existing tracks
|
||||
for track in self.tracks.values():
|
||||
track.predict()
|
||||
|
||||
# Associate detections to tracks
|
||||
matched_tracks, unmatched_detections, unmatched_tracks = self._associate(detections)
|
||||
|
||||
# Update matched tracks
|
||||
for track_id, detection in matched_tracks:
|
||||
self.tracks[track_id].update(detection)
|
||||
|
||||
# Mark unmatched tracks as missed
|
||||
for track_id in unmatched_tracks:
|
||||
self.tracks[track_id].mark_missed()
|
||||
|
||||
# Create new tracks for unmatched detections
|
||||
for detection in unmatched_detections:
|
||||
track = Track(detection, self.next_id, self.camera_id)
|
||||
self.tracks[self.next_id] = track
|
||||
self.next_id += 1
|
||||
|
||||
# Remove deleted tracks
|
||||
tracks_to_remove = [tid for tid, track in self.tracks.items() if track.is_deleted()]
|
||||
for tid in tracks_to_remove:
|
||||
del self.tracks[tid]
|
||||
|
||||
# Return confirmed tracks
|
||||
confirmed_tracks = [track for track in self.tracks.values() if track.is_confirmed()]
|
||||
|
||||
return confirmed_tracks
|
||||
|
||||
def _associate(self, detections: List) -> Tuple[List[Tuple[int, Any]], List[Any], List[int]]:
|
||||
"""
|
||||
Associate detections to existing tracks using IoU distance
|
||||
|
||||
Returns:
|
||||
(matched_tracks, unmatched_detections, unmatched_tracks)
|
||||
"""
|
||||
if not detections or not self.tracks:
|
||||
return [], detections, list(self.tracks.keys())
|
||||
|
||||
# Calculate IoU distance matrix
|
||||
track_ids = list(self.tracks.keys())
|
||||
cost_matrix = np.zeros((len(track_ids), len(detections)))
|
||||
|
||||
for i, track_id in enumerate(track_ids):
|
||||
track = self.tracks[track_id]
|
||||
predicted_bbox = track.predict()
|
||||
|
||||
for j, detection in enumerate(detections):
|
||||
iou = self._calculate_iou(predicted_bbox, detection.bbox)
|
||||
cost_matrix[i, j] = 1 - iou # Convert IoU to distance
|
||||
|
||||
# Solve assignment problem
|
||||
row_indices, col_indices = linear_sum_assignment(cost_matrix)
|
||||
|
||||
# Filter matches by IoU threshold
|
||||
iou_threshold = 0.3
|
||||
matched_tracks = []
|
||||
matched_detection_indices = set()
|
||||
matched_track_indices = set()
|
||||
|
||||
for row, col in zip(row_indices, col_indices):
|
||||
if cost_matrix[row, col] <= (1 - iou_threshold):
|
||||
track_id = track_ids[row]
|
||||
detection = detections[col]
|
||||
matched_tracks.append((track_id, detection))
|
||||
matched_detection_indices.add(col)
|
||||
matched_track_indices.add(row)
|
||||
|
||||
# Find unmatched detections and tracks
|
||||
unmatched_detections = [detections[i] for i in range(len(detections))
|
||||
if i not in matched_detection_indices]
|
||||
unmatched_tracks = [track_ids[i] for i in range(len(track_ids))
|
||||
if i not in matched_track_indices]
|
||||
|
||||
return matched_tracks, unmatched_detections, unmatched_tracks
|
||||
|
||||
def _calculate_iou(self, bbox1: np.ndarray, bbox2: List[float]) -> float:
|
||||
"""Calculate IoU between two bounding boxes"""
|
||||
x1_1, y1_1, x2_1, y2_1 = bbox1
|
||||
x1_2, y1_2, x2_2, y2_2 = bbox2
|
||||
|
||||
# Calculate intersection area
|
||||
x1_i = max(x1_1, x1_2)
|
||||
y1_i = max(y1_1, y1_2)
|
||||
x2_i = min(x2_1, x2_2)
|
||||
y2_i = min(y2_1, y2_2)
|
||||
|
||||
if x2_i <= x1_i or y2_i <= y1_i:
|
||||
return 0.0
|
||||
|
||||
intersection = (x2_i - x1_i) * (y2_i - y1_i)
|
||||
|
||||
# Calculate union area
|
||||
area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
|
||||
area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
|
||||
union = area1 + area2 - intersection
|
||||
|
||||
return intersection / union if union > 0 else 0.0
|
||||
|
||||
|
||||
class MultiCameraBoTSORT:
|
||||
"""
|
||||
Multi-camera BoT-SORT tracker with complete camera isolation
|
||||
"""
|
||||
|
||||
def __init__(self, trigger_classes: List[str], min_confidence: float = 0.6):
|
||||
"""
|
||||
Initialize multi-camera tracker
|
||||
|
||||
Args:
|
||||
trigger_classes: List of class names to track
|
||||
min_confidence: Minimum detection confidence threshold
|
||||
"""
|
||||
self.trigger_classes = trigger_classes
|
||||
self.min_confidence = min_confidence
|
||||
|
||||
# Camera-specific trackers
|
||||
self.camera_trackers: Dict[str, CameraTracker] = {}
|
||||
|
||||
logger.info(f"Initialized MultiCameraBoTSORT with classes={trigger_classes}, "
|
||||
f"min_confidence={min_confidence}")
|
||||
|
||||
def get_or_create_tracker(self, camera_id: str) -> CameraTracker:
|
||||
"""Get or create tracker for specific camera"""
|
||||
if camera_id not in self.camera_trackers:
|
||||
self.camera_trackers[camera_id] = CameraTracker(camera_id)
|
||||
logger.info(f"Created new tracker for camera {camera_id}")
|
||||
|
||||
return self.camera_trackers[camera_id]
|
||||
|
||||
def update(self, camera_id: str, inference_result) -> List[Dict]:
|
||||
"""
|
||||
Update tracker for specific camera with detections
|
||||
|
||||
Args:
|
||||
camera_id: Camera identifier
|
||||
inference_result: InferenceResult with detections
|
||||
|
||||
Returns:
|
||||
List of track information dictionaries
|
||||
"""
|
||||
# Filter detections by confidence and trigger classes
|
||||
filtered_detections = []
|
||||
|
||||
if hasattr(inference_result, 'detections') and inference_result.detections:
|
||||
for detection in inference_result.detections:
|
||||
if (detection.confidence >= self.min_confidence and
|
||||
detection.class_name in self.trigger_classes):
|
||||
filtered_detections.append(detection)
|
||||
|
||||
# Get camera tracker and update
|
||||
tracker = self.get_or_create_tracker(camera_id)
|
||||
confirmed_tracks = tracker.update(filtered_detections)
|
||||
|
||||
# Convert tracks to output format
|
||||
track_results = []
|
||||
for track in confirmed_tracks:
|
||||
track_results.append({
|
||||
'track_id': track.track_id,
|
||||
'camera_id': track.camera_id,
|
||||
'bbox': track.bbox,
|
||||
'confidence': track.confidence,
|
||||
'class_name': track.class_name,
|
||||
'hit_streak': track.hit_streak,
|
||||
'age': track.age
|
||||
})
|
||||
|
||||
return track_results
|
||||
|
||||
def get_statistics(self) -> Dict[str, Any]:
|
||||
"""Get tracking statistics across all cameras"""
|
||||
stats = {}
|
||||
total_tracks = 0
|
||||
|
||||
for camera_id, tracker in self.camera_trackers.items():
|
||||
camera_stats = {
|
||||
'active_tracks': len([t for t in tracker.tracks.values() if t.is_confirmed()]),
|
||||
'total_tracks': len(tracker.tracks),
|
||||
'frame_count': tracker.frame_count
|
||||
}
|
||||
stats[camera_id] = camera_stats
|
||||
total_tracks += camera_stats['active_tracks']
|
||||
|
||||
stats['summary'] = {
|
||||
'total_cameras': len(self.camera_trackers),
|
||||
'total_active_tracks': total_tracks
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def reset_camera(self, camera_id: str):
|
||||
"""Reset tracking for specific camera"""
|
||||
if camera_id in self.camera_trackers:
|
||||
del self.camera_trackers[camera_id]
|
||||
logger.info(f"Reset tracking for camera {camera_id}")
|
||||
|
||||
def reset_all(self):
|
||||
"""Reset all camera trackers"""
|
||||
self.camera_trackers.clear()
|
||||
logger.info("Reset all camera trackers")
|
|
@ -61,9 +61,10 @@ class TrackingPipelineIntegration:
|
|||
self.cleared_sessions: Dict[str, float] = {} # session_id -> clear_time
|
||||
self.pending_vehicles: Dict[str, int] = {} # display_id -> track_id (waiting for session ID)
|
||||
self.pending_processing_data: Dict[str, Dict] = {} # display_id -> processing data (waiting for session ID)
|
||||
self.display_to_subscription: Dict[str, str] = {} # display_id -> subscription_id (for fallback)
|
||||
|
||||
# Additional validators for enhanced flow control
|
||||
self.permanently_processed: Dict[int, float] = {} # track_id -> process_time (never process again)
|
||||
self.permanently_processed: Dict[str, float] = {} # "camera_id:track_id" -> process_time (never process again)
|
||||
self.progression_stages: Dict[str, str] = {} # session_id -> current_stage
|
||||
self.last_detection_time: Dict[str, float] = {} # display_id -> last_detection_timestamp
|
||||
self.abandonment_timeout = 3.0 # seconds to wait before declaring car abandoned
|
||||
|
@ -71,12 +72,17 @@ class TrackingPipelineIntegration:
|
|||
# Thread pool for pipeline execution
|
||||
self.executor = ThreadPoolExecutor(max_workers=2)
|
||||
|
||||
# Min bbox filtering configuration
|
||||
# TODO: Make this configurable via pipeline.json in the future
|
||||
self.min_bbox_area_percentage = 3.5 # 3.5% of frame area minimum
|
||||
|
||||
# Statistics
|
||||
self.stats = {
|
||||
'frames_processed': 0,
|
||||
'vehicles_detected': 0,
|
||||
'vehicles_validated': 0,
|
||||
'pipelines_executed': 0
|
||||
'pipelines_executed': 0,
|
||||
'frontals_filtered_small': 0 # Track filtered detections
|
||||
}
|
||||
|
||||
|
||||
|
@ -183,7 +189,7 @@ class TrackingPipelineIntegration:
|
|||
|
||||
# Run tracking model
|
||||
if self.tracking_model:
|
||||
# Run inference with tracking
|
||||
# Run detection-only (tracking handled by our own tracker)
|
||||
tracking_results = self.tracking_model.track(
|
||||
frame,
|
||||
confidence_threshold=self.tracker.min_confidence,
|
||||
|
@ -202,6 +208,10 @@ class TrackingPipelineIntegration:
|
|||
else:
|
||||
logger.debug(f"No tracking results or detections attribute")
|
||||
|
||||
# Filter out small frontal detections (neighboring pumps/distant cars)
|
||||
if tracking_results and hasattr(tracking_results, 'detections'):
|
||||
tracking_results = self._filter_small_frontals(tracking_results, frame)
|
||||
|
||||
# Process tracking results
|
||||
tracked_vehicles = self.tracker.process_detections(
|
||||
tracking_results,
|
||||
|
@ -210,8 +220,10 @@ class TrackingPipelineIntegration:
|
|||
)
|
||||
|
||||
# Update last detection time for abandonment detection
|
||||
# Update when vehicles ARE detected, so when they leave, timestamp ages
|
||||
if tracked_vehicles:
|
||||
self.last_detection_time[display_id] = time.time()
|
||||
logger.debug(f"Updated last_detection_time for {display_id}: {len(tracked_vehicles)} vehicles")
|
||||
|
||||
# Check for car abandonment (vehicle left after getting car_wait_staff stage)
|
||||
await self._check_car_abandonment(display_id, subscription_id)
|
||||
|
@ -402,27 +414,12 @@ class TrackingPipelineIntegration:
|
|||
logger.info(f"Executing processing phase for session {session_id}, vehicle {vehicle.track_id}")
|
||||
|
||||
# Capture high-quality snapshot for pipeline processing
|
||||
frame = None
|
||||
if self.subscription_info and self.subscription_info.stream_config.snapshot_url:
|
||||
from ..streaming.readers import HTTPSnapshotReader
|
||||
logger.info(f"[PROCESSING PHASE] Fetching 2K snapshot for session {session_id}")
|
||||
frame = self._fetch_snapshot()
|
||||
|
||||
logger.info(f"[PROCESSING PHASE] Fetching 2K snapshot for session {session_id}")
|
||||
snapshot_reader = HTTPSnapshotReader(
|
||||
camera_id=self.subscription_info.camera_id,
|
||||
snapshot_url=self.subscription_info.stream_config.snapshot_url,
|
||||
max_retries=3
|
||||
)
|
||||
|
||||
frame = snapshot_reader.fetch_single_snapshot()
|
||||
|
||||
if frame is not None:
|
||||
logger.info(f"[PROCESSING PHASE] Successfully fetched {frame.shape[1]}x{frame.shape[0]} snapshot for pipeline")
|
||||
else:
|
||||
logger.warning(f"[PROCESSING PHASE] Failed to capture snapshot, falling back to RTSP frame")
|
||||
# Fall back to RTSP frame if snapshot fails
|
||||
frame = processing_data['frame']
|
||||
else:
|
||||
logger.warning(f"[PROCESSING PHASE] No snapshot URL available, using RTSP frame")
|
||||
if frame is None:
|
||||
logger.warning(f"[PROCESSING PHASE] Failed to capture snapshot, falling back to RTSP frame")
|
||||
# Fall back to RTSP frame if snapshot fails
|
||||
frame = processing_data['frame']
|
||||
|
||||
# Extract detected regions from detection phase result if available
|
||||
|
@ -465,7 +462,7 @@ class TrackingPipelineIntegration:
|
|||
self.subscription_info = subscription_info
|
||||
logger.debug(f"Set subscription info with snapshot_url: {subscription_info.stream_config.snapshot_url if subscription_info else None}")
|
||||
|
||||
def set_session_id(self, display_id: str, session_id: str):
|
||||
def set_session_id(self, display_id: str, session_id: str, subscription_id: str = None):
|
||||
"""
|
||||
Set session ID for a display (from backend).
|
||||
This is called when backend sends setSessionId after receiving imageDetection.
|
||||
|
@ -473,9 +470,18 @@ class TrackingPipelineIntegration:
|
|||
Args:
|
||||
display_id: Display identifier
|
||||
session_id: Session identifier
|
||||
subscription_id: Subscription identifier (displayId;cameraId) - needed for fallback
|
||||
"""
|
||||
# Ensure session_id is always a string for consistent type handling
|
||||
session_id = str(session_id) if session_id is not None else None
|
||||
self.active_sessions[display_id] = session_id
|
||||
logger.info(f"Set session {session_id} for display {display_id}")
|
||||
|
||||
# Store subscription_id for fallback usage
|
||||
if subscription_id:
|
||||
self.display_to_subscription[display_id] = subscription_id
|
||||
logger.info(f"Set session {session_id} for display {display_id} with subscription {subscription_id}")
|
||||
else:
|
||||
logger.info(f"Set session {session_id} for display {display_id}")
|
||||
|
||||
# Check if we have a pending vehicle for this display
|
||||
if display_id in self.pending_vehicles:
|
||||
|
@ -486,7 +492,10 @@ class TrackingPipelineIntegration:
|
|||
self.session_vehicles[session_id] = track_id
|
||||
|
||||
# Mark vehicle as permanently processed (won't process again even after session clear)
|
||||
self.permanently_processed[track_id] = time.time()
|
||||
# Use composite key to distinguish same track IDs across different cameras
|
||||
camera_id = display_id # Using display_id as camera_id for isolation
|
||||
permanent_key = f"{camera_id}:{track_id}"
|
||||
self.permanently_processed[permanent_key] = time.time()
|
||||
|
||||
# Remove from pending
|
||||
del self.pending_vehicles[display_id]
|
||||
|
@ -513,6 +522,25 @@ class TrackingPipelineIntegration:
|
|||
else:
|
||||
logger.warning(f"No pending processing data found for display {display_id} when setting session {session_id}")
|
||||
|
||||
# FALLBACK: Execute pipeline for POS-initiated sessions
|
||||
# Skip if session_id is None (no car present or car has left)
|
||||
if session_id is not None:
|
||||
# Use stored subscription_id instead of creating fake one
|
||||
stored_subscription_id = self.display_to_subscription.get(display_id)
|
||||
if stored_subscription_id:
|
||||
logger.info(f"[FALLBACK] Triggering fallback pipeline for session {session_id} on display {display_id} with subscription {stored_subscription_id}")
|
||||
|
||||
# Trigger the fallback pipeline asynchronously with real subscription_id
|
||||
asyncio.create_task(self._execute_fallback_pipeline(
|
||||
display_id=display_id,
|
||||
session_id=session_id,
|
||||
subscription_id=stored_subscription_id
|
||||
))
|
||||
else:
|
||||
logger.error(f"[FALLBACK] No subscription_id stored for display {display_id}, cannot execute fallback pipeline")
|
||||
else:
|
||||
logger.debug(f"[FALLBACK] Skipping pipeline execution for session_id=None on display {display_id}")
|
||||
|
||||
def clear_session_id(self, session_id: str):
|
||||
"""
|
||||
Clear session ID (post-fueling).
|
||||
|
@ -562,6 +590,7 @@ class TrackingPipelineIntegration:
|
|||
self.cleared_sessions.clear()
|
||||
self.pending_vehicles.clear()
|
||||
self.pending_processing_data.clear()
|
||||
self.display_to_subscription.clear()
|
||||
self.permanently_processed.clear()
|
||||
self.progression_stages.clear()
|
||||
self.last_detection_time.clear()
|
||||
|
@ -605,10 +634,16 @@ class TrackingPipelineIntegration:
|
|||
last_detection = self.last_detection_time.get(session_display, 0)
|
||||
time_since_detection = current_time - last_detection
|
||||
|
||||
logger.info(f"[ABANDON CHECK] Session {session_id} (display: {session_display}): "
|
||||
f"time_since_detection={time_since_detection:.1f}s, "
|
||||
f"timeout={self.abandonment_timeout}s")
|
||||
|
||||
if time_since_detection > self.abandonment_timeout:
|
||||
logger.info(f"Car abandonment detected: session {session_id}, "
|
||||
logger.warning(f"🚨 Car abandonment detected: session {session_id}, "
|
||||
f"no detection for {time_since_detection:.1f}s")
|
||||
abandoned_sessions.append(session_id)
|
||||
else:
|
||||
logger.debug(f"[ABANDON CHECK] Session {session_id} has no associated display")
|
||||
|
||||
# Send abandonment detection for each abandoned session
|
||||
for session_id in abandoned_sessions:
|
||||
|
@ -616,6 +651,7 @@ class TrackingPipelineIntegration:
|
|||
# Remove from progression stages to avoid repeated detection
|
||||
if session_id in self.progression_stages:
|
||||
del self.progression_stages[session_id]
|
||||
logger.info(f"[ABANDON] Removed session {session_id} from progression_stages after notification")
|
||||
|
||||
async def _send_abandonment_detection(self, subscription_id: str, session_id: str):
|
||||
"""
|
||||
|
@ -662,11 +698,159 @@ class TrackingPipelineIntegration:
|
|||
if stage == "car_wait_staff":
|
||||
logger.info(f"Started monitoring session {session_id} for car abandonment")
|
||||
|
||||
def _fetch_snapshot(self) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Fetch high-quality snapshot from camera's snapshot URL.
|
||||
Reusable method for both processing phase and fallback pipeline.
|
||||
|
||||
Returns:
|
||||
Snapshot frame or None if unavailable
|
||||
"""
|
||||
if not (self.subscription_info and self.subscription_info.stream_config.snapshot_url):
|
||||
logger.warning("[SNAPSHOT] No subscription info or snapshot URL available")
|
||||
return None
|
||||
|
||||
try:
|
||||
from ..streaming.readers import HTTPSnapshotReader
|
||||
|
||||
logger.info(f"[SNAPSHOT] Fetching snapshot for {self.subscription_info.camera_id}")
|
||||
snapshot_reader = HTTPSnapshotReader(
|
||||
camera_id=self.subscription_info.camera_id,
|
||||
snapshot_url=self.subscription_info.stream_config.snapshot_url,
|
||||
max_retries=3
|
||||
)
|
||||
|
||||
frame = snapshot_reader.fetch_single_snapshot()
|
||||
|
||||
if frame is not None:
|
||||
logger.info(f"[SNAPSHOT] Successfully fetched {frame.shape[1]}x{frame.shape[0]} snapshot")
|
||||
return frame
|
||||
else:
|
||||
logger.warning("[SNAPSHOT] Failed to fetch snapshot")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[SNAPSHOT] Error fetching snapshot: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
async def _execute_fallback_pipeline(self, display_id: str, session_id: str, subscription_id: str):
|
||||
"""
|
||||
Execute fallback pipeline when sessionId is received without prior detection.
|
||||
This handles POS-initiated sessions where backend starts transaction before car detection.
|
||||
|
||||
Args:
|
||||
display_id: Display identifier
|
||||
session_id: Session ID from backend
|
||||
subscription_id: Subscription identifier for pipeline execution
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[FALLBACK PIPELINE] Executing for session {session_id}, display {display_id}")
|
||||
|
||||
# Fetch fresh snapshot from camera
|
||||
frame = self._fetch_snapshot()
|
||||
|
||||
if frame is None:
|
||||
logger.error(f"[FALLBACK] Failed to fetch snapshot for session {session_id}, cannot execute pipeline")
|
||||
return
|
||||
|
||||
logger.info(f"[FALLBACK] Using snapshot frame {frame.shape[1]}x{frame.shape[0]} for session {session_id}")
|
||||
|
||||
# Check if detection pipeline is available
|
||||
if not self.detection_pipeline:
|
||||
logger.error(f"[FALLBACK] Detection pipeline not available for session {session_id}")
|
||||
return
|
||||
|
||||
# Execute detection phase to get detected regions
|
||||
detection_result = await self.detection_pipeline.execute_detection_phase(
|
||||
frame=frame,
|
||||
display_id=display_id,
|
||||
subscription_id=subscription_id
|
||||
)
|
||||
|
||||
logger.info(f"[FALLBACK] Detection phase completed for session {session_id}: "
|
||||
f"status={detection_result.get('status', 'unknown')}, "
|
||||
f"regions={list(detection_result.get('detected_regions', {}).keys())}")
|
||||
|
||||
# If detection found regions, execute processing phase
|
||||
detected_regions = detection_result.get('detected_regions', {})
|
||||
if detected_regions:
|
||||
processing_result = await self.detection_pipeline.execute_processing_phase(
|
||||
frame=frame,
|
||||
display_id=display_id,
|
||||
session_id=session_id,
|
||||
subscription_id=subscription_id,
|
||||
detected_regions=detected_regions
|
||||
)
|
||||
|
||||
logger.info(f"[FALLBACK] Processing phase completed for session {session_id}: "
|
||||
f"status={processing_result.get('status', 'unknown')}, "
|
||||
f"branches={len(processing_result.get('branch_results', {}))}, "
|
||||
f"actions={len(processing_result.get('actions_executed', []))}")
|
||||
|
||||
# Update statistics
|
||||
self.stats['pipelines_executed'] += 1
|
||||
|
||||
else:
|
||||
logger.warning(f"[FALLBACK] No detections found in snapshot for session {session_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[FALLBACK] Error executing fallback pipeline for session {session_id}: {e}", exc_info=True)
|
||||
|
||||
def _filter_small_frontals(self, tracking_results, frame):
|
||||
"""
|
||||
Filter out frontal detections that are smaller than minimum bbox area percentage.
|
||||
This prevents processing of cars from neighboring pumps that appear in camera view.
|
||||
|
||||
Args:
|
||||
tracking_results: YOLO tracking results with detections
|
||||
frame: Input frame for calculating frame area
|
||||
|
||||
Returns:
|
||||
Modified tracking_results with small frontals removed
|
||||
"""
|
||||
if not hasattr(tracking_results, 'detections') or not tracking_results.detections:
|
||||
return tracking_results
|
||||
|
||||
# Calculate frame area and minimum bbox area threshold
|
||||
frame_area = frame.shape[0] * frame.shape[1] # height * width
|
||||
min_bbox_area = frame_area * (self.min_bbox_area_percentage / 100.0)
|
||||
|
||||
# Filter detections
|
||||
filtered_detections = []
|
||||
filtered_count = 0
|
||||
|
||||
for detection in tracking_results.detections:
|
||||
# Calculate detection bbox area
|
||||
bbox = detection.bbox # Assuming bbox is [x1, y1, x2, y2]
|
||||
bbox_area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
|
||||
|
||||
if bbox_area >= min_bbox_area:
|
||||
# Keep detection - bbox is large enough
|
||||
filtered_detections.append(detection)
|
||||
else:
|
||||
# Filter out small detection
|
||||
filtered_count += 1
|
||||
area_percentage = (bbox_area / frame_area) * 100
|
||||
logger.debug(f"Filtered small frontal: area={bbox_area:.0f}px² ({area_percentage:.1f}% of frame, "
|
||||
f"min required: {self.min_bbox_area_percentage}%)")
|
||||
|
||||
# Update tracking results with filtered detections
|
||||
tracking_results.detections = filtered_detections
|
||||
|
||||
# Update statistics
|
||||
if filtered_count > 0:
|
||||
self.stats['frontals_filtered_small'] += filtered_count
|
||||
logger.info(f"Filtered {filtered_count} small frontal detections, "
|
||||
f"{len(filtered_detections)} remaining (total filtered: {self.stats['frontals_filtered_small']})")
|
||||
|
||||
return tracking_results
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup resources."""
|
||||
self.executor.shutdown(wait=False)
|
||||
self.reset_tracking()
|
||||
|
||||
|
||||
# Cleanup detection pipeline
|
||||
if self.detection_pipeline:
|
||||
self.detection_pipeline.cleanup()
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
"""
|
||||
Vehicle Tracking Module - Continuous tracking with front_rear_detection model
|
||||
Implements vehicle identification, persistence, and motion analysis.
|
||||
Vehicle Tracking Module - BoT-SORT based tracking with camera isolation
|
||||
Implements vehicle identification, persistence, and motion analysis using external tracker.
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
|
@ -10,6 +10,8 @@ from dataclasses import dataclass, field
|
|||
import numpy as np
|
||||
from threading import Lock
|
||||
|
||||
from .bot_sort_tracker import MultiCameraBoTSORT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -17,6 +19,7 @@ logger = logging.getLogger(__name__)
|
|||
class TrackedVehicle:
|
||||
"""Represents a tracked vehicle with all its state information."""
|
||||
track_id: int
|
||||
camera_id: str
|
||||
first_seen: float
|
||||
last_seen: float
|
||||
session_id: Optional[str] = None
|
||||
|
@ -30,6 +33,8 @@ class TrackedVehicle:
|
|||
processed_pipeline: bool = False
|
||||
last_position_history: List[Tuple[float, float]] = field(default_factory=list)
|
||||
avg_confidence: float = 0.0
|
||||
hit_streak: int = 0
|
||||
age: int = 0
|
||||
|
||||
def update_position(self, bbox: Tuple[int, int, int, int], confidence: float):
|
||||
"""Update vehicle position and confidence."""
|
||||
|
@ -73,7 +78,7 @@ class TrackedVehicle:
|
|||
|
||||
class VehicleTracker:
|
||||
"""
|
||||
Main vehicle tracking implementation using YOLO tracking capabilities.
|
||||
Main vehicle tracking implementation using BoT-SORT with camera isolation.
|
||||
Manages continuous tracking, vehicle identification, and state persistence.
|
||||
"""
|
||||
|
||||
|
@ -88,18 +93,19 @@ class VehicleTracker:
|
|||
self.trigger_classes = self.config.get('trigger_classes', self.config.get('triggerClasses', ['frontal']))
|
||||
self.min_confidence = self.config.get('minConfidence', 0.6)
|
||||
|
||||
# Tracking state
|
||||
self.tracked_vehicles: Dict[int, TrackedVehicle] = {}
|
||||
self.next_track_id = 1
|
||||
# BoT-SORT multi-camera tracker
|
||||
self.bot_sort = MultiCameraBoTSORT(self.trigger_classes, self.min_confidence)
|
||||
|
||||
# Tracking state - maintain compatibility with existing code
|
||||
self.tracked_vehicles: Dict[str, Dict[int, TrackedVehicle]] = {} # camera_id -> {track_id: vehicle}
|
||||
self.lock = Lock()
|
||||
|
||||
# Tracking parameters
|
||||
self.stability_threshold = 0.7
|
||||
self.min_stable_frames = 5
|
||||
self.position_tolerance = 50 # pixels
|
||||
self.timeout_seconds = 2.0
|
||||
|
||||
logger.info(f"VehicleTracker initialized with trigger_classes={self.trigger_classes}, "
|
||||
logger.info(f"VehicleTracker initialized with BoT-SORT: trigger_classes={self.trigger_classes}, "
|
||||
f"min_confidence={self.min_confidence}")
|
||||
|
||||
def process_detections(self,
|
||||
|
@ -107,10 +113,10 @@ class VehicleTracker:
|
|||
display_id: str,
|
||||
frame: np.ndarray) -> List[TrackedVehicle]:
|
||||
"""
|
||||
Process YOLO detection results and update tracking state.
|
||||
Process detection results using BoT-SORT tracking.
|
||||
|
||||
Args:
|
||||
results: YOLO detection results with tracking
|
||||
results: Detection results (InferenceResult)
|
||||
display_id: Display identifier for this stream
|
||||
frame: Current frame being processed
|
||||
|
||||
|
@ -118,108 +124,67 @@ class VehicleTracker:
|
|||
List of currently tracked vehicles
|
||||
"""
|
||||
current_time = time.time()
|
||||
active_tracks = []
|
||||
|
||||
# Extract camera_id from display_id for tracking isolation
|
||||
camera_id = display_id # Using display_id as camera_id for isolation
|
||||
|
||||
with self.lock:
|
||||
# Clean up expired tracks
|
||||
expired_ids = [
|
||||
track_id for track_id, vehicle in self.tracked_vehicles.items()
|
||||
if vehicle.is_expired(self.timeout_seconds)
|
||||
]
|
||||
for track_id in expired_ids:
|
||||
logger.debug(f"Removing expired track {track_id}")
|
||||
del self.tracked_vehicles[track_id]
|
||||
# Update BoT-SORT tracker
|
||||
track_results = self.bot_sort.update(camera_id, results)
|
||||
|
||||
# Process new detections from InferenceResult
|
||||
if hasattr(results, 'detections') and results.detections:
|
||||
# Process detections from InferenceResult
|
||||
for detection in results.detections:
|
||||
# Skip if confidence is too low
|
||||
if detection.confidence < self.min_confidence:
|
||||
continue
|
||||
# Ensure camera tracking dict exists
|
||||
if camera_id not in self.tracked_vehicles:
|
||||
self.tracked_vehicles[camera_id] = {}
|
||||
|
||||
# Check if class is in trigger classes
|
||||
if detection.class_name not in self.trigger_classes:
|
||||
continue
|
||||
# Update tracked vehicles based on BoT-SORT results
|
||||
current_tracks = {}
|
||||
active_tracks = []
|
||||
|
||||
# Use track_id if available, otherwise generate one
|
||||
track_id = detection.track_id if detection.track_id is not None else self.next_track_id
|
||||
if detection.track_id is None:
|
||||
self.next_track_id += 1
|
||||
for track_result in track_results:
|
||||
track_id = track_result['track_id']
|
||||
|
||||
# Get bounding box from Detection object
|
||||
x1, y1, x2, y2 = detection.bbox
|
||||
bbox = (int(x1), int(y1), int(x2), int(y2))
|
||||
# Create or update TrackedVehicle
|
||||
if track_id in self.tracked_vehicles[camera_id]:
|
||||
# Update existing vehicle
|
||||
vehicle = self.tracked_vehicles[camera_id][track_id]
|
||||
vehicle.update_position(track_result['bbox'], track_result['confidence'])
|
||||
vehicle.hit_streak = track_result['hit_streak']
|
||||
vehicle.age = track_result['age']
|
||||
|
||||
# Update or create tracked vehicle
|
||||
confidence = detection.confidence
|
||||
if track_id in self.tracked_vehicles:
|
||||
# Update existing track
|
||||
vehicle = self.tracked_vehicles[track_id]
|
||||
vehicle.update_position(bbox, confidence)
|
||||
vehicle.display_id = display_id
|
||||
# Update stability based on hit_streak
|
||||
if vehicle.hit_streak >= self.min_stable_frames:
|
||||
vehicle.is_stable = True
|
||||
vehicle.stable_frames = vehicle.hit_streak
|
||||
|
||||
# Check stability
|
||||
stability = vehicle.calculate_stability()
|
||||
if stability > self.stability_threshold:
|
||||
vehicle.stable_frames += 1
|
||||
if vehicle.stable_frames >= self.min_stable_frames:
|
||||
vehicle.is_stable = True
|
||||
else:
|
||||
vehicle.stable_frames = max(0, vehicle.stable_frames - 1)
|
||||
if vehicle.stable_frames < self.min_stable_frames:
|
||||
vehicle.is_stable = False
|
||||
logger.debug(f"Updated track {track_id}: conf={vehicle.confidence:.2f}, "
|
||||
f"stable={vehicle.is_stable}, hit_streak={vehicle.hit_streak}")
|
||||
else:
|
||||
# Create new vehicle
|
||||
x1, y1, x2, y2 = track_result['bbox']
|
||||
vehicle = TrackedVehicle(
|
||||
track_id=track_id,
|
||||
camera_id=camera_id,
|
||||
first_seen=current_time,
|
||||
last_seen=current_time,
|
||||
display_id=display_id,
|
||||
confidence=track_result['confidence'],
|
||||
bbox=tuple(track_result['bbox']),
|
||||
center=((x1 + x2) / 2, (y1 + y2) / 2),
|
||||
total_frames=1,
|
||||
hit_streak=track_result['hit_streak'],
|
||||
age=track_result['age']
|
||||
)
|
||||
vehicle.last_position_history.append(vehicle.center)
|
||||
logger.info(f"New vehicle tracked: ID={track_id}, camera={camera_id}, display={display_id}")
|
||||
|
||||
logger.debug(f"Updated track {track_id}: conf={confidence:.2f}, "
|
||||
f"stable={vehicle.is_stable}, stability={stability:.2f}")
|
||||
else:
|
||||
# Create new track
|
||||
vehicle = TrackedVehicle(
|
||||
track_id=track_id,
|
||||
first_seen=current_time,
|
||||
last_seen=current_time,
|
||||
display_id=display_id,
|
||||
confidence=confidence,
|
||||
bbox=bbox,
|
||||
center=((x1 + x2) / 2, (y1 + y2) / 2),
|
||||
total_frames=1
|
||||
)
|
||||
vehicle.last_position_history.append(vehicle.center)
|
||||
self.tracked_vehicles[track_id] = vehicle
|
||||
logger.info(f"New vehicle tracked: ID={track_id}, display={display_id}")
|
||||
current_tracks[track_id] = vehicle
|
||||
active_tracks.append(vehicle)
|
||||
|
||||
active_tracks.append(self.tracked_vehicles[track_id])
|
||||
# Update the camera's tracked vehicles
|
||||
self.tracked_vehicles[camera_id] = current_tracks
|
||||
|
||||
return active_tracks
|
||||
|
||||
def _find_closest_track(self, center: Tuple[float, float]) -> Optional[TrackedVehicle]:
|
||||
"""
|
||||
Find the closest existing track to a given position.
|
||||
|
||||
Args:
|
||||
center: Center position to match
|
||||
|
||||
Returns:
|
||||
Closest tracked vehicle if within tolerance, None otherwise
|
||||
"""
|
||||
min_distance = float('inf')
|
||||
closest_track = None
|
||||
|
||||
for vehicle in self.tracked_vehicles.values():
|
||||
if vehicle.is_expired(0.5): # Shorter timeout for matching
|
||||
continue
|
||||
|
||||
distance = np.sqrt(
|
||||
(center[0] - vehicle.center[0]) ** 2 +
|
||||
(center[1] - vehicle.center[1]) ** 2
|
||||
)
|
||||
|
||||
if distance < min_distance and distance < self.position_tolerance:
|
||||
min_distance = distance
|
||||
closest_track = vehicle
|
||||
|
||||
return closest_track
|
||||
|
||||
def get_stable_vehicles(self, display_id: Optional[str] = None) -> List[TrackedVehicle]:
|
||||
"""
|
||||
Get all stable vehicles, optionally filtered by display.
|
||||
|
@ -231,11 +196,15 @@ class VehicleTracker:
|
|||
List of stable tracked vehicles
|
||||
"""
|
||||
with self.lock:
|
||||
stable = [
|
||||
v for v in self.tracked_vehicles.values()
|
||||
if v.is_stable and not v.is_expired(self.timeout_seconds)
|
||||
and (display_id is None or v.display_id == display_id)
|
||||
]
|
||||
stable = []
|
||||
camera_id = display_id # Using display_id as camera_id
|
||||
|
||||
if camera_id in self.tracked_vehicles:
|
||||
for vehicle in self.tracked_vehicles[camera_id].values():
|
||||
if (vehicle.is_stable and not vehicle.is_expired(self.timeout_seconds) and
|
||||
(display_id is None or vehicle.display_id == display_id)):
|
||||
stable.append(vehicle)
|
||||
|
||||
return stable
|
||||
|
||||
def get_vehicle_by_session(self, session_id: str) -> Optional[TrackedVehicle]:
|
||||
|
@ -249,9 +218,11 @@ class VehicleTracker:
|
|||
Tracked vehicle if found, None otherwise
|
||||
"""
|
||||
with self.lock:
|
||||
for vehicle in self.tracked_vehicles.values():
|
||||
if vehicle.session_id == session_id:
|
||||
return vehicle
|
||||
# Search across all cameras
|
||||
for camera_vehicles in self.tracked_vehicles.values():
|
||||
for vehicle in camera_vehicles.values():
|
||||
if vehicle.session_id == session_id:
|
||||
return vehicle
|
||||
return None
|
||||
|
||||
def mark_processed(self, track_id: int, session_id: str):
|
||||
|
@ -263,11 +234,14 @@ class VehicleTracker:
|
|||
session_id: Session ID assigned to this vehicle
|
||||
"""
|
||||
with self.lock:
|
||||
if track_id in self.tracked_vehicles:
|
||||
vehicle = self.tracked_vehicles[track_id]
|
||||
vehicle.processed_pipeline = True
|
||||
vehicle.session_id = session_id
|
||||
logger.info(f"Marked vehicle {track_id} as processed with session {session_id}")
|
||||
# Search across all cameras for the track_id
|
||||
for camera_vehicles in self.tracked_vehicles.values():
|
||||
if track_id in camera_vehicles:
|
||||
vehicle = camera_vehicles[track_id]
|
||||
vehicle.processed_pipeline = True
|
||||
vehicle.session_id = session_id
|
||||
logger.info(f"Marked vehicle {track_id} as processed with session {session_id}")
|
||||
return
|
||||
|
||||
def clear_session(self, session_id: str):
|
||||
"""
|
||||
|
@ -277,30 +251,43 @@ class VehicleTracker:
|
|||
session_id: Session ID to clear
|
||||
"""
|
||||
with self.lock:
|
||||
for vehicle in self.tracked_vehicles.values():
|
||||
if vehicle.session_id == session_id:
|
||||
logger.info(f"Clearing session {session_id} from vehicle {vehicle.track_id}")
|
||||
vehicle.session_id = None
|
||||
# Keep processed_pipeline=True to prevent re-processing
|
||||
# Search across all cameras
|
||||
for camera_vehicles in self.tracked_vehicles.values():
|
||||
for vehicle in camera_vehicles.values():
|
||||
if vehicle.session_id == session_id:
|
||||
logger.info(f"Clearing session {session_id} from vehicle {vehicle.track_id}")
|
||||
vehicle.session_id = None
|
||||
# Keep processed_pipeline=True to prevent re-processing
|
||||
|
||||
def reset_tracking(self):
|
||||
"""Reset all tracking state."""
|
||||
with self.lock:
|
||||
self.tracked_vehicles.clear()
|
||||
self.next_track_id = 1
|
||||
self.bot_sort.reset_all()
|
||||
logger.info("Vehicle tracking state reset")
|
||||
|
||||
def get_statistics(self) -> Dict:
|
||||
"""Get tracking statistics."""
|
||||
with self.lock:
|
||||
total = len(self.tracked_vehicles)
|
||||
stable = sum(1 for v in self.tracked_vehicles.values() if v.is_stable)
|
||||
processed = sum(1 for v in self.tracked_vehicles.values() if v.processed_pipeline)
|
||||
total = 0
|
||||
stable = 0
|
||||
processed = 0
|
||||
all_confidences = []
|
||||
|
||||
# Aggregate stats across all cameras
|
||||
for camera_vehicles in self.tracked_vehicles.values():
|
||||
total += len(camera_vehicles)
|
||||
for vehicle in camera_vehicles.values():
|
||||
if vehicle.is_stable:
|
||||
stable += 1
|
||||
if vehicle.processed_pipeline:
|
||||
processed += 1
|
||||
all_confidences.append(vehicle.avg_confidence)
|
||||
|
||||
return {
|
||||
'total_tracked': total,
|
||||
'stable_vehicles': stable,
|
||||
'processed_vehicles': processed,
|
||||
'avg_confidence': np.mean([v.avg_confidence for v in self.tracked_vehicles.values()])
|
||||
if self.tracked_vehicles else 0.0
|
||||
'avg_confidence': np.mean(all_confidences) if all_confidences else 0.0,
|
||||
'bot_sort_stats': self.bot_sort.get_statistics()
|
||||
}
|
|
@ -36,8 +36,14 @@ class ValidationResult:
|
|||
|
||||
class StableCarValidator:
|
||||
"""
|
||||
Validates whether a tracked vehicle is stable (fueling) or just passing by.
|
||||
Uses multiple criteria including position stability, duration, and movement patterns.
|
||||
Validates whether a tracked vehicle should be processed through the pipeline.
|
||||
|
||||
Updated for BoT-SORT integration: Trusts the sophisticated BoT-SORT tracking algorithm
|
||||
for stability determination and focuses on business logic validation:
|
||||
- Duration requirements for processing
|
||||
- Confidence thresholds
|
||||
- Session management and cooldowns
|
||||
- Camera isolation with composite keys
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict] = None):
|
||||
|
@ -169,7 +175,10 @@ class StableCarValidator:
|
|||
|
||||
def _determine_vehicle_state(self, vehicle: TrackedVehicle) -> VehicleState:
|
||||
"""
|
||||
Determine the current state of the vehicle based on movement patterns.
|
||||
Determine the current state of the vehicle based on BoT-SORT tracking results.
|
||||
|
||||
BoT-SORT provides sophisticated tracking, so we trust its stability determination
|
||||
and focus on business logic validation.
|
||||
|
||||
Args:
|
||||
vehicle: The tracked vehicle
|
||||
|
@ -177,53 +186,44 @@ class StableCarValidator:
|
|||
Returns:
|
||||
Current vehicle state
|
||||
"""
|
||||
# Not enough data
|
||||
if len(vehicle.last_position_history) < 3:
|
||||
return VehicleState.UNKNOWN
|
||||
|
||||
# Calculate velocity
|
||||
velocity = self._calculate_velocity(vehicle)
|
||||
|
||||
# Get position zones
|
||||
x_position = vehicle.center[0] / self.frame_width
|
||||
y_position = vehicle.center[1] / self.frame_height
|
||||
|
||||
# Check if vehicle is stable
|
||||
stability = vehicle.calculate_stability()
|
||||
if stability > 0.7 and velocity < self.velocity_threshold:
|
||||
# Check if it's been stable long enough
|
||||
# Trust BoT-SORT's stability determination
|
||||
if vehicle.is_stable:
|
||||
# Check if it's been stable long enough for processing
|
||||
duration = time.time() - vehicle.first_seen
|
||||
if duration > self.min_stable_duration and vehicle.stable_frames >= self.min_stable_frames:
|
||||
if duration >= self.min_stable_duration:
|
||||
return VehicleState.STABLE
|
||||
else:
|
||||
return VehicleState.ENTERING
|
||||
|
||||
# Check if vehicle is entering or leaving
|
||||
# For non-stable vehicles, use simplified state determination
|
||||
if len(vehicle.last_position_history) < 2:
|
||||
return VehicleState.UNKNOWN
|
||||
|
||||
# Calculate velocity for movement classification
|
||||
velocity = self._calculate_velocity(vehicle)
|
||||
|
||||
# Basic movement classification
|
||||
if velocity > self.velocity_threshold:
|
||||
# Determine direction based on position history
|
||||
positions = np.array(vehicle.last_position_history)
|
||||
if len(positions) >= 2:
|
||||
direction = positions[-1] - positions[0]
|
||||
# Vehicle is moving - classify as passing by or entering/leaving
|
||||
x_position = vehicle.center[0] / self.frame_width
|
||||
|
||||
# Entering: moving towards center
|
||||
if x_position < self.entering_zone_ratio or x_position > (1 - self.entering_zone_ratio):
|
||||
if abs(direction[0]) > abs(direction[1]): # Horizontal movement
|
||||
if (x_position < 0.5 and direction[0] > 0) or (x_position > 0.5 and direction[0] < 0):
|
||||
return VehicleState.ENTERING
|
||||
# Simple heuristic: vehicles near edges are entering/leaving, center vehicles are passing
|
||||
if x_position < 0.2 or x_position > 0.8:
|
||||
return VehicleState.ENTERING
|
||||
else:
|
||||
return VehicleState.PASSING_BY
|
||||
|
||||
# Leaving: moving away from center
|
||||
if 0.3 < x_position < 0.7: # In center zone
|
||||
if abs(direction[0]) > abs(direction[1]): # Horizontal movement
|
||||
if abs(direction[0]) > 10: # Significant movement
|
||||
return VehicleState.LEAVING
|
||||
|
||||
return VehicleState.PASSING_BY
|
||||
|
||||
return VehicleState.UNKNOWN
|
||||
# Low velocity but not marked stable by tracker - likely entering
|
||||
return VehicleState.ENTERING
|
||||
|
||||
def _validate_stable_vehicle(self, vehicle: TrackedVehicle) -> ValidationResult:
|
||||
"""
|
||||
Perform detailed validation of a stable vehicle.
|
||||
Perform business logic validation of a stable vehicle.
|
||||
|
||||
Since BoT-SORT already determined the vehicle is stable, we focus on:
|
||||
- Duration requirements for processing
|
||||
- Confidence thresholds
|
||||
- Business logic constraints
|
||||
|
||||
Args:
|
||||
vehicle: The stable vehicle to validate
|
||||
|
@ -231,7 +231,7 @@ class StableCarValidator:
|
|||
Returns:
|
||||
Detailed validation result
|
||||
"""
|
||||
# Check duration
|
||||
# Check duration (business requirement)
|
||||
duration = time.time() - vehicle.first_seen
|
||||
if duration < self.min_stable_duration:
|
||||
return ValidationResult(
|
||||
|
@ -243,18 +243,7 @@ class StableCarValidator:
|
|||
track_id=vehicle.track_id
|
||||
)
|
||||
|
||||
# Check frame count
|
||||
if vehicle.stable_frames < self.min_stable_frames:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
state=VehicleState.STABLE,
|
||||
confidence=0.6,
|
||||
reason=f"Not enough stable frames ({vehicle.stable_frames} < {self.min_stable_frames})",
|
||||
should_process=False,
|
||||
track_id=vehicle.track_id
|
||||
)
|
||||
|
||||
# Check confidence
|
||||
# Check confidence (business requirement)
|
||||
if vehicle.avg_confidence < self.min_confidence:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
|
@ -265,28 +254,19 @@ class StableCarValidator:
|
|||
track_id=vehicle.track_id
|
||||
)
|
||||
|
||||
# Check position variance
|
||||
variance = self._calculate_position_variance(vehicle)
|
||||
if variance > self.position_variance_threshold:
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
state=VehicleState.STABLE,
|
||||
confidence=0.7,
|
||||
reason=f"Position variance too high ({variance:.1f} > {self.position_variance_threshold})",
|
||||
should_process=False,
|
||||
track_id=vehicle.track_id
|
||||
)
|
||||
# Trust BoT-SORT's stability determination - skip position variance check
|
||||
# BoT-SORT's sophisticated tracking already ensures consistent positioning
|
||||
|
||||
# Check state history consistency
|
||||
# Simplified state history check - just ensure recent stability
|
||||
if vehicle.track_id in self.validation_history:
|
||||
history = self.validation_history[vehicle.track_id][-5:] # Last 5 states
|
||||
history = self.validation_history[vehicle.track_id][-3:] # Last 3 states
|
||||
stable_count = sum(1 for s in history if s == VehicleState.STABLE)
|
||||
if stable_count < 3:
|
||||
if len(history) >= 2 and stable_count == 0: # Only fail if clear instability
|
||||
return ValidationResult(
|
||||
is_valid=False,
|
||||
state=VehicleState.STABLE,
|
||||
confidence=0.7,
|
||||
reason="Inconsistent state history",
|
||||
reason="Recent state history shows instability",
|
||||
should_process=False,
|
||||
track_id=vehicle.track_id
|
||||
)
|
||||
|
@ -298,7 +278,7 @@ class StableCarValidator:
|
|||
is_valid=True,
|
||||
state=VehicleState.STABLE,
|
||||
confidence=vehicle.avg_confidence,
|
||||
reason="Vehicle is stable and ready for processing",
|
||||
reason="Vehicle is stable and ready for processing (BoT-SORT validated)",
|
||||
should_process=True,
|
||||
track_id=vehicle.track_id
|
||||
)
|
||||
|
@ -354,25 +334,28 @@ class StableCarValidator:
|
|||
def should_skip_same_car(self,
|
||||
vehicle: TrackedVehicle,
|
||||
session_cleared: bool = False,
|
||||
permanently_processed: Dict[int, float] = None) -> bool:
|
||||
permanently_processed: Dict[str, float] = None) -> bool:
|
||||
"""
|
||||
Determine if we should skip processing for the same car after session clear.
|
||||
|
||||
Args:
|
||||
vehicle: The tracked vehicle
|
||||
session_cleared: Whether the session was recently cleared
|
||||
permanently_processed: Dict of permanently processed vehicles
|
||||
permanently_processed: Dict of permanently processed vehicles (camera_id:track_id -> time)
|
||||
|
||||
Returns:
|
||||
True if we should skip this vehicle
|
||||
"""
|
||||
# Check if this vehicle was permanently processed (never process again)
|
||||
if permanently_processed and vehicle.track_id in permanently_processed:
|
||||
process_time = permanently_processed[vehicle.track_id]
|
||||
time_since = time.time() - process_time
|
||||
logger.debug(f"Skipping permanently processed vehicle {vehicle.track_id} "
|
||||
f"(processed {time_since:.1f}s ago)")
|
||||
return True
|
||||
if permanently_processed:
|
||||
# Create composite key using camera_id and track_id
|
||||
permanent_key = f"{vehicle.camera_id}:{vehicle.track_id}"
|
||||
if permanent_key in permanently_processed:
|
||||
process_time = permanently_processed[permanent_key]
|
||||
time_since = time.time() - process_time
|
||||
logger.debug(f"Skipping permanently processed vehicle {vehicle.track_id} on camera {vehicle.camera_id} "
|
||||
f"(processed {time_since:.1f}s ago)")
|
||||
return True
|
||||
|
||||
# If vehicle has a session_id but it was cleared, skip for a period
|
||||
if vehicle.session_id is None and vehicle.processed_pipeline and session_cleared:
|
||||
|
|
214
core/utils/ffmpeg_detector.py
Normal file
214
core/utils/ffmpeg_detector.py
Normal file
|
@ -0,0 +1,214 @@
|
|||
"""
|
||||
FFmpeg hardware acceleration detection and configuration
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import logging
|
||||
import re
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger("detector_worker")
|
||||
|
||||
|
||||
class FFmpegCapabilities:
|
||||
"""Detect and configure FFmpeg hardware acceleration capabilities."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize FFmpeg capabilities detector."""
|
||||
self.hwaccels = []
|
||||
self.codecs = {}
|
||||
self.nvidia_support = False
|
||||
self.vaapi_support = False
|
||||
self.qsv_support = False
|
||||
|
||||
self._detect_capabilities()
|
||||
|
||||
def _detect_capabilities(self):
|
||||
"""Detect available hardware acceleration methods."""
|
||||
try:
|
||||
# Get hardware accelerators
|
||||
result = subprocess.run(
|
||||
['ffmpeg', '-hide_banner', '-hwaccels'],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if result.returncode == 0:
|
||||
self.hwaccels = [line.strip() for line in result.stdout.strip().split('\n')[1:] if line.strip()]
|
||||
logger.info(f"Available FFmpeg hardware accelerators: {', '.join(self.hwaccels)}")
|
||||
|
||||
# Check for NVIDIA support
|
||||
self.nvidia_support = any(hw in self.hwaccels for hw in ['cuda', 'cuvid', 'nvdec'])
|
||||
self.vaapi_support = 'vaapi' in self.hwaccels
|
||||
self.qsv_support = 'qsv' in self.hwaccels
|
||||
|
||||
# Get decoder information
|
||||
self._detect_decoders()
|
||||
|
||||
# Log capabilities
|
||||
if self.nvidia_support:
|
||||
logger.info("NVIDIA hardware acceleration available (CUDA/CUVID/NVDEC)")
|
||||
logger.info(f"Detected hardware codecs: {self.codecs}")
|
||||
if self.vaapi_support:
|
||||
logger.info("VAAPI hardware acceleration available")
|
||||
if self.qsv_support:
|
||||
logger.info("Intel QuickSync hardware acceleration available")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to detect FFmpeg capabilities: {e}")
|
||||
|
||||
def _detect_decoders(self):
|
||||
"""Detect available hardware decoders."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['ffmpeg', '-hide_banner', '-decoders'],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if result.returncode == 0:
|
||||
# Parse decoder output to find hardware decoders
|
||||
for line in result.stdout.split('\n'):
|
||||
if 'cuvid' in line or 'nvdec' in line:
|
||||
match = re.search(r'(\w+)\s+.*?(\w+(?:_cuvid|_nvdec))', line)
|
||||
if match:
|
||||
codec_type, decoder = match.groups()
|
||||
if 'h264' in decoder:
|
||||
self.codecs['h264_hw'] = decoder
|
||||
elif 'hevc' in decoder or 'h265' in decoder:
|
||||
self.codecs['h265_hw'] = decoder
|
||||
elif 'vaapi' in line:
|
||||
match = re.search(r'(\w+)\s+.*?(\w+_vaapi)', line)
|
||||
if match:
|
||||
codec_type, decoder = match.groups()
|
||||
if 'h264' in decoder:
|
||||
self.codecs['h264_vaapi'] = decoder
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to detect decoders: {e}")
|
||||
|
||||
def get_optimal_capture_options(self, codec: str = 'h264') -> Dict[str, str]:
|
||||
"""
|
||||
Get optimal FFmpeg capture options for the given codec.
|
||||
|
||||
Args:
|
||||
codec: Video codec (h264, h265, etc.)
|
||||
|
||||
Returns:
|
||||
Dictionary of FFmpeg options
|
||||
"""
|
||||
options = {
|
||||
'rtsp_transport': 'tcp',
|
||||
'buffer_size': '1024k',
|
||||
'max_delay': '500000', # 500ms
|
||||
'fflags': '+genpts',
|
||||
'flags': '+low_delay',
|
||||
'probesize': '32',
|
||||
'analyzeduration': '0'
|
||||
}
|
||||
|
||||
# Add hardware acceleration if available
|
||||
if self.nvidia_support:
|
||||
# Force enable CUDA hardware acceleration for H.264 if CUDA is available
|
||||
if codec == 'h264':
|
||||
options.update({
|
||||
'hwaccel': 'cuda',
|
||||
'hwaccel_device': '0'
|
||||
})
|
||||
logger.info("Using NVIDIA NVDEC hardware acceleration for H.264")
|
||||
elif codec == 'h265':
|
||||
options.update({
|
||||
'hwaccel': 'cuda',
|
||||
'hwaccel_device': '0',
|
||||
'video_codec': 'hevc_cuvid',
|
||||
'hwaccel_output_format': 'cuda'
|
||||
})
|
||||
logger.info("Using NVIDIA CUVID hardware acceleration for H.265")
|
||||
|
||||
elif self.vaapi_support:
|
||||
if codec == 'h264':
|
||||
options.update({
|
||||
'hwaccel': 'vaapi',
|
||||
'hwaccel_device': '/dev/dri/renderD128',
|
||||
'video_codec': 'h264_vaapi'
|
||||
})
|
||||
logger.debug("Using VAAPI hardware acceleration")
|
||||
|
||||
return options
|
||||
|
||||
def format_opencv_options(self, options: Dict[str, str]) -> str:
|
||||
"""
|
||||
Format options for OpenCV FFmpeg backend.
|
||||
|
||||
Args:
|
||||
options: Dictionary of FFmpeg options
|
||||
|
||||
Returns:
|
||||
Formatted options string for OpenCV
|
||||
"""
|
||||
return '|'.join(f"{key};{value}" for key, value in options.items())
|
||||
|
||||
def get_hardware_encoder_options(self, codec: str = 'h264', quality: str = 'fast') -> Dict[str, str]:
|
||||
"""
|
||||
Get optimal hardware encoding options.
|
||||
|
||||
Args:
|
||||
codec: Video codec for encoding
|
||||
quality: Quality preset (fast, medium, slow)
|
||||
|
||||
Returns:
|
||||
Dictionary of encoding options
|
||||
"""
|
||||
options = {}
|
||||
|
||||
if self.nvidia_support:
|
||||
if codec == 'h264':
|
||||
options.update({
|
||||
'video_codec': 'h264_nvenc',
|
||||
'preset': quality,
|
||||
'tune': 'zerolatency',
|
||||
'gpu': '0',
|
||||
'rc': 'cbr_hq',
|
||||
'surfaces': '64'
|
||||
})
|
||||
elif codec == 'h265':
|
||||
options.update({
|
||||
'video_codec': 'hevc_nvenc',
|
||||
'preset': quality,
|
||||
'tune': 'zerolatency',
|
||||
'gpu': '0'
|
||||
})
|
||||
|
||||
elif self.vaapi_support:
|
||||
if codec == 'h264':
|
||||
options.update({
|
||||
'video_codec': 'h264_vaapi',
|
||||
'vaapi_device': '/dev/dri/renderD128'
|
||||
})
|
||||
|
||||
return options
|
||||
|
||||
|
||||
# Global instance
|
||||
_ffmpeg_caps = None
|
||||
|
||||
def get_ffmpeg_capabilities() -> FFmpegCapabilities:
|
||||
"""Get or create the global FFmpeg capabilities instance."""
|
||||
global _ffmpeg_caps
|
||||
if _ffmpeg_caps is None:
|
||||
_ffmpeg_caps = FFmpegCapabilities()
|
||||
return _ffmpeg_caps
|
||||
|
||||
def get_optimal_rtsp_options(rtsp_url: str) -> str:
|
||||
"""
|
||||
Get optimal OpenCV FFmpeg options for RTSP streaming.
|
||||
|
||||
Args:
|
||||
rtsp_url: RTSP stream URL
|
||||
|
||||
Returns:
|
||||
Formatted options string for cv2.VideoCapture
|
||||
"""
|
||||
caps = get_ffmpeg_capabilities()
|
||||
|
||||
# Detect codec from URL or assume H.264
|
||||
codec = 'h265' if any(x in rtsp_url.lower() for x in ['h265', 'hevc']) else 'h264'
|
||||
|
||||
options = caps.get_optimal_capture_options(codec)
|
||||
return caps.format_opencv_options(options)
|
173
core/utils/hardware_encoder.py
Normal file
173
core/utils/hardware_encoder.py
Normal file
|
@ -0,0 +1,173 @@
|
|||
"""
|
||||
Hardware-accelerated image encoding using NVIDIA NVENC or Intel QuickSync
|
||||
"""
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import logging
|
||||
from typing import Optional, Tuple
|
||||
import os
|
||||
|
||||
logger = logging.getLogger("detector_worker")
|
||||
|
||||
|
||||
class HardwareEncoder:
|
||||
"""Hardware-accelerated JPEG encoder using GPU."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize hardware encoder."""
|
||||
self.nvenc_available = False
|
||||
self.vaapi_available = False
|
||||
self.turbojpeg_available = False
|
||||
|
||||
# Check for TurboJPEG (fastest CPU-based option)
|
||||
try:
|
||||
from turbojpeg import TurboJPEG
|
||||
self.turbojpeg = TurboJPEG()
|
||||
self.turbojpeg_available = True
|
||||
logger.info("TurboJPEG accelerated encoding available")
|
||||
except ImportError:
|
||||
logger.debug("TurboJPEG not available")
|
||||
|
||||
# Check for NVIDIA NVENC support
|
||||
try:
|
||||
# Test if we can create an NVENC encoder
|
||||
test_frame = np.zeros((720, 1280, 3), dtype=np.uint8)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'H264')
|
||||
test_writer = cv2.VideoWriter(
|
||||
"test.mp4",
|
||||
fourcc,
|
||||
30,
|
||||
(1280, 720),
|
||||
[cv2.CAP_PROP_HW_ACCELERATION, cv2.VIDEO_ACCELERATION_ANY]
|
||||
)
|
||||
if test_writer.isOpened():
|
||||
self.nvenc_available = True
|
||||
logger.info("NVENC hardware encoding available")
|
||||
test_writer.release()
|
||||
if os.path.exists("test.mp4"):
|
||||
os.remove("test.mp4")
|
||||
except Exception as e:
|
||||
logger.debug(f"NVENC not available: {e}")
|
||||
|
||||
def encode_jpeg(self, frame: np.ndarray, quality: int = 85) -> Optional[bytes]:
|
||||
"""
|
||||
Encode frame to JPEG using the fastest available method.
|
||||
|
||||
Args:
|
||||
frame: BGR image frame
|
||||
quality: JPEG quality (1-100)
|
||||
|
||||
Returns:
|
||||
Encoded JPEG bytes or None on failure
|
||||
"""
|
||||
try:
|
||||
# Method 1: TurboJPEG (3-5x faster than cv2.imencode)
|
||||
if self.turbojpeg_available:
|
||||
# Convert BGR to RGB for TurboJPEG
|
||||
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
encoded = self.turbojpeg.encode(rgb_frame, quality=quality)
|
||||
return encoded
|
||||
|
||||
# Method 2: Hardware-accelerated encoding via GStreamer (if available)
|
||||
if self.nvenc_available:
|
||||
return self._encode_with_nvenc(frame, quality)
|
||||
|
||||
# Fallback: Standard OpenCV encoding
|
||||
encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
|
||||
success, encoded = cv2.imencode('.jpg', frame, encode_params)
|
||||
if success:
|
||||
return encoded.tobytes()
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to encode frame: {e}")
|
||||
return None
|
||||
|
||||
def _encode_with_nvenc(self, frame: np.ndarray, quality: int) -> Optional[bytes]:
|
||||
"""
|
||||
Encode using NVIDIA NVENC hardware encoder.
|
||||
|
||||
This is complex to implement directly, so we'll use a GStreamer pipeline
|
||||
if available.
|
||||
"""
|
||||
try:
|
||||
# Create a GStreamer pipeline for hardware encoding
|
||||
height, width = frame.shape[:2]
|
||||
gst_pipeline = (
|
||||
f"appsrc ! "
|
||||
f"video/x-raw,format=BGR,width={width},height={height},framerate=30/1 ! "
|
||||
f"videoconvert ! "
|
||||
f"nvvideoconvert ! " # GPU color conversion
|
||||
f"nvjpegenc quality={quality} ! " # Hardware JPEG encoder
|
||||
f"appsink"
|
||||
)
|
||||
|
||||
# This would require GStreamer Python bindings
|
||||
# For now, fall back to TurboJPEG or standard encoding
|
||||
logger.debug("NVENC JPEG encoding not fully implemented, using fallback")
|
||||
encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
|
||||
success, encoded = cv2.imencode('.jpg', frame, encode_params)
|
||||
if success:
|
||||
return encoded.tobytes()
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"NVENC encoding failed: {e}")
|
||||
return None
|
||||
|
||||
def encode_batch(self, frames: list, quality: int = 85) -> list:
|
||||
"""
|
||||
Batch encode multiple frames for better GPU utilization.
|
||||
|
||||
Args:
|
||||
frames: List of BGR frames
|
||||
quality: JPEG quality
|
||||
|
||||
Returns:
|
||||
List of encoded JPEG bytes
|
||||
"""
|
||||
encoded_frames = []
|
||||
|
||||
if self.turbojpeg_available:
|
||||
# TurboJPEG can handle batch encoding efficiently
|
||||
for frame in frames:
|
||||
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
encoded = self.turbojpeg.encode(rgb_frame, quality=quality)
|
||||
encoded_frames.append(encoded)
|
||||
else:
|
||||
# Fallback to sequential encoding
|
||||
for frame in frames:
|
||||
encoded = self.encode_jpeg(frame, quality)
|
||||
encoded_frames.append(encoded)
|
||||
|
||||
return encoded_frames
|
||||
|
||||
|
||||
# Global encoder instance
|
||||
_hardware_encoder = None
|
||||
|
||||
|
||||
def get_hardware_encoder() -> HardwareEncoder:
|
||||
"""Get or create the global hardware encoder instance."""
|
||||
global _hardware_encoder
|
||||
if _hardware_encoder is None:
|
||||
_hardware_encoder = HardwareEncoder()
|
||||
return _hardware_encoder
|
||||
|
||||
|
||||
def encode_frame_hardware(frame: np.ndarray, quality: int = 85) -> Optional[bytes]:
|
||||
"""
|
||||
Convenience function to encode a frame using hardware acceleration.
|
||||
|
||||
Args:
|
||||
frame: BGR image frame
|
||||
quality: JPEG quality (1-100)
|
||||
|
||||
Returns:
|
||||
Encoded JPEG bytes or None on failure
|
||||
"""
|
||||
encoder = get_hardware_encoder()
|
||||
return encoder.encode_jpeg(frame, quality)
|
|
@ -6,4 +6,7 @@ scipy
|
|||
filterpy
|
||||
psycopg2-binary
|
||||
lap>=0.5.12
|
||||
pynvml
|
||||
pynvml
|
||||
PyTurboJPEG
|
||||
PyNvVideoCodec
|
||||
cupy-cuda12x
|
|
@ -5,4 +5,5 @@ fastapi[standard]
|
|||
redis
|
||||
urllib3<2.0.0
|
||||
numpy
|
||||
requests
|
||||
requests
|
||||
watchdog
|
Loading…
Add table
Add a link
Reference in a new issue