All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 10s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m52s
Build Worker Base and Application Images / deploy-stack (push) Successful in 8s
453 lines
No EOL
15 KiB
Python
453 lines
No EOL
15 KiB
Python
"""
|
|
Multiprocessing-based RTSP stream management for scalability.
|
|
Handles multiple camera streams using separate processes to bypass GIL limitations.
|
|
"""
|
|
|
|
import multiprocessing as mp
|
|
import time
|
|
import logging
|
|
import cv2
|
|
import numpy as np
|
|
import queue
|
|
import threading
|
|
import os
|
|
import psutil
|
|
from typing import Dict, Optional, Tuple, Any, Callable
|
|
from dataclasses import dataclass
|
|
from multiprocessing import Process, Queue, Lock, Value, Array, Manager
|
|
from multiprocessing.shared_memory import SharedMemory
|
|
import signal
|
|
import sys
|
|
|
|
# Ensure proper multiprocessing context for uvicorn compatibility
|
|
try:
|
|
mp.set_start_method('spawn', force=True)
|
|
except RuntimeError:
|
|
pass # Already set
|
|
|
|
logger = logging.getLogger("detector_worker.process_manager")
|
|
|
|
# Frame dimensions (1280x720 RGB)
|
|
FRAME_WIDTH = 1280
|
|
FRAME_HEIGHT = 720
|
|
FRAME_CHANNELS = 3
|
|
FRAME_SIZE = FRAME_WIDTH * FRAME_HEIGHT * FRAME_CHANNELS
|
|
|
|
@dataclass
|
|
class ProcessConfig:
|
|
"""Configuration for camera process."""
|
|
camera_id: str
|
|
rtsp_url: str
|
|
expected_fps: int = 6
|
|
buffer_size: int = 3
|
|
max_retries: int = 30
|
|
reconnect_delay: float = 5.0
|
|
|
|
|
|
class SharedFrameBuffer:
|
|
"""Thread-safe shared memory frame buffer with double buffering."""
|
|
|
|
def __init__(self, camera_id: str):
|
|
self.camera_id = camera_id
|
|
self.lock = mp.Lock()
|
|
|
|
# Double buffering for lock-free reads
|
|
self.buffer_a = mp.Array('B', FRAME_SIZE, lock=False)
|
|
self.buffer_b = mp.Array('B', FRAME_SIZE, lock=False)
|
|
|
|
# Atomic index for current read buffer (0 or 1)
|
|
self.read_buffer_idx = mp.Value('i', 0)
|
|
|
|
# Frame metadata (atomic access)
|
|
self.timestamp = mp.Value('d', 0.0)
|
|
self.frame_number = mp.Value('L', 0)
|
|
self.is_valid = mp.Value('b', False)
|
|
|
|
# Statistics
|
|
self.frames_written = mp.Value('L', 0)
|
|
self.frames_dropped = mp.Value('L', 0)
|
|
|
|
def write_frame(self, frame: np.ndarray, timestamp: float) -> bool:
|
|
"""Write frame to buffer with atomic swap."""
|
|
if frame is None or frame.size == 0:
|
|
return False
|
|
|
|
# Resize if needed
|
|
if frame.shape != (FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS):
|
|
frame = cv2.resize(frame, (FRAME_WIDTH, FRAME_HEIGHT))
|
|
|
|
# Get write buffer (opposite of read buffer)
|
|
write_idx = 1 - self.read_buffer_idx.value
|
|
write_buffer = self.buffer_a if write_idx == 0 else self.buffer_b
|
|
|
|
try:
|
|
# Write to buffer without lock (safe because of double buffering)
|
|
frame_flat = frame.flatten()
|
|
write_buffer[:] = frame_flat.astype(np.uint8)
|
|
|
|
# Update metadata
|
|
self.timestamp.value = timestamp
|
|
self.frame_number.value += 1
|
|
|
|
# Atomic swap of buffers
|
|
with self.lock:
|
|
self.read_buffer_idx.value = write_idx
|
|
self.is_valid.value = True
|
|
self.frames_written.value += 1
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error writing frame for {self.camera_id}: {e}")
|
|
self.frames_dropped.value += 1
|
|
return False
|
|
|
|
def read_frame(self) -> Optional[Tuple[np.ndarray, float]]:
|
|
"""Read frame from buffer without blocking writers."""
|
|
if not self.is_valid.value:
|
|
return None
|
|
|
|
# Get current read buffer index (atomic read)
|
|
read_idx = self.read_buffer_idx.value
|
|
read_buffer = self.buffer_a if read_idx == 0 else self.buffer_b
|
|
|
|
# Read timestamp (atomic)
|
|
timestamp = self.timestamp.value
|
|
|
|
# Copy frame data (no lock needed for read)
|
|
try:
|
|
frame_data = np.array(read_buffer, dtype=np.uint8)
|
|
frame = frame_data.reshape((FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNELS))
|
|
return frame.copy(), timestamp
|
|
except Exception as e:
|
|
logger.error(f"Error reading frame for {self.camera_id}: {e}")
|
|
return None
|
|
|
|
def get_stats(self) -> Dict[str, int]:
|
|
"""Get buffer statistics."""
|
|
return {
|
|
'frames_written': self.frames_written.value,
|
|
'frames_dropped': self.frames_dropped.value,
|
|
'frame_number': self.frame_number.value,
|
|
'is_valid': self.is_valid.value
|
|
}
|
|
|
|
|
|
def camera_worker_process(
|
|
config: ProcessConfig,
|
|
frame_buffer: SharedFrameBuffer,
|
|
command_queue: Queue,
|
|
status_queue: Queue,
|
|
stop_event: mp.Event
|
|
):
|
|
"""
|
|
Worker process for individual camera stream.
|
|
Runs in separate process to bypass GIL.
|
|
"""
|
|
# Set process name for debugging
|
|
mp.current_process().name = f"Camera-{config.camera_id}"
|
|
|
|
# Configure logging for subprocess
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format=f'%(asctime)s [%(levelname)s] Camera-{config.camera_id}: %(message)s'
|
|
)
|
|
|
|
logger.info(f"Starting camera worker for {config.camera_id}")
|
|
|
|
cap = None
|
|
consecutive_errors = 0
|
|
frame_interval = 1.0 / config.expected_fps
|
|
last_frame_time = 0
|
|
|
|
def initialize_capture():
|
|
"""Initialize OpenCV capture with optimized settings."""
|
|
nonlocal cap
|
|
|
|
try:
|
|
# Set RTSP transport to TCP for reliability
|
|
os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'rtsp_transport;tcp'
|
|
|
|
# Create capture
|
|
cap = cv2.VideoCapture(config.rtsp_url, cv2.CAP_FFMPEG)
|
|
|
|
if not cap.isOpened():
|
|
logger.error(f"Failed to open RTSP stream")
|
|
return False
|
|
|
|
# Set capture properties
|
|
cap.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH)
|
|
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT)
|
|
cap.set(cv2.CAP_PROP_FPS, config.expected_fps)
|
|
cap.set(cv2.CAP_PROP_BUFFERSIZE, config.buffer_size)
|
|
|
|
# Read initial frames to stabilize
|
|
for _ in range(3):
|
|
ret, _ = cap.read()
|
|
if not ret:
|
|
logger.warning("Failed to read initial frames")
|
|
time.sleep(0.1)
|
|
|
|
logger.info(f"Successfully initialized capture")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error initializing capture: {e}")
|
|
return False
|
|
|
|
# Main processing loop
|
|
while not stop_event.is_set():
|
|
try:
|
|
# Check for commands (non-blocking)
|
|
try:
|
|
command = command_queue.get_nowait()
|
|
if command == "reinit":
|
|
logger.info("Received reinit command")
|
|
if cap:
|
|
cap.release()
|
|
cap = None
|
|
consecutive_errors = 0
|
|
except queue.Empty:
|
|
pass
|
|
|
|
# Initialize capture if needed
|
|
if cap is None or not cap.isOpened():
|
|
if not initialize_capture():
|
|
time.sleep(config.reconnect_delay)
|
|
consecutive_errors += 1
|
|
if consecutive_errors > config.max_retries and config.max_retries > 0:
|
|
logger.error("Max retries reached, exiting")
|
|
break
|
|
continue
|
|
else:
|
|
consecutive_errors = 0
|
|
|
|
# Read frame with timing control
|
|
current_time = time.time()
|
|
if current_time - last_frame_time < frame_interval:
|
|
time.sleep(0.01) # Small sleep to prevent busy waiting
|
|
continue
|
|
|
|
ret, frame = cap.read()
|
|
|
|
if not ret or frame is None:
|
|
consecutive_errors += 1
|
|
|
|
if consecutive_errors >= config.max_retries:
|
|
logger.error(f"Too many consecutive errors ({consecutive_errors}), reinitializing")
|
|
if cap:
|
|
cap.release()
|
|
cap = None
|
|
consecutive_errors = 0
|
|
time.sleep(config.reconnect_delay)
|
|
else:
|
|
if consecutive_errors <= 5:
|
|
logger.debug(f"Frame read failed (error {consecutive_errors})")
|
|
elif consecutive_errors % 10 == 0:
|
|
logger.warning(f"Continuing frame failures (error {consecutive_errors})")
|
|
|
|
# Exponential backoff
|
|
sleep_time = min(0.1 * (1.5 ** min(consecutive_errors, 10)), 1.0)
|
|
time.sleep(sleep_time)
|
|
continue
|
|
|
|
# Frame read successful
|
|
consecutive_errors = 0
|
|
last_frame_time = current_time
|
|
|
|
# Write to shared buffer
|
|
if frame_buffer.write_frame(frame, current_time):
|
|
# Send status update periodically
|
|
if frame_buffer.frame_number.value % 30 == 0: # Every 30 frames
|
|
status_queue.put({
|
|
'camera_id': config.camera_id,
|
|
'status': 'running',
|
|
'frames': frame_buffer.frame_number.value,
|
|
'timestamp': current_time
|
|
})
|
|
|
|
except KeyboardInterrupt:
|
|
logger.info("Received interrupt signal")
|
|
break
|
|
except Exception as e:
|
|
logger.error(f"Error in camera worker: {e}")
|
|
consecutive_errors += 1
|
|
time.sleep(1.0)
|
|
|
|
# Cleanup
|
|
if cap:
|
|
cap.release()
|
|
|
|
logger.info(f"Camera worker stopped")
|
|
status_queue.put({
|
|
'camera_id': config.camera_id,
|
|
'status': 'stopped',
|
|
'frames': frame_buffer.frame_number.value
|
|
})
|
|
|
|
|
|
class RTSPProcessManager:
|
|
"""
|
|
Manages multiple camera processes with health monitoring and auto-restart.
|
|
"""
|
|
|
|
def __init__(self, max_processes: int = None):
|
|
self.max_processes = max_processes or (mp.cpu_count() - 2)
|
|
self.processes: Dict[str, Process] = {}
|
|
self.frame_buffers: Dict[str, SharedFrameBuffer] = {}
|
|
self.command_queues: Dict[str, Queue] = {}
|
|
self.status_queue = mp.Queue()
|
|
self.stop_events: Dict[str, mp.Event] = {}
|
|
self.configs: Dict[str, ProcessConfig] = {}
|
|
|
|
# Manager for shared objects
|
|
self.manager = Manager()
|
|
self.process_stats = self.manager.dict()
|
|
|
|
# Health monitoring thread
|
|
self.monitor_thread = None
|
|
self.monitor_stop = threading.Event()
|
|
|
|
logger.info(f"RTSPProcessManager initialized with max_processes={self.max_processes}")
|
|
|
|
def add_camera(self, config: ProcessConfig) -> bool:
|
|
"""Add a new camera stream."""
|
|
if config.camera_id in self.processes:
|
|
logger.warning(f"Camera {config.camera_id} already exists")
|
|
return False
|
|
|
|
if len(self.processes) >= self.max_processes:
|
|
logger.error(f"Max processes ({self.max_processes}) reached")
|
|
return False
|
|
|
|
try:
|
|
# Create shared resources
|
|
frame_buffer = SharedFrameBuffer(config.camera_id)
|
|
command_queue = mp.Queue()
|
|
stop_event = mp.Event()
|
|
|
|
# Store resources
|
|
self.frame_buffers[config.camera_id] = frame_buffer
|
|
self.command_queues[config.camera_id] = command_queue
|
|
self.stop_events[config.camera_id] = stop_event
|
|
self.configs[config.camera_id] = config
|
|
|
|
# Start process
|
|
process = mp.Process(
|
|
target=camera_worker_process,
|
|
args=(config, frame_buffer, command_queue, self.status_queue, stop_event),
|
|
name=f"Camera-{config.camera_id}"
|
|
)
|
|
process.start()
|
|
self.processes[config.camera_id] = process
|
|
|
|
logger.info(f"Started process for camera {config.camera_id} (PID: {process.pid})")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error adding camera {config.camera_id}: {e}")
|
|
self._cleanup_camera(config.camera_id)
|
|
return False
|
|
|
|
def remove_camera(self, camera_id: str) -> bool:
|
|
"""Remove a camera stream."""
|
|
if camera_id not in self.processes:
|
|
return False
|
|
|
|
logger.info(f"Removing camera {camera_id}")
|
|
|
|
# Signal stop
|
|
if camera_id in self.stop_events:
|
|
self.stop_events[camera_id].set()
|
|
|
|
# Wait for process to stop
|
|
process = self.processes.get(camera_id)
|
|
if process and process.is_alive():
|
|
process.join(timeout=5.0)
|
|
if process.is_alive():
|
|
logger.warning(f"Force terminating process for {camera_id}")
|
|
process.terminate()
|
|
process.join(timeout=2.0)
|
|
|
|
# Cleanup
|
|
self._cleanup_camera(camera_id)
|
|
return True
|
|
|
|
def _cleanup_camera(self, camera_id: str):
|
|
"""Clean up camera resources."""
|
|
for collection in [self.processes, self.frame_buffers,
|
|
self.command_queues, self.stop_events, self.configs]:
|
|
collection.pop(camera_id, None)
|
|
|
|
def get_frame(self, camera_id: str) -> Optional[Tuple[np.ndarray, float]]:
|
|
"""Get latest frame from camera."""
|
|
buffer = self.frame_buffers.get(camera_id)
|
|
if buffer:
|
|
return buffer.read_frame()
|
|
return None
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""Get statistics for all cameras."""
|
|
stats = {}
|
|
for camera_id, buffer in self.frame_buffers.items():
|
|
process = self.processes.get(camera_id)
|
|
stats[camera_id] = {
|
|
'buffer_stats': buffer.get_stats(),
|
|
'process_alive': process.is_alive() if process else False,
|
|
'process_pid': process.pid if process else None
|
|
}
|
|
return stats
|
|
|
|
def start_monitoring(self):
|
|
"""Start health monitoring thread."""
|
|
if self.monitor_thread and self.monitor_thread.is_alive():
|
|
return
|
|
|
|
self.monitor_stop.clear()
|
|
self.monitor_thread = threading.Thread(target=self._monitor_processes)
|
|
self.monitor_thread.start()
|
|
logger.info("Started process monitoring")
|
|
|
|
def _monitor_processes(self):
|
|
"""Monitor process health and restart if needed."""
|
|
while not self.monitor_stop.is_set():
|
|
try:
|
|
# Check status queue
|
|
try:
|
|
while True:
|
|
status = self.status_queue.get_nowait()
|
|
self.process_stats[status['camera_id']] = status
|
|
except queue.Empty:
|
|
pass
|
|
|
|
# Check process health
|
|
for camera_id in list(self.processes.keys()):
|
|
process = self.processes.get(camera_id)
|
|
if process and not process.is_alive():
|
|
logger.warning(f"Process for {camera_id} died, restarting")
|
|
config = self.configs.get(camera_id)
|
|
if config:
|
|
self.remove_camera(camera_id)
|
|
time.sleep(1.0)
|
|
self.add_camera(config)
|
|
|
|
time.sleep(5.0) # Check every 5 seconds
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in monitor thread: {e}")
|
|
time.sleep(5.0)
|
|
|
|
def stop_all(self):
|
|
"""Stop all camera processes."""
|
|
logger.info("Stopping all camera processes")
|
|
|
|
# Stop monitoring
|
|
if self.monitor_thread:
|
|
self.monitor_stop.set()
|
|
self.monitor_thread.join(timeout=5.0)
|
|
|
|
# Stop all cameras
|
|
for camera_id in list(self.processes.keys()):
|
|
self.remove_camera(camera_id)
|
|
|
|
logger.info("All processes stopped") |