refactor: half way to process per session
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 7s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m52s
Build Worker Base and Application Images / deploy-stack (push) Successful in 9s

This commit is contained in:
ziesorx 2025-09-25 20:52:26 +07:00
parent 2e5316ca01
commit 34d1982e9e
12 changed files with 2771 additions and 92 deletions

View file

@ -0,0 +1,3 @@
"""
Session Process Management Module
"""

View file

@ -0,0 +1,317 @@
"""
Inter-Process Communication (IPC) system for session processes.
Defines message types and protocols for main session communication.
"""
import time
from enum import Enum
from typing import Dict, Any, Optional, Union
from dataclasses import dataclass, field
import numpy as np
class MessageType(Enum):
"""Message types for IPC communication."""
# Commands: Main → Session
INITIALIZE = "initialize"
PROCESS_FRAME = "process_frame"
SET_SESSION_ID = "set_session_id"
SHUTDOWN = "shutdown"
HEALTH_CHECK = "health_check"
# Responses: Session → Main
INITIALIZED = "initialized"
DETECTION_RESULT = "detection_result"
SESSION_SET = "session_set"
SHUTDOWN_COMPLETE = "shutdown_complete"
HEALTH_RESPONSE = "health_response"
ERROR = "error"
@dataclass
class IPCMessage:
"""Base class for all IPC messages."""
type: MessageType
session_id: str
timestamp: float = field(default_factory=time.time)
message_id: str = field(default_factory=lambda: str(int(time.time() * 1000000)))
@dataclass
class InitializeCommand(IPCMessage):
"""Initialize session process with configuration."""
subscription_config: Dict[str, Any] = field(default_factory=dict)
model_config: Dict[str, Any] = field(default_factory=dict)
@dataclass
class ProcessFrameCommand(IPCMessage):
"""Process a frame through the detection pipeline."""
frame: Optional[np.ndarray] = None
display_id: str = ""
subscription_identifier: str = ""
frame_timestamp: float = 0.0
@dataclass
class SetSessionIdCommand(IPCMessage):
"""Set the session ID for the current session."""
backend_session_id: str = ""
display_id: str = ""
@dataclass
class ShutdownCommand(IPCMessage):
"""Shutdown the session process gracefully."""
@dataclass
class HealthCheckCommand(IPCMessage):
"""Check health status of session process."""
@dataclass
class InitializedResponse(IPCMessage):
"""Response indicating successful initialization."""
success: bool = False
error_message: Optional[str] = None
@dataclass
class DetectionResultResponse(IPCMessage):
"""Detection results from session process."""
detections: Dict[str, Any] = field(default_factory=dict)
processing_time: float = 0.0
phase: str = "" # "detection" or "processing"
@dataclass
class SessionSetResponse(IPCMessage):
"""Response confirming session ID was set."""
success: bool = False
backend_session_id: str = ""
@dataclass
class ShutdownCompleteResponse(IPCMessage):
"""Response confirming graceful shutdown."""
@dataclass
class HealthResponse(IPCMessage):
"""Health status response."""
status: str = "unknown" # "healthy", "degraded", "unhealthy"
memory_usage_mb: float = 0.0
cpu_percent: float = 0.0
gpu_memory_mb: Optional[float] = None
uptime_seconds: float = 0.0
processed_frames: int = 0
@dataclass
class ErrorResponse(IPCMessage):
"""Error message from session process."""
error_type: str = ""
error_message: str = ""
traceback: Optional[str] = None
# Type aliases for message unions
CommandMessage = Union[
InitializeCommand,
ProcessFrameCommand,
SetSessionIdCommand,
ShutdownCommand,
HealthCheckCommand
]
ResponseMessage = Union[
InitializedResponse,
DetectionResultResponse,
SessionSetResponse,
ShutdownCompleteResponse,
HealthResponse,
ErrorResponse
]
IPCMessageUnion = Union[CommandMessage, ResponseMessage]
class MessageSerializer:
"""Handles serialization/deserialization of IPC messages."""
@staticmethod
def serialize_message(message: IPCMessageUnion) -> Dict[str, Any]:
"""
Serialize message to dictionary for queue transport.
Args:
message: Message to serialize
Returns:
Dictionary representation of message
"""
result = {
'type': message.type.value,
'session_id': message.session_id,
'timestamp': message.timestamp,
'message_id': message.message_id,
}
# Add specific fields based on message type
if isinstance(message, InitializeCommand):
result.update({
'subscription_config': message.subscription_config,
'model_config': message.model_config
})
elif isinstance(message, ProcessFrameCommand):
result.update({
'frame': message.frame,
'display_id': message.display_id,
'subscription_identifier': message.subscription_identifier,
'frame_timestamp': message.frame_timestamp
})
elif isinstance(message, SetSessionIdCommand):
result.update({
'backend_session_id': message.backend_session_id,
'display_id': message.display_id
})
elif isinstance(message, InitializedResponse):
result.update({
'success': message.success,
'error_message': message.error_message
})
elif isinstance(message, DetectionResultResponse):
result.update({
'detections': message.detections,
'processing_time': message.processing_time,
'phase': message.phase
})
elif isinstance(message, SessionSetResponse):
result.update({
'success': message.success,
'backend_session_id': message.backend_session_id
})
elif isinstance(message, HealthResponse):
result.update({
'status': message.status,
'memory_usage_mb': message.memory_usage_mb,
'cpu_percent': message.cpu_percent,
'gpu_memory_mb': message.gpu_memory_mb,
'uptime_seconds': message.uptime_seconds,
'processed_frames': message.processed_frames
})
elif isinstance(message, ErrorResponse):
result.update({
'error_type': message.error_type,
'error_message': message.error_message,
'traceback': message.traceback
})
return result
@staticmethod
def deserialize_message(data: Dict[str, Any]) -> IPCMessageUnion:
"""
Deserialize dictionary back to message object.
Args:
data: Dictionary representation
Returns:
Deserialized message object
"""
msg_type = MessageType(data['type'])
session_id = data['session_id']
timestamp = data['timestamp']
message_id = data['message_id']
base_kwargs = {
'session_id': session_id,
'timestamp': timestamp,
'message_id': message_id
}
if msg_type == MessageType.INITIALIZE:
return InitializeCommand(
type=msg_type,
subscription_config=data['subscription_config'],
model_config=data['model_config'],
**base_kwargs
)
elif msg_type == MessageType.PROCESS_FRAME:
return ProcessFrameCommand(
type=msg_type,
frame=data['frame'],
display_id=data['display_id'],
subscription_identifier=data['subscription_identifier'],
frame_timestamp=data['frame_timestamp'],
**base_kwargs
)
elif msg_type == MessageType.SET_SESSION_ID:
return SetSessionIdCommand(
backend_session_id=data['backend_session_id'],
display_id=data['display_id'],
**base_kwargs
)
elif msg_type == MessageType.SHUTDOWN:
return ShutdownCommand(**base_kwargs)
elif msg_type == MessageType.HEALTH_CHECK:
return HealthCheckCommand(**base_kwargs)
elif msg_type == MessageType.INITIALIZED:
return InitializedResponse(
type=msg_type,
success=data['success'],
error_message=data.get('error_message'),
**base_kwargs
)
elif msg_type == MessageType.DETECTION_RESULT:
return DetectionResultResponse(
type=msg_type,
detections=data['detections'],
processing_time=data['processing_time'],
phase=data['phase'],
**base_kwargs
)
elif msg_type == MessageType.SESSION_SET:
return SessionSetResponse(
type=msg_type,
success=data['success'],
backend_session_id=data['backend_session_id'],
**base_kwargs
)
elif msg_type == MessageType.SHUTDOWN_COMPLETE:
return ShutdownCompleteResponse(type=msg_type, **base_kwargs)
elif msg_type == MessageType.HEALTH_RESPONSE:
return HealthResponse(
type=msg_type,
status=data['status'],
memory_usage_mb=data['memory_usage_mb'],
cpu_percent=data['cpu_percent'],
gpu_memory_mb=data.get('gpu_memory_mb'),
uptime_seconds=data.get('uptime_seconds', 0.0),
processed_frames=data.get('processed_frames', 0),
**base_kwargs
)
elif msg_type == MessageType.ERROR:
return ErrorResponse(
type=msg_type,
error_type=data['error_type'],
error_message=data['error_message'],
traceback=data.get('traceback'),
**base_kwargs
)
else:
raise ValueError(f"Unknown message type: {msg_type}")

View file

@ -0,0 +1,464 @@
"""
Session Process Manager - Manages lifecycle of session processes.
Handles process spawning, monitoring, cleanup, and health checks.
"""
import time
import logging
import asyncio
import multiprocessing as mp
from typing import Dict, Optional, Any, Callable
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor
import threading
from .communication import (
MessageSerializer, MessageType,
InitializeCommand, ProcessFrameCommand, SetSessionIdCommand,
ShutdownCommand, HealthCheckCommand,
InitializedResponse, DetectionResultResponse, SessionSetResponse,
ShutdownCompleteResponse, HealthResponse, ErrorResponse
)
from .session_worker import session_worker_main
logger = logging.getLogger(__name__)
@dataclass
class SessionProcessInfo:
"""Information about a running session process."""
session_id: str
subscription_identifier: str
process: mp.Process
command_queue: mp.Queue
response_queue: mp.Queue
created_at: float
last_health_check: float = 0.0
is_initialized: bool = False
processed_frames: int = 0
class SessionProcessManager:
"""
Manages lifecycle of session processes.
Each session gets its own dedicated process for complete isolation.
"""
def __init__(self, max_concurrent_sessions: int = 20, health_check_interval: int = 30):
"""
Initialize session process manager.
Args:
max_concurrent_sessions: Maximum number of concurrent session processes
health_check_interval: Interval in seconds between health checks
"""
self.max_concurrent_sessions = max_concurrent_sessions
self.health_check_interval = health_check_interval
# Active session processes
self.sessions: Dict[str, SessionProcessInfo] = {}
self.subscription_to_session: Dict[str, str] = {}
# Thread pool for response processing
self.response_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ResponseProcessor")
# Health check task
self.health_check_task = None
self.is_running = False
# Message callbacks
self.detection_result_callback: Optional[Callable] = None
self.error_callback: Optional[Callable] = None
# Store main event loop for async operations from threads
self.main_event_loop = None
logger.info(f"SessionProcessManager initialized (max_sessions={max_concurrent_sessions})")
async def start(self):
"""Start the session process manager."""
if self.is_running:
return
self.is_running = True
# Store the main event loop for use in threads
self.main_event_loop = asyncio.get_running_loop()
logger.info("Starting session process manager")
# Start health check task
self.health_check_task = asyncio.create_task(self._health_check_loop())
# Start response processing for existing sessions
for session_info in self.sessions.values():
self._start_response_processing(session_info)
async def stop(self):
"""Stop the session process manager and cleanup all sessions."""
if not self.is_running:
return
logger.info("Stopping session process manager")
self.is_running = False
# Cancel health check task
if self.health_check_task:
self.health_check_task.cancel()
try:
await self.health_check_task
except asyncio.CancelledError:
pass
# Shutdown all sessions
shutdown_tasks = []
for session_id in list(self.sessions.keys()):
task = asyncio.create_task(self.remove_session(session_id))
shutdown_tasks.append(task)
if shutdown_tasks:
await asyncio.gather(*shutdown_tasks, return_exceptions=True)
# Cleanup thread pool
self.response_executor.shutdown(wait=True)
logger.info("Session process manager stopped")
async def create_session(self, subscription_identifier: str, subscription_config: Dict[str, Any]) -> bool:
"""
Create a new session process for a subscription.
Args:
subscription_identifier: Unique subscription identifier
subscription_config: Subscription configuration
Returns:
True if session was created successfully
"""
try:
# Check if we're at capacity
if len(self.sessions) >= self.max_concurrent_sessions:
logger.warning(f"Cannot create session: at max capacity ({self.max_concurrent_sessions})")
return False
# Check if subscription already has a session
if subscription_identifier in self.subscription_to_session:
existing_session_id = self.subscription_to_session[subscription_identifier]
logger.info(f"Subscription {subscription_identifier} already has session {existing_session_id}")
return True
# Generate unique session ID
session_id = f"session_{int(time.time() * 1000)}_{subscription_identifier.replace(';', '_')}"
logger.info(f"Creating session process for subscription {subscription_identifier}")
logger.info(f"Session ID: {session_id}")
# Create communication queues
command_queue = mp.Queue()
response_queue = mp.Queue()
# Create and start process
process = mp.Process(
target=session_worker_main,
args=(session_id, command_queue, response_queue),
name=f"SessionWorker-{session_id}"
)
process.start()
# Store session information
session_info = SessionProcessInfo(
session_id=session_id,
subscription_identifier=subscription_identifier,
process=process,
command_queue=command_queue,
response_queue=response_queue,
created_at=time.time()
)
self.sessions[session_id] = session_info
self.subscription_to_session[subscription_identifier] = session_id
# Start response processing for this session
self._start_response_processing(session_info)
logger.info(f"Session process created: {session_id} (PID: {process.pid})")
# Initialize the session with configuration
model_config = {
'modelId': subscription_config.get('modelId'),
'modelUrl': subscription_config.get('modelUrl'),
'modelName': subscription_config.get('modelName')
}
init_command = InitializeCommand(
type=MessageType.INITIALIZE,
session_id=session_id,
subscription_config=subscription_config,
model_config=model_config
)
await self._send_command(session_id, init_command)
return True
except Exception as e:
logger.error(f"Failed to create session for {subscription_identifier}: {e}", exc_info=True)
# Cleanup on failure
if session_id in self.sessions:
await self._cleanup_session(session_id)
return False
async def remove_session(self, subscription_identifier: str) -> bool:
"""
Remove a session process for a subscription.
Args:
subscription_identifier: Subscription identifier to remove
Returns:
True if session was removed successfully
"""
try:
session_id = self.subscription_to_session.get(subscription_identifier)
if not session_id:
logger.warning(f"No session found for subscription {subscription_identifier}")
return False
logger.info(f"Removing session {session_id} for subscription {subscription_identifier}")
session_info = self.sessions.get(session_id)
if session_info:
# Send shutdown command
shutdown_command = ShutdownCommand(session_id=session_id)
await self._send_command(session_id, shutdown_command)
# Wait for graceful shutdown (with timeout)
try:
await asyncio.wait_for(self._wait_for_shutdown(session_info), timeout=10.0)
except asyncio.TimeoutError:
logger.warning(f"Session {session_id} did not shutdown gracefully, terminating")
# Cleanup session
await self._cleanup_session(session_id)
return True
except Exception as e:
logger.error(f"Failed to remove session for {subscription_identifier}: {e}", exc_info=True)
return False
async def process_frame(self, subscription_identifier: str, frame: Any, display_id: str, frame_timestamp: float) -> bool:
"""
Send a frame to the session process for processing.
Args:
subscription_identifier: Subscription identifier
frame: Frame to process
display_id: Display identifier
frame_timestamp: Timestamp of the frame
Returns:
True if frame was sent successfully
"""
try:
session_id = self.subscription_to_session.get(subscription_identifier)
if not session_id:
logger.warning(f"No session found for subscription {subscription_identifier}")
return False
session_info = self.sessions.get(session_id)
if not session_info or not session_info.is_initialized:
logger.warning(f"Session {session_id} not initialized")
return False
# Create process frame command
process_command = ProcessFrameCommand(
session_id=session_id,
frame=frame,
display_id=display_id,
subscription_identifier=subscription_identifier,
frame_timestamp=frame_timestamp
)
await self._send_command(session_id, process_command)
return True
except Exception as e:
logger.error(f"Failed to process frame for {subscription_identifier}: {e}", exc_info=True)
return False
async def set_session_id(self, subscription_identifier: str, backend_session_id: str, display_id: str) -> bool:
"""
Set the backend session ID for a session.
Args:
subscription_identifier: Subscription identifier
backend_session_id: Backend session ID
display_id: Display identifier
Returns:
True if session ID was set successfully
"""
try:
session_id = self.subscription_to_session.get(subscription_identifier)
if not session_id:
logger.warning(f"No session found for subscription {subscription_identifier}")
return False
# Create set session ID command
set_command = SetSessionIdCommand(
session_id=session_id,
backend_session_id=backend_session_id,
display_id=display_id
)
await self._send_command(session_id, set_command)
return True
except Exception as e:
logger.error(f"Failed to set session ID for {subscription_identifier}: {e}", exc_info=True)
return False
def set_detection_result_callback(self, callback: Callable):
"""Set callback for handling detection results."""
self.detection_result_callback = callback
def set_error_callback(self, callback: Callable):
"""Set callback for handling errors."""
self.error_callback = callback
def get_session_count(self) -> int:
"""Get the number of active sessions."""
return len(self.sessions)
def get_session_info(self, subscription_identifier: str) -> Optional[Dict[str, Any]]:
"""Get information about a session."""
session_id = self.subscription_to_session.get(subscription_identifier)
if not session_id:
return None
session_info = self.sessions.get(session_id)
if not session_info:
return None
return {
'session_id': session_id,
'subscription_identifier': subscription_identifier,
'created_at': session_info.created_at,
'is_initialized': session_info.is_initialized,
'processed_frames': session_info.processed_frames,
'process_pid': session_info.process.pid if session_info.process.is_alive() else None,
'is_alive': session_info.process.is_alive()
}
async def _send_command(self, session_id: str, command):
"""Send command to session process."""
session_info = self.sessions.get(session_id)
if not session_info:
raise ValueError(f"Session {session_id} not found")
serialized = MessageSerializer.serialize_message(command)
session_info.command_queue.put(serialized)
def _start_response_processing(self, session_info: SessionProcessInfo):
"""Start processing responses from a session process."""
def process_responses():
while session_info.session_id in self.sessions and session_info.process.is_alive():
try:
if not session_info.response_queue.empty():
response_data = session_info.response_queue.get(timeout=1.0)
response = MessageSerializer.deserialize_message(response_data)
if self.main_event_loop:
asyncio.run_coroutine_threadsafe(
self._handle_response(session_info.session_id, response),
self.main_event_loop
)
else:
time.sleep(0.01)
except Exception as e:
logger.error(f"Error processing response from {session_info.session_id}: {e}")
self.response_executor.submit(process_responses)
async def _handle_response(self, session_id: str, response):
"""Handle response from session process."""
try:
session_info = self.sessions.get(session_id)
if not session_info:
return
if response.type == MessageType.INITIALIZED:
session_info.is_initialized = response.success
if response.success:
logger.info(f"Session {session_id} initialized successfully")
else:
logger.error(f"Session {session_id} initialization failed: {response.error_message}")
elif response.type == MessageType.DETECTION_RESULT:
session_info.processed_frames += 1
if self.detection_result_callback:
await self.detection_result_callback(session_info.subscription_identifier, response)
elif response.type == MessageType.SESSION_SET:
logger.info(f"Session ID set for {session_id}: {response.backend_session_id}")
elif response.type == MessageType.HEALTH_RESPONSE:
session_info.last_health_check = time.time()
logger.debug(f"Health check for {session_id}: {response.status}")
elif response.type == MessageType.ERROR:
logger.error(f"Error from session {session_id}: {response.error_message}")
if self.error_callback:
await self.error_callback(session_info.subscription_identifier, response)
except Exception as e:
logger.error(f"Error handling response from {session_id}: {e}", exc_info=True)
async def _wait_for_shutdown(self, session_info: SessionProcessInfo):
"""Wait for session process to shutdown gracefully."""
while session_info.process.is_alive():
await asyncio.sleep(0.1)
async def _cleanup_session(self, session_id: str):
"""Cleanup session process and resources."""
try:
session_info = self.sessions.get(session_id)
if not session_info:
return
# Terminate process if still alive
if session_info.process.is_alive():
session_info.process.terminate()
# Wait a bit for graceful termination
await asyncio.sleep(1.0)
if session_info.process.is_alive():
session_info.process.kill()
# Remove from tracking
del self.sessions[session_id]
if session_info.subscription_identifier in self.subscription_to_session:
del self.subscription_to_session[session_info.subscription_identifier]
logger.info(f"Session {session_id} cleaned up")
except Exception as e:
logger.error(f"Error cleaning up session {session_id}: {e}", exc_info=True)
async def _health_check_loop(self):
"""Periodic health check of all session processes."""
while self.is_running:
try:
for session_id in list(self.sessions.keys()):
session_info = self.sessions.get(session_id)
if session_info and session_info.is_initialized:
# Send health check
health_command = HealthCheckCommand(session_id=session_id)
await self._send_command(session_id, health_command)
await asyncio.sleep(self.health_check_interval)
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Error in health check loop: {e}", exc_info=True)
await asyncio.sleep(5.0) # Brief pause before retrying

View file

@ -0,0 +1,813 @@
"""
Session Worker Process - Individual process that handles one session completely.
Each camera/session gets its own dedicated worker process for complete isolation.
"""
import asyncio
import multiprocessing as mp
import time
import logging
import sys
import os
import traceback
import psutil
import threading
import cv2
import requests
from typing import Dict, Any, Optional, Tuple
from pathlib import Path
import numpy as np
from queue import Queue, Empty
# Import core modules
from ..models.manager import ModelManager
from ..detection.pipeline import DetectionPipeline
from ..models.pipeline import PipelineParser
from ..logging.session_logger import PerSessionLogger
from .communication import (
MessageSerializer, MessageType, IPCMessageUnion,
InitializeCommand, ProcessFrameCommand, SetSessionIdCommand,
ShutdownCommand, HealthCheckCommand,
InitializedResponse, DetectionResultResponse, SessionSetResponse,
ShutdownCompleteResponse, HealthResponse, ErrorResponse
)
class IntegratedStreamReader:
"""
Integrated RTSP/HTTP stream reader for session worker processes.
Handles both RTSP streams and HTTP snapshots with automatic failover.
"""
def __init__(self, session_id: str, subscription_config: Dict[str, Any], logger: logging.Logger):
self.session_id = session_id
self.subscription_config = subscription_config
self.logger = logger
# Stream configuration
self.rtsp_url = subscription_config.get('rtspUrl')
self.snapshot_url = subscription_config.get('snapshotUrl')
self.snapshot_interval = subscription_config.get('snapshotInterval', 2000) / 1000.0 # Convert to seconds
# Stream state
self.is_running = False
self.rtsp_cap = None
self.stream_thread = None
self.stop_event = threading.Event()
# Frame buffer - single latest frame only
self.frame_queue = Queue(maxsize=1)
self.last_frame_time = 0
# Stream health monitoring
self.consecutive_errors = 0
self.max_consecutive_errors = 30
self.reconnect_delay = 5.0
self.frame_timeout = 10.0 # Seconds without frame before considered dead
# Crop coordinates if present
self.crop_coords = None
if subscription_config.get('cropX1') is not None:
self.crop_coords = (
subscription_config['cropX1'],
subscription_config['cropY1'],
subscription_config['cropX2'],
subscription_config['cropY2']
)
def start(self) -> bool:
"""Start the stream reading in background thread."""
if self.is_running:
return True
try:
self.is_running = True
self.stop_event.clear()
# Start background thread for stream reading
self.stream_thread = threading.Thread(
target=self._stream_loop,
name=f"StreamReader-{self.session_id}",
daemon=True
)
self.stream_thread.start()
self.logger.info(f"Stream reader started for {self.session_id}")
return True
except Exception as e:
self.logger.error(f"Failed to start stream reader: {e}")
self.is_running = False
return False
def stop(self):
"""Stop the stream reading."""
if not self.is_running:
return
self.logger.info(f"Stopping stream reader for {self.session_id}")
self.is_running = False
self.stop_event.set()
# Close RTSP connection
if self.rtsp_cap:
try:
self.rtsp_cap.release()
except:
pass
self.rtsp_cap = None
# Wait for thread to finish
if self.stream_thread and self.stream_thread.is_alive():
self.stream_thread.join(timeout=3.0)
def get_latest_frame(self) -> Optional[Tuple[np.ndarray, str, float]]:
"""Get the latest frame if available. Returns (frame, display_id, timestamp) or None."""
try:
# Non-blocking get - return None if no frame available
frame_data = self.frame_queue.get_nowait()
return frame_data
except Empty:
return None
def _stream_loop(self):
"""Main stream reading loop - runs in background thread."""
self.logger.info(f"Stream loop started for {self.session_id}")
while self.is_running and not self.stop_event.is_set():
try:
if self.rtsp_url:
# Try RTSP first
self._read_rtsp_stream()
elif self.snapshot_url:
# Fallback to HTTP snapshots
self._read_http_snapshots()
else:
self.logger.error("No stream URL configured")
break
except Exception as e:
self.logger.error(f"Error in stream loop: {e}")
self._handle_stream_error()
self.logger.info(f"Stream loop ended for {self.session_id}")
def _read_rtsp_stream(self):
"""Read frames from RTSP stream."""
if not self.rtsp_cap:
self._connect_rtsp()
if not self.rtsp_cap:
return
try:
ret, frame = self.rtsp_cap.read()
if ret and frame is not None:
# Process the frame
processed_frame = self._process_frame(frame)
if processed_frame is not None:
# Extract display ID from subscription identifier
display_id = self.subscription_config['subscriptionIdentifier'].split(';')[-1]
timestamp = time.time()
# Put frame in queue (replace if full)
try:
# Clear queue and put new frame
try:
self.frame_queue.get_nowait()
except Empty:
pass
self.frame_queue.put((processed_frame, display_id, timestamp), timeout=0.1)
self.last_frame_time = timestamp
self.consecutive_errors = 0
except:
pass # Queue full, skip frame
else:
self._handle_stream_error()
except Exception as e:
self.logger.error(f"Error reading RTSP frame: {e}")
self._handle_stream_error()
def _read_http_snapshots(self):
"""Read frames from HTTP snapshot URL."""
try:
response = requests.get(self.snapshot_url, timeout=10)
response.raise_for_status()
# Convert response to numpy array
img_array = np.asarray(bytearray(response.content), dtype=np.uint8)
frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
if frame is not None:
# Process the frame
processed_frame = self._process_frame(frame)
if processed_frame is not None:
# Extract display ID from subscription identifier
display_id = self.subscription_config['subscriptionIdentifier'].split(';')[-1]
timestamp = time.time()
# Put frame in queue (replace if full)
try:
# Clear queue and put new frame
try:
self.frame_queue.get_nowait()
except Empty:
pass
self.frame_queue.put((processed_frame, display_id, timestamp), timeout=0.1)
self.last_frame_time = timestamp
self.consecutive_errors = 0
except:
pass # Queue full, skip frame
# Wait for next snapshot interval
time.sleep(self.snapshot_interval)
except Exception as e:
self.logger.error(f"Error reading HTTP snapshot: {e}")
self._handle_stream_error()
def _connect_rtsp(self):
"""Connect to RTSP stream."""
try:
self.logger.info(f"Connecting to RTSP: {self.rtsp_url}")
# Create VideoCapture with optimized settings
self.rtsp_cap = cv2.VideoCapture(self.rtsp_url)
# Set buffer size to 1 to reduce latency
self.rtsp_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
# Check if connection successful
if self.rtsp_cap.isOpened():
# Test read a frame
ret, frame = self.rtsp_cap.read()
if ret and frame is not None:
self.logger.info(f"RTSP connection successful for {self.session_id}")
self.consecutive_errors = 0
return True
# Connection failed
if self.rtsp_cap:
self.rtsp_cap.release()
self.rtsp_cap = None
except Exception as e:
self.logger.error(f"Failed to connect RTSP: {e}")
return False
def _process_frame(self, frame: np.ndarray) -> Optional[np.ndarray]:
"""Process frame - apply cropping if configured."""
if frame is None:
return None
try:
# Apply crop if configured
if self.crop_coords:
x1, y1, x2, y2 = self.crop_coords
if x1 < x2 and y1 < y2:
frame = frame[y1:y2, x1:x2]
return frame
except Exception as e:
self.logger.error(f"Error processing frame: {e}")
return None
def _handle_stream_error(self):
"""Handle stream errors with reconnection logic."""
self.consecutive_errors += 1
if self.consecutive_errors >= self.max_consecutive_errors:
self.logger.error(f"Too many consecutive errors ({self.consecutive_errors}), stopping stream")
self.stop()
return
# Close current connection
if self.rtsp_cap:
try:
self.rtsp_cap.release()
except:
pass
self.rtsp_cap = None
# Wait before reconnecting
self.logger.warning(f"Stream error #{self.consecutive_errors}, reconnecting in {self.reconnect_delay}s")
time.sleep(self.reconnect_delay)
def is_healthy(self) -> bool:
"""Check if stream is healthy (receiving frames)."""
if not self.is_running:
return False
# Check if we've received a frame recently
if self.last_frame_time > 0:
time_since_frame = time.time() - self.last_frame_time
return time_since_frame < self.frame_timeout
return False
class SessionWorkerProcess:
"""
Individual session worker process that handles one camera/session completely.
Runs in its own process with isolated memory, models, and state.
"""
def __init__(self, session_id: str, command_queue: mp.Queue, response_queue: mp.Queue):
"""
Initialize session worker process.
Args:
session_id: Unique session identifier
command_queue: Queue to receive commands from main process
response_queue: Queue to send responses back to main process
"""
self.session_id = session_id
self.command_queue = command_queue
self.response_queue = response_queue
# Process information
self.process = None
self.start_time = time.time()
self.processed_frames = 0
# Session components (will be initialized in process)
self.model_manager = None
self.detection_pipeline = None
self.pipeline_parser = None
self.logger = None
self.session_logger = None
self.stream_reader = None
# Session state
self.subscription_config = None
self.model_config = None
self.backend_session_id = None
self.display_id = None
self.is_initialized = False
self.should_shutdown = False
# Frame processing
self.frame_processing_enabled = False
async def run(self):
"""
Main entry point for the worker process.
This method runs in the separate process.
"""
try:
# Set process name for debugging
mp.current_process().name = f"SessionWorker-{self.session_id}"
# Setup basic logging first (enhanced after we get subscription config)
self._setup_basic_logging()
self.logger.info(f"Session worker process started for session {self.session_id}")
self.logger.info(f"Process ID: {os.getpid()}")
# Main message processing loop with integrated frame processing
while not self.should_shutdown:
try:
# Process pending messages
await self._process_pending_messages()
# Process frames if enabled and initialized
if self.frame_processing_enabled and self.is_initialized and self.stream_reader:
await self._process_stream_frames()
# Brief sleep to prevent busy waiting
await asyncio.sleep(0.01)
except Exception as e:
self.logger.error(f"Error in main processing loop: {e}", exc_info=True)
self._send_error_response("main_loop_error", str(e), traceback.format_exc())
except Exception as e:
# Critical error in main run loop
if self.logger:
self.logger.error(f"Critical error in session worker: {e}", exc_info=True)
else:
print(f"Critical error in session worker {self.session_id}: {e}")
finally:
# Cleanup stream reader
if self.stream_reader:
self.stream_reader.stop()
if self.session_logger:
self.session_logger.log_session_end()
if self.session_logger:
self.session_logger.cleanup()
if self.logger:
self.logger.info(f"Session worker process {self.session_id} shutting down")
async def _handle_message(self, message: IPCMessageUnion):
"""
Handle incoming messages from main process.
Args:
message: Deserialized message object
"""
try:
if message.type == MessageType.INITIALIZE:
await self._handle_initialize(message)
elif message.type == MessageType.PROCESS_FRAME:
await self._handle_process_frame(message)
elif message.type == MessageType.SET_SESSION_ID:
await self._handle_set_session_id(message)
elif message.type == MessageType.SHUTDOWN:
await self._handle_shutdown(message)
elif message.type == MessageType.HEALTH_CHECK:
await self._handle_health_check(message)
else:
self.logger.warning(f"Unknown message type: {message.type}")
except Exception as e:
self.logger.error(f"Error handling message {message.type}: {e}", exc_info=True)
self._send_error_response(f"handle_{message.type.value}_error", str(e), traceback.format_exc())
async def _handle_initialize(self, message: InitializeCommand):
"""
Initialize the session with models and pipeline.
Args:
message: Initialize command message
"""
try:
self.logger.info(f"Initializing session {self.session_id}")
self.logger.info(f"Subscription config: {message.subscription_config}")
self.logger.info(f"Model config: {message.model_config}")
# Store configuration
self.subscription_config = message.subscription_config
self.model_config = message.model_config
# Setup enhanced logging now that we have subscription config
self._setup_enhanced_logging()
# Initialize model manager (isolated for this process)
self.model_manager = ModelManager("models")
self.logger.info("Model manager initialized")
# Download and prepare model if needed
model_id = self.model_config.get('modelId')
model_url = self.model_config.get('modelUrl')
model_name = self.model_config.get('modelName', f'Model-{model_id}')
if model_id and model_url:
model_path = self.model_manager.ensure_model(model_id, model_url, model_name)
if not model_path:
raise RuntimeError(f"Failed to download/prepare model {model_id}")
self.logger.info(f"Model {model_id} prepared at {model_path}")
# Log model loading
if self.session_logger:
self.session_logger.log_model_loading(model_id, model_name, str(model_path))
# Load pipeline configuration
self.pipeline_parser = self.model_manager.get_pipeline_config(model_id)
if not self.pipeline_parser:
raise RuntimeError(f"Failed to load pipeline config for model {model_id}")
self.logger.info(f"Pipeline configuration loaded for model {model_id}")
# Initialize detection pipeline (isolated for this session)
self.detection_pipeline = DetectionPipeline(
pipeline_parser=self.pipeline_parser,
model_manager=self.model_manager,
model_id=model_id,
message_sender=None # Will be set to send via IPC
)
# Initialize pipeline components
if not await self.detection_pipeline.initialize():
raise RuntimeError("Failed to initialize detection pipeline")
self.logger.info("Detection pipeline initialized successfully")
# Initialize integrated stream reader
self.logger.info("Initializing integrated stream reader")
self.stream_reader = IntegratedStreamReader(
self.session_id,
self.subscription_config,
self.logger
)
# Start stream reading
if self.stream_reader.start():
self.logger.info("Stream reader started successfully")
self.frame_processing_enabled = True
else:
self.logger.error("Failed to start stream reader")
self.is_initialized = True
# Send success response
response = InitializedResponse(
type=MessageType.INITIALIZED,
session_id=self.session_id,
success=True
)
self._send_response(response)
else:
raise ValueError("Missing required model configuration (modelId, modelUrl)")
except Exception as e:
self.logger.error(f"Failed to initialize session: {e}", exc_info=True)
response = InitializedResponse(
type=MessageType.INITIALIZED,
session_id=self.session_id,
success=False,
error_message=str(e)
)
self._send_response(response)
async def _handle_process_frame(self, message: ProcessFrameCommand):
"""
Process a frame through the detection pipeline.
Args:
message: Process frame command message
"""
if not self.is_initialized:
self._send_error_response("not_initialized", "Session not initialized", None)
return
try:
self.logger.debug(f"Processing frame for display {message.display_id}")
# Process frame through detection pipeline
if self.backend_session_id:
# Processing phase (after session ID is set)
result = await self.detection_pipeline.execute_processing_phase(
frame=message.frame,
display_id=message.display_id,
session_id=self.backend_session_id,
subscription_id=message.subscription_identifier
)
phase = "processing"
else:
# Detection phase (before session ID is set)
result = await self.detection_pipeline.execute_detection_phase(
frame=message.frame,
display_id=message.display_id,
subscription_id=message.subscription_identifier
)
phase = "detection"
self.processed_frames += 1
# Send result back to main process
response = DetectionResultResponse(
session_id=self.session_id,
detections=result,
processing_time=result.get('processing_time', 0.0),
phase=phase
)
self._send_response(response)
except Exception as e:
self.logger.error(f"Error processing frame: {e}", exc_info=True)
self._send_error_response("frame_processing_error", str(e), traceback.format_exc())
async def _handle_set_session_id(self, message: SetSessionIdCommand):
"""
Set the backend session ID for this session.
Args:
message: Set session ID command message
"""
try:
self.logger.info(f"Setting backend session ID: {message.backend_session_id}")
self.backend_session_id = message.backend_session_id
self.display_id = message.display_id
response = SessionSetResponse(
session_id=self.session_id,
success=True,
backend_session_id=message.backend_session_id
)
self._send_response(response)
except Exception as e:
self.logger.error(f"Error setting session ID: {e}", exc_info=True)
self._send_error_response("set_session_id_error", str(e), traceback.format_exc())
async def _handle_shutdown(self, message: ShutdownCommand):
"""
Handle graceful shutdown request.
Args:
message: Shutdown command message
"""
try:
self.logger.info("Received shutdown request")
self.should_shutdown = True
# Cleanup resources
if self.detection_pipeline:
# Add cleanup method to pipeline if needed
pass
response = ShutdownCompleteResponse(session_id=self.session_id)
self._send_response(response)
except Exception as e:
self.logger.error(f"Error during shutdown: {e}", exc_info=True)
async def _handle_health_check(self, message: HealthCheckCommand):
"""
Handle health check request.
Args:
message: Health check command message
"""
try:
# Get process metrics
process = psutil.Process()
memory_info = process.memory_info()
memory_mb = memory_info.rss / (1024 * 1024) # Convert to MB
cpu_percent = process.cpu_percent()
# GPU memory (if available)
gpu_memory_mb = None
try:
import torch
if torch.cuda.is_available():
gpu_memory_mb = torch.cuda.memory_allocated() / (1024 * 1024)
except ImportError:
pass
# Determine health status
status = "healthy"
if memory_mb > 2048: # More than 2GB
status = "degraded"
if memory_mb > 4096: # More than 4GB
status = "unhealthy"
response = HealthResponse(
session_id=self.session_id,
status=status,
memory_usage_mb=memory_mb,
cpu_percent=cpu_percent,
gpu_memory_mb=gpu_memory_mb,
uptime_seconds=time.time() - self.start_time,
processed_frames=self.processed_frames
)
self._send_response(response)
except Exception as e:
self.logger.error(f"Error checking health: {e}", exc_info=True)
self._send_error_response("health_check_error", str(e), traceback.format_exc())
def _send_response(self, response: IPCMessageUnion):
"""
Send response message to main process.
Args:
response: Response message to send
"""
try:
serialized = MessageSerializer.serialize_message(response)
self.response_queue.put(serialized)
except Exception as e:
if self.logger:
self.logger.error(f"Failed to send response: {e}")
def _send_error_response(self, error_type: str, error_message: str, traceback_str: Optional[str]):
"""
Send error response to main process.
Args:
error_type: Type of error
error_message: Error message
traceback_str: Optional traceback string
"""
error_response = ErrorResponse(
type=MessageType.ERROR,
session_id=self.session_id,
error_type=error_type,
error_message=error_message,
traceback=traceback_str
)
self._send_response(error_response)
def _setup_basic_logging(self):
"""
Setup basic logging for this process before we have subscription config.
"""
logging.basicConfig(
level=logging.INFO,
format=f"%(asctime)s [%(levelname)s] SessionWorker-{self.session_id}: %(message)s",
handlers=[
logging.StreamHandler(sys.stdout)
]
)
self.logger = logging.getLogger(f"session_worker_{self.session_id}")
def _setup_enhanced_logging(self):
"""
Setup per-session logging with dedicated log file after we have subscription config.
Phase 2: Enhanced logging with file rotation and session context.
"""
if not self.subscription_config:
return
# Initialize per-session logger
subscription_id = self.subscription_config.get('subscriptionIdentifier', self.session_id)
self.session_logger = PerSessionLogger(
session_id=self.session_id,
subscription_identifier=subscription_id,
log_dir="logs",
max_size_mb=100,
backup_count=5
)
# Get the configured logger (replaces basic logger)
self.logger = self.session_logger.get_logger()
# Log session start
self.session_logger.log_session_start(os.getpid())
async def _process_pending_messages(self):
"""Process pending IPC messages from main process."""
try:
# Process all pending messages
while not self.command_queue.empty():
message_data = self.command_queue.get_nowait()
message = MessageSerializer.deserialize_message(message_data)
await self._handle_message(message)
except Exception as e:
if not self.command_queue.empty():
# Only log error if there was actually a message to process
self.logger.error(f"Error processing messages: {e}", exc_info=True)
async def _process_stream_frames(self):
"""Process frames from the integrated stream reader."""
try:
if not self.stream_reader or not self.stream_reader.is_running:
return
# Get latest frame from stream
frame_data = self.stream_reader.get_latest_frame()
if frame_data is None:
return
frame, display_id, timestamp = frame_data
# Process frame through detection pipeline
subscription_identifier = self.subscription_config['subscriptionIdentifier']
if self.backend_session_id:
# Processing phase (after session ID is set)
result = await self.detection_pipeline.execute_processing_phase(
frame=frame,
display_id=display_id,
session_id=self.backend_session_id,
subscription_id=subscription_identifier
)
phase = "processing"
else:
# Detection phase (before session ID is set)
result = await self.detection_pipeline.execute_detection_phase(
frame=frame,
display_id=display_id,
subscription_id=subscription_identifier
)
phase = "detection"
self.processed_frames += 1
# Send result back to main process
response = DetectionResultResponse(
type=MessageType.DETECTION_RESULT,
session_id=self.session_id,
detections=result,
processing_time=result.get('processing_time', 0.0),
phase=phase
)
self._send_response(response)
# Log frame processing (debug level to avoid spam)
self.logger.debug(f"Processed frame #{self.processed_frames} from {display_id} (phase: {phase})")
except Exception as e:
self.logger.error(f"Error processing stream frame: {e}", exc_info=True)
def session_worker_main(session_id: str, command_queue: mp.Queue, response_queue: mp.Queue):
"""
Main entry point for session worker process.
This function is called when the process is spawned.
"""
# Create worker instance
worker = SessionWorkerProcess(session_id, command_queue, response_queue)
# Run the worker
asyncio.run(worker.run())