refactor: half way to process per session
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 7s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m52s
Build Worker Base and Application Images / deploy-stack (push) Successful in 9s
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 7s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m52s
Build Worker Base and Application Images / deploy-stack (push) Successful in 9s
This commit is contained in:
parent
2e5316ca01
commit
34d1982e9e
12 changed files with 2771 additions and 92 deletions
3
core/processes/__init__.py
Normal file
3
core/processes/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Session Process Management Module
|
||||
"""
|
317
core/processes/communication.py
Normal file
317
core/processes/communication.py
Normal file
|
@ -0,0 +1,317 @@
|
|||
"""
|
||||
Inter-Process Communication (IPC) system for session processes.
|
||||
Defines message types and protocols for main ↔ session communication.
|
||||
"""
|
||||
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import Dict, Any, Optional, Union
|
||||
from dataclasses import dataclass, field
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MessageType(Enum):
|
||||
"""Message types for IPC communication."""
|
||||
|
||||
# Commands: Main → Session
|
||||
INITIALIZE = "initialize"
|
||||
PROCESS_FRAME = "process_frame"
|
||||
SET_SESSION_ID = "set_session_id"
|
||||
SHUTDOWN = "shutdown"
|
||||
HEALTH_CHECK = "health_check"
|
||||
|
||||
# Responses: Session → Main
|
||||
INITIALIZED = "initialized"
|
||||
DETECTION_RESULT = "detection_result"
|
||||
SESSION_SET = "session_set"
|
||||
SHUTDOWN_COMPLETE = "shutdown_complete"
|
||||
HEALTH_RESPONSE = "health_response"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
@dataclass
|
||||
class IPCMessage:
|
||||
"""Base class for all IPC messages."""
|
||||
type: MessageType
|
||||
session_id: str
|
||||
timestamp: float = field(default_factory=time.time)
|
||||
message_id: str = field(default_factory=lambda: str(int(time.time() * 1000000)))
|
||||
|
||||
|
||||
@dataclass
|
||||
class InitializeCommand(IPCMessage):
|
||||
"""Initialize session process with configuration."""
|
||||
subscription_config: Dict[str, Any] = field(default_factory=dict)
|
||||
model_config: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessFrameCommand(IPCMessage):
|
||||
"""Process a frame through the detection pipeline."""
|
||||
frame: Optional[np.ndarray] = None
|
||||
display_id: str = ""
|
||||
subscription_identifier: str = ""
|
||||
frame_timestamp: float = 0.0
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class SetSessionIdCommand(IPCMessage):
|
||||
"""Set the session ID for the current session."""
|
||||
backend_session_id: str = ""
|
||||
display_id: str = ""
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class ShutdownCommand(IPCMessage):
|
||||
"""Shutdown the session process gracefully."""
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class HealthCheckCommand(IPCMessage):
|
||||
"""Check health status of session process."""
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class InitializedResponse(IPCMessage):
|
||||
"""Response indicating successful initialization."""
|
||||
success: bool = False
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResultResponse(IPCMessage):
|
||||
"""Detection results from session process."""
|
||||
detections: Dict[str, Any] = field(default_factory=dict)
|
||||
processing_time: float = 0.0
|
||||
phase: str = "" # "detection" or "processing"
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionSetResponse(IPCMessage):
|
||||
"""Response confirming session ID was set."""
|
||||
success: bool = False
|
||||
backend_session_id: str = ""
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class ShutdownCompleteResponse(IPCMessage):
|
||||
"""Response confirming graceful shutdown."""
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class HealthResponse(IPCMessage):
|
||||
"""Health status response."""
|
||||
status: str = "unknown" # "healthy", "degraded", "unhealthy"
|
||||
memory_usage_mb: float = 0.0
|
||||
cpu_percent: float = 0.0
|
||||
gpu_memory_mb: Optional[float] = None
|
||||
uptime_seconds: float = 0.0
|
||||
processed_frames: int = 0
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class ErrorResponse(IPCMessage):
|
||||
"""Error message from session process."""
|
||||
error_type: str = ""
|
||||
error_message: str = ""
|
||||
traceback: Optional[str] = None
|
||||
|
||||
|
||||
|
||||
# Type aliases for message unions
|
||||
CommandMessage = Union[
|
||||
InitializeCommand,
|
||||
ProcessFrameCommand,
|
||||
SetSessionIdCommand,
|
||||
ShutdownCommand,
|
||||
HealthCheckCommand
|
||||
]
|
||||
|
||||
ResponseMessage = Union[
|
||||
InitializedResponse,
|
||||
DetectionResultResponse,
|
||||
SessionSetResponse,
|
||||
ShutdownCompleteResponse,
|
||||
HealthResponse,
|
||||
ErrorResponse
|
||||
]
|
||||
|
||||
IPCMessageUnion = Union[CommandMessage, ResponseMessage]
|
||||
|
||||
|
||||
class MessageSerializer:
|
||||
"""Handles serialization/deserialization of IPC messages."""
|
||||
|
||||
@staticmethod
|
||||
def serialize_message(message: IPCMessageUnion) -> Dict[str, Any]:
|
||||
"""
|
||||
Serialize message to dictionary for queue transport.
|
||||
|
||||
Args:
|
||||
message: Message to serialize
|
||||
|
||||
Returns:
|
||||
Dictionary representation of message
|
||||
"""
|
||||
result = {
|
||||
'type': message.type.value,
|
||||
'session_id': message.session_id,
|
||||
'timestamp': message.timestamp,
|
||||
'message_id': message.message_id,
|
||||
}
|
||||
|
||||
# Add specific fields based on message type
|
||||
if isinstance(message, InitializeCommand):
|
||||
result.update({
|
||||
'subscription_config': message.subscription_config,
|
||||
'model_config': message.model_config
|
||||
})
|
||||
elif isinstance(message, ProcessFrameCommand):
|
||||
result.update({
|
||||
'frame': message.frame,
|
||||
'display_id': message.display_id,
|
||||
'subscription_identifier': message.subscription_identifier,
|
||||
'frame_timestamp': message.frame_timestamp
|
||||
})
|
||||
elif isinstance(message, SetSessionIdCommand):
|
||||
result.update({
|
||||
'backend_session_id': message.backend_session_id,
|
||||
'display_id': message.display_id
|
||||
})
|
||||
elif isinstance(message, InitializedResponse):
|
||||
result.update({
|
||||
'success': message.success,
|
||||
'error_message': message.error_message
|
||||
})
|
||||
elif isinstance(message, DetectionResultResponse):
|
||||
result.update({
|
||||
'detections': message.detections,
|
||||
'processing_time': message.processing_time,
|
||||
'phase': message.phase
|
||||
})
|
||||
elif isinstance(message, SessionSetResponse):
|
||||
result.update({
|
||||
'success': message.success,
|
||||
'backend_session_id': message.backend_session_id
|
||||
})
|
||||
elif isinstance(message, HealthResponse):
|
||||
result.update({
|
||||
'status': message.status,
|
||||
'memory_usage_mb': message.memory_usage_mb,
|
||||
'cpu_percent': message.cpu_percent,
|
||||
'gpu_memory_mb': message.gpu_memory_mb,
|
||||
'uptime_seconds': message.uptime_seconds,
|
||||
'processed_frames': message.processed_frames
|
||||
})
|
||||
elif isinstance(message, ErrorResponse):
|
||||
result.update({
|
||||
'error_type': message.error_type,
|
||||
'error_message': message.error_message,
|
||||
'traceback': message.traceback
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def deserialize_message(data: Dict[str, Any]) -> IPCMessageUnion:
|
||||
"""
|
||||
Deserialize dictionary back to message object.
|
||||
|
||||
Args:
|
||||
data: Dictionary representation
|
||||
|
||||
Returns:
|
||||
Deserialized message object
|
||||
"""
|
||||
msg_type = MessageType(data['type'])
|
||||
session_id = data['session_id']
|
||||
timestamp = data['timestamp']
|
||||
message_id = data['message_id']
|
||||
|
||||
base_kwargs = {
|
||||
'session_id': session_id,
|
||||
'timestamp': timestamp,
|
||||
'message_id': message_id
|
||||
}
|
||||
|
||||
if msg_type == MessageType.INITIALIZE:
|
||||
return InitializeCommand(
|
||||
type=msg_type,
|
||||
subscription_config=data['subscription_config'],
|
||||
model_config=data['model_config'],
|
||||
**base_kwargs
|
||||
)
|
||||
elif msg_type == MessageType.PROCESS_FRAME:
|
||||
return ProcessFrameCommand(
|
||||
type=msg_type,
|
||||
frame=data['frame'],
|
||||
display_id=data['display_id'],
|
||||
subscription_identifier=data['subscription_identifier'],
|
||||
frame_timestamp=data['frame_timestamp'],
|
||||
**base_kwargs
|
||||
)
|
||||
elif msg_type == MessageType.SET_SESSION_ID:
|
||||
return SetSessionIdCommand(
|
||||
backend_session_id=data['backend_session_id'],
|
||||
display_id=data['display_id'],
|
||||
**base_kwargs
|
||||
)
|
||||
elif msg_type == MessageType.SHUTDOWN:
|
||||
return ShutdownCommand(**base_kwargs)
|
||||
elif msg_type == MessageType.HEALTH_CHECK:
|
||||
return HealthCheckCommand(**base_kwargs)
|
||||
elif msg_type == MessageType.INITIALIZED:
|
||||
return InitializedResponse(
|
||||
type=msg_type,
|
||||
success=data['success'],
|
||||
error_message=data.get('error_message'),
|
||||
**base_kwargs
|
||||
)
|
||||
elif msg_type == MessageType.DETECTION_RESULT:
|
||||
return DetectionResultResponse(
|
||||
type=msg_type,
|
||||
detections=data['detections'],
|
||||
processing_time=data['processing_time'],
|
||||
phase=data['phase'],
|
||||
**base_kwargs
|
||||
)
|
||||
elif msg_type == MessageType.SESSION_SET:
|
||||
return SessionSetResponse(
|
||||
type=msg_type,
|
||||
success=data['success'],
|
||||
backend_session_id=data['backend_session_id'],
|
||||
**base_kwargs
|
||||
)
|
||||
elif msg_type == MessageType.SHUTDOWN_COMPLETE:
|
||||
return ShutdownCompleteResponse(type=msg_type, **base_kwargs)
|
||||
elif msg_type == MessageType.HEALTH_RESPONSE:
|
||||
return HealthResponse(
|
||||
type=msg_type,
|
||||
status=data['status'],
|
||||
memory_usage_mb=data['memory_usage_mb'],
|
||||
cpu_percent=data['cpu_percent'],
|
||||
gpu_memory_mb=data.get('gpu_memory_mb'),
|
||||
uptime_seconds=data.get('uptime_seconds', 0.0),
|
||||
processed_frames=data.get('processed_frames', 0),
|
||||
**base_kwargs
|
||||
)
|
||||
elif msg_type == MessageType.ERROR:
|
||||
return ErrorResponse(
|
||||
type=msg_type,
|
||||
error_type=data['error_type'],
|
||||
error_message=data['error_message'],
|
||||
traceback=data.get('traceback'),
|
||||
**base_kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown message type: {msg_type}")
|
464
core/processes/session_manager.py
Normal file
464
core/processes/session_manager.py
Normal file
|
@ -0,0 +1,464 @@
|
|||
"""
|
||||
Session Process Manager - Manages lifecycle of session processes.
|
||||
Handles process spawning, monitoring, cleanup, and health checks.
|
||||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
import asyncio
|
||||
import multiprocessing as mp
|
||||
from typing import Dict, Optional, Any, Callable
|
||||
from dataclasses import dataclass
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import threading
|
||||
|
||||
from .communication import (
|
||||
MessageSerializer, MessageType,
|
||||
InitializeCommand, ProcessFrameCommand, SetSessionIdCommand,
|
||||
ShutdownCommand, HealthCheckCommand,
|
||||
InitializedResponse, DetectionResultResponse, SessionSetResponse,
|
||||
ShutdownCompleteResponse, HealthResponse, ErrorResponse
|
||||
)
|
||||
from .session_worker import session_worker_main
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionProcessInfo:
|
||||
"""Information about a running session process."""
|
||||
session_id: str
|
||||
subscription_identifier: str
|
||||
process: mp.Process
|
||||
command_queue: mp.Queue
|
||||
response_queue: mp.Queue
|
||||
created_at: float
|
||||
last_health_check: float = 0.0
|
||||
is_initialized: bool = False
|
||||
processed_frames: int = 0
|
||||
|
||||
|
||||
class SessionProcessManager:
|
||||
"""
|
||||
Manages lifecycle of session processes.
|
||||
Each session gets its own dedicated process for complete isolation.
|
||||
"""
|
||||
|
||||
def __init__(self, max_concurrent_sessions: int = 20, health_check_interval: int = 30):
|
||||
"""
|
||||
Initialize session process manager.
|
||||
|
||||
Args:
|
||||
max_concurrent_sessions: Maximum number of concurrent session processes
|
||||
health_check_interval: Interval in seconds between health checks
|
||||
"""
|
||||
self.max_concurrent_sessions = max_concurrent_sessions
|
||||
self.health_check_interval = health_check_interval
|
||||
|
||||
# Active session processes
|
||||
self.sessions: Dict[str, SessionProcessInfo] = {}
|
||||
self.subscription_to_session: Dict[str, str] = {}
|
||||
|
||||
# Thread pool for response processing
|
||||
self.response_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ResponseProcessor")
|
||||
|
||||
# Health check task
|
||||
self.health_check_task = None
|
||||
self.is_running = False
|
||||
|
||||
# Message callbacks
|
||||
self.detection_result_callback: Optional[Callable] = None
|
||||
self.error_callback: Optional[Callable] = None
|
||||
|
||||
# Store main event loop for async operations from threads
|
||||
self.main_event_loop = None
|
||||
|
||||
logger.info(f"SessionProcessManager initialized (max_sessions={max_concurrent_sessions})")
|
||||
|
||||
async def start(self):
|
||||
"""Start the session process manager."""
|
||||
if self.is_running:
|
||||
return
|
||||
|
||||
self.is_running = True
|
||||
|
||||
# Store the main event loop for use in threads
|
||||
self.main_event_loop = asyncio.get_running_loop()
|
||||
|
||||
logger.info("Starting session process manager")
|
||||
|
||||
# Start health check task
|
||||
self.health_check_task = asyncio.create_task(self._health_check_loop())
|
||||
|
||||
# Start response processing for existing sessions
|
||||
for session_info in self.sessions.values():
|
||||
self._start_response_processing(session_info)
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the session process manager and cleanup all sessions."""
|
||||
if not self.is_running:
|
||||
return
|
||||
|
||||
logger.info("Stopping session process manager")
|
||||
self.is_running = False
|
||||
|
||||
# Cancel health check task
|
||||
if self.health_check_task:
|
||||
self.health_check_task.cancel()
|
||||
try:
|
||||
await self.health_check_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# Shutdown all sessions
|
||||
shutdown_tasks = []
|
||||
for session_id in list(self.sessions.keys()):
|
||||
task = asyncio.create_task(self.remove_session(session_id))
|
||||
shutdown_tasks.append(task)
|
||||
|
||||
if shutdown_tasks:
|
||||
await asyncio.gather(*shutdown_tasks, return_exceptions=True)
|
||||
|
||||
# Cleanup thread pool
|
||||
self.response_executor.shutdown(wait=True)
|
||||
|
||||
logger.info("Session process manager stopped")
|
||||
|
||||
async def create_session(self, subscription_identifier: str, subscription_config: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Create a new session process for a subscription.
|
||||
|
||||
Args:
|
||||
subscription_identifier: Unique subscription identifier
|
||||
subscription_config: Subscription configuration
|
||||
|
||||
Returns:
|
||||
True if session was created successfully
|
||||
"""
|
||||
try:
|
||||
# Check if we're at capacity
|
||||
if len(self.sessions) >= self.max_concurrent_sessions:
|
||||
logger.warning(f"Cannot create session: at max capacity ({self.max_concurrent_sessions})")
|
||||
return False
|
||||
|
||||
# Check if subscription already has a session
|
||||
if subscription_identifier in self.subscription_to_session:
|
||||
existing_session_id = self.subscription_to_session[subscription_identifier]
|
||||
logger.info(f"Subscription {subscription_identifier} already has session {existing_session_id}")
|
||||
return True
|
||||
|
||||
# Generate unique session ID
|
||||
session_id = f"session_{int(time.time() * 1000)}_{subscription_identifier.replace(';', '_')}"
|
||||
|
||||
logger.info(f"Creating session process for subscription {subscription_identifier}")
|
||||
logger.info(f"Session ID: {session_id}")
|
||||
|
||||
# Create communication queues
|
||||
command_queue = mp.Queue()
|
||||
response_queue = mp.Queue()
|
||||
|
||||
# Create and start process
|
||||
process = mp.Process(
|
||||
target=session_worker_main,
|
||||
args=(session_id, command_queue, response_queue),
|
||||
name=f"SessionWorker-{session_id}"
|
||||
)
|
||||
process.start()
|
||||
|
||||
# Store session information
|
||||
session_info = SessionProcessInfo(
|
||||
session_id=session_id,
|
||||
subscription_identifier=subscription_identifier,
|
||||
process=process,
|
||||
command_queue=command_queue,
|
||||
response_queue=response_queue,
|
||||
created_at=time.time()
|
||||
)
|
||||
|
||||
self.sessions[session_id] = session_info
|
||||
self.subscription_to_session[subscription_identifier] = session_id
|
||||
|
||||
# Start response processing for this session
|
||||
self._start_response_processing(session_info)
|
||||
|
||||
logger.info(f"Session process created: {session_id} (PID: {process.pid})")
|
||||
|
||||
# Initialize the session with configuration
|
||||
model_config = {
|
||||
'modelId': subscription_config.get('modelId'),
|
||||
'modelUrl': subscription_config.get('modelUrl'),
|
||||
'modelName': subscription_config.get('modelName')
|
||||
}
|
||||
|
||||
init_command = InitializeCommand(
|
||||
type=MessageType.INITIALIZE,
|
||||
session_id=session_id,
|
||||
subscription_config=subscription_config,
|
||||
model_config=model_config
|
||||
)
|
||||
|
||||
await self._send_command(session_id, init_command)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create session for {subscription_identifier}: {e}", exc_info=True)
|
||||
# Cleanup on failure
|
||||
if session_id in self.sessions:
|
||||
await self._cleanup_session(session_id)
|
||||
return False
|
||||
|
||||
async def remove_session(self, subscription_identifier: str) -> bool:
|
||||
"""
|
||||
Remove a session process for a subscription.
|
||||
|
||||
Args:
|
||||
subscription_identifier: Subscription identifier to remove
|
||||
|
||||
Returns:
|
||||
True if session was removed successfully
|
||||
"""
|
||||
try:
|
||||
session_id = self.subscription_to_session.get(subscription_identifier)
|
||||
if not session_id:
|
||||
logger.warning(f"No session found for subscription {subscription_identifier}")
|
||||
return False
|
||||
|
||||
logger.info(f"Removing session {session_id} for subscription {subscription_identifier}")
|
||||
|
||||
session_info = self.sessions.get(session_id)
|
||||
if session_info:
|
||||
# Send shutdown command
|
||||
shutdown_command = ShutdownCommand(session_id=session_id)
|
||||
await self._send_command(session_id, shutdown_command)
|
||||
|
||||
# Wait for graceful shutdown (with timeout)
|
||||
try:
|
||||
await asyncio.wait_for(self._wait_for_shutdown(session_info), timeout=10.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Session {session_id} did not shutdown gracefully, terminating")
|
||||
|
||||
# Cleanup session
|
||||
await self._cleanup_session(session_id)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to remove session for {subscription_identifier}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
async def process_frame(self, subscription_identifier: str, frame: Any, display_id: str, frame_timestamp: float) -> bool:
|
||||
"""
|
||||
Send a frame to the session process for processing.
|
||||
|
||||
Args:
|
||||
subscription_identifier: Subscription identifier
|
||||
frame: Frame to process
|
||||
display_id: Display identifier
|
||||
frame_timestamp: Timestamp of the frame
|
||||
|
||||
Returns:
|
||||
True if frame was sent successfully
|
||||
"""
|
||||
try:
|
||||
session_id = self.subscription_to_session.get(subscription_identifier)
|
||||
if not session_id:
|
||||
logger.warning(f"No session found for subscription {subscription_identifier}")
|
||||
return False
|
||||
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info or not session_info.is_initialized:
|
||||
logger.warning(f"Session {session_id} not initialized")
|
||||
return False
|
||||
|
||||
# Create process frame command
|
||||
process_command = ProcessFrameCommand(
|
||||
session_id=session_id,
|
||||
frame=frame,
|
||||
display_id=display_id,
|
||||
subscription_identifier=subscription_identifier,
|
||||
frame_timestamp=frame_timestamp
|
||||
)
|
||||
|
||||
await self._send_command(session_id, process_command)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process frame for {subscription_identifier}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
async def set_session_id(self, subscription_identifier: str, backend_session_id: str, display_id: str) -> bool:
|
||||
"""
|
||||
Set the backend session ID for a session.
|
||||
|
||||
Args:
|
||||
subscription_identifier: Subscription identifier
|
||||
backend_session_id: Backend session ID
|
||||
display_id: Display identifier
|
||||
|
||||
Returns:
|
||||
True if session ID was set successfully
|
||||
"""
|
||||
try:
|
||||
session_id = self.subscription_to_session.get(subscription_identifier)
|
||||
if not session_id:
|
||||
logger.warning(f"No session found for subscription {subscription_identifier}")
|
||||
return False
|
||||
|
||||
# Create set session ID command
|
||||
set_command = SetSessionIdCommand(
|
||||
session_id=session_id,
|
||||
backend_session_id=backend_session_id,
|
||||
display_id=display_id
|
||||
)
|
||||
|
||||
await self._send_command(session_id, set_command)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to set session ID for {subscription_identifier}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def set_detection_result_callback(self, callback: Callable):
|
||||
"""Set callback for handling detection results."""
|
||||
self.detection_result_callback = callback
|
||||
|
||||
def set_error_callback(self, callback: Callable):
|
||||
"""Set callback for handling errors."""
|
||||
self.error_callback = callback
|
||||
|
||||
def get_session_count(self) -> int:
|
||||
"""Get the number of active sessions."""
|
||||
return len(self.sessions)
|
||||
|
||||
def get_session_info(self, subscription_identifier: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get information about a session."""
|
||||
session_id = self.subscription_to_session.get(subscription_identifier)
|
||||
if not session_id:
|
||||
return None
|
||||
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info:
|
||||
return None
|
||||
|
||||
return {
|
||||
'session_id': session_id,
|
||||
'subscription_identifier': subscription_identifier,
|
||||
'created_at': session_info.created_at,
|
||||
'is_initialized': session_info.is_initialized,
|
||||
'processed_frames': session_info.processed_frames,
|
||||
'process_pid': session_info.process.pid if session_info.process.is_alive() else None,
|
||||
'is_alive': session_info.process.is_alive()
|
||||
}
|
||||
|
||||
async def _send_command(self, session_id: str, command):
|
||||
"""Send command to session process."""
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info:
|
||||
raise ValueError(f"Session {session_id} not found")
|
||||
|
||||
serialized = MessageSerializer.serialize_message(command)
|
||||
session_info.command_queue.put(serialized)
|
||||
|
||||
def _start_response_processing(self, session_info: SessionProcessInfo):
|
||||
"""Start processing responses from a session process."""
|
||||
def process_responses():
|
||||
while session_info.session_id in self.sessions and session_info.process.is_alive():
|
||||
try:
|
||||
if not session_info.response_queue.empty():
|
||||
response_data = session_info.response_queue.get(timeout=1.0)
|
||||
response = MessageSerializer.deserialize_message(response_data)
|
||||
if self.main_event_loop:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self._handle_response(session_info.session_id, response),
|
||||
self.main_event_loop
|
||||
)
|
||||
else:
|
||||
time.sleep(0.01)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing response from {session_info.session_id}: {e}")
|
||||
|
||||
self.response_executor.submit(process_responses)
|
||||
|
||||
async def _handle_response(self, session_id: str, response):
|
||||
"""Handle response from session process."""
|
||||
try:
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info:
|
||||
return
|
||||
|
||||
if response.type == MessageType.INITIALIZED:
|
||||
session_info.is_initialized = response.success
|
||||
if response.success:
|
||||
logger.info(f"Session {session_id} initialized successfully")
|
||||
else:
|
||||
logger.error(f"Session {session_id} initialization failed: {response.error_message}")
|
||||
|
||||
elif response.type == MessageType.DETECTION_RESULT:
|
||||
session_info.processed_frames += 1
|
||||
if self.detection_result_callback:
|
||||
await self.detection_result_callback(session_info.subscription_identifier, response)
|
||||
|
||||
elif response.type == MessageType.SESSION_SET:
|
||||
logger.info(f"Session ID set for {session_id}: {response.backend_session_id}")
|
||||
|
||||
elif response.type == MessageType.HEALTH_RESPONSE:
|
||||
session_info.last_health_check = time.time()
|
||||
logger.debug(f"Health check for {session_id}: {response.status}")
|
||||
|
||||
elif response.type == MessageType.ERROR:
|
||||
logger.error(f"Error from session {session_id}: {response.error_message}")
|
||||
if self.error_callback:
|
||||
await self.error_callback(session_info.subscription_identifier, response)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling response from {session_id}: {e}", exc_info=True)
|
||||
|
||||
async def _wait_for_shutdown(self, session_info: SessionProcessInfo):
|
||||
"""Wait for session process to shutdown gracefully."""
|
||||
while session_info.process.is_alive():
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
async def _cleanup_session(self, session_id: str):
|
||||
"""Cleanup session process and resources."""
|
||||
try:
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info:
|
||||
return
|
||||
|
||||
# Terminate process if still alive
|
||||
if session_info.process.is_alive():
|
||||
session_info.process.terminate()
|
||||
# Wait a bit for graceful termination
|
||||
await asyncio.sleep(1.0)
|
||||
if session_info.process.is_alive():
|
||||
session_info.process.kill()
|
||||
|
||||
# Remove from tracking
|
||||
del self.sessions[session_id]
|
||||
if session_info.subscription_identifier in self.subscription_to_session:
|
||||
del self.subscription_to_session[session_info.subscription_identifier]
|
||||
|
||||
logger.info(f"Session {session_id} cleaned up")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}", exc_info=True)
|
||||
|
||||
async def _health_check_loop(self):
|
||||
"""Periodic health check of all session processes."""
|
||||
while self.is_running:
|
||||
try:
|
||||
for session_id in list(self.sessions.keys()):
|
||||
session_info = self.sessions.get(session_id)
|
||||
if session_info and session_info.is_initialized:
|
||||
# Send health check
|
||||
health_command = HealthCheckCommand(session_id=session_id)
|
||||
await self._send_command(session_id, health_command)
|
||||
|
||||
await asyncio.sleep(self.health_check_interval)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in health check loop: {e}", exc_info=True)
|
||||
await asyncio.sleep(5.0) # Brief pause before retrying
|
813
core/processes/session_worker.py
Normal file
813
core/processes/session_worker.py
Normal file
|
@ -0,0 +1,813 @@
|
|||
"""
|
||||
Session Worker Process - Individual process that handles one session completely.
|
||||
Each camera/session gets its own dedicated worker process for complete isolation.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import multiprocessing as mp
|
||||
import time
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
import psutil
|
||||
import threading
|
||||
import cv2
|
||||
import requests
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from queue import Queue, Empty
|
||||
|
||||
# Import core modules
|
||||
from ..models.manager import ModelManager
|
||||
from ..detection.pipeline import DetectionPipeline
|
||||
from ..models.pipeline import PipelineParser
|
||||
from ..logging.session_logger import PerSessionLogger
|
||||
from .communication import (
|
||||
MessageSerializer, MessageType, IPCMessageUnion,
|
||||
InitializeCommand, ProcessFrameCommand, SetSessionIdCommand,
|
||||
ShutdownCommand, HealthCheckCommand,
|
||||
InitializedResponse, DetectionResultResponse, SessionSetResponse,
|
||||
ShutdownCompleteResponse, HealthResponse, ErrorResponse
|
||||
)
|
||||
|
||||
|
||||
class IntegratedStreamReader:
|
||||
"""
|
||||
Integrated RTSP/HTTP stream reader for session worker processes.
|
||||
Handles both RTSP streams and HTTP snapshots with automatic failover.
|
||||
"""
|
||||
|
||||
def __init__(self, session_id: str, subscription_config: Dict[str, Any], logger: logging.Logger):
|
||||
self.session_id = session_id
|
||||
self.subscription_config = subscription_config
|
||||
self.logger = logger
|
||||
|
||||
# Stream configuration
|
||||
self.rtsp_url = subscription_config.get('rtspUrl')
|
||||
self.snapshot_url = subscription_config.get('snapshotUrl')
|
||||
self.snapshot_interval = subscription_config.get('snapshotInterval', 2000) / 1000.0 # Convert to seconds
|
||||
|
||||
# Stream state
|
||||
self.is_running = False
|
||||
self.rtsp_cap = None
|
||||
self.stream_thread = None
|
||||
self.stop_event = threading.Event()
|
||||
|
||||
# Frame buffer - single latest frame only
|
||||
self.frame_queue = Queue(maxsize=1)
|
||||
self.last_frame_time = 0
|
||||
|
||||
# Stream health monitoring
|
||||
self.consecutive_errors = 0
|
||||
self.max_consecutive_errors = 30
|
||||
self.reconnect_delay = 5.0
|
||||
self.frame_timeout = 10.0 # Seconds without frame before considered dead
|
||||
|
||||
# Crop coordinates if present
|
||||
self.crop_coords = None
|
||||
if subscription_config.get('cropX1') is not None:
|
||||
self.crop_coords = (
|
||||
subscription_config['cropX1'],
|
||||
subscription_config['cropY1'],
|
||||
subscription_config['cropX2'],
|
||||
subscription_config['cropY2']
|
||||
)
|
||||
|
||||
def start(self) -> bool:
|
||||
"""Start the stream reading in background thread."""
|
||||
if self.is_running:
|
||||
return True
|
||||
|
||||
try:
|
||||
self.is_running = True
|
||||
self.stop_event.clear()
|
||||
|
||||
# Start background thread for stream reading
|
||||
self.stream_thread = threading.Thread(
|
||||
target=self._stream_loop,
|
||||
name=f"StreamReader-{self.session_id}",
|
||||
daemon=True
|
||||
)
|
||||
self.stream_thread.start()
|
||||
|
||||
self.logger.info(f"Stream reader started for {self.session_id}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to start stream reader: {e}")
|
||||
self.is_running = False
|
||||
return False
|
||||
|
||||
def stop(self):
|
||||
"""Stop the stream reading."""
|
||||
if not self.is_running:
|
||||
return
|
||||
|
||||
self.logger.info(f"Stopping stream reader for {self.session_id}")
|
||||
self.is_running = False
|
||||
self.stop_event.set()
|
||||
|
||||
# Close RTSP connection
|
||||
if self.rtsp_cap:
|
||||
try:
|
||||
self.rtsp_cap.release()
|
||||
except:
|
||||
pass
|
||||
self.rtsp_cap = None
|
||||
|
||||
# Wait for thread to finish
|
||||
if self.stream_thread and self.stream_thread.is_alive():
|
||||
self.stream_thread.join(timeout=3.0)
|
||||
|
||||
def get_latest_frame(self) -> Optional[Tuple[np.ndarray, str, float]]:
|
||||
"""Get the latest frame if available. Returns (frame, display_id, timestamp) or None."""
|
||||
try:
|
||||
# Non-blocking get - return None if no frame available
|
||||
frame_data = self.frame_queue.get_nowait()
|
||||
return frame_data
|
||||
except Empty:
|
||||
return None
|
||||
|
||||
def _stream_loop(self):
|
||||
"""Main stream reading loop - runs in background thread."""
|
||||
self.logger.info(f"Stream loop started for {self.session_id}")
|
||||
|
||||
while self.is_running and not self.stop_event.is_set():
|
||||
try:
|
||||
if self.rtsp_url:
|
||||
# Try RTSP first
|
||||
self._read_rtsp_stream()
|
||||
elif self.snapshot_url:
|
||||
# Fallback to HTTP snapshots
|
||||
self._read_http_snapshots()
|
||||
else:
|
||||
self.logger.error("No stream URL configured")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in stream loop: {e}")
|
||||
self._handle_stream_error()
|
||||
|
||||
self.logger.info(f"Stream loop ended for {self.session_id}")
|
||||
|
||||
def _read_rtsp_stream(self):
|
||||
"""Read frames from RTSP stream."""
|
||||
if not self.rtsp_cap:
|
||||
self._connect_rtsp()
|
||||
|
||||
if not self.rtsp_cap:
|
||||
return
|
||||
|
||||
try:
|
||||
ret, frame = self.rtsp_cap.read()
|
||||
|
||||
if ret and frame is not None:
|
||||
# Process the frame
|
||||
processed_frame = self._process_frame(frame)
|
||||
if processed_frame is not None:
|
||||
# Extract display ID from subscription identifier
|
||||
display_id = self.subscription_config['subscriptionIdentifier'].split(';')[-1]
|
||||
timestamp = time.time()
|
||||
|
||||
# Put frame in queue (replace if full)
|
||||
try:
|
||||
# Clear queue and put new frame
|
||||
try:
|
||||
self.frame_queue.get_nowait()
|
||||
except Empty:
|
||||
pass
|
||||
self.frame_queue.put((processed_frame, display_id, timestamp), timeout=0.1)
|
||||
self.last_frame_time = timestamp
|
||||
self.consecutive_errors = 0
|
||||
except:
|
||||
pass # Queue full, skip frame
|
||||
else:
|
||||
self._handle_stream_error()
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error reading RTSP frame: {e}")
|
||||
self._handle_stream_error()
|
||||
|
||||
def _read_http_snapshots(self):
|
||||
"""Read frames from HTTP snapshot URL."""
|
||||
try:
|
||||
response = requests.get(self.snapshot_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
# Convert response to numpy array
|
||||
img_array = np.asarray(bytearray(response.content), dtype=np.uint8)
|
||||
frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
|
||||
|
||||
if frame is not None:
|
||||
# Process the frame
|
||||
processed_frame = self._process_frame(frame)
|
||||
if processed_frame is not None:
|
||||
# Extract display ID from subscription identifier
|
||||
display_id = self.subscription_config['subscriptionIdentifier'].split(';')[-1]
|
||||
timestamp = time.time()
|
||||
|
||||
# Put frame in queue (replace if full)
|
||||
try:
|
||||
# Clear queue and put new frame
|
||||
try:
|
||||
self.frame_queue.get_nowait()
|
||||
except Empty:
|
||||
pass
|
||||
self.frame_queue.put((processed_frame, display_id, timestamp), timeout=0.1)
|
||||
self.last_frame_time = timestamp
|
||||
self.consecutive_errors = 0
|
||||
except:
|
||||
pass # Queue full, skip frame
|
||||
|
||||
# Wait for next snapshot interval
|
||||
time.sleep(self.snapshot_interval)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error reading HTTP snapshot: {e}")
|
||||
self._handle_stream_error()
|
||||
|
||||
def _connect_rtsp(self):
|
||||
"""Connect to RTSP stream."""
|
||||
try:
|
||||
self.logger.info(f"Connecting to RTSP: {self.rtsp_url}")
|
||||
|
||||
# Create VideoCapture with optimized settings
|
||||
self.rtsp_cap = cv2.VideoCapture(self.rtsp_url)
|
||||
|
||||
# Set buffer size to 1 to reduce latency
|
||||
self.rtsp_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
|
||||
|
||||
# Check if connection successful
|
||||
if self.rtsp_cap.isOpened():
|
||||
# Test read a frame
|
||||
ret, frame = self.rtsp_cap.read()
|
||||
if ret and frame is not None:
|
||||
self.logger.info(f"RTSP connection successful for {self.session_id}")
|
||||
self.consecutive_errors = 0
|
||||
return True
|
||||
|
||||
# Connection failed
|
||||
if self.rtsp_cap:
|
||||
self.rtsp_cap.release()
|
||||
self.rtsp_cap = None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to connect RTSP: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def _process_frame(self, frame: np.ndarray) -> Optional[np.ndarray]:
|
||||
"""Process frame - apply cropping if configured."""
|
||||
if frame is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Apply crop if configured
|
||||
if self.crop_coords:
|
||||
x1, y1, x2, y2 = self.crop_coords
|
||||
if x1 < x2 and y1 < y2:
|
||||
frame = frame[y1:y2, x1:x2]
|
||||
|
||||
return frame
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing frame: {e}")
|
||||
return None
|
||||
|
||||
def _handle_stream_error(self):
|
||||
"""Handle stream errors with reconnection logic."""
|
||||
self.consecutive_errors += 1
|
||||
|
||||
if self.consecutive_errors >= self.max_consecutive_errors:
|
||||
self.logger.error(f"Too many consecutive errors ({self.consecutive_errors}), stopping stream")
|
||||
self.stop()
|
||||
return
|
||||
|
||||
# Close current connection
|
||||
if self.rtsp_cap:
|
||||
try:
|
||||
self.rtsp_cap.release()
|
||||
except:
|
||||
pass
|
||||
self.rtsp_cap = None
|
||||
|
||||
# Wait before reconnecting
|
||||
self.logger.warning(f"Stream error #{self.consecutive_errors}, reconnecting in {self.reconnect_delay}s")
|
||||
time.sleep(self.reconnect_delay)
|
||||
|
||||
def is_healthy(self) -> bool:
|
||||
"""Check if stream is healthy (receiving frames)."""
|
||||
if not self.is_running:
|
||||
return False
|
||||
|
||||
# Check if we've received a frame recently
|
||||
if self.last_frame_time > 0:
|
||||
time_since_frame = time.time() - self.last_frame_time
|
||||
return time_since_frame < self.frame_timeout
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class SessionWorkerProcess:
|
||||
"""
|
||||
Individual session worker process that handles one camera/session completely.
|
||||
Runs in its own process with isolated memory, models, and state.
|
||||
"""
|
||||
|
||||
def __init__(self, session_id: str, command_queue: mp.Queue, response_queue: mp.Queue):
|
||||
"""
|
||||
Initialize session worker process.
|
||||
|
||||
Args:
|
||||
session_id: Unique session identifier
|
||||
command_queue: Queue to receive commands from main process
|
||||
response_queue: Queue to send responses back to main process
|
||||
"""
|
||||
self.session_id = session_id
|
||||
self.command_queue = command_queue
|
||||
self.response_queue = response_queue
|
||||
|
||||
# Process information
|
||||
self.process = None
|
||||
self.start_time = time.time()
|
||||
self.processed_frames = 0
|
||||
|
||||
# Session components (will be initialized in process)
|
||||
self.model_manager = None
|
||||
self.detection_pipeline = None
|
||||
self.pipeline_parser = None
|
||||
self.logger = None
|
||||
self.session_logger = None
|
||||
self.stream_reader = None
|
||||
|
||||
# Session state
|
||||
self.subscription_config = None
|
||||
self.model_config = None
|
||||
self.backend_session_id = None
|
||||
self.display_id = None
|
||||
self.is_initialized = False
|
||||
self.should_shutdown = False
|
||||
|
||||
# Frame processing
|
||||
self.frame_processing_enabled = False
|
||||
|
||||
async def run(self):
|
||||
"""
|
||||
Main entry point for the worker process.
|
||||
This method runs in the separate process.
|
||||
"""
|
||||
try:
|
||||
# Set process name for debugging
|
||||
mp.current_process().name = f"SessionWorker-{self.session_id}"
|
||||
|
||||
# Setup basic logging first (enhanced after we get subscription config)
|
||||
self._setup_basic_logging()
|
||||
|
||||
self.logger.info(f"Session worker process started for session {self.session_id}")
|
||||
self.logger.info(f"Process ID: {os.getpid()}")
|
||||
|
||||
# Main message processing loop with integrated frame processing
|
||||
while not self.should_shutdown:
|
||||
try:
|
||||
# Process pending messages
|
||||
await self._process_pending_messages()
|
||||
|
||||
# Process frames if enabled and initialized
|
||||
if self.frame_processing_enabled and self.is_initialized and self.stream_reader:
|
||||
await self._process_stream_frames()
|
||||
|
||||
# Brief sleep to prevent busy waiting
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in main processing loop: {e}", exc_info=True)
|
||||
self._send_error_response("main_loop_error", str(e), traceback.format_exc())
|
||||
|
||||
except Exception as e:
|
||||
# Critical error in main run loop
|
||||
if self.logger:
|
||||
self.logger.error(f"Critical error in session worker: {e}", exc_info=True)
|
||||
else:
|
||||
print(f"Critical error in session worker {self.session_id}: {e}")
|
||||
|
||||
finally:
|
||||
# Cleanup stream reader
|
||||
if self.stream_reader:
|
||||
self.stream_reader.stop()
|
||||
|
||||
if self.session_logger:
|
||||
self.session_logger.log_session_end()
|
||||
if self.session_logger:
|
||||
self.session_logger.cleanup()
|
||||
if self.logger:
|
||||
self.logger.info(f"Session worker process {self.session_id} shutting down")
|
||||
|
||||
async def _handle_message(self, message: IPCMessageUnion):
|
||||
"""
|
||||
Handle incoming messages from main process.
|
||||
|
||||
Args:
|
||||
message: Deserialized message object
|
||||
"""
|
||||
try:
|
||||
if message.type == MessageType.INITIALIZE:
|
||||
await self._handle_initialize(message)
|
||||
elif message.type == MessageType.PROCESS_FRAME:
|
||||
await self._handle_process_frame(message)
|
||||
elif message.type == MessageType.SET_SESSION_ID:
|
||||
await self._handle_set_session_id(message)
|
||||
elif message.type == MessageType.SHUTDOWN:
|
||||
await self._handle_shutdown(message)
|
||||
elif message.type == MessageType.HEALTH_CHECK:
|
||||
await self._handle_health_check(message)
|
||||
else:
|
||||
self.logger.warning(f"Unknown message type: {message.type}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error handling message {message.type}: {e}", exc_info=True)
|
||||
self._send_error_response(f"handle_{message.type.value}_error", str(e), traceback.format_exc())
|
||||
|
||||
async def _handle_initialize(self, message: InitializeCommand):
|
||||
"""
|
||||
Initialize the session with models and pipeline.
|
||||
|
||||
Args:
|
||||
message: Initialize command message
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Initializing session {self.session_id}")
|
||||
self.logger.info(f"Subscription config: {message.subscription_config}")
|
||||
self.logger.info(f"Model config: {message.model_config}")
|
||||
|
||||
# Store configuration
|
||||
self.subscription_config = message.subscription_config
|
||||
self.model_config = message.model_config
|
||||
|
||||
# Setup enhanced logging now that we have subscription config
|
||||
self._setup_enhanced_logging()
|
||||
|
||||
# Initialize model manager (isolated for this process)
|
||||
self.model_manager = ModelManager("models")
|
||||
self.logger.info("Model manager initialized")
|
||||
|
||||
# Download and prepare model if needed
|
||||
model_id = self.model_config.get('modelId')
|
||||
model_url = self.model_config.get('modelUrl')
|
||||
model_name = self.model_config.get('modelName', f'Model-{model_id}')
|
||||
|
||||
if model_id and model_url:
|
||||
model_path = self.model_manager.ensure_model(model_id, model_url, model_name)
|
||||
if not model_path:
|
||||
raise RuntimeError(f"Failed to download/prepare model {model_id}")
|
||||
|
||||
self.logger.info(f"Model {model_id} prepared at {model_path}")
|
||||
|
||||
# Log model loading
|
||||
if self.session_logger:
|
||||
self.session_logger.log_model_loading(model_id, model_name, str(model_path))
|
||||
|
||||
# Load pipeline configuration
|
||||
self.pipeline_parser = self.model_manager.get_pipeline_config(model_id)
|
||||
if not self.pipeline_parser:
|
||||
raise RuntimeError(f"Failed to load pipeline config for model {model_id}")
|
||||
|
||||
self.logger.info(f"Pipeline configuration loaded for model {model_id}")
|
||||
|
||||
# Initialize detection pipeline (isolated for this session)
|
||||
self.detection_pipeline = DetectionPipeline(
|
||||
pipeline_parser=self.pipeline_parser,
|
||||
model_manager=self.model_manager,
|
||||
model_id=model_id,
|
||||
message_sender=None # Will be set to send via IPC
|
||||
)
|
||||
|
||||
# Initialize pipeline components
|
||||
if not await self.detection_pipeline.initialize():
|
||||
raise RuntimeError("Failed to initialize detection pipeline")
|
||||
|
||||
self.logger.info("Detection pipeline initialized successfully")
|
||||
|
||||
# Initialize integrated stream reader
|
||||
self.logger.info("Initializing integrated stream reader")
|
||||
self.stream_reader = IntegratedStreamReader(
|
||||
self.session_id,
|
||||
self.subscription_config,
|
||||
self.logger
|
||||
)
|
||||
|
||||
# Start stream reading
|
||||
if self.stream_reader.start():
|
||||
self.logger.info("Stream reader started successfully")
|
||||
self.frame_processing_enabled = True
|
||||
else:
|
||||
self.logger.error("Failed to start stream reader")
|
||||
|
||||
self.is_initialized = True
|
||||
|
||||
# Send success response
|
||||
response = InitializedResponse(
|
||||
type=MessageType.INITIALIZED,
|
||||
session_id=self.session_id,
|
||||
success=True
|
||||
)
|
||||
self._send_response(response)
|
||||
|
||||
else:
|
||||
raise ValueError("Missing required model configuration (modelId, modelUrl)")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to initialize session: {e}", exc_info=True)
|
||||
response = InitializedResponse(
|
||||
type=MessageType.INITIALIZED,
|
||||
session_id=self.session_id,
|
||||
success=False,
|
||||
error_message=str(e)
|
||||
)
|
||||
self._send_response(response)
|
||||
|
||||
async def _handle_process_frame(self, message: ProcessFrameCommand):
|
||||
"""
|
||||
Process a frame through the detection pipeline.
|
||||
|
||||
Args:
|
||||
message: Process frame command message
|
||||
"""
|
||||
if not self.is_initialized:
|
||||
self._send_error_response("not_initialized", "Session not initialized", None)
|
||||
return
|
||||
|
||||
try:
|
||||
self.logger.debug(f"Processing frame for display {message.display_id}")
|
||||
|
||||
# Process frame through detection pipeline
|
||||
if self.backend_session_id:
|
||||
# Processing phase (after session ID is set)
|
||||
result = await self.detection_pipeline.execute_processing_phase(
|
||||
frame=message.frame,
|
||||
display_id=message.display_id,
|
||||
session_id=self.backend_session_id,
|
||||
subscription_id=message.subscription_identifier
|
||||
)
|
||||
phase = "processing"
|
||||
else:
|
||||
# Detection phase (before session ID is set)
|
||||
result = await self.detection_pipeline.execute_detection_phase(
|
||||
frame=message.frame,
|
||||
display_id=message.display_id,
|
||||
subscription_id=message.subscription_identifier
|
||||
)
|
||||
phase = "detection"
|
||||
|
||||
self.processed_frames += 1
|
||||
|
||||
# Send result back to main process
|
||||
response = DetectionResultResponse(
|
||||
session_id=self.session_id,
|
||||
detections=result,
|
||||
processing_time=result.get('processing_time', 0.0),
|
||||
phase=phase
|
||||
)
|
||||
self._send_response(response)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing frame: {e}", exc_info=True)
|
||||
self._send_error_response("frame_processing_error", str(e), traceback.format_exc())
|
||||
|
||||
async def _handle_set_session_id(self, message: SetSessionIdCommand):
|
||||
"""
|
||||
Set the backend session ID for this session.
|
||||
|
||||
Args:
|
||||
message: Set session ID command message
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Setting backend session ID: {message.backend_session_id}")
|
||||
self.backend_session_id = message.backend_session_id
|
||||
self.display_id = message.display_id
|
||||
|
||||
response = SessionSetResponse(
|
||||
session_id=self.session_id,
|
||||
success=True,
|
||||
backend_session_id=message.backend_session_id
|
||||
)
|
||||
self._send_response(response)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error setting session ID: {e}", exc_info=True)
|
||||
self._send_error_response("set_session_id_error", str(e), traceback.format_exc())
|
||||
|
||||
async def _handle_shutdown(self, message: ShutdownCommand):
|
||||
"""
|
||||
Handle graceful shutdown request.
|
||||
|
||||
Args:
|
||||
message: Shutdown command message
|
||||
"""
|
||||
try:
|
||||
self.logger.info("Received shutdown request")
|
||||
self.should_shutdown = True
|
||||
|
||||
# Cleanup resources
|
||||
if self.detection_pipeline:
|
||||
# Add cleanup method to pipeline if needed
|
||||
pass
|
||||
|
||||
response = ShutdownCompleteResponse(session_id=self.session_id)
|
||||
self._send_response(response)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error during shutdown: {e}", exc_info=True)
|
||||
|
||||
async def _handle_health_check(self, message: HealthCheckCommand):
|
||||
"""
|
||||
Handle health check request.
|
||||
|
||||
Args:
|
||||
message: Health check command message
|
||||
"""
|
||||
try:
|
||||
# Get process metrics
|
||||
process = psutil.Process()
|
||||
memory_info = process.memory_info()
|
||||
memory_mb = memory_info.rss / (1024 * 1024) # Convert to MB
|
||||
cpu_percent = process.cpu_percent()
|
||||
|
||||
# GPU memory (if available)
|
||||
gpu_memory_mb = None
|
||||
try:
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
gpu_memory_mb = torch.cuda.memory_allocated() / (1024 * 1024)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Determine health status
|
||||
status = "healthy"
|
||||
if memory_mb > 2048: # More than 2GB
|
||||
status = "degraded"
|
||||
if memory_mb > 4096: # More than 4GB
|
||||
status = "unhealthy"
|
||||
|
||||
response = HealthResponse(
|
||||
session_id=self.session_id,
|
||||
status=status,
|
||||
memory_usage_mb=memory_mb,
|
||||
cpu_percent=cpu_percent,
|
||||
gpu_memory_mb=gpu_memory_mb,
|
||||
uptime_seconds=time.time() - self.start_time,
|
||||
processed_frames=self.processed_frames
|
||||
)
|
||||
self._send_response(response)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking health: {e}", exc_info=True)
|
||||
self._send_error_response("health_check_error", str(e), traceback.format_exc())
|
||||
|
||||
def _send_response(self, response: IPCMessageUnion):
|
||||
"""
|
||||
Send response message to main process.
|
||||
|
||||
Args:
|
||||
response: Response message to send
|
||||
"""
|
||||
try:
|
||||
serialized = MessageSerializer.serialize_message(response)
|
||||
self.response_queue.put(serialized)
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.error(f"Failed to send response: {e}")
|
||||
|
||||
def _send_error_response(self, error_type: str, error_message: str, traceback_str: Optional[str]):
|
||||
"""
|
||||
Send error response to main process.
|
||||
|
||||
Args:
|
||||
error_type: Type of error
|
||||
error_message: Error message
|
||||
traceback_str: Optional traceback string
|
||||
"""
|
||||
error_response = ErrorResponse(
|
||||
type=MessageType.ERROR,
|
||||
session_id=self.session_id,
|
||||
error_type=error_type,
|
||||
error_message=error_message,
|
||||
traceback=traceback_str
|
||||
)
|
||||
self._send_response(error_response)
|
||||
|
||||
def _setup_basic_logging(self):
|
||||
"""
|
||||
Setup basic logging for this process before we have subscription config.
|
||||
"""
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format=f"%(asctime)s [%(levelname)s] SessionWorker-{self.session_id}: %(message)s",
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
self.logger = logging.getLogger(f"session_worker_{self.session_id}")
|
||||
|
||||
def _setup_enhanced_logging(self):
|
||||
"""
|
||||
Setup per-session logging with dedicated log file after we have subscription config.
|
||||
Phase 2: Enhanced logging with file rotation and session context.
|
||||
"""
|
||||
if not self.subscription_config:
|
||||
return
|
||||
|
||||
# Initialize per-session logger
|
||||
subscription_id = self.subscription_config.get('subscriptionIdentifier', self.session_id)
|
||||
|
||||
self.session_logger = PerSessionLogger(
|
||||
session_id=self.session_id,
|
||||
subscription_identifier=subscription_id,
|
||||
log_dir="logs",
|
||||
max_size_mb=100,
|
||||
backup_count=5
|
||||
)
|
||||
|
||||
# Get the configured logger (replaces basic logger)
|
||||
self.logger = self.session_logger.get_logger()
|
||||
|
||||
# Log session start
|
||||
self.session_logger.log_session_start(os.getpid())
|
||||
|
||||
async def _process_pending_messages(self):
|
||||
"""Process pending IPC messages from main process."""
|
||||
try:
|
||||
# Process all pending messages
|
||||
while not self.command_queue.empty():
|
||||
message_data = self.command_queue.get_nowait()
|
||||
message = MessageSerializer.deserialize_message(message_data)
|
||||
await self._handle_message(message)
|
||||
except Exception as e:
|
||||
if not self.command_queue.empty():
|
||||
# Only log error if there was actually a message to process
|
||||
self.logger.error(f"Error processing messages: {e}", exc_info=True)
|
||||
|
||||
async def _process_stream_frames(self):
|
||||
"""Process frames from the integrated stream reader."""
|
||||
try:
|
||||
if not self.stream_reader or not self.stream_reader.is_running:
|
||||
return
|
||||
|
||||
# Get latest frame from stream
|
||||
frame_data = self.stream_reader.get_latest_frame()
|
||||
if frame_data is None:
|
||||
return
|
||||
|
||||
frame, display_id, timestamp = frame_data
|
||||
|
||||
# Process frame through detection pipeline
|
||||
subscription_identifier = self.subscription_config['subscriptionIdentifier']
|
||||
|
||||
if self.backend_session_id:
|
||||
# Processing phase (after session ID is set)
|
||||
result = await self.detection_pipeline.execute_processing_phase(
|
||||
frame=frame,
|
||||
display_id=display_id,
|
||||
session_id=self.backend_session_id,
|
||||
subscription_id=subscription_identifier
|
||||
)
|
||||
phase = "processing"
|
||||
else:
|
||||
# Detection phase (before session ID is set)
|
||||
result = await self.detection_pipeline.execute_detection_phase(
|
||||
frame=frame,
|
||||
display_id=display_id,
|
||||
subscription_id=subscription_identifier
|
||||
)
|
||||
phase = "detection"
|
||||
|
||||
self.processed_frames += 1
|
||||
|
||||
# Send result back to main process
|
||||
response = DetectionResultResponse(
|
||||
type=MessageType.DETECTION_RESULT,
|
||||
session_id=self.session_id,
|
||||
detections=result,
|
||||
processing_time=result.get('processing_time', 0.0),
|
||||
phase=phase
|
||||
)
|
||||
self._send_response(response)
|
||||
|
||||
# Log frame processing (debug level to avoid spam)
|
||||
self.logger.debug(f"Processed frame #{self.processed_frames} from {display_id} (phase: {phase})")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing stream frame: {e}", exc_info=True)
|
||||
|
||||
|
||||
def session_worker_main(session_id: str, command_queue: mp.Queue, response_queue: mp.Queue):
|
||||
"""
|
||||
Main entry point for session worker process.
|
||||
This function is called when the process is spawned.
|
||||
"""
|
||||
# Create worker instance
|
||||
worker = SessionWorkerProcess(session_id, command_queue, response_queue)
|
||||
|
||||
# Run the worker
|
||||
asyncio.run(worker.run())
|
Loading…
Add table
Add a link
Reference in a new issue