python-detector-worker/core/processes/session_manager.py
ziesorx 34d1982e9e
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 7s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m52s
Build Worker Base and Application Images / deploy-stack (push) Successful in 9s
refactor: half way to process per session
2025-09-25 20:52:26 +07:00

464 lines
No EOL
18 KiB
Python

"""
Session Process Manager - Manages lifecycle of session processes.
Handles process spawning, monitoring, cleanup, and health checks.
"""
import time
import logging
import asyncio
import multiprocessing as mp
from typing import Dict, Optional, Any, Callable
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor
import threading
from .communication import (
MessageSerializer, MessageType,
InitializeCommand, ProcessFrameCommand, SetSessionIdCommand,
ShutdownCommand, HealthCheckCommand,
InitializedResponse, DetectionResultResponse, SessionSetResponse,
ShutdownCompleteResponse, HealthResponse, ErrorResponse
)
from .session_worker import session_worker_main
logger = logging.getLogger(__name__)
@dataclass
class SessionProcessInfo:
"""Information about a running session process."""
session_id: str
subscription_identifier: str
process: mp.Process
command_queue: mp.Queue
response_queue: mp.Queue
created_at: float
last_health_check: float = 0.0
is_initialized: bool = False
processed_frames: int = 0
class SessionProcessManager:
"""
Manages lifecycle of session processes.
Each session gets its own dedicated process for complete isolation.
"""
def __init__(self, max_concurrent_sessions: int = 20, health_check_interval: int = 30):
"""
Initialize session process manager.
Args:
max_concurrent_sessions: Maximum number of concurrent session processes
health_check_interval: Interval in seconds between health checks
"""
self.max_concurrent_sessions = max_concurrent_sessions
self.health_check_interval = health_check_interval
# Active session processes
self.sessions: Dict[str, SessionProcessInfo] = {}
self.subscription_to_session: Dict[str, str] = {}
# Thread pool for response processing
self.response_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ResponseProcessor")
# Health check task
self.health_check_task = None
self.is_running = False
# Message callbacks
self.detection_result_callback: Optional[Callable] = None
self.error_callback: Optional[Callable] = None
# Store main event loop for async operations from threads
self.main_event_loop = None
logger.info(f"SessionProcessManager initialized (max_sessions={max_concurrent_sessions})")
async def start(self):
"""Start the session process manager."""
if self.is_running:
return
self.is_running = True
# Store the main event loop for use in threads
self.main_event_loop = asyncio.get_running_loop()
logger.info("Starting session process manager")
# Start health check task
self.health_check_task = asyncio.create_task(self._health_check_loop())
# Start response processing for existing sessions
for session_info in self.sessions.values():
self._start_response_processing(session_info)
async def stop(self):
"""Stop the session process manager and cleanup all sessions."""
if not self.is_running:
return
logger.info("Stopping session process manager")
self.is_running = False
# Cancel health check task
if self.health_check_task:
self.health_check_task.cancel()
try:
await self.health_check_task
except asyncio.CancelledError:
pass
# Shutdown all sessions
shutdown_tasks = []
for session_id in list(self.sessions.keys()):
task = asyncio.create_task(self.remove_session(session_id))
shutdown_tasks.append(task)
if shutdown_tasks:
await asyncio.gather(*shutdown_tasks, return_exceptions=True)
# Cleanup thread pool
self.response_executor.shutdown(wait=True)
logger.info("Session process manager stopped")
async def create_session(self, subscription_identifier: str, subscription_config: Dict[str, Any]) -> bool:
"""
Create a new session process for a subscription.
Args:
subscription_identifier: Unique subscription identifier
subscription_config: Subscription configuration
Returns:
True if session was created successfully
"""
try:
# Check if we're at capacity
if len(self.sessions) >= self.max_concurrent_sessions:
logger.warning(f"Cannot create session: at max capacity ({self.max_concurrent_sessions})")
return False
# Check if subscription already has a session
if subscription_identifier in self.subscription_to_session:
existing_session_id = self.subscription_to_session[subscription_identifier]
logger.info(f"Subscription {subscription_identifier} already has session {existing_session_id}")
return True
# Generate unique session ID
session_id = f"session_{int(time.time() * 1000)}_{subscription_identifier.replace(';', '_')}"
logger.info(f"Creating session process for subscription {subscription_identifier}")
logger.info(f"Session ID: {session_id}")
# Create communication queues
command_queue = mp.Queue()
response_queue = mp.Queue()
# Create and start process
process = mp.Process(
target=session_worker_main,
args=(session_id, command_queue, response_queue),
name=f"SessionWorker-{session_id}"
)
process.start()
# Store session information
session_info = SessionProcessInfo(
session_id=session_id,
subscription_identifier=subscription_identifier,
process=process,
command_queue=command_queue,
response_queue=response_queue,
created_at=time.time()
)
self.sessions[session_id] = session_info
self.subscription_to_session[subscription_identifier] = session_id
# Start response processing for this session
self._start_response_processing(session_info)
logger.info(f"Session process created: {session_id} (PID: {process.pid})")
# Initialize the session with configuration
model_config = {
'modelId': subscription_config.get('modelId'),
'modelUrl': subscription_config.get('modelUrl'),
'modelName': subscription_config.get('modelName')
}
init_command = InitializeCommand(
type=MessageType.INITIALIZE,
session_id=session_id,
subscription_config=subscription_config,
model_config=model_config
)
await self._send_command(session_id, init_command)
return True
except Exception as e:
logger.error(f"Failed to create session for {subscription_identifier}: {e}", exc_info=True)
# Cleanup on failure
if session_id in self.sessions:
await self._cleanup_session(session_id)
return False
async def remove_session(self, subscription_identifier: str) -> bool:
"""
Remove a session process for a subscription.
Args:
subscription_identifier: Subscription identifier to remove
Returns:
True if session was removed successfully
"""
try:
session_id = self.subscription_to_session.get(subscription_identifier)
if not session_id:
logger.warning(f"No session found for subscription {subscription_identifier}")
return False
logger.info(f"Removing session {session_id} for subscription {subscription_identifier}")
session_info = self.sessions.get(session_id)
if session_info:
# Send shutdown command
shutdown_command = ShutdownCommand(session_id=session_id)
await self._send_command(session_id, shutdown_command)
# Wait for graceful shutdown (with timeout)
try:
await asyncio.wait_for(self._wait_for_shutdown(session_info), timeout=10.0)
except asyncio.TimeoutError:
logger.warning(f"Session {session_id} did not shutdown gracefully, terminating")
# Cleanup session
await self._cleanup_session(session_id)
return True
except Exception as e:
logger.error(f"Failed to remove session for {subscription_identifier}: {e}", exc_info=True)
return False
async def process_frame(self, subscription_identifier: str, frame: Any, display_id: str, frame_timestamp: float) -> bool:
"""
Send a frame to the session process for processing.
Args:
subscription_identifier: Subscription identifier
frame: Frame to process
display_id: Display identifier
frame_timestamp: Timestamp of the frame
Returns:
True if frame was sent successfully
"""
try:
session_id = self.subscription_to_session.get(subscription_identifier)
if not session_id:
logger.warning(f"No session found for subscription {subscription_identifier}")
return False
session_info = self.sessions.get(session_id)
if not session_info or not session_info.is_initialized:
logger.warning(f"Session {session_id} not initialized")
return False
# Create process frame command
process_command = ProcessFrameCommand(
session_id=session_id,
frame=frame,
display_id=display_id,
subscription_identifier=subscription_identifier,
frame_timestamp=frame_timestamp
)
await self._send_command(session_id, process_command)
return True
except Exception as e:
logger.error(f"Failed to process frame for {subscription_identifier}: {e}", exc_info=True)
return False
async def set_session_id(self, subscription_identifier: str, backend_session_id: str, display_id: str) -> bool:
"""
Set the backend session ID for a session.
Args:
subscription_identifier: Subscription identifier
backend_session_id: Backend session ID
display_id: Display identifier
Returns:
True if session ID was set successfully
"""
try:
session_id = self.subscription_to_session.get(subscription_identifier)
if not session_id:
logger.warning(f"No session found for subscription {subscription_identifier}")
return False
# Create set session ID command
set_command = SetSessionIdCommand(
session_id=session_id,
backend_session_id=backend_session_id,
display_id=display_id
)
await self._send_command(session_id, set_command)
return True
except Exception as e:
logger.error(f"Failed to set session ID for {subscription_identifier}: {e}", exc_info=True)
return False
def set_detection_result_callback(self, callback: Callable):
"""Set callback for handling detection results."""
self.detection_result_callback = callback
def set_error_callback(self, callback: Callable):
"""Set callback for handling errors."""
self.error_callback = callback
def get_session_count(self) -> int:
"""Get the number of active sessions."""
return len(self.sessions)
def get_session_info(self, subscription_identifier: str) -> Optional[Dict[str, Any]]:
"""Get information about a session."""
session_id = self.subscription_to_session.get(subscription_identifier)
if not session_id:
return None
session_info = self.sessions.get(session_id)
if not session_info:
return None
return {
'session_id': session_id,
'subscription_identifier': subscription_identifier,
'created_at': session_info.created_at,
'is_initialized': session_info.is_initialized,
'processed_frames': session_info.processed_frames,
'process_pid': session_info.process.pid if session_info.process.is_alive() else None,
'is_alive': session_info.process.is_alive()
}
async def _send_command(self, session_id: str, command):
"""Send command to session process."""
session_info = self.sessions.get(session_id)
if not session_info:
raise ValueError(f"Session {session_id} not found")
serialized = MessageSerializer.serialize_message(command)
session_info.command_queue.put(serialized)
def _start_response_processing(self, session_info: SessionProcessInfo):
"""Start processing responses from a session process."""
def process_responses():
while session_info.session_id in self.sessions and session_info.process.is_alive():
try:
if not session_info.response_queue.empty():
response_data = session_info.response_queue.get(timeout=1.0)
response = MessageSerializer.deserialize_message(response_data)
if self.main_event_loop:
asyncio.run_coroutine_threadsafe(
self._handle_response(session_info.session_id, response),
self.main_event_loop
)
else:
time.sleep(0.01)
except Exception as e:
logger.error(f"Error processing response from {session_info.session_id}: {e}")
self.response_executor.submit(process_responses)
async def _handle_response(self, session_id: str, response):
"""Handle response from session process."""
try:
session_info = self.sessions.get(session_id)
if not session_info:
return
if response.type == MessageType.INITIALIZED:
session_info.is_initialized = response.success
if response.success:
logger.info(f"Session {session_id} initialized successfully")
else:
logger.error(f"Session {session_id} initialization failed: {response.error_message}")
elif response.type == MessageType.DETECTION_RESULT:
session_info.processed_frames += 1
if self.detection_result_callback:
await self.detection_result_callback(session_info.subscription_identifier, response)
elif response.type == MessageType.SESSION_SET:
logger.info(f"Session ID set for {session_id}: {response.backend_session_id}")
elif response.type == MessageType.HEALTH_RESPONSE:
session_info.last_health_check = time.time()
logger.debug(f"Health check for {session_id}: {response.status}")
elif response.type == MessageType.ERROR:
logger.error(f"Error from session {session_id}: {response.error_message}")
if self.error_callback:
await self.error_callback(session_info.subscription_identifier, response)
except Exception as e:
logger.error(f"Error handling response from {session_id}: {e}", exc_info=True)
async def _wait_for_shutdown(self, session_info: SessionProcessInfo):
"""Wait for session process to shutdown gracefully."""
while session_info.process.is_alive():
await asyncio.sleep(0.1)
async def _cleanup_session(self, session_id: str):
"""Cleanup session process and resources."""
try:
session_info = self.sessions.get(session_id)
if not session_info:
return
# Terminate process if still alive
if session_info.process.is_alive():
session_info.process.terminate()
# Wait a bit for graceful termination
await asyncio.sleep(1.0)
if session_info.process.is_alive():
session_info.process.kill()
# Remove from tracking
del self.sessions[session_id]
if session_info.subscription_identifier in self.subscription_to_session:
del self.subscription_to_session[session_info.subscription_identifier]
logger.info(f"Session {session_id} cleaned up")
except Exception as e:
logger.error(f"Error cleaning up session {session_id}: {e}", exc_info=True)
async def _health_check_loop(self):
"""Periodic health check of all session processes."""
while self.is_running:
try:
for session_id in list(self.sessions.keys()):
session_info = self.sessions.get(session_id)
if session_info and session_info.is_initialized:
# Send health check
health_command = HealthCheckCommand(session_id=session_id)
await self._send_command(session_id, health_command)
await asyncio.sleep(self.health_check_interval)
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Error in health check loop: {e}", exc_info=True)
await asyncio.sleep(5.0) # Brief pause before retrying