refactor: half way to process per session
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 7s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m52s
Build Worker Base and Application Images / deploy-stack (push) Successful in 9s
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 7s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m52s
Build Worker Base and Application Images / deploy-stack (push) Successful in 9s
This commit is contained in:
parent
2e5316ca01
commit
34d1982e9e
12 changed files with 2771 additions and 92 deletions
464
core/processes/session_manager.py
Normal file
464
core/processes/session_manager.py
Normal file
|
@ -0,0 +1,464 @@
|
|||
"""
|
||||
Session Process Manager - Manages lifecycle of session processes.
|
||||
Handles process spawning, monitoring, cleanup, and health checks.
|
||||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
import asyncio
|
||||
import multiprocessing as mp
|
||||
from typing import Dict, Optional, Any, Callable
|
||||
from dataclasses import dataclass
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import threading
|
||||
|
||||
from .communication import (
|
||||
MessageSerializer, MessageType,
|
||||
InitializeCommand, ProcessFrameCommand, SetSessionIdCommand,
|
||||
ShutdownCommand, HealthCheckCommand,
|
||||
InitializedResponse, DetectionResultResponse, SessionSetResponse,
|
||||
ShutdownCompleteResponse, HealthResponse, ErrorResponse
|
||||
)
|
||||
from .session_worker import session_worker_main
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionProcessInfo:
|
||||
"""Information about a running session process."""
|
||||
session_id: str
|
||||
subscription_identifier: str
|
||||
process: mp.Process
|
||||
command_queue: mp.Queue
|
||||
response_queue: mp.Queue
|
||||
created_at: float
|
||||
last_health_check: float = 0.0
|
||||
is_initialized: bool = False
|
||||
processed_frames: int = 0
|
||||
|
||||
|
||||
class SessionProcessManager:
|
||||
"""
|
||||
Manages lifecycle of session processes.
|
||||
Each session gets its own dedicated process for complete isolation.
|
||||
"""
|
||||
|
||||
def __init__(self, max_concurrent_sessions: int = 20, health_check_interval: int = 30):
|
||||
"""
|
||||
Initialize session process manager.
|
||||
|
||||
Args:
|
||||
max_concurrent_sessions: Maximum number of concurrent session processes
|
||||
health_check_interval: Interval in seconds between health checks
|
||||
"""
|
||||
self.max_concurrent_sessions = max_concurrent_sessions
|
||||
self.health_check_interval = health_check_interval
|
||||
|
||||
# Active session processes
|
||||
self.sessions: Dict[str, SessionProcessInfo] = {}
|
||||
self.subscription_to_session: Dict[str, str] = {}
|
||||
|
||||
# Thread pool for response processing
|
||||
self.response_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ResponseProcessor")
|
||||
|
||||
# Health check task
|
||||
self.health_check_task = None
|
||||
self.is_running = False
|
||||
|
||||
# Message callbacks
|
||||
self.detection_result_callback: Optional[Callable] = None
|
||||
self.error_callback: Optional[Callable] = None
|
||||
|
||||
# Store main event loop for async operations from threads
|
||||
self.main_event_loop = None
|
||||
|
||||
logger.info(f"SessionProcessManager initialized (max_sessions={max_concurrent_sessions})")
|
||||
|
||||
async def start(self):
|
||||
"""Start the session process manager."""
|
||||
if self.is_running:
|
||||
return
|
||||
|
||||
self.is_running = True
|
||||
|
||||
# Store the main event loop for use in threads
|
||||
self.main_event_loop = asyncio.get_running_loop()
|
||||
|
||||
logger.info("Starting session process manager")
|
||||
|
||||
# Start health check task
|
||||
self.health_check_task = asyncio.create_task(self._health_check_loop())
|
||||
|
||||
# Start response processing for existing sessions
|
||||
for session_info in self.sessions.values():
|
||||
self._start_response_processing(session_info)
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the session process manager and cleanup all sessions."""
|
||||
if not self.is_running:
|
||||
return
|
||||
|
||||
logger.info("Stopping session process manager")
|
||||
self.is_running = False
|
||||
|
||||
# Cancel health check task
|
||||
if self.health_check_task:
|
||||
self.health_check_task.cancel()
|
||||
try:
|
||||
await self.health_check_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# Shutdown all sessions
|
||||
shutdown_tasks = []
|
||||
for session_id in list(self.sessions.keys()):
|
||||
task = asyncio.create_task(self.remove_session(session_id))
|
||||
shutdown_tasks.append(task)
|
||||
|
||||
if shutdown_tasks:
|
||||
await asyncio.gather(*shutdown_tasks, return_exceptions=True)
|
||||
|
||||
# Cleanup thread pool
|
||||
self.response_executor.shutdown(wait=True)
|
||||
|
||||
logger.info("Session process manager stopped")
|
||||
|
||||
async def create_session(self, subscription_identifier: str, subscription_config: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Create a new session process for a subscription.
|
||||
|
||||
Args:
|
||||
subscription_identifier: Unique subscription identifier
|
||||
subscription_config: Subscription configuration
|
||||
|
||||
Returns:
|
||||
True if session was created successfully
|
||||
"""
|
||||
try:
|
||||
# Check if we're at capacity
|
||||
if len(self.sessions) >= self.max_concurrent_sessions:
|
||||
logger.warning(f"Cannot create session: at max capacity ({self.max_concurrent_sessions})")
|
||||
return False
|
||||
|
||||
# Check if subscription already has a session
|
||||
if subscription_identifier in self.subscription_to_session:
|
||||
existing_session_id = self.subscription_to_session[subscription_identifier]
|
||||
logger.info(f"Subscription {subscription_identifier} already has session {existing_session_id}")
|
||||
return True
|
||||
|
||||
# Generate unique session ID
|
||||
session_id = f"session_{int(time.time() * 1000)}_{subscription_identifier.replace(';', '_')}"
|
||||
|
||||
logger.info(f"Creating session process for subscription {subscription_identifier}")
|
||||
logger.info(f"Session ID: {session_id}")
|
||||
|
||||
# Create communication queues
|
||||
command_queue = mp.Queue()
|
||||
response_queue = mp.Queue()
|
||||
|
||||
# Create and start process
|
||||
process = mp.Process(
|
||||
target=session_worker_main,
|
||||
args=(session_id, command_queue, response_queue),
|
||||
name=f"SessionWorker-{session_id}"
|
||||
)
|
||||
process.start()
|
||||
|
||||
# Store session information
|
||||
session_info = SessionProcessInfo(
|
||||
session_id=session_id,
|
||||
subscription_identifier=subscription_identifier,
|
||||
process=process,
|
||||
command_queue=command_queue,
|
||||
response_queue=response_queue,
|
||||
created_at=time.time()
|
||||
)
|
||||
|
||||
self.sessions[session_id] = session_info
|
||||
self.subscription_to_session[subscription_identifier] = session_id
|
||||
|
||||
# Start response processing for this session
|
||||
self._start_response_processing(session_info)
|
||||
|
||||
logger.info(f"Session process created: {session_id} (PID: {process.pid})")
|
||||
|
||||
# Initialize the session with configuration
|
||||
model_config = {
|
||||
'modelId': subscription_config.get('modelId'),
|
||||
'modelUrl': subscription_config.get('modelUrl'),
|
||||
'modelName': subscription_config.get('modelName')
|
||||
}
|
||||
|
||||
init_command = InitializeCommand(
|
||||
type=MessageType.INITIALIZE,
|
||||
session_id=session_id,
|
||||
subscription_config=subscription_config,
|
||||
model_config=model_config
|
||||
)
|
||||
|
||||
await self._send_command(session_id, init_command)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create session for {subscription_identifier}: {e}", exc_info=True)
|
||||
# Cleanup on failure
|
||||
if session_id in self.sessions:
|
||||
await self._cleanup_session(session_id)
|
||||
return False
|
||||
|
||||
async def remove_session(self, subscription_identifier: str) -> bool:
|
||||
"""
|
||||
Remove a session process for a subscription.
|
||||
|
||||
Args:
|
||||
subscription_identifier: Subscription identifier to remove
|
||||
|
||||
Returns:
|
||||
True if session was removed successfully
|
||||
"""
|
||||
try:
|
||||
session_id = self.subscription_to_session.get(subscription_identifier)
|
||||
if not session_id:
|
||||
logger.warning(f"No session found for subscription {subscription_identifier}")
|
||||
return False
|
||||
|
||||
logger.info(f"Removing session {session_id} for subscription {subscription_identifier}")
|
||||
|
||||
session_info = self.sessions.get(session_id)
|
||||
if session_info:
|
||||
# Send shutdown command
|
||||
shutdown_command = ShutdownCommand(session_id=session_id)
|
||||
await self._send_command(session_id, shutdown_command)
|
||||
|
||||
# Wait for graceful shutdown (with timeout)
|
||||
try:
|
||||
await asyncio.wait_for(self._wait_for_shutdown(session_info), timeout=10.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Session {session_id} did not shutdown gracefully, terminating")
|
||||
|
||||
# Cleanup session
|
||||
await self._cleanup_session(session_id)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to remove session for {subscription_identifier}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
async def process_frame(self, subscription_identifier: str, frame: Any, display_id: str, frame_timestamp: float) -> bool:
|
||||
"""
|
||||
Send a frame to the session process for processing.
|
||||
|
||||
Args:
|
||||
subscription_identifier: Subscription identifier
|
||||
frame: Frame to process
|
||||
display_id: Display identifier
|
||||
frame_timestamp: Timestamp of the frame
|
||||
|
||||
Returns:
|
||||
True if frame was sent successfully
|
||||
"""
|
||||
try:
|
||||
session_id = self.subscription_to_session.get(subscription_identifier)
|
||||
if not session_id:
|
||||
logger.warning(f"No session found for subscription {subscription_identifier}")
|
||||
return False
|
||||
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info or not session_info.is_initialized:
|
||||
logger.warning(f"Session {session_id} not initialized")
|
||||
return False
|
||||
|
||||
# Create process frame command
|
||||
process_command = ProcessFrameCommand(
|
||||
session_id=session_id,
|
||||
frame=frame,
|
||||
display_id=display_id,
|
||||
subscription_identifier=subscription_identifier,
|
||||
frame_timestamp=frame_timestamp
|
||||
)
|
||||
|
||||
await self._send_command(session_id, process_command)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process frame for {subscription_identifier}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
async def set_session_id(self, subscription_identifier: str, backend_session_id: str, display_id: str) -> bool:
|
||||
"""
|
||||
Set the backend session ID for a session.
|
||||
|
||||
Args:
|
||||
subscription_identifier: Subscription identifier
|
||||
backend_session_id: Backend session ID
|
||||
display_id: Display identifier
|
||||
|
||||
Returns:
|
||||
True if session ID was set successfully
|
||||
"""
|
||||
try:
|
||||
session_id = self.subscription_to_session.get(subscription_identifier)
|
||||
if not session_id:
|
||||
logger.warning(f"No session found for subscription {subscription_identifier}")
|
||||
return False
|
||||
|
||||
# Create set session ID command
|
||||
set_command = SetSessionIdCommand(
|
||||
session_id=session_id,
|
||||
backend_session_id=backend_session_id,
|
||||
display_id=display_id
|
||||
)
|
||||
|
||||
await self._send_command(session_id, set_command)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to set session ID for {subscription_identifier}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def set_detection_result_callback(self, callback: Callable):
|
||||
"""Set callback for handling detection results."""
|
||||
self.detection_result_callback = callback
|
||||
|
||||
def set_error_callback(self, callback: Callable):
|
||||
"""Set callback for handling errors."""
|
||||
self.error_callback = callback
|
||||
|
||||
def get_session_count(self) -> int:
|
||||
"""Get the number of active sessions."""
|
||||
return len(self.sessions)
|
||||
|
||||
def get_session_info(self, subscription_identifier: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get information about a session."""
|
||||
session_id = self.subscription_to_session.get(subscription_identifier)
|
||||
if not session_id:
|
||||
return None
|
||||
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info:
|
||||
return None
|
||||
|
||||
return {
|
||||
'session_id': session_id,
|
||||
'subscription_identifier': subscription_identifier,
|
||||
'created_at': session_info.created_at,
|
||||
'is_initialized': session_info.is_initialized,
|
||||
'processed_frames': session_info.processed_frames,
|
||||
'process_pid': session_info.process.pid if session_info.process.is_alive() else None,
|
||||
'is_alive': session_info.process.is_alive()
|
||||
}
|
||||
|
||||
async def _send_command(self, session_id: str, command):
|
||||
"""Send command to session process."""
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info:
|
||||
raise ValueError(f"Session {session_id} not found")
|
||||
|
||||
serialized = MessageSerializer.serialize_message(command)
|
||||
session_info.command_queue.put(serialized)
|
||||
|
||||
def _start_response_processing(self, session_info: SessionProcessInfo):
|
||||
"""Start processing responses from a session process."""
|
||||
def process_responses():
|
||||
while session_info.session_id in self.sessions and session_info.process.is_alive():
|
||||
try:
|
||||
if not session_info.response_queue.empty():
|
||||
response_data = session_info.response_queue.get(timeout=1.0)
|
||||
response = MessageSerializer.deserialize_message(response_data)
|
||||
if self.main_event_loop:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self._handle_response(session_info.session_id, response),
|
||||
self.main_event_loop
|
||||
)
|
||||
else:
|
||||
time.sleep(0.01)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing response from {session_info.session_id}: {e}")
|
||||
|
||||
self.response_executor.submit(process_responses)
|
||||
|
||||
async def _handle_response(self, session_id: str, response):
|
||||
"""Handle response from session process."""
|
||||
try:
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info:
|
||||
return
|
||||
|
||||
if response.type == MessageType.INITIALIZED:
|
||||
session_info.is_initialized = response.success
|
||||
if response.success:
|
||||
logger.info(f"Session {session_id} initialized successfully")
|
||||
else:
|
||||
logger.error(f"Session {session_id} initialization failed: {response.error_message}")
|
||||
|
||||
elif response.type == MessageType.DETECTION_RESULT:
|
||||
session_info.processed_frames += 1
|
||||
if self.detection_result_callback:
|
||||
await self.detection_result_callback(session_info.subscription_identifier, response)
|
||||
|
||||
elif response.type == MessageType.SESSION_SET:
|
||||
logger.info(f"Session ID set for {session_id}: {response.backend_session_id}")
|
||||
|
||||
elif response.type == MessageType.HEALTH_RESPONSE:
|
||||
session_info.last_health_check = time.time()
|
||||
logger.debug(f"Health check for {session_id}: {response.status}")
|
||||
|
||||
elif response.type == MessageType.ERROR:
|
||||
logger.error(f"Error from session {session_id}: {response.error_message}")
|
||||
if self.error_callback:
|
||||
await self.error_callback(session_info.subscription_identifier, response)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling response from {session_id}: {e}", exc_info=True)
|
||||
|
||||
async def _wait_for_shutdown(self, session_info: SessionProcessInfo):
|
||||
"""Wait for session process to shutdown gracefully."""
|
||||
while session_info.process.is_alive():
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
async def _cleanup_session(self, session_id: str):
|
||||
"""Cleanup session process and resources."""
|
||||
try:
|
||||
session_info = self.sessions.get(session_id)
|
||||
if not session_info:
|
||||
return
|
||||
|
||||
# Terminate process if still alive
|
||||
if session_info.process.is_alive():
|
||||
session_info.process.terminate()
|
||||
# Wait a bit for graceful termination
|
||||
await asyncio.sleep(1.0)
|
||||
if session_info.process.is_alive():
|
||||
session_info.process.kill()
|
||||
|
||||
# Remove from tracking
|
||||
del self.sessions[session_id]
|
||||
if session_info.subscription_identifier in self.subscription_to_session:
|
||||
del self.subscription_to_session[session_info.subscription_identifier]
|
||||
|
||||
logger.info(f"Session {session_id} cleaned up")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}", exc_info=True)
|
||||
|
||||
async def _health_check_loop(self):
|
||||
"""Periodic health check of all session processes."""
|
||||
while self.is_running:
|
||||
try:
|
||||
for session_id in list(self.sessions.keys()):
|
||||
session_info = self.sessions.get(session_id)
|
||||
if session_info and session_info.is_initialized:
|
||||
# Send health check
|
||||
health_command = HealthCheckCommand(session_id=session_id)
|
||||
await self._send_command(session_id, health_command)
|
||||
|
||||
await asyncio.sleep(self.health_check_interval)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in health check loop: {e}", exc_info=True)
|
||||
await asyncio.sleep(5.0) # Brief pause before retrying
|
Loading…
Add table
Add a link
Reference in a new issue