All checks were successful
		
		
	
	Build Worker Base and Application Images / check-base-changes (push) Successful in 7s
				
			Build Worker Base and Application Images / build-base (push) Has been skipped
				
			Build Worker Base and Application Images / build-docker (push) Successful in 3m53s
				
			Build Worker Base and Application Images / deploy-stack (push) Successful in 14s
				
			
		
			
				
	
	
		
			586 lines
		
	
	
		
			No EOL
		
	
	
		
			22 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			586 lines
		
	
	
		
			No EOL
		
	
	
		
			22 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						|
Detector Worker - Main FastAPI Application
 | 
						|
Refactored modular architecture for computer vision pipeline processing.
 | 
						|
"""
 | 
						|
import json
 | 
						|
import logging
 | 
						|
import os
 | 
						|
import time
 | 
						|
import cv2
 | 
						|
from contextlib import asynccontextmanager
 | 
						|
from typing import Dict, Any
 | 
						|
from fastapi import FastAPI, WebSocket, HTTPException
 | 
						|
from fastapi.responses import Response
 | 
						|
 | 
						|
# Import new modular communication system
 | 
						|
from core.communication.websocket import websocket_endpoint
 | 
						|
from core.communication.state import worker_state
 | 
						|
 | 
						|
# Configure logging
 | 
						|
logging.basicConfig(
 | 
						|
    level=logging.DEBUG,
 | 
						|
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
 | 
						|
    handlers=[
 | 
						|
        logging.FileHandler("detector_worker.log"),
 | 
						|
        logging.StreamHandler()
 | 
						|
    ]
 | 
						|
)
 | 
						|
 | 
						|
logger = logging.getLogger("detector_worker")
 | 
						|
logger.setLevel(logging.DEBUG)
 | 
						|
 | 
						|
# Frames are now stored in the shared cache buffer from core.streaming.buffers
 | 
						|
# latest_frames = {}  # Deprecated - using shared_cache_buffer instead
 | 
						|
 | 
						|
 | 
						|
# Health monitoring recovery handlers
 | 
						|
def _handle_stream_restart_recovery(component: str, details: Dict[str, Any]) -> bool:
 | 
						|
    """Handle stream restart recovery at the application level."""
 | 
						|
    try:
 | 
						|
        from core.streaming.manager import shared_stream_manager
 | 
						|
 | 
						|
        # Extract camera ID from component name (e.g., "stream_cam-001" -> "cam-001")
 | 
						|
        if component.startswith("stream_"):
 | 
						|
            camera_id = component[7:]  # Remove "stream_" prefix
 | 
						|
        else:
 | 
						|
            camera_id = component
 | 
						|
 | 
						|
        logger.info(f"Attempting stream restart recovery for {camera_id}")
 | 
						|
 | 
						|
        # Find and restart the subscription
 | 
						|
        subscriptions = shared_stream_manager.get_all_subscriptions()
 | 
						|
        for sub_info in subscriptions:
 | 
						|
            if sub_info.camera_id == camera_id:
 | 
						|
                # Remove and re-add the subscription
 | 
						|
                shared_stream_manager.remove_subscription(sub_info.subscription_id)
 | 
						|
                time.sleep(1.0)  # Brief delay
 | 
						|
 | 
						|
                # Re-add subscription
 | 
						|
                success = shared_stream_manager.add_subscription(
 | 
						|
                    sub_info.subscription_id,
 | 
						|
                    sub_info.stream_config,
 | 
						|
                    sub_info.crop_coords,
 | 
						|
                    sub_info.model_id,
 | 
						|
                    sub_info.model_url,
 | 
						|
                    sub_info.tracking_integration
 | 
						|
                )
 | 
						|
 | 
						|
                if success:
 | 
						|
                    logger.info(f"Stream restart recovery successful for {camera_id}")
 | 
						|
                    return True
 | 
						|
                else:
 | 
						|
                    logger.error(f"Stream restart recovery failed for {camera_id}")
 | 
						|
                    return False
 | 
						|
 | 
						|
        logger.warning(f"No subscription found for camera {camera_id} during recovery")
 | 
						|
        return False
 | 
						|
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error in stream restart recovery for {component}: {e}")
 | 
						|
        return False
 | 
						|
 | 
						|
 | 
						|
def _handle_stream_reconnect_recovery(component: str, details: Dict[str, Any]) -> bool:
 | 
						|
    """Handle stream reconnect recovery at the application level."""
 | 
						|
    try:
 | 
						|
        from core.streaming.manager import shared_stream_manager
 | 
						|
 | 
						|
        # Extract camera ID from component name
 | 
						|
        if component.startswith("stream_"):
 | 
						|
            camera_id = component[7:]
 | 
						|
        else:
 | 
						|
            camera_id = component
 | 
						|
 | 
						|
        logger.info(f"Attempting stream reconnect recovery for {camera_id}")
 | 
						|
 | 
						|
        # For reconnect, we just need to trigger the stream's internal reconnect
 | 
						|
        # The stream readers handle their own reconnection logic
 | 
						|
        active_cameras = shared_stream_manager.get_active_cameras()
 | 
						|
 | 
						|
        if camera_id in active_cameras:
 | 
						|
            logger.info(f"Stream reconnect recovery triggered for {camera_id}")
 | 
						|
            return True
 | 
						|
        else:
 | 
						|
            logger.warning(f"Camera {camera_id} not found in active cameras during reconnect recovery")
 | 
						|
            return False
 | 
						|
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error in stream reconnect recovery for {component}: {e}")
 | 
						|
        return False
 | 
						|
 | 
						|
# Lifespan event handler (modern FastAPI approach)
 | 
						|
@asynccontextmanager
 | 
						|
async def lifespan(app: FastAPI):
 | 
						|
    """Application lifespan management."""
 | 
						|
    # Startup
 | 
						|
    logger.info("Detector Worker started successfully")
 | 
						|
 | 
						|
    # Initialize health monitoring system
 | 
						|
    try:
 | 
						|
        from core.monitoring.health import health_monitor
 | 
						|
        from core.monitoring.stream_health import stream_health_tracker
 | 
						|
        from core.monitoring.thread_health import thread_health_monitor
 | 
						|
        from core.monitoring.recovery import recovery_manager
 | 
						|
 | 
						|
        # Start health monitoring
 | 
						|
        health_monitor.start()
 | 
						|
        logger.info("Health monitoring system started")
 | 
						|
 | 
						|
        # Register recovery handlers for stream management
 | 
						|
        from core.streaming.manager import shared_stream_manager
 | 
						|
        recovery_manager.register_recovery_handler(
 | 
						|
            "restart_stream",
 | 
						|
            _handle_stream_restart_recovery
 | 
						|
        )
 | 
						|
        recovery_manager.register_recovery_handler(
 | 
						|
            "reconnect",
 | 
						|
            _handle_stream_reconnect_recovery
 | 
						|
        )
 | 
						|
 | 
						|
        logger.info("Recovery handlers registered")
 | 
						|
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Failed to initialize health monitoring: {e}")
 | 
						|
 | 
						|
    logger.info("WebSocket endpoint available at: ws://0.0.0.0:8001/")
 | 
						|
    logger.info("HTTP camera endpoint available at: http://0.0.0.0:8001/camera/{camera_id}/image")
 | 
						|
    logger.info("Health check available at: http://0.0.0.0:8001/health")
 | 
						|
    logger.info("Detailed health monitoring available at: http://0.0.0.0:8001/health/detailed")
 | 
						|
    logger.info("Ready and waiting for backend WebSocket connections")
 | 
						|
 | 
						|
    yield
 | 
						|
 | 
						|
    # Shutdown
 | 
						|
    logger.info("Detector Worker shutting down...")
 | 
						|
 | 
						|
    # Stop health monitoring
 | 
						|
    try:
 | 
						|
        from core.monitoring.health import health_monitor
 | 
						|
        health_monitor.stop()
 | 
						|
        logger.info("Health monitoring system stopped")
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error stopping health monitoring: {e}")
 | 
						|
 | 
						|
    # Clear all state
 | 
						|
    worker_state.set_subscriptions([])
 | 
						|
    worker_state.session_ids.clear()
 | 
						|
    worker_state.progression_stages.clear()
 | 
						|
    # latest_frames.clear()  # No longer needed - frames are in shared_cache_buffer
 | 
						|
    logger.info("Detector Worker shutdown complete")
 | 
						|
 | 
						|
# Create FastAPI application with detailed WebSocket logging
 | 
						|
app = FastAPI(title="Detector Worker", version="2.0.0", lifespan=lifespan)
 | 
						|
 | 
						|
# Add middleware to log all requests
 | 
						|
@app.middleware("http")
 | 
						|
async def log_requests(request, call_next):
 | 
						|
    start_time = time.time()
 | 
						|
    response = await call_next(request)
 | 
						|
    process_time = time.time() - start_time
 | 
						|
    logger.debug(f"HTTP {request.method} {request.url} - {response.status_code} ({process_time:.3f}s)")
 | 
						|
    return response
 | 
						|
 | 
						|
# Load configuration
 | 
						|
config_path = "config.json"
 | 
						|
if os.path.exists(config_path):
 | 
						|
    with open(config_path, "r") as f:
 | 
						|
        config = json.load(f)
 | 
						|
    logger.info(f"Loaded configuration from {config_path}")
 | 
						|
else:
 | 
						|
    # Default configuration
 | 
						|
    config = {
 | 
						|
        "poll_interval_ms": 100,
 | 
						|
        "reconnect_interval_sec": 5,
 | 
						|
        "target_fps": 10,
 | 
						|
        "max_streams": 20,
 | 
						|
        "max_retries": 3
 | 
						|
    }
 | 
						|
    logger.warning(f"Configuration file {config_path} not found, using defaults")
 | 
						|
 | 
						|
# Ensure models directory exists
 | 
						|
os.makedirs("models", exist_ok=True)
 | 
						|
logger.info("Ensured models directory exists")
 | 
						|
 | 
						|
# Stream manager already initialized at module level with max_streams=20
 | 
						|
# Calling initialize_stream_manager() creates a NEW instance, breaking references
 | 
						|
# from core.streaming import initialize_stream_manager
 | 
						|
# initialize_stream_manager(max_streams=config.get('max_streams', 10))
 | 
						|
logger.info(f"Using stream manager with max_streams=20 (module-level initialization)")
 | 
						|
 | 
						|
# Frames are now stored in the shared cache buffer from core.streaming.buffers
 | 
						|
# latest_frames = {}  # Deprecated - using shared_cache_buffer instead
 | 
						|
 | 
						|
logger.info("Starting detector worker application (refactored)")
 | 
						|
logger.info(f"Configuration: Target FPS: {config.get('target_fps', 10)}, "
 | 
						|
           f"Max streams: {config.get('max_streams', 5)}, "
 | 
						|
           f"Max retries: {config.get('max_retries', 3)}")
 | 
						|
 | 
						|
 | 
						|
@app.websocket("/")
 | 
						|
async def websocket_handler(websocket: WebSocket):
 | 
						|
    """
 | 
						|
    Main WebSocket endpoint for backend communication.
 | 
						|
    Handles all protocol messages according to worker.md specification.
 | 
						|
    """
 | 
						|
    client_info = f"{websocket.client.host}:{websocket.client.port}" if websocket.client else "unknown"
 | 
						|
    logger.info(f"[RX ← Backend] New WebSocket connection request from {client_info}")
 | 
						|
 | 
						|
    try:
 | 
						|
        await websocket_endpoint(websocket)
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"WebSocket handler error for {client_info}: {e}", exc_info=True)
 | 
						|
 | 
						|
 | 
						|
@app.get("/camera/{camera_id}/image")
 | 
						|
async def get_camera_image(camera_id: str):
 | 
						|
    """
 | 
						|
    HTTP endpoint to retrieve the latest frame from a camera as JPEG image.
 | 
						|
 | 
						|
    This endpoint is preserved for backward compatibility with existing systems.
 | 
						|
 | 
						|
    Args:
 | 
						|
        camera_id: The subscription identifier (e.g., "display-001;cam-001")
 | 
						|
 | 
						|
    Returns:
 | 
						|
        JPEG image as binary response
 | 
						|
 | 
						|
    Raises:
 | 
						|
        HTTPException: 404 if camera not found or no frame available
 | 
						|
        HTTPException: 500 if encoding fails
 | 
						|
    """
 | 
						|
    try:
 | 
						|
        from urllib.parse import unquote
 | 
						|
 | 
						|
        # URL decode the camera_id to handle encoded characters
 | 
						|
        original_camera_id = camera_id
 | 
						|
        camera_id = unquote(camera_id)
 | 
						|
        logger.debug(f"REST API request: original='{original_camera_id}', decoded='{camera_id}'")
 | 
						|
 | 
						|
        # Check if camera is in active subscriptions
 | 
						|
        subscription = worker_state.get_subscription(camera_id)
 | 
						|
        if not subscription:
 | 
						|
            logger.warning(f"Camera ID '{camera_id}' not found in active subscriptions")
 | 
						|
            available_cameras = list(worker_state.subscriptions.keys())
 | 
						|
            logger.debug(f"Available cameras: {available_cameras}")
 | 
						|
            raise HTTPException(
 | 
						|
                status_code=404,
 | 
						|
                detail=f"Camera {camera_id} not found or not active"
 | 
						|
            )
 | 
						|
 | 
						|
        # Extract actual camera_id from subscription identifier (displayId;cameraId)
 | 
						|
        # Frames are stored using just the camera_id part
 | 
						|
        actual_camera_id = camera_id.split(';')[-1] if ';' in camera_id else camera_id
 | 
						|
 | 
						|
        # Get frame from the shared cache buffer
 | 
						|
        from core.streaming.buffers import shared_cache_buffer
 | 
						|
 | 
						|
        # Only show buffer debug info if camera not found (to reduce log spam)
 | 
						|
        available_cameras = shared_cache_buffer.frame_buffer.get_camera_list()
 | 
						|
 | 
						|
        frame = shared_cache_buffer.get_frame(actual_camera_id)
 | 
						|
        if frame is None:
 | 
						|
            logger.warning(f"\033[93m[API] No frame for '{actual_camera_id}' - Available: {available_cameras}\033[0m")
 | 
						|
            raise HTTPException(
 | 
						|
                status_code=404,
 | 
						|
                detail=f"No frame available for camera {actual_camera_id}"
 | 
						|
            )
 | 
						|
 | 
						|
        # Successful frame retrieval - log only occasionally to avoid spam
 | 
						|
 | 
						|
        # Encode frame as JPEG
 | 
						|
        success, buffer_img = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
 | 
						|
        if not success:
 | 
						|
            raise HTTPException(status_code=500, detail="Failed to encode image as JPEG")
 | 
						|
 | 
						|
        # Return image as binary response
 | 
						|
        return Response(content=buffer_img.tobytes(), media_type="image/jpeg")
 | 
						|
 | 
						|
    except HTTPException:
 | 
						|
        raise
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error retrieving image for camera {camera_id}: {str(e)}", exc_info=True)
 | 
						|
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 | 
						|
 | 
						|
 | 
						|
@app.get("/session-image/{session_id}")
 | 
						|
async def get_session_image(session_id: int):
 | 
						|
    """
 | 
						|
    HTTP endpoint to retrieve the saved session image by session ID.
 | 
						|
 | 
						|
    Args:
 | 
						|
        session_id: The session ID to retrieve the image for
 | 
						|
 | 
						|
    Returns:
 | 
						|
        JPEG image as binary response
 | 
						|
 | 
						|
    Raises:
 | 
						|
        HTTPException: 404 if no image found for the session
 | 
						|
        HTTPException: 500 if reading image fails
 | 
						|
    """
 | 
						|
    try:
 | 
						|
        from pathlib import Path
 | 
						|
 | 
						|
        # Images directory
 | 
						|
        images_dir = Path("images")
 | 
						|
 | 
						|
        if not images_dir.exists():
 | 
						|
            logger.warning(f"Images directory does not exist")
 | 
						|
            raise HTTPException(
 | 
						|
                status_code=404,
 | 
						|
                detail=f"No images directory found"
 | 
						|
            )
 | 
						|
 | 
						|
        # Use os.scandir() for efficient file searching (3-5x faster than glob.glob)
 | 
						|
        # Filter files matching session ID pattern: {session_id}_*.jpg
 | 
						|
        prefix = f"{session_id}_"
 | 
						|
        most_recent_file = None
 | 
						|
        most_recent_mtime = 0
 | 
						|
 | 
						|
        with os.scandir(images_dir) as entries:
 | 
						|
            for entry in entries:
 | 
						|
                # Filter: must be a file, start with session_id prefix, and end with .jpg
 | 
						|
                if entry.is_file() and entry.name.startswith(prefix) and entry.name.endswith('.jpg'):
 | 
						|
                    # Use cached stat info from DirEntry (much faster than separate stat calls)
 | 
						|
                    entry_stat = entry.stat()
 | 
						|
                    if entry_stat.st_mtime > most_recent_mtime:
 | 
						|
                        most_recent_mtime = entry_stat.st_mtime
 | 
						|
                        most_recent_file = entry.path
 | 
						|
 | 
						|
        if not most_recent_file:
 | 
						|
            logger.warning(f"No image found for session {session_id}")
 | 
						|
            raise HTTPException(
 | 
						|
                status_code=404,
 | 
						|
                detail=f"No image found for session {session_id}"
 | 
						|
            )
 | 
						|
 | 
						|
        logger.info(f"Found session image for session {session_id}: {most_recent_file}")
 | 
						|
 | 
						|
        # Read the image file
 | 
						|
        with open(most_recent_file, 'rb') as f:
 | 
						|
            image_data = f.read()
 | 
						|
 | 
						|
        # Return image as binary response
 | 
						|
        return Response(content=image_data, media_type="image/jpeg")
 | 
						|
 | 
						|
    except HTTPException:
 | 
						|
        raise
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error retrieving session image for session {session_id}: {str(e)}", exc_info=True)
 | 
						|
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 | 
						|
 | 
						|
 | 
						|
@app.get("/health")
 | 
						|
async def health_check():
 | 
						|
    """Health check endpoint for monitoring."""
 | 
						|
    return {
 | 
						|
        "status": "healthy",
 | 
						|
        "version": "2.0.0",
 | 
						|
        "active_subscriptions": len(worker_state.subscriptions),
 | 
						|
        "active_sessions": len(worker_state.session_ids)
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
@app.get("/health/detailed")
 | 
						|
async def detailed_health_check():
 | 
						|
    """Comprehensive health status with detailed monitoring data."""
 | 
						|
    try:
 | 
						|
        from core.monitoring.health import health_monitor
 | 
						|
        from core.monitoring.stream_health import stream_health_tracker
 | 
						|
        from core.monitoring.thread_health import thread_health_monitor
 | 
						|
        from core.monitoring.recovery import recovery_manager
 | 
						|
 | 
						|
        # Get comprehensive health status
 | 
						|
        overall_health = health_monitor.get_health_status()
 | 
						|
        stream_metrics = stream_health_tracker.get_all_metrics()
 | 
						|
        thread_info = thread_health_monitor.get_all_thread_info()
 | 
						|
        recovery_stats = recovery_manager.get_recovery_stats()
 | 
						|
 | 
						|
        return {
 | 
						|
            "timestamp": time.time(),
 | 
						|
            "overall_health": overall_health,
 | 
						|
            "stream_metrics": stream_metrics,
 | 
						|
            "thread_health": thread_info,
 | 
						|
            "recovery_stats": recovery_stats,
 | 
						|
            "system_info": {
 | 
						|
                "active_subscriptions": len(worker_state.subscriptions),
 | 
						|
                "active_sessions": len(worker_state.session_ids),
 | 
						|
                "version": "2.0.0"
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error generating detailed health report: {e}")
 | 
						|
        raise HTTPException(status_code=500, detail=f"Health monitoring error: {str(e)}")
 | 
						|
 | 
						|
 | 
						|
@app.get("/health/streams")
 | 
						|
async def stream_health_status():
 | 
						|
    """Stream-specific health monitoring."""
 | 
						|
    try:
 | 
						|
        from core.monitoring.stream_health import stream_health_tracker
 | 
						|
        from core.streaming.buffers import shared_cache_buffer
 | 
						|
 | 
						|
        stream_metrics = stream_health_tracker.get_all_metrics()
 | 
						|
        buffer_stats = shared_cache_buffer.get_stats()
 | 
						|
 | 
						|
        return {
 | 
						|
            "timestamp": time.time(),
 | 
						|
            "stream_count": len(stream_metrics),
 | 
						|
            "stream_metrics": stream_metrics,
 | 
						|
            "buffer_stats": buffer_stats,
 | 
						|
            "frame_ages": {
 | 
						|
                camera_id: {
 | 
						|
                    "age_seconds": time.time() - info["last_frame_time"] if info and info.get("last_frame_time") else None,
 | 
						|
                    "total_frames": info.get("frame_count", 0) if info else 0
 | 
						|
                }
 | 
						|
                for camera_id, info in stream_metrics.items()
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error generating stream health report: {e}")
 | 
						|
        raise HTTPException(status_code=500, detail=f"Stream health error: {str(e)}")
 | 
						|
 | 
						|
 | 
						|
@app.get("/health/threads")
 | 
						|
async def thread_health_status():
 | 
						|
    """Thread-specific health monitoring."""
 | 
						|
    try:
 | 
						|
        from core.monitoring.thread_health import thread_health_monitor
 | 
						|
 | 
						|
        thread_info = thread_health_monitor.get_all_thread_info()
 | 
						|
        deadlocks = thread_health_monitor.detect_deadlocks()
 | 
						|
 | 
						|
        return {
 | 
						|
            "timestamp": time.time(),
 | 
						|
            "thread_count": len(thread_info),
 | 
						|
            "thread_info": thread_info,
 | 
						|
            "potential_deadlocks": deadlocks,
 | 
						|
            "summary": {
 | 
						|
                "responsive_threads": sum(1 for info in thread_info.values() if info.get("is_responsive", False)),
 | 
						|
                "unresponsive_threads": sum(1 for info in thread_info.values() if not info.get("is_responsive", True)),
 | 
						|
                "deadlock_count": len(deadlocks)
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error generating thread health report: {e}")
 | 
						|
        raise HTTPException(status_code=500, detail=f"Thread health error: {str(e)}")
 | 
						|
 | 
						|
 | 
						|
@app.get("/health/recovery")
 | 
						|
async def recovery_status():
 | 
						|
    """Recovery system status and history."""
 | 
						|
    try:
 | 
						|
        from core.monitoring.recovery import recovery_manager
 | 
						|
 | 
						|
        recovery_stats = recovery_manager.get_recovery_stats()
 | 
						|
 | 
						|
        return {
 | 
						|
            "timestamp": time.time(),
 | 
						|
            "recovery_stats": recovery_stats,
 | 
						|
            "summary": {
 | 
						|
                "total_recoveries_last_hour": recovery_stats.get("total_recoveries_last_hour", 0),
 | 
						|
                "components_with_recovery_state": len(recovery_stats.get("recovery_states", {})),
 | 
						|
                "total_recovery_failures": sum(
 | 
						|
                    state.get("failure_count", 0)
 | 
						|
                    for state in recovery_stats.get("recovery_states", {}).values()
 | 
						|
                ),
 | 
						|
                "total_recovery_successes": sum(
 | 
						|
                    state.get("success_count", 0)
 | 
						|
                    for state in recovery_stats.get("recovery_states", {}).values()
 | 
						|
                )
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error generating recovery status report: {e}")
 | 
						|
        raise HTTPException(status_code=500, detail=f"Recovery status error: {str(e)}")
 | 
						|
 | 
						|
 | 
						|
@app.post("/health/recovery/force/{component}")
 | 
						|
async def force_recovery(component: str, action: str = "restart_stream"):
 | 
						|
    """Force recovery action for a specific component."""
 | 
						|
    try:
 | 
						|
        from core.monitoring.recovery import recovery_manager, RecoveryAction
 | 
						|
 | 
						|
        # Validate action
 | 
						|
        try:
 | 
						|
            recovery_action = RecoveryAction(action)
 | 
						|
        except ValueError:
 | 
						|
            raise HTTPException(
 | 
						|
                status_code=400,
 | 
						|
                detail=f"Invalid recovery action: {action}. Valid actions: {[a.value for a in RecoveryAction]}"
 | 
						|
            )
 | 
						|
 | 
						|
        # Force recovery
 | 
						|
        success = recovery_manager.force_recovery(component, recovery_action, "manual_api_request")
 | 
						|
 | 
						|
        return {
 | 
						|
            "timestamp": time.time(),
 | 
						|
            "component": component,
 | 
						|
            "action": action,
 | 
						|
            "success": success,
 | 
						|
            "message": f"Recovery {'successful' if success else 'failed'} for component {component}"
 | 
						|
        }
 | 
						|
 | 
						|
    except HTTPException:
 | 
						|
        raise
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error forcing recovery for {component}: {e}")
 | 
						|
        raise HTTPException(status_code=500, detail=f"Recovery error: {str(e)}")
 | 
						|
 | 
						|
 | 
						|
@app.get("/health/metrics")
 | 
						|
async def health_metrics():
 | 
						|
    """Performance and health metrics in a format suitable for monitoring systems."""
 | 
						|
    try:
 | 
						|
        from core.monitoring.health import health_monitor
 | 
						|
        from core.monitoring.stream_health import stream_health_tracker
 | 
						|
        from core.streaming.buffers import shared_cache_buffer
 | 
						|
 | 
						|
        # Get basic metrics
 | 
						|
        overall_health = health_monitor.get_health_status()
 | 
						|
        stream_metrics = stream_health_tracker.get_all_metrics()
 | 
						|
        buffer_stats = shared_cache_buffer.get_stats()
 | 
						|
 | 
						|
        # Format for monitoring systems (Prometheus-style)
 | 
						|
        metrics = {
 | 
						|
            "detector_worker_up": 1,
 | 
						|
            "detector_worker_streams_total": len(stream_metrics),
 | 
						|
            "detector_worker_subscriptions_total": len(worker_state.subscriptions),
 | 
						|
            "detector_worker_sessions_total": len(worker_state.session_ids),
 | 
						|
            "detector_worker_memory_mb": buffer_stats.get("total_memory_mb", 0),
 | 
						|
            "detector_worker_health_status": {
 | 
						|
                "healthy": 1,
 | 
						|
                "warning": 2,
 | 
						|
                "critical": 3,
 | 
						|
                "unknown": 4
 | 
						|
            }.get(overall_health.get("overall_status", "unknown"), 4)
 | 
						|
        }
 | 
						|
 | 
						|
        # Add per-stream metrics
 | 
						|
        for camera_id, stream_info in stream_metrics.items():
 | 
						|
            safe_camera_id = camera_id.replace("-", "_").replace(".", "_")
 | 
						|
            metrics.update({
 | 
						|
                f"detector_worker_stream_frames_total{{camera=\"{safe_camera_id}\"}}": stream_info.get("frame_count", 0),
 | 
						|
                f"detector_worker_stream_errors_total{{camera=\"{safe_camera_id}\"}}": stream_info.get("error_count", 0),
 | 
						|
                f"detector_worker_stream_fps{{camera=\"{safe_camera_id}\"}}": stream_info.get("frames_per_second", 0),
 | 
						|
                f"detector_worker_stream_frame_age_seconds{{camera=\"{safe_camera_id}\"}}": stream_info.get("last_frame_age_seconds") or 0
 | 
						|
            })
 | 
						|
 | 
						|
        return {
 | 
						|
            "timestamp": time.time(),
 | 
						|
            "metrics": metrics
 | 
						|
        }
 | 
						|
 | 
						|
    except Exception as e:
 | 
						|
        logger.error(f"Error generating health metrics: {e}")
 | 
						|
        raise HTTPException(status_code=500, detail=f"Metrics error: {str(e)}")
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    import uvicorn
 | 
						|
    uvicorn.run(app, host="0.0.0.0", port=8001) |