python-detector-worker/archive/original/siwatsystem/model_registry.py

"""
Shared Model Registry for Memory Optimization

This module implements a global shared model registry to prevent duplicate model loading
in memory when multiple cameras use the same model. This significantly reduces RAM and
GPU VRAM usage by ensuring only one instance of each unique model is loaded.

Key Features:
- Thread-safe model loading and access
- Reference counting for proper cleanup
- Automatic model lifecycle management
- Maintains compatibility with existing pipeline system
"""

import os
import threading
import logging
from typing import Dict, Any, Optional, Set
import torch
from ultralytics import YOLO

# Create a logger for this module
logger = logging.getLogger("detector_worker.model_registry")

class ModelRegistry:
    """
    Singleton class for managing shared YOLO models across multiple cameras.

    This registry ensures that each unique model is loaded only once in memory,
    dramatically reducing RAM and GPU VRAM usage when multiple cameras use the
    same model.
    """

    _instance = None
    _lock = threading.Lock()

    def __new__(cls):
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = super(ModelRegistry, cls).__new__(cls)
                    cls._instance._initialized = False
        return cls._instance

    def __init__(self):
        if self._initialized:
            return

        self._initialized = True

        # Thread-safe storage for loaded models
        self._models: Dict[str, YOLO] = {}  # modelId -> YOLO model instance
        self._model_files: Dict[str, str] = {}  # modelId -> file path
        self._reference_counts: Dict[str, int] = {}  # modelId -> reference count
        self._model_lock = threading.RLock()  # Reentrant lock for nested calls

        logger.info("🏭 Shared Model Registry initialized - ready for memory-optimized model loading")

    def get_model(self, model_id: str, model_file_path: str) -> YOLO:
        """
        Get or load a YOLO model. Returns shared instance if already loaded.

        Args:
            model_id: Unique identifier for the model
            model_file_path: Path to the model file

        Returns:
            YOLO model instance (shared across all callers)
        """
        with self._model_lock:
            if model_id in self._models:
                # Model already loaded - increment reference count and return
                self._reference_counts[model_id] += 1
                logger.info(f"📖 Model '{model_id}' reused (ref_count: {self._reference_counts[model_id]}) - SAVED MEMORY!")
                return self._models[model_id]

            # Model not loaded yet - load it
            logger.info(f"🔄 Loading NEW model '{model_id}' from {model_file_path}")

            if not os.path.exists(model_file_path):
                raise FileNotFoundError(f"Model file {model_file_path} not found")

            try:
                # Load the YOLO model
                model = YOLO(model_file_path)

                # Move to GPU if available
                if torch.cuda.is_available():
                    logger.info(f"🚀 CUDA available. Moving model '{model_id}' to GPU VRAM")
                    model.to("cuda")
                else:
                    logger.info(f"💻 CUDA not available. Using CPU for model '{model_id}'")

                # Store in registry
                self._models[model_id] = model
                self._model_files[model_id] = model_file_path
                self._reference_counts[model_id] = 1

                logger.info(f"✅ Model '{model_id}' loaded and registered (ref_count: 1)")
                self._log_registry_status()

                return model

            except Exception as e:
                logger.error(f"❌ Failed to load model '{model_id}' from {model_file_path}: {e}")
                raise

    def release_model(self, model_id: str) -> None:
        """
        Release a reference to a model. If reference count reaches zero,
        the model may be unloaded to free memory.

        Args:
            model_id: Unique identifier for the model to release
        """
        with self._model_lock:
            if model_id not in self._reference_counts:
                logger.warning(f"⚠️ Attempted to release unknown model '{model_id}'")
                return

            self._reference_counts[model_id] -= 1
            logger.info(f"📉 Model '{model_id}' reference count decreased to {self._reference_counts[model_id]}")

            # For now, keep models in memory even when ref count reaches 0
            # This prevents reload overhead if the same model is needed again soon
            # In the future, we could implement LRU eviction policy
            # if self._reference_counts[model_id] <= 0:
            #     logger.info(f"💤 Model '{model_id}' has 0 references but keeping in memory for reuse")
                # Optionally: self._unload_model(model_id)

    def _unload_model(self, model_id: str) -> None:
        """
        Internal method to unload a model from memory.
        Currently not used to prevent reload overhead.
        """
        with self._model_lock:
            if model_id in self._models:
                logger.info(f"🗑️ Unloading model '{model_id}' from memory")

                # Clear GPU memory if model was on GPU
                model = self._models[model_id]
                if hasattr(model, 'model') and hasattr(model.model, 'cuda'):
                    try:
                        # Move model to CPU before deletion to free GPU memory
                        model.to('cpu')
                    except Exception as e:
                        logger.warning(f"⚠️ Failed to move model '{model_id}' to CPU: {e}")

                # Remove from registry
                del self._models[model_id]
                del self._model_files[model_id]
                del self._reference_counts[model_id]

                # Force garbage collection
                import gc
                gc.collect()
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()

                logger.info(f"✅ Model '{model_id}' unloaded and memory freed")
                self._log_registry_status()

    def get_registry_status(self) -> Dict[str, Any]:
        """
        Get current status of the model registry.

        Returns:
            Dictionary with registry statistics
        """
        with self._model_lock:
            return {
                "total_models": len(self._models),
                "models": {
                    model_id: {
                        "file_path": self._model_files[model_id],
                        "reference_count": self._reference_counts[model_id]
                    }
                    for model_id in self._models
                },
                "total_references": sum(self._reference_counts.values())
            }

    def _log_registry_status(self) -> None:
        """Log current registry status for debugging."""
        status = self.get_registry_status()
        logger.info(f"📊 Model Registry Status: {status['total_models']} unique models, {status['total_references']} total references")
        for model_id, info in status['models'].items():
            logger.debug(f"   📋 '{model_id}': refs={info['reference_count']}, file={os.path.basename(info['file_path'])}")

    def cleanup_all(self) -> None:
        """
        Clean up all models from the registry. Used during shutdown.
        """
        with self._model_lock:
            model_ids = list(self._models.keys())
            logger.info(f"🧹 Cleaning up {len(model_ids)} models from registry")

            for model_id in model_ids:
                self._unload_model(model_id)

            logger.info("✅ Model registry cleanup complete")


# Global singleton instance
_registry = ModelRegistry()

def get_shared_model(model_id: str, model_file_path: str) -> YOLO:
    """
    Convenience function to get a shared model instance.

    Args:
        model_id: Unique identifier for the model
        model_file_path: Path to the model file

    Returns:
        YOLO model instance (shared across all callers)
    """
    return _registry.get_model(model_id, model_file_path)

def release_shared_model(model_id: str) -> None:
    """
    Convenience function to release a shared model reference.

    Args:
        model_id: Unique identifier for the model to release
    """
    _registry.release_model(model_id)

def get_registry_status() -> Dict[str, Any]:
    """
    Convenience function to get registry status.

    Returns:
        Dictionary with registry statistics
    """
    return _registry.get_registry_status()

def cleanup_registry() -> None:
    """
    Convenience function to cleanup the entire registry.
    """
    _registry.cleanup_all()