""" Shared Model Registry for Memory Optimization This module implements a global shared model registry to prevent duplicate model loading in memory when multiple cameras use the same model. This significantly reduces RAM and GPU VRAM usage by ensuring only one instance of each unique model is loaded. Key Features: - Thread-safe model loading and access - Reference counting for proper cleanup - Automatic model lifecycle management - Maintains compatibility with existing pipeline system """ import os import threading import logging from typing import Dict, Any, Optional, Set import torch from ultralytics import YOLO # Create a logger for this module logger = logging.getLogger("detector_worker.model_registry") class ModelRegistry: """ Singleton class for managing shared YOLO models across multiple cameras. This registry ensures that each unique model is loaded only once in memory, dramatically reducing RAM and GPU VRAM usage when multiple cameras use the same model. """ _instance = None _lock = threading.Lock() def __new__(cls): if cls._instance is None: with cls._lock: if cls._instance is None: cls._instance = super(ModelRegistry, cls).__new__(cls) cls._instance._initialized = False return cls._instance def __init__(self): if self._initialized: return self._initialized = True # Thread-safe storage for loaded models self._models: Dict[str, YOLO] = {} # modelId -> YOLO model instance self._model_files: Dict[str, str] = {} # modelId -> file path self._reference_counts: Dict[str, int] = {} # modelId -> reference count self._model_lock = threading.RLock() # Reentrant lock for nested calls logger.info("๐Ÿญ Shared Model Registry initialized - ready for memory-optimized model loading") def get_model(self, model_id: str, model_file_path: str) -> YOLO: """ Get or load a YOLO model. Returns shared instance if already loaded. Args: model_id: Unique identifier for the model model_file_path: Path to the model file Returns: YOLO model instance (shared across all callers) """ with self._model_lock: if model_id in self._models: # Model already loaded - increment reference count and return self._reference_counts[model_id] += 1 logger.info(f"๐Ÿ“– Model '{model_id}' reused (ref_count: {self._reference_counts[model_id]}) - SAVED MEMORY!") return self._models[model_id] # Model not loaded yet - load it logger.info(f"๐Ÿ”„ Loading NEW model '{model_id}' from {model_file_path}") if not os.path.exists(model_file_path): raise FileNotFoundError(f"Model file {model_file_path} not found") try: # Load the YOLO model model = YOLO(model_file_path) # Move to GPU if available if torch.cuda.is_available(): logger.info(f"๐Ÿš€ CUDA available. Moving model '{model_id}' to GPU VRAM") model.to("cuda") else: logger.info(f"๐Ÿ’ป CUDA not available. Using CPU for model '{model_id}'") # Store in registry self._models[model_id] = model self._model_files[model_id] = model_file_path self._reference_counts[model_id] = 1 logger.info(f"โœ… Model '{model_id}' loaded and registered (ref_count: 1)") self._log_registry_status() return model except Exception as e: logger.error(f"โŒ Failed to load model '{model_id}' from {model_file_path}: {e}") raise def release_model(self, model_id: str) -> None: """ Release a reference to a model. If reference count reaches zero, the model may be unloaded to free memory. Args: model_id: Unique identifier for the model to release """ with self._model_lock: if model_id not in self._reference_counts: logger.warning(f"โš ๏ธ Attempted to release unknown model '{model_id}'") return self._reference_counts[model_id] -= 1 logger.info(f"๐Ÿ“‰ Model '{model_id}' reference count decreased to {self._reference_counts[model_id]}") # For now, keep models in memory even when ref count reaches 0 # This prevents reload overhead if the same model is needed again soon # In the future, we could implement LRU eviction policy # if self._reference_counts[model_id] <= 0: # logger.info(f"๐Ÿ’ค Model '{model_id}' has 0 references but keeping in memory for reuse") # Optionally: self._unload_model(model_id) def _unload_model(self, model_id: str) -> None: """ Internal method to unload a model from memory. Currently not used to prevent reload overhead. """ with self._model_lock: if model_id in self._models: logger.info(f"๐Ÿ—‘๏ธ Unloading model '{model_id}' from memory") # Clear GPU memory if model was on GPU model = self._models[model_id] if hasattr(model, 'model') and hasattr(model.model, 'cuda'): try: # Move model to CPU before deletion to free GPU memory model.to('cpu') except Exception as e: logger.warning(f"โš ๏ธ Failed to move model '{model_id}' to CPU: {e}") # Remove from registry del self._models[model_id] del self._model_files[model_id] del self._reference_counts[model_id] # Force garbage collection import gc gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() logger.info(f"โœ… Model '{model_id}' unloaded and memory freed") self._log_registry_status() def get_registry_status(self) -> Dict[str, Any]: """ Get current status of the model registry. Returns: Dictionary with registry statistics """ with self._model_lock: return { "total_models": len(self._models), "models": { model_id: { "file_path": self._model_files[model_id], "reference_count": self._reference_counts[model_id] } for model_id in self._models }, "total_references": sum(self._reference_counts.values()) } def _log_registry_status(self) -> None: """Log current registry status for debugging.""" status = self.get_registry_status() logger.info(f"๐Ÿ“Š Model Registry Status: {status['total_models']} unique models, {status['total_references']} total references") for model_id, info in status['models'].items(): logger.debug(f" ๐Ÿ“‹ '{model_id}': refs={info['reference_count']}, file={os.path.basename(info['file_path'])}") def cleanup_all(self) -> None: """ Clean up all models from the registry. Used during shutdown. """ with self._model_lock: model_ids = list(self._models.keys()) logger.info(f"๐Ÿงน Cleaning up {len(model_ids)} models from registry") for model_id in model_ids: self._unload_model(model_id) logger.info("โœ… Model registry cleanup complete") # Global singleton instance _registry = ModelRegistry() def get_shared_model(model_id: str, model_file_path: str) -> YOLO: """ Convenience function to get a shared model instance. Args: model_id: Unique identifier for the model model_file_path: Path to the model file Returns: YOLO model instance (shared across all callers) """ return _registry.get_model(model_id, model_file_path) def release_shared_model(model_id: str) -> None: """ Convenience function to release a shared model reference. Args: model_id: Unique identifier for the model to release """ _registry.release_model(model_id) def get_registry_status() -> Dict[str, Any]: """ Convenience function to get registry status. Returns: Dictionary with registry statistics """ return _registry.get_registry_status() def cleanup_registry() -> None: """ Convenience function to cleanup the entire registry. """ _registry.cleanup_all()