feat: optimize model declaration in ram

2025-09-01 18:36:39 +07:00 · 2025-09-01 18:36:39 +07:00 · ac85caca39
commit ac85caca39
parent c715b26a2a
4 changed files with 679 additions and 216 deletions
--- a/siwatsystem/model_registry.py
+++ b/siwatsystem/model_registry.py
@ -0,0 +1,242 @@
+"""
+Shared Model Registry for Memory Optimization
+
+This module implements a global shared model registry to prevent duplicate model loading
+in memory when multiple cameras use the same model. This significantly reduces RAM and
+GPU VRAM usage by ensuring only one instance of each unique model is loaded.
+
+Key Features:
+- Thread-safe model loading and access
+- Reference counting for proper cleanup
+- Automatic model lifecycle management
+- Maintains compatibility with existing pipeline system
+"""
+
+import os
+import threading
+import logging
+from typing import Dict, Any, Optional, Set
+import torch
+from ultralytics import YOLO
+
+# Create a logger for this module
+logger = logging.getLogger("detector_worker.model_registry")
+
+class ModelRegistry:
+    """
+    Singleton class for managing shared YOLO models across multiple cameras.
+    
+    This registry ensures that each unique model is loaded only once in memory,
+    dramatically reducing RAM and GPU VRAM usage when multiple cameras use the
+    same model.
+    """
+    
+    _instance = None
+    _lock = threading.Lock()
+    
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super(ModelRegistry, cls).__new__(cls)
+                    cls._instance._initialized = False
+        return cls._instance
+    
+    def __init__(self):
+        if self._initialized:
+            return
+            
+        self._initialized = True
+        
+        # Thread-safe storage for loaded models
+        self._models: Dict[str, YOLO] = {}  # modelId -> YOLO model instance
+        self._model_files: Dict[str, str] = {}  # modelId -> file path
+        self._reference_counts: Dict[str, int] = {}  # modelId -> reference count
+        self._model_lock = threading.RLock()  # Reentrant lock for nested calls
+        
+        logger.info("🏭 Shared Model Registry initialized - ready for memory-optimized model loading")
+    
+    def get_model(self, model_id: str, model_file_path: str) -> YOLO:
+        """
+        Get or load a YOLO model. Returns shared instance if already loaded.
+        
+        Args:
+            model_id: Unique identifier for the model
+            model_file_path: Path to the model file
+            
+        Returns:
+            YOLO model instance (shared across all callers)
+        """
+        with self._model_lock:
+            if model_id in self._models:
+                # Model already loaded - increment reference count and return
+                self._reference_counts[model_id] += 1
+                logger.info(f"📖 Model '{model_id}' reused (ref_count: {self._reference_counts[model_id]}) - SAVED MEMORY!")
+                return self._models[model_id]
+            
+            # Model not loaded yet - load it
+            logger.info(f"🔄 Loading NEW model '{model_id}' from {model_file_path}")
+            
+            if not os.path.exists(model_file_path):
+                raise FileNotFoundError(f"Model file {model_file_path} not found")
+            
+            try:
+                # Load the YOLO model
+                model = YOLO(model_file_path)
+                
+                # Move to GPU if available
+                if torch.cuda.is_available():
+                    logger.info(f"🚀 CUDA available. Moving model '{model_id}' to GPU VRAM")
+                    model.to("cuda")
+                else:
+                    logger.info(f"💻 CUDA not available. Using CPU for model '{model_id}'")
+                
+                # Store in registry
+                self._models[model_id] = model
+                self._model_files[model_id] = model_file_path
+                self._reference_counts[model_id] = 1
+                
+                logger.info(f"✅ Model '{model_id}' loaded and registered (ref_count: 1)")
+                self._log_registry_status()
+                
+                return model
+                
+            except Exception as e:
+                logger.error(f"❌ Failed to load model '{model_id}' from {model_file_path}: {e}")
+                raise
+    
+    def release_model(self, model_id: str) -> None:
+        """
+        Release a reference to a model. If reference count reaches zero,
+        the model may be unloaded to free memory.
+        
+        Args:
+            model_id: Unique identifier for the model to release
+        """
+        with self._model_lock:
+            if model_id not in self._reference_counts:
+                logger.warning(f"⚠️ Attempted to release unknown model '{model_id}'")
+                return
+            
+            self._reference_counts[model_id] -= 1
+            logger.info(f"📉 Model '{model_id}' reference count decreased to {self._reference_counts[model_id]}")
+            
+            # For now, keep models in memory even when ref count reaches 0
+            # This prevents reload overhead if the same model is needed again soon
+            # In the future, we could implement LRU eviction policy
+            # if self._reference_counts[model_id] <= 0:
+            #     logger.info(f"💤 Model '{model_id}' has 0 references but keeping in memory for reuse")
+                # Optionally: self._unload_model(model_id)
+    
+    def _unload_model(self, model_id: str) -> None:
+        """
+        Internal method to unload a model from memory.
+        Currently not used to prevent reload overhead.
+        """
+        with self._model_lock:
+            if model_id in self._models:
+                logger.info(f"🗑️ Unloading model '{model_id}' from memory")
+                
+                # Clear GPU memory if model was on GPU
+                model = self._models[model_id]
+                if hasattr(model, 'model') and hasattr(model.model, 'cuda'):
+                    try:
+                        # Move model to CPU before deletion to free GPU memory
+                        model.to('cpu')
+                    except Exception as e:
+                        logger.warning(f"⚠️ Failed to move model '{model_id}' to CPU: {e}")
+                
+                # Remove from registry
+                del self._models[model_id]
+                del self._model_files[model_id]
+                del self._reference_counts[model_id]
+                
+                # Force garbage collection
+                import gc
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                
+                logger.info(f"✅ Model '{model_id}' unloaded and memory freed")
+                self._log_registry_status()
+    
+    def get_registry_status(self) -> Dict[str, Any]:
+        """
+        Get current status of the model registry.
+        
+        Returns:
+            Dictionary with registry statistics
+        """
+        with self._model_lock:
+            return {
+                "total_models": len(self._models),
+                "models": {
+                    model_id: {
+                        "file_path": self._model_files[model_id],
+                        "reference_count": self._reference_counts[model_id]
+                    }
+                    for model_id in self._models
+                },
+                "total_references": sum(self._reference_counts.values())
+            }
+    
+    def _log_registry_status(self) -> None:
+        """Log current registry status for debugging."""
+        status = self.get_registry_status()
+        logger.info(f"📊 Model Registry Status: {status['total_models']} unique models, {status['total_references']} total references")
+        for model_id, info in status['models'].items():
+            logger.debug(f"   📋 '{model_id}': refs={info['reference_count']}, file={os.path.basename(info['file_path'])}")
+    
+    def cleanup_all(self) -> None:
+        """
+        Clean up all models from the registry. Used during shutdown.
+        """
+        with self._model_lock:
+            model_ids = list(self._models.keys())
+            logger.info(f"🧹 Cleaning up {len(model_ids)} models from registry")
+            
+            for model_id in model_ids:
+                self._unload_model(model_id)
+            
+            logger.info("✅ Model registry cleanup complete")
+
+
+# Global singleton instance
+_registry = ModelRegistry()
+
+def get_shared_model(model_id: str, model_file_path: str) -> YOLO:
+    """
+    Convenience function to get a shared model instance.
+    
+    Args:
+        model_id: Unique identifier for the model
+        model_file_path: Path to the model file
+        
+    Returns:
+        YOLO model instance (shared across all callers)
+    """
+    return _registry.get_model(model_id, model_file_path)
+
+def release_shared_model(model_id: str) -> None:
+    """
+    Convenience function to release a shared model reference.
+    
+    Args:
+        model_id: Unique identifier for the model to release
+    """
+    _registry.release_model(model_id)
+
+def get_registry_status() -> Dict[str, Any]:
+    """
+    Convenience function to get registry status.
+    
+    Returns:
+        Dictionary with registry statistics
+    """
+    return _registry.get_registry_status()
+
+def cleanup_registry() -> None:
+    """
+    Convenience function to cleanup the entire registry.
+    """
+    _registry.cleanup_all()