feat: optimize model declaration in ram
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 8s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m47s
Build Worker Base and Application Images / deploy-stack (push) Successful in 10s
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 8s
Build Worker Base and Application Images / build-base (push) Has been skipped
Build Worker Base and Application Images / build-docker (push) Successful in 2m47s
Build Worker Base and Application Images / deploy-stack (push) Successful in 10s
This commit is contained in:
parent
c715b26a2a
commit
ac85caca39
4 changed files with 679 additions and 216 deletions
242
siwatsystem/model_registry.py
Normal file
242
siwatsystem/model_registry.py
Normal file
|
@ -0,0 +1,242 @@
|
|||
"""
|
||||
Shared Model Registry for Memory Optimization
|
||||
|
||||
This module implements a global shared model registry to prevent duplicate model loading
|
||||
in memory when multiple cameras use the same model. This significantly reduces RAM and
|
||||
GPU VRAM usage by ensuring only one instance of each unique model is loaded.
|
||||
|
||||
Key Features:
|
||||
- Thread-safe model loading and access
|
||||
- Reference counting for proper cleanup
|
||||
- Automatic model lifecycle management
|
||||
- Maintains compatibility with existing pipeline system
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, Set
|
||||
import torch
|
||||
from ultralytics import YOLO
|
||||
|
||||
# Create a logger for this module
|
||||
logger = logging.getLogger("detector_worker.model_registry")
|
||||
|
||||
class ModelRegistry:
|
||||
"""
|
||||
Singleton class for managing shared YOLO models across multiple cameras.
|
||||
|
||||
This registry ensures that each unique model is loaded only once in memory,
|
||||
dramatically reducing RAM and GPU VRAM usage when multiple cameras use the
|
||||
same model.
|
||||
"""
|
||||
|
||||
_instance = None
|
||||
_lock = threading.Lock()
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
with cls._lock:
|
||||
if cls._instance is None:
|
||||
cls._instance = super(ModelRegistry, cls).__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self._initialized = True
|
||||
|
||||
# Thread-safe storage for loaded models
|
||||
self._models: Dict[str, YOLO] = {} # modelId -> YOLO model instance
|
||||
self._model_files: Dict[str, str] = {} # modelId -> file path
|
||||
self._reference_counts: Dict[str, int] = {} # modelId -> reference count
|
||||
self._model_lock = threading.RLock() # Reentrant lock for nested calls
|
||||
|
||||
logger.info("🏭 Shared Model Registry initialized - ready for memory-optimized model loading")
|
||||
|
||||
def get_model(self, model_id: str, model_file_path: str) -> YOLO:
|
||||
"""
|
||||
Get or load a YOLO model. Returns shared instance if already loaded.
|
||||
|
||||
Args:
|
||||
model_id: Unique identifier for the model
|
||||
model_file_path: Path to the model file
|
||||
|
||||
Returns:
|
||||
YOLO model instance (shared across all callers)
|
||||
"""
|
||||
with self._model_lock:
|
||||
if model_id in self._models:
|
||||
# Model already loaded - increment reference count and return
|
||||
self._reference_counts[model_id] += 1
|
||||
logger.info(f"📖 Model '{model_id}' reused (ref_count: {self._reference_counts[model_id]}) - SAVED MEMORY!")
|
||||
return self._models[model_id]
|
||||
|
||||
# Model not loaded yet - load it
|
||||
logger.info(f"🔄 Loading NEW model '{model_id}' from {model_file_path}")
|
||||
|
||||
if not os.path.exists(model_file_path):
|
||||
raise FileNotFoundError(f"Model file {model_file_path} not found")
|
||||
|
||||
try:
|
||||
# Load the YOLO model
|
||||
model = YOLO(model_file_path)
|
||||
|
||||
# Move to GPU if available
|
||||
if torch.cuda.is_available():
|
||||
logger.info(f"🚀 CUDA available. Moving model '{model_id}' to GPU VRAM")
|
||||
model.to("cuda")
|
||||
else:
|
||||
logger.info(f"💻 CUDA not available. Using CPU for model '{model_id}'")
|
||||
|
||||
# Store in registry
|
||||
self._models[model_id] = model
|
||||
self._model_files[model_id] = model_file_path
|
||||
self._reference_counts[model_id] = 1
|
||||
|
||||
logger.info(f"✅ Model '{model_id}' loaded and registered (ref_count: 1)")
|
||||
self._log_registry_status()
|
||||
|
||||
return model
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Failed to load model '{model_id}' from {model_file_path}: {e}")
|
||||
raise
|
||||
|
||||
def release_model(self, model_id: str) -> None:
|
||||
"""
|
||||
Release a reference to a model. If reference count reaches zero,
|
||||
the model may be unloaded to free memory.
|
||||
|
||||
Args:
|
||||
model_id: Unique identifier for the model to release
|
||||
"""
|
||||
with self._model_lock:
|
||||
if model_id not in self._reference_counts:
|
||||
logger.warning(f"⚠️ Attempted to release unknown model '{model_id}'")
|
||||
return
|
||||
|
||||
self._reference_counts[model_id] -= 1
|
||||
logger.info(f"📉 Model '{model_id}' reference count decreased to {self._reference_counts[model_id]}")
|
||||
|
||||
# For now, keep models in memory even when ref count reaches 0
|
||||
# This prevents reload overhead if the same model is needed again soon
|
||||
# In the future, we could implement LRU eviction policy
|
||||
# if self._reference_counts[model_id] <= 0:
|
||||
# logger.info(f"💤 Model '{model_id}' has 0 references but keeping in memory for reuse")
|
||||
# Optionally: self._unload_model(model_id)
|
||||
|
||||
def _unload_model(self, model_id: str) -> None:
|
||||
"""
|
||||
Internal method to unload a model from memory.
|
||||
Currently not used to prevent reload overhead.
|
||||
"""
|
||||
with self._model_lock:
|
||||
if model_id in self._models:
|
||||
logger.info(f"🗑️ Unloading model '{model_id}' from memory")
|
||||
|
||||
# Clear GPU memory if model was on GPU
|
||||
model = self._models[model_id]
|
||||
if hasattr(model, 'model') and hasattr(model.model, 'cuda'):
|
||||
try:
|
||||
# Move model to CPU before deletion to free GPU memory
|
||||
model.to('cpu')
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Failed to move model '{model_id}' to CPU: {e}")
|
||||
|
||||
# Remove from registry
|
||||
del self._models[model_id]
|
||||
del self._model_files[model_id]
|
||||
del self._reference_counts[model_id]
|
||||
|
||||
# Force garbage collection
|
||||
import gc
|
||||
gc.collect()
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
logger.info(f"✅ Model '{model_id}' unloaded and memory freed")
|
||||
self._log_registry_status()
|
||||
|
||||
def get_registry_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get current status of the model registry.
|
||||
|
||||
Returns:
|
||||
Dictionary with registry statistics
|
||||
"""
|
||||
with self._model_lock:
|
||||
return {
|
||||
"total_models": len(self._models),
|
||||
"models": {
|
||||
model_id: {
|
||||
"file_path": self._model_files[model_id],
|
||||
"reference_count": self._reference_counts[model_id]
|
||||
}
|
||||
for model_id in self._models
|
||||
},
|
||||
"total_references": sum(self._reference_counts.values())
|
||||
}
|
||||
|
||||
def _log_registry_status(self) -> None:
|
||||
"""Log current registry status for debugging."""
|
||||
status = self.get_registry_status()
|
||||
logger.info(f"📊 Model Registry Status: {status['total_models']} unique models, {status['total_references']} total references")
|
||||
for model_id, info in status['models'].items():
|
||||
logger.debug(f" 📋 '{model_id}': refs={info['reference_count']}, file={os.path.basename(info['file_path'])}")
|
||||
|
||||
def cleanup_all(self) -> None:
|
||||
"""
|
||||
Clean up all models from the registry. Used during shutdown.
|
||||
"""
|
||||
with self._model_lock:
|
||||
model_ids = list(self._models.keys())
|
||||
logger.info(f"🧹 Cleaning up {len(model_ids)} models from registry")
|
||||
|
||||
for model_id in model_ids:
|
||||
self._unload_model(model_id)
|
||||
|
||||
logger.info("✅ Model registry cleanup complete")
|
||||
|
||||
|
||||
# Global singleton instance
|
||||
_registry = ModelRegistry()
|
||||
|
||||
def get_shared_model(model_id: str, model_file_path: str) -> YOLO:
|
||||
"""
|
||||
Convenience function to get a shared model instance.
|
||||
|
||||
Args:
|
||||
model_id: Unique identifier for the model
|
||||
model_file_path: Path to the model file
|
||||
|
||||
Returns:
|
||||
YOLO model instance (shared across all callers)
|
||||
"""
|
||||
return _registry.get_model(model_id, model_file_path)
|
||||
|
||||
def release_shared_model(model_id: str) -> None:
|
||||
"""
|
||||
Convenience function to release a shared model reference.
|
||||
|
||||
Args:
|
||||
model_id: Unique identifier for the model to release
|
||||
"""
|
||||
_registry.release_model(model_id)
|
||||
|
||||
def get_registry_status() -> Dict[str, Any]:
|
||||
"""
|
||||
Convenience function to get registry status.
|
||||
|
||||
Returns:
|
||||
Dictionary with registry statistics
|
||||
"""
|
||||
return _registry.get_registry_status()
|
||||
|
||||
def cleanup_registry() -> None:
|
||||
"""
|
||||
Convenience function to cleanup the entire registry.
|
||||
"""
|
||||
_registry.cleanup_all()
|
Loading…
Add table
Add a link
Reference in a new issue