From 5065e43837ad4c3e876e5ee451c7db53f3fa3663 Mon Sep 17 00:00:00 2001 From: ziesorx Date: Thu, 25 Sep 2025 01:26:19 +0700 Subject: [PATCH] feat: update pynvml in linux --- .gitignore | 2 ++ core/communication/state.py | 31 +++++++++++++++++++++++-------- requirements.base.txt | 3 ++- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index ff8c99d..2da89cb 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ app.log *.pt +images + # All pycache directories __pycache__/ .mptacache diff --git a/core/communication/state.py b/core/communication/state.py index b60f341..9016c07 100644 --- a/core/communication/state.py +++ b/core/communication/state.py @@ -10,7 +10,7 @@ from .models import CameraConnection, SubscriptionObject logger = logging.getLogger(__name__) -# Try to import torch for GPU monitoring +# Try to import torch and pynvml for GPU monitoring try: import torch TORCH_AVAILABLE = True @@ -18,6 +18,18 @@ except ImportError: TORCH_AVAILABLE = False logger.warning("PyTorch not available, GPU metrics will not be collected") +try: + import pynvml + PYNVML_AVAILABLE = True + pynvml.nvmlInit() + logger.info("NVIDIA ML Python (pynvml) initialized successfully") +except ImportError: + PYNVML_AVAILABLE = False + logger.debug("pynvml not available, falling back to PyTorch GPU monitoring") +except Exception as e: + PYNVML_AVAILABLE = False + logger.warning(f"Failed to initialize pynvml: {e}") + @dataclass class WorkerState: @@ -180,21 +192,24 @@ class SystemMetrics: @staticmethod def get_gpu_usage() -> Optional[float]: """Get current GPU usage percentage.""" - if not TORCH_AVAILABLE: - return None - try: - if torch.cuda.is_available(): - # PyTorch doesn't provide direct GPU utilization - # This is a placeholder - real implementation might use nvidia-ml-py + # Prefer pynvml for accurate GPU utilization + if PYNVML_AVAILABLE: + handle = pynvml.nvmlDeviceGetHandleByIndex(0) # First GPU + utilization = pynvml.nvmlDeviceGetUtilizationRates(handle) + return float(utilization.gpu) + + # Fallback to PyTorch memory-based estimation + elif TORCH_AVAILABLE and torch.cuda.is_available(): if hasattr(torch.cuda, 'utilization'): return torch.cuda.utilization() else: - # Fallback: estimate based on memory usage + # Estimate based on memory usage allocated = torch.cuda.memory_allocated() reserved = torch.cuda.memory_reserved() if reserved > 0: return (allocated / reserved) * 100 + return None except Exception as e: logger.error(f"Failed to get GPU usage: {e}") diff --git a/requirements.base.txt b/requirements.base.txt index 094f332..04e90ba 100644 --- a/requirements.base.txt +++ b/requirements.base.txt @@ -5,4 +5,5 @@ opencv-python scipy filterpy psycopg2-binary -lap>=0.5.12 \ No newline at end of file +lap>=0.5.12 +pynvml \ No newline at end of file