From 5065e43837ad4c3e876e5ee451c7db53f3fa3663 Mon Sep 17 00:00:00 2001
From: ziesorx <nwisessan@hotmail.com>
Date: Thu, 25 Sep 2025 01:26:19 +0700
Subject: [PATCH] feat: update pynvml in linux

---
 .gitignore                  |  2 ++
 core/communication/state.py | 31 +++++++++++++++++++++++--------
 requirements.base.txt       |  3 ++-
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/.gitignore b/.gitignore
index ff8c99d..2da89cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 app.log
 *.pt
 
+images
+
 # All pycache directories
 __pycache__/
 .mptacache
diff --git a/core/communication/state.py b/core/communication/state.py
index b60f341..9016c07 100644
--- a/core/communication/state.py
+++ b/core/communication/state.py
@@ -10,7 +10,7 @@ from .models import CameraConnection, SubscriptionObject
 
 logger = logging.getLogger(__name__)
 
-# Try to import torch for GPU monitoring
+# Try to import torch and pynvml for GPU monitoring
 try:
     import torch
     TORCH_AVAILABLE = True
@@ -18,6 +18,18 @@ except ImportError:
     TORCH_AVAILABLE = False
     logger.warning("PyTorch not available, GPU metrics will not be collected")
 
+try:
+    import pynvml
+    PYNVML_AVAILABLE = True
+    pynvml.nvmlInit()
+    logger.info("NVIDIA ML Python (pynvml) initialized successfully")
+except ImportError:
+    PYNVML_AVAILABLE = False
+    logger.debug("pynvml not available, falling back to PyTorch GPU monitoring")
+except Exception as e:
+    PYNVML_AVAILABLE = False
+    logger.warning(f"Failed to initialize pynvml: {e}")
+
 
 @dataclass
 class WorkerState:
@@ -180,21 +192,24 @@ class SystemMetrics:
     @staticmethod
     def get_gpu_usage() -> Optional[float]:
         """Get current GPU usage percentage."""
-        if not TORCH_AVAILABLE:
-            return None
-
         try:
-            if torch.cuda.is_available():
-                # PyTorch doesn't provide direct GPU utilization
-                # This is a placeholder - real implementation might use nvidia-ml-py
+            # Prefer pynvml for accurate GPU utilization
+            if PYNVML_AVAILABLE:
+                handle = pynvml.nvmlDeviceGetHandleByIndex(0)  # First GPU
+                utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
+                return float(utilization.gpu)
+
+            # Fallback to PyTorch memory-based estimation
+            elif TORCH_AVAILABLE and torch.cuda.is_available():
                 if hasattr(torch.cuda, 'utilization'):
                     return torch.cuda.utilization()
                 else:
-                    # Fallback: estimate based on memory usage
+                    # Estimate based on memory usage
                     allocated = torch.cuda.memory_allocated()
                     reserved = torch.cuda.memory_reserved()
                     if reserved > 0:
                         return (allocated / reserved) * 100
+
             return None
         except Exception as e:
             logger.error(f"Failed to get GPU usage: {e}")
diff --git a/requirements.base.txt b/requirements.base.txt
index 094f332..04e90ba 100644
--- a/requirements.base.txt
+++ b/requirements.base.txt
@@ -5,4 +5,5 @@ opencv-python
 scipy
 filterpy
 psycopg2-binary
-lap>=0.5.12
\ No newline at end of file
+lap>=0.5.12
+pynvml
\ No newline at end of file