feat: update pynvml in linux
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 12s
Build Worker Base and Application Images / build-base (push) Successful in 4m44s
Build Worker Base and Application Images / build-docker (push) Successful in 3m3s
Build Worker Base and Application Images / deploy-stack (push) Successful in 24s

This commit is contained in:
ziesorx 2025-09-25 01:26:19 +07:00
parent 67096d4141
commit 5065e43837
3 changed files with 27 additions and 9 deletions

2
.gitignore vendored
View file

@ -2,6 +2,8 @@
app.log
*.pt
images
# All pycache directories
__pycache__/
.mptacache

View file

@ -10,7 +10,7 @@ from .models import CameraConnection, SubscriptionObject
logger = logging.getLogger(__name__)
# Try to import torch for GPU monitoring
# Try to import torch and pynvml for GPU monitoring
try:
import torch
TORCH_AVAILABLE = True
@ -18,6 +18,18 @@ except ImportError:
TORCH_AVAILABLE = False
logger.warning("PyTorch not available, GPU metrics will not be collected")
try:
import pynvml
PYNVML_AVAILABLE = True
pynvml.nvmlInit()
logger.info("NVIDIA ML Python (pynvml) initialized successfully")
except ImportError:
PYNVML_AVAILABLE = False
logger.debug("pynvml not available, falling back to PyTorch GPU monitoring")
except Exception as e:
PYNVML_AVAILABLE = False
logger.warning(f"Failed to initialize pynvml: {e}")
@dataclass
class WorkerState:
@ -180,21 +192,24 @@ class SystemMetrics:
@staticmethod
def get_gpu_usage() -> Optional[float]:
"""Get current GPU usage percentage."""
if not TORCH_AVAILABLE:
return None
try:
if torch.cuda.is_available():
# PyTorch doesn't provide direct GPU utilization
# This is a placeholder - real implementation might use nvidia-ml-py
# Prefer pynvml for accurate GPU utilization
if PYNVML_AVAILABLE:
handle = pynvml.nvmlDeviceGetHandleByIndex(0) # First GPU
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
return float(utilization.gpu)
# Fallback to PyTorch memory-based estimation
elif TORCH_AVAILABLE and torch.cuda.is_available():
if hasattr(torch.cuda, 'utilization'):
return torch.cuda.utilization()
else:
# Fallback: estimate based on memory usage
# Estimate based on memory usage
allocated = torch.cuda.memory_allocated()
reserved = torch.cuda.memory_reserved()
if reserved > 0:
return (allocated / reserved) * 100
return None
except Exception as e:
logger.error(f"Failed to get GPU usage: {e}")

View file

@ -6,3 +6,4 @@ scipy
filterpy
psycopg2-binary
lap>=0.5.12
pynvml