feat: update pynvml in linux
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 12s
Build Worker Base and Application Images / build-base (push) Successful in 4m44s
Build Worker Base and Application Images / build-docker (push) Successful in 3m3s
Build Worker Base and Application Images / deploy-stack (push) Successful in 24s
All checks were successful
Build Worker Base and Application Images / check-base-changes (push) Successful in 12s
Build Worker Base and Application Images / build-base (push) Successful in 4m44s
Build Worker Base and Application Images / build-docker (push) Successful in 3m3s
Build Worker Base and Application Images / deploy-stack (push) Successful in 24s
This commit is contained in:
parent
67096d4141
commit
5065e43837
3 changed files with 27 additions and 9 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -2,6 +2,8 @@
|
|||
app.log
|
||||
*.pt
|
||||
|
||||
images
|
||||
|
||||
# All pycache directories
|
||||
__pycache__/
|
||||
.mptacache
|
||||
|
|
|
@ -10,7 +10,7 @@ from .models import CameraConnection, SubscriptionObject
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Try to import torch for GPU monitoring
|
||||
# Try to import torch and pynvml for GPU monitoring
|
||||
try:
|
||||
import torch
|
||||
TORCH_AVAILABLE = True
|
||||
|
@ -18,6 +18,18 @@ except ImportError:
|
|||
TORCH_AVAILABLE = False
|
||||
logger.warning("PyTorch not available, GPU metrics will not be collected")
|
||||
|
||||
try:
|
||||
import pynvml
|
||||
PYNVML_AVAILABLE = True
|
||||
pynvml.nvmlInit()
|
||||
logger.info("NVIDIA ML Python (pynvml) initialized successfully")
|
||||
except ImportError:
|
||||
PYNVML_AVAILABLE = False
|
||||
logger.debug("pynvml not available, falling back to PyTorch GPU monitoring")
|
||||
except Exception as e:
|
||||
PYNVML_AVAILABLE = False
|
||||
logger.warning(f"Failed to initialize pynvml: {e}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkerState:
|
||||
|
@ -180,21 +192,24 @@ class SystemMetrics:
|
|||
@staticmethod
|
||||
def get_gpu_usage() -> Optional[float]:
|
||||
"""Get current GPU usage percentage."""
|
||||
if not TORCH_AVAILABLE:
|
||||
return None
|
||||
|
||||
try:
|
||||
if torch.cuda.is_available():
|
||||
# PyTorch doesn't provide direct GPU utilization
|
||||
# This is a placeholder - real implementation might use nvidia-ml-py
|
||||
# Prefer pynvml for accurate GPU utilization
|
||||
if PYNVML_AVAILABLE:
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(0) # First GPU
|
||||
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
|
||||
return float(utilization.gpu)
|
||||
|
||||
# Fallback to PyTorch memory-based estimation
|
||||
elif TORCH_AVAILABLE and torch.cuda.is_available():
|
||||
if hasattr(torch.cuda, 'utilization'):
|
||||
return torch.cuda.utilization()
|
||||
else:
|
||||
# Fallback: estimate based on memory usage
|
||||
# Estimate based on memory usage
|
||||
allocated = torch.cuda.memory_allocated()
|
||||
reserved = torch.cuda.memory_reserved()
|
||||
if reserved > 0:
|
||||
return (allocated / reserved) * 100
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get GPU usage: {e}")
|
||||
|
|
|
@ -6,3 +6,4 @@ scipy
|
|||
filterpy
|
||||
psycopg2-binary
|
||||
lap>=0.5.12
|
||||
pynvml
|
Loading…
Add table
Add a link
Reference in a new issue