fix: gpu memory leaks
This commit is contained in:
parent
3a47920186
commit
593611cdb7
13 changed files with 420 additions and 166 deletions
|
|
@ -12,26 +12,31 @@ from .jpeg_encoder import encode_frame_to_jpeg
|
|||
|
||||
class FrameReference:
|
||||
"""
|
||||
CPU-side reference object for a GPU frame.
|
||||
Reference-counted frame wrapper for zero-copy memory management.
|
||||
|
||||
This object holds a cloned RGB tensor that is independent of PyNvVideoCodec's
|
||||
DecodedFrame lifecycle. We don't keep the DecodedFrame to avoid conflicts
|
||||
with PyNvVideoCodec's internal frame pool management.
|
||||
This allows multiple parts of the pipeline to hold references to the same
|
||||
cloned frame, and tracks when all references are released so the decoder
|
||||
knows when buffer slots can be reused.
|
||||
"""
|
||||
def __init__(self, rgb_tensor: torch.Tensor, buffer_index: int, decoder):
|
||||
self.rgb_tensor = rgb_tensor # Cloned RGB tensor (independent copy)
|
||||
self.rgb_tensor = rgb_tensor # Cloned RGB tensor (one clone per frame)
|
||||
self.buffer_index = buffer_index
|
||||
self.decoder = decoder # Reference to decoder for marking as free
|
||||
self.decoder = decoder
|
||||
self._freed = False
|
||||
|
||||
def free(self):
|
||||
"""Mark this frame as no longer in use"""
|
||||
"""Mark this reference as freed - called by the last user of the frame"""
|
||||
if not self._freed:
|
||||
self._freed = True
|
||||
|
||||
# Release GPU memory immediately
|
||||
if self.rgb_tensor is not None:
|
||||
del self.rgb_tensor
|
||||
self.rgb_tensor = None
|
||||
self.decoder._mark_frame_free(self.buffer_index)
|
||||
|
||||
def is_freed(self) -> bool:
|
||||
"""Check if this frame has been freed"""
|
||||
"""Check if this reference has been freed"""
|
||||
return self._freed
|
||||
|
||||
def __del__(self):
|
||||
|
|
@ -212,13 +217,10 @@ class StreamDecoder:
|
|||
self.status = ConnectionStatus.DISCONNECTED
|
||||
self._status_lock = threading.Lock()
|
||||
|
||||
# Frame buffer (ring buffer) - stores FrameReference objects
|
||||
# Frame buffer (ring buffer) - stores cloned RGB tensors
|
||||
self.frame_buffer = deque(maxlen=buffer_size)
|
||||
self._buffer_lock = threading.RLock()
|
||||
|
||||
# Track which buffer slots are in use (list of FrameReference objects)
|
||||
self._in_use_frames = [] # List of FrameReference objects currently held by callbacks
|
||||
|
||||
# Decoder and container instances
|
||||
self.decoder = None
|
||||
self.container = None
|
||||
|
|
@ -236,6 +238,10 @@ class StreamDecoder:
|
|||
self._frame_callbacks = []
|
||||
self._callback_lock = threading.Lock()
|
||||
|
||||
# Track frames currently in use (referenced by callbacks/pipeline)
|
||||
self._in_use_frames = [] # List of FrameReference objects
|
||||
self._frame_index_counter = 0 # Monotonically increasing frame index
|
||||
|
||||
def register_frame_callback(self, callback: Callable):
|
||||
"""
|
||||
Register a callback to be called when a new frame is decoded.
|
||||
|
|
@ -396,19 +402,7 @@ class StreamDecoder:
|
|||
# Add frames to ring buffer and fire callbacks
|
||||
with self._buffer_lock:
|
||||
for frame in decoded_frames:
|
||||
# Check for buffer overflow - discard oldest if needed
|
||||
if len(self.frame_buffer) >= self.buffer_size:
|
||||
# Check if oldest frame is still in use
|
||||
if len(self._in_use_frames) > 0:
|
||||
oldest_ref = self.frame_buffer[0] if len(self.frame_buffer) > 0 else None
|
||||
if oldest_ref and not oldest_ref.is_freed():
|
||||
# Force free the oldest frame to prevent overflow
|
||||
print(f"[WARNING] Buffer overflow, force-freeing oldest frame (buffer_index={oldest_ref.buffer_index})")
|
||||
oldest_ref.free()
|
||||
|
||||
# Deque will automatically remove oldest when at maxlen
|
||||
|
||||
# Convert to tensor
|
||||
# Convert to tensor immediately after NVDEC
|
||||
try:
|
||||
# Convert DecodedFrame to PyTorch tensor using DLPack (zero-copy)
|
||||
nv12_tensor = torch.from_dlpack(frame)
|
||||
|
|
@ -417,32 +411,32 @@ class StreamDecoder:
|
|||
if self.frame_height is not None and self.frame_width is not None:
|
||||
rgb_tensor = nv12_to_rgb_gpu(nv12_tensor, self.frame_height, self.frame_width)
|
||||
|
||||
# CRITICAL: Clone the RGB tensor to break CUDA memory dependency
|
||||
# The nv12_to_rgb_gpu creates a new tensor, but it still references
|
||||
# the same CUDA context/stream. We need an independent copy.
|
||||
rgb_tensor_cloned = rgb_tensor.clone()
|
||||
# CLONE ONCE into our post-decode buffer
|
||||
# This breaks the dependency on PyNvVideoCodec's DecodedFrame
|
||||
# After this, the tensor is fully ours and can be used throughout the pipeline
|
||||
rgb_cloned = rgb_tensor.clone()
|
||||
|
||||
# Create FrameReference object for C++-style memory management
|
||||
# We don't keep the DecodedFrame to avoid conflicts with PyNvVideoCodec's
|
||||
# internal frame pool - the clone is fully independent
|
||||
buffer_index = self.frame_count
|
||||
# Create FrameReference for reference counting
|
||||
frame_ref = FrameReference(
|
||||
rgb_tensor=rgb_tensor_cloned, # Independent cloned tensor
|
||||
buffer_index=buffer_index,
|
||||
rgb_tensor=rgb_cloned,
|
||||
buffer_index=self._frame_index_counter,
|
||||
decoder=self
|
||||
)
|
||||
self._frame_index_counter += 1
|
||||
|
||||
# Add to buffer and in-use tracking
|
||||
# Add FrameReference to ring buffer (deque automatically removes oldest when full)
|
||||
self.frame_buffer.append(frame_ref)
|
||||
self._in_use_frames.append(frame_ref)
|
||||
self.frame_count += 1
|
||||
|
||||
# Fire callbacks with the cloned RGB tensor from FrameReference
|
||||
# The tensor is now independent of the DecodedFrame lifecycle
|
||||
# Track this frame as in-use
|
||||
self._in_use_frames.append(frame_ref)
|
||||
|
||||
# Fire callbacks with the FrameReference
|
||||
# The callback receivers should call .free() when done
|
||||
with self._callback_lock:
|
||||
for callback in self._frame_callbacks:
|
||||
try:
|
||||
callback(frame_ref.rgb_tensor)
|
||||
callback(frame_ref)
|
||||
except Exception as e:
|
||||
print(f"Error in frame callback: {e}")
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue