decoder example
This commit is contained in:
parent
16842186c7
commit
1432eb4b97
2 changed files with 130 additions and 39 deletions
|
|
@ -1,12 +1,14 @@
|
||||||
import threading
|
import threading
|
||||||
from typing import Optional, Callable
|
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import torch
|
from typing import Callable, Optional
|
||||||
import PyNvVideoCodec as nvc
|
|
||||||
import av
|
import av
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import PyNvVideoCodec as nvc
|
||||||
|
import torch
|
||||||
from cuda.bindings import driver as cuda_driver
|
from cuda.bindings import driver as cuda_driver
|
||||||
|
|
||||||
from .jpeg_encoder import encode_frame_to_jpeg
|
from .jpeg_encoder import encode_frame_to_jpeg
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -18,6 +20,7 @@ class FrameReference:
|
||||||
cloned frame, and tracks when all references are released so the decoder
|
cloned frame, and tracks when all references are released so the decoder
|
||||||
knows when buffer slots can be reused.
|
knows when buffer slots can be reused.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, rgb_tensor: torch.Tensor, buffer_index: int, decoder):
|
def __init__(self, rgb_tensor: torch.Tensor, buffer_index: int, decoder):
|
||||||
self.rgb_tensor = rgb_tensor # Cloned RGB tensor (one clone per frame)
|
self.rgb_tensor = rgb_tensor # Cloned RGB tensor (one clone per frame)
|
||||||
self.buffer_index = buffer_index
|
self.buffer_index = buffer_index
|
||||||
|
|
@ -75,19 +78,27 @@ def nv12_to_rgb_gpu(nv12_tensor: torch.Tensor, height: int, width: int) -> torch
|
||||||
v_plane = uv_plane[:, :, 1] # (H/2, W/2)
|
v_plane = uv_plane[:, :, 1] # (H/2, W/2)
|
||||||
|
|
||||||
# Upsample U and V to full resolution using bilinear interpolation
|
# Upsample U and V to full resolution using bilinear interpolation
|
||||||
u_upsampled = torch.nn.functional.interpolate(
|
u_upsampled = (
|
||||||
u_plane.unsqueeze(0).unsqueeze(0), # (1, 1, H/2, W/2)
|
torch.nn.functional.interpolate(
|
||||||
size=(height, width),
|
u_plane.unsqueeze(0).unsqueeze(0), # (1, 1, H/2, W/2)
|
||||||
mode='bilinear',
|
size=(height, width),
|
||||||
align_corners=False
|
mode="bilinear",
|
||||||
).squeeze(0).squeeze(0) # (H, W)
|
align_corners=False,
|
||||||
|
)
|
||||||
|
.squeeze(0)
|
||||||
|
.squeeze(0)
|
||||||
|
) # (H, W)
|
||||||
|
|
||||||
v_upsampled = torch.nn.functional.interpolate(
|
v_upsampled = (
|
||||||
v_plane.unsqueeze(0).unsqueeze(0), # (1, 1, H/2, W/2)
|
torch.nn.functional.interpolate(
|
||||||
size=(height, width),
|
v_plane.unsqueeze(0).unsqueeze(0), # (1, 1, H/2, W/2)
|
||||||
mode='bilinear',
|
size=(height, width),
|
||||||
align_corners=False
|
mode="bilinear",
|
||||||
).squeeze(0).squeeze(0) # (H, W)
|
align_corners=False,
|
||||||
|
)
|
||||||
|
.squeeze(0)
|
||||||
|
.squeeze(0)
|
||||||
|
) # (H, W)
|
||||||
|
|
||||||
# YUV to RGB conversion using BT.601 standard
|
# YUV to RGB conversion using BT.601 standard
|
||||||
# R = Y + 1.402 * (V - 128)
|
# R = Y + 1.402 * (V - 128)
|
||||||
|
|
@ -145,7 +156,7 @@ class StreamDecoderFactory:
|
||||||
self.gpu_id = gpu_id
|
self.gpu_id = gpu_id
|
||||||
|
|
||||||
# Initialize CUDA and get device
|
# Initialize CUDA and get device
|
||||||
err, = cuda_driver.cuInit(0)
|
(err,) = cuda_driver.cuInit(0)
|
||||||
if err != cuda_driver.CUresult.CUDA_SUCCESS:
|
if err != cuda_driver.CUresult.CUDA_SUCCESS:
|
||||||
raise RuntimeError(f"Failed to initialize CUDA: {err}")
|
raise RuntimeError(f"Failed to initialize CUDA: {err}")
|
||||||
|
|
||||||
|
|
@ -160,10 +171,13 @@ class StreamDecoderFactory:
|
||||||
raise RuntimeError(f"Failed to retain CUDA primary context: {err}")
|
raise RuntimeError(f"Failed to retain CUDA primary context: {err}")
|
||||||
|
|
||||||
self._initialized = True
|
self._initialized = True
|
||||||
print(f"StreamDecoderFactory initialized with shared CUDA context on GPU {gpu_id}")
|
print(
|
||||||
|
f"StreamDecoderFactory initialized with shared CUDA context on GPU {gpu_id}"
|
||||||
|
)
|
||||||
|
|
||||||
def create_decoder(self, rtsp_url: str, buffer_size: int = 30,
|
def create_decoder(
|
||||||
codec: str = "h264") -> 'StreamDecoder':
|
self, rtsp_url: str, buffer_size: int = 30, codec: str = "h264"
|
||||||
|
) -> "StreamDecoder":
|
||||||
"""
|
"""
|
||||||
Create a new StreamDecoder instance with shared CUDA context.
|
Create a new StreamDecoder instance with shared CUDA context.
|
||||||
|
|
||||||
|
|
@ -180,12 +194,12 @@ class StreamDecoderFactory:
|
||||||
cuda_context=self.cuda_context,
|
cuda_context=self.cuda_context,
|
||||||
gpu_id=self.gpu_id,
|
gpu_id=self.gpu_id,
|
||||||
buffer_size=buffer_size,
|
buffer_size=buffer_size,
|
||||||
codec=codec
|
codec=codec,
|
||||||
)
|
)
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
"""Cleanup shared CUDA context on factory destruction"""
|
"""Cleanup shared CUDA context on factory destruction"""
|
||||||
if hasattr(self, 'cuda_device') and hasattr(self, 'gpu_id'):
|
if hasattr(self, "cuda_device") and hasattr(self, "gpu_id"):
|
||||||
cuda_driver.cuDevicePrimaryCtxRelease(self.cuda_device)
|
cuda_driver.cuDevicePrimaryCtxRelease(self.cuda_device)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -195,8 +209,14 @@ class StreamDecoder:
|
||||||
Thread-safe for concurrent read/write operations.
|
Thread-safe for concurrent read/write operations.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, rtsp_url: str, cuda_context, gpu_id: int,
|
def __init__(
|
||||||
buffer_size: int = 30, codec: str = "h264"):
|
self,
|
||||||
|
rtsp_url: str,
|
||||||
|
cuda_context,
|
||||||
|
gpu_id: int,
|
||||||
|
buffer_size: int = 30,
|
||||||
|
codec: str = "h264",
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Initialize StreamDecoder.
|
Initialize StreamDecoder.
|
||||||
|
|
||||||
|
|
@ -275,7 +295,9 @@ class StreamDecoder:
|
||||||
"""
|
"""
|
||||||
with self._buffer_lock:
|
with self._buffer_lock:
|
||||||
# Remove from in-use tracking
|
# Remove from in-use tracking
|
||||||
self._in_use_frames = [f for f in self._in_use_frames if f.buffer_index != buffer_index]
|
self._in_use_frames = [
|
||||||
|
f for f in self._in_use_frames if f.buffer_index != buffer_index
|
||||||
|
]
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
"""Start the RTSP stream decoding in background thread"""
|
"""Start the RTSP stream decoding in background thread"""
|
||||||
|
|
@ -313,10 +335,10 @@ class StreamDecoder:
|
||||||
|
|
||||||
# Open RTSP stream with PyAV
|
# Open RTSP stream with PyAV
|
||||||
options = {
|
options = {
|
||||||
'rtsp_transport': 'tcp',
|
"rtsp_transport": "tcp",
|
||||||
'max_delay': '500000', # 500ms
|
"max_delay": "500000", # 500ms
|
||||||
'rtsp_flags': 'prefer_tcp',
|
"rtsp_flags": "prefer_tcp",
|
||||||
'timeout': '5000000', # 5 seconds
|
"timeout": "5000000", # 5 seconds
|
||||||
}
|
}
|
||||||
|
|
||||||
self.container = av.open(self.rtsp_url, options=options)
|
self.container = av.open(self.rtsp_url, options=options)
|
||||||
|
|
@ -330,9 +352,9 @@ class StreamDecoder:
|
||||||
|
|
||||||
# Map codec name to PyNvVideoCodec codec enum
|
# Map codec name to PyNvVideoCodec codec enum
|
||||||
codec_map = {
|
codec_map = {
|
||||||
'h264': nvc.cudaVideoCodec.H264,
|
"h264": nvc.cudaVideoCodec.H264,
|
||||||
'hevc': nvc.cudaVideoCodec.HEVC,
|
"hevc": nvc.cudaVideoCodec.HEVC,
|
||||||
'h265': nvc.cudaVideoCodec.HEVC,
|
"h265": nvc.cudaVideoCodec.HEVC,
|
||||||
}
|
}
|
||||||
|
|
||||||
codec_id = codec_map.get(self.codec.lower(), nvc.cudaVideoCodec.H264)
|
codec_id = codec_map.get(self.codec.lower(), nvc.cudaVideoCodec.H264)
|
||||||
|
|
@ -342,7 +364,7 @@ class StreamDecoder:
|
||||||
gpuid=self.gpu_id,
|
gpuid=self.gpu_id,
|
||||||
codec=codec_id,
|
codec=codec_id,
|
||||||
cudacontext=self.cuda_context,
|
cudacontext=self.cuda_context,
|
||||||
usedevicememory=True
|
usedevicememory=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._set_status(ConnectionStatus.CONNECTED)
|
self._set_status(ConnectionStatus.CONNECTED)
|
||||||
|
|
@ -408,8 +430,13 @@ class StreamDecoder:
|
||||||
nv12_tensor = torch.from_dlpack(frame)
|
nv12_tensor = torch.from_dlpack(frame)
|
||||||
|
|
||||||
# Convert NV12 to RGB on GPU
|
# Convert NV12 to RGB on GPU
|
||||||
if self.frame_height is not None and self.frame_width is not None:
|
if (
|
||||||
rgb_tensor = nv12_to_rgb_gpu(nv12_tensor, self.frame_height, self.frame_width)
|
self.frame_height is not None
|
||||||
|
and self.frame_width is not None
|
||||||
|
):
|
||||||
|
rgb_tensor = nv12_to_rgb_gpu(
|
||||||
|
nv12_tensor, self.frame_height, self.frame_width
|
||||||
|
)
|
||||||
|
|
||||||
# CLONE ONCE into our post-decode buffer
|
# CLONE ONCE into our post-decode buffer
|
||||||
# This breaks the dependency on PyNvVideoCodec's DecodedFrame
|
# This breaks the dependency on PyNvVideoCodec's DecodedFrame
|
||||||
|
|
@ -420,7 +447,7 @@ class StreamDecoder:
|
||||||
frame_ref = FrameReference(
|
frame_ref = FrameReference(
|
||||||
rgb_tensor=rgb_cloned,
|
rgb_tensor=rgb_cloned,
|
||||||
buffer_index=self._frame_index_counter,
|
buffer_index=self._frame_index_counter,
|
||||||
decoder=self
|
decoder=self,
|
||||||
)
|
)
|
||||||
self._frame_index_counter += 1
|
self._frame_index_counter += 1
|
||||||
|
|
||||||
|
|
@ -480,7 +507,9 @@ class StreamDecoder:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if not rgb:
|
if not rgb:
|
||||||
print("Warning: NV12 format not supported with FrameReference, only RGB")
|
print(
|
||||||
|
"Warning: NV12 format not supported with FrameReference, only RGB"
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -620,6 +649,8 @@ class StreamDecoder:
|
||||||
return encode_frame_to_jpeg(rgb_frame, quality=quality)
|
return encode_frame_to_jpeg(rgb_frame, quality=quality)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return (f"StreamDecoder(url={self.rtsp_url}, status={self.status.value}, "
|
return (
|
||||||
f"buffer={self.get_buffer_size()}/{self.buffer_size}, "
|
f"StreamDecoder(url={self.rtsp_url}, status={self.status.value}, "
|
||||||
f"frames_decoded={self.frame_count})")
|
f"buffer={self.get_buffer_size()}/{self.buffer_size}, "
|
||||||
|
f"frames_decoded={self.frame_count})"
|
||||||
|
)
|
||||||
|
|
|
||||||
60
stream_decoder_test.py
Normal file
60
stream_decoder_test.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple example: Decode 4 RTSP streams and display with OpenCV using callbacks
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python stream_decoder_test.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from services import StreamDecoderFactory
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Frame storage for each camera
|
||||||
|
frames = {1: None, 2: None, 3: None, 4: None}
|
||||||
|
|
||||||
|
|
||||||
|
def make_callback(cam_id):
|
||||||
|
"""Create callback for specific camera"""
|
||||||
|
|
||||||
|
def callback(frame_ref):
|
||||||
|
# Transfer to CPU and convert RGB to BGR
|
||||||
|
frame = frame_ref.rgb_tensor.cpu().permute(1, 2, 0).numpy()
|
||||||
|
frames[cam_id] = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
||||||
|
frame_ref.free()
|
||||||
|
|
||||||
|
return callback
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize factory and decoders
|
||||||
|
factory = StreamDecoderFactory(gpu_id=0)
|
||||||
|
decoders = []
|
||||||
|
|
||||||
|
for i in range(1, 5):
|
||||||
|
url = os.getenv(f"CAMERA_URL_{i}")
|
||||||
|
decoder = factory.create_decoder(url, buffer_size=5)
|
||||||
|
decoder.register_frame_callback(make_callback(i))
|
||||||
|
decoder.start()
|
||||||
|
decoders.append(decoder)
|
||||||
|
print(f"Camera {i} started")
|
||||||
|
|
||||||
|
# Display loop
|
||||||
|
print("Press 'q' to quit")
|
||||||
|
while True:
|
||||||
|
# Show each camera in separate window
|
||||||
|
for cam_id, frame in frames.items():
|
||||||
|
if frame is not None:
|
||||||
|
cv2.imshow(f"Camera {cam_id}", frame)
|
||||||
|
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||||
|
break
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
for decoder in decoders:
|
||||||
|
decoder.stop()
|
||||||
|
cv2.destroyAllWindows()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue