feat: inference subsystem and optimization to decoder

2025-11-09 00:57:08 +07:00 · 2025-11-09 00:57:08 +07:00 · 3c83a57e44
commit 3c83a57e44
19 changed files with 3897 additions and 0 deletions
--- a/services/jpeg_encoder.py
+++ b/services/jpeg_encoder.py
@ -0,0 +1,91 @@
+"""
+JPEG Encoder wrapper for GPU-accelerated JPEG encoding using nvImageCodec/nvJPEG.
+Provides a shared encoder instance that can be used across multiple streams.
+"""
+
+from typing import Optional
+import torch
+import nvidia.nvimgcodec as nvimgcodec
+
+
+class JPEGEncoderFactory:
+    """
+    Factory for creating and managing a shared JPEG encoder instance.
+    Thread-safe singleton pattern for efficient resource sharing.
+    """
+
+    _instance = None
+    _encoder = None
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(JPEGEncoderFactory, cls).__new__(cls)
+            cls._encoder = nvimgcodec.Encoder()
+            print("JPEGEncoderFactory initialized with shared nvJPEG encoder")
+        return cls._instance
+
+    @classmethod
+    def get_encoder(cls):
+        """Get the shared JPEG encoder instance"""
+        if cls._encoder is None:
+            cls()  # Initialize if not already done
+        return cls._encoder
+
+
+def encode_frame_to_jpeg(rgb_frame: torch.Tensor, quality: int = 95) -> Optional[bytes]:
+    """
+    Encode an RGB frame to JPEG on GPU and return JPEG bytes.
+
+    This function:
+    1. Takes RGB frame from GPU (stays on GPU during encoding)
+    2. Converts PyTorch tensor to nvImageCodec image via as_image()
+    3. Encodes to JPEG using nvJPEG (GPU operation)
+    4. Transfers only JPEG bytes to CPU
+    5. Returns bytes for saving to disk
+
+    Args:
+        rgb_frame: RGB tensor on GPU, shape (3, H, W) or (H, W, 3), dtype uint8
+        quality: JPEG quality (0-100, default 95)
+
+    Returns:
+        JPEG encoded bytes or None if encoding fails
+    """
+    if rgb_frame is None:
+        return None
+
+    try:
+        # Ensure we have (H, W, C) format and contiguous memory
+        if rgb_frame.dim() == 3:
+            if rgb_frame.shape[0] == 3:
+                # Convert from (C, H, W) to (H, W, C)
+                rgb_hwc = rgb_frame.permute(1, 2, 0).contiguous()
+            else:
+                # Already (H, W, C)
+                rgb_hwc = rgb_frame.contiguous()
+        else:
+            raise ValueError(f"Expected 3D tensor, got shape {rgb_frame.shape}")
+
+        # Get shared encoder
+        encoder = JPEGEncoderFactory.get_encoder()
+
+        # Create encode parameters with quality
+        # Quality is set via quality_value (0-100 scale)
+        jpeg_params = nvimgcodec.JpegEncodeParams(optimized_huffman=True)
+        encode_params = nvimgcodec.EncodeParams(
+            quality_value=float(quality),
+            jpeg_encode_params=jpeg_params
+        )
+
+        # Convert PyTorch GPU tensor to nvImageCodec image using __cuda_array_interface__
+        # This is zero-copy - nvimgcodec reads directly from GPU memory
+        nv_image = nvimgcodec.as_image(rgb_hwc)
+
+        # Encode to JPEG on GPU
+        # The encoding happens on GPU, only compressed JPEG bytes are transferred to CPU
+        jpeg_data = encoder.encode(nv_image, "jpeg", encode_params)
+
+        return bytes(jpeg_data)
+
+    except Exception as e:
+        print(f"Error encoding frame to JPEG: {e}")
+        return None