feat: inference subsystem and optimization to decoder

2025-11-09 00:57:08 +07:00 · 2025-11-09 00:57:08 +07:00 · 3c83a57e44
commit 3c83a57e44
19 changed files with 3897 additions and 0 deletions
--- a/test_vram_process.py
+++ b/test_vram_process.py
@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""
+VRAM scaling test - measures Python process memory usage for 1, 2, 3, and 4 streams.
+"""
+
+import os
+import time
+import subprocess
+from dotenv import load_dotenv
+from services import StreamDecoderFactory
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Load camera URLs from environment
+camera_urls = []
+i = 1
+while True:
+    url = os.getenv(f'CAMERA_URL_{i}')
+    if url:
+        camera_urls.append(url)
+        i += 1
+    else:
+        break
+
+if not camera_urls:
+    print("Error: No camera URLs found in .env file")
+    print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
+    exit(1)
+
+def get_python_gpu_memory():
+    """Get Python process GPU memory usage in MB"""
+    try:
+        pid = os.getpid()
+        result = subprocess.run(
+            ['nvidia-smi', '--query-compute-apps=pid,used_memory', '--format=csv,noheader,nounits'],
+            capture_output=True, text=True, check=True
+        )
+        for line in result.stdout.strip().split('\n'):
+            if line:
+                parts = line.split(',')
+                if len(parts) >= 2 and int(parts[0].strip()) == pid:
+                    return int(parts[1].strip())
+        return 0
+    except:
+        return 0
+
+def test_n_streams(n, wait_time=15):
+    """Test with n streams"""
+    print(f"\n{'='*80}")
+    print(f"Testing with {n} stream(s)")
+    print('='*80)
+
+    mem_before = get_python_gpu_memory()
+    print(f"Python process VRAM before: {mem_before} MB")
+
+    # Create factory
+    factory = StreamDecoderFactory(gpu_id=0)
+    time.sleep(1)
+    mem_after_factory = get_python_gpu_memory()
+    print(f"After factory: {mem_after_factory} MB (+{mem_after_factory - mem_before} MB)")
+
+    # Create decoders
+    decoders = []
+    for i in range(n):
+        decoder = factory.create_decoder(camera_urls[i], buffer_size=30)
+        decoders.append(decoder)
+
+    time.sleep(1)
+    mem_after_create = get_python_gpu_memory()
+    print(f"After creating {n} decoder(s): {mem_after_create} MB (+{mem_after_create - mem_after_factory} MB)")
+
+    # Start decoders
+    for decoder in decoders:
+        decoder.start()
+
+    time.sleep(2)
+    mem_after_start = get_python_gpu_memory()
+    print(f"After starting {n} decoder(s): {mem_after_start} MB (+{mem_after_start - mem_after_create} MB)")
+
+    # Wait for connection
+    print(f"Waiting {wait_time}s for streams to connect and stabilize...")
+    time.sleep(wait_time)
+
+    # Check connection status
+    connected = sum(1 for d in decoders if d.is_connected())
+    mem_stable = get_python_gpu_memory()
+
+    print(f"Connected: {connected}/{n} streams")
+    print(f"Python process VRAM (stable): {mem_stable} MB")
+
+    # Get frame stats
+    for i, decoder in enumerate(decoders):
+        print(f"  Stream {i+1}: {decoder.get_status().value:10s} "
+              f"Buffer: {decoder.get_buffer_size()}/30 "
+              f"Frames: {decoder.frame_count}")
+
+    # Cleanup
+    for decoder in decoders:
+        decoder.stop()
+
+    time.sleep(2)
+    mem_after_cleanup = get_python_gpu_memory()
+    print(f"After cleanup: {mem_after_cleanup} MB")
+
+    return mem_stable
+
+if __name__ == '__main__':
+    print("Python VRAM Scaling Test")
+    print(f"PID: {os.getpid()}")
+
+    baseline = get_python_gpu_memory()
+    print(f"Baseline Python process VRAM: {baseline} MB\n")
+
+    results = {}
+    for n in [1, 2, 3, 4]:
+        mem = test_n_streams(n, wait_time=15)
+        results[n] = mem
+        print(f"\n→ {n} stream(s): {mem} MB (process total)")
+
+        # Give time between tests
+        if n < 4:
+            print("\nWaiting 5s before next test...")
+            time.sleep(5)
+
+    # Summary
+    print("\n" + "="*80)
+    print("Python Process VRAM Scaling Summary")
+    print("="*80)
+    print(f"Baseline:     {baseline:4d} MB")
+    for n in [1, 2, 3, 4]:
+        total = results[n]
+        overhead = total - baseline
+        per_stream = overhead / n if n > 0 else 0
+        print(f"{n} stream(s):  {total:4d} MB  (+{overhead:3d} MB total, {per_stream:5.1f} MB per stream)")
+
+    # Calculate marginal cost
+    print("\nMarginal cost per additional stream:")
+    for n in [2, 3, 4]:
+        marginal = results[n] - results[n-1]
+        print(f"  Stream {n}: +{marginal} MB")
+
+    print("="*80)