feat: inference subsystem and optimization to decoder

This commit is contained in:
Siwat Sirichai 2025-11-09 00:57:08 +07:00
commit 3c83a57e44
19 changed files with 3897 additions and 0 deletions

143
test_vram_process.py Normal file
View file

@ -0,0 +1,143 @@
#!/usr/bin/env python3
"""
VRAM scaling test - measures Python process memory usage for 1, 2, 3, and 4 streams.
"""
import os
import time
import subprocess
from dotenv import load_dotenv
from services import StreamDecoderFactory
# Load environment variables from .env file
load_dotenv()
# Load camera URLs from environment
camera_urls = []
i = 1
while True:
url = os.getenv(f'CAMERA_URL_{i}')
if url:
camera_urls.append(url)
i += 1
else:
break
if not camera_urls:
print("Error: No camera URLs found in .env file")
print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
exit(1)
def get_python_gpu_memory():
"""Get Python process GPU memory usage in MB"""
try:
pid = os.getpid()
result = subprocess.run(
['nvidia-smi', '--query-compute-apps=pid,used_memory', '--format=csv,noheader,nounits'],
capture_output=True, text=True, check=True
)
for line in result.stdout.strip().split('\n'):
if line:
parts = line.split(',')
if len(parts) >= 2 and int(parts[0].strip()) == pid:
return int(parts[1].strip())
return 0
except:
return 0
def test_n_streams(n, wait_time=15):
"""Test with n streams"""
print(f"\n{'='*80}")
print(f"Testing with {n} stream(s)")
print('='*80)
mem_before = get_python_gpu_memory()
print(f"Python process VRAM before: {mem_before} MB")
# Create factory
factory = StreamDecoderFactory(gpu_id=0)
time.sleep(1)
mem_after_factory = get_python_gpu_memory()
print(f"After factory: {mem_after_factory} MB (+{mem_after_factory - mem_before} MB)")
# Create decoders
decoders = []
for i in range(n):
decoder = factory.create_decoder(camera_urls[i], buffer_size=30)
decoders.append(decoder)
time.sleep(1)
mem_after_create = get_python_gpu_memory()
print(f"After creating {n} decoder(s): {mem_after_create} MB (+{mem_after_create - mem_after_factory} MB)")
# Start decoders
for decoder in decoders:
decoder.start()
time.sleep(2)
mem_after_start = get_python_gpu_memory()
print(f"After starting {n} decoder(s): {mem_after_start} MB (+{mem_after_start - mem_after_create} MB)")
# Wait for connection
print(f"Waiting {wait_time}s for streams to connect and stabilize...")
time.sleep(wait_time)
# Check connection status
connected = sum(1 for d in decoders if d.is_connected())
mem_stable = get_python_gpu_memory()
print(f"Connected: {connected}/{n} streams")
print(f"Python process VRAM (stable): {mem_stable} MB")
# Get frame stats
for i, decoder in enumerate(decoders):
print(f" Stream {i+1}: {decoder.get_status().value:10s} "
f"Buffer: {decoder.get_buffer_size()}/30 "
f"Frames: {decoder.frame_count}")
# Cleanup
for decoder in decoders:
decoder.stop()
time.sleep(2)
mem_after_cleanup = get_python_gpu_memory()
print(f"After cleanup: {mem_after_cleanup} MB")
return mem_stable
if __name__ == '__main__':
print("Python VRAM Scaling Test")
print(f"PID: {os.getpid()}")
baseline = get_python_gpu_memory()
print(f"Baseline Python process VRAM: {baseline} MB\n")
results = {}
for n in [1, 2, 3, 4]:
mem = test_n_streams(n, wait_time=15)
results[n] = mem
print(f"\n{n} stream(s): {mem} MB (process total)")
# Give time between tests
if n < 4:
print("\nWaiting 5s before next test...")
time.sleep(5)
# Summary
print("\n" + "="*80)
print("Python Process VRAM Scaling Summary")
print("="*80)
print(f"Baseline: {baseline:4d} MB")
for n in [1, 2, 3, 4]:
total = results[n]
overhead = total - baseline
per_stream = overhead / n if n > 0 else 0
print(f"{n} stream(s): {total:4d} MB (+{overhead:3d} MB total, {per_stream:5.1f} MB per stream)")
# Calculate marginal cost
print("\nMarginal cost per additional stream:")
for n in [2, 3, 4]:
marginal = results[n] - results[n-1]
print(f" Stream {n}: +{marginal} MB")
print("="*80)