feat: inference subsystem and optimization to decoder
This commit is contained in:
commit
3c83a57e44
19 changed files with 3897 additions and 0 deletions
143
test_vram_process.py
Normal file
143
test_vram_process.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
VRAM scaling test - measures Python process memory usage for 1, 2, 3, and 4 streams.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import subprocess
|
||||
from dotenv import load_dotenv
|
||||
from services import StreamDecoderFactory
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
# Load camera URLs from environment
|
||||
camera_urls = []
|
||||
i = 1
|
||||
while True:
|
||||
url = os.getenv(f'CAMERA_URL_{i}')
|
||||
if url:
|
||||
camera_urls.append(url)
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if not camera_urls:
|
||||
print("Error: No camera URLs found in .env file")
|
||||
print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
|
||||
exit(1)
|
||||
|
||||
def get_python_gpu_memory():
|
||||
"""Get Python process GPU memory usage in MB"""
|
||||
try:
|
||||
pid = os.getpid()
|
||||
result = subprocess.run(
|
||||
['nvidia-smi', '--query-compute-apps=pid,used_memory', '--format=csv,noheader,nounits'],
|
||||
capture_output=True, text=True, check=True
|
||||
)
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line:
|
||||
parts = line.split(',')
|
||||
if len(parts) >= 2 and int(parts[0].strip()) == pid:
|
||||
return int(parts[1].strip())
|
||||
return 0
|
||||
except:
|
||||
return 0
|
||||
|
||||
def test_n_streams(n, wait_time=15):
|
||||
"""Test with n streams"""
|
||||
print(f"\n{'='*80}")
|
||||
print(f"Testing with {n} stream(s)")
|
||||
print('='*80)
|
||||
|
||||
mem_before = get_python_gpu_memory()
|
||||
print(f"Python process VRAM before: {mem_before} MB")
|
||||
|
||||
# Create factory
|
||||
factory = StreamDecoderFactory(gpu_id=0)
|
||||
time.sleep(1)
|
||||
mem_after_factory = get_python_gpu_memory()
|
||||
print(f"After factory: {mem_after_factory} MB (+{mem_after_factory - mem_before} MB)")
|
||||
|
||||
# Create decoders
|
||||
decoders = []
|
||||
for i in range(n):
|
||||
decoder = factory.create_decoder(camera_urls[i], buffer_size=30)
|
||||
decoders.append(decoder)
|
||||
|
||||
time.sleep(1)
|
||||
mem_after_create = get_python_gpu_memory()
|
||||
print(f"After creating {n} decoder(s): {mem_after_create} MB (+{mem_after_create - mem_after_factory} MB)")
|
||||
|
||||
# Start decoders
|
||||
for decoder in decoders:
|
||||
decoder.start()
|
||||
|
||||
time.sleep(2)
|
||||
mem_after_start = get_python_gpu_memory()
|
||||
print(f"After starting {n} decoder(s): {mem_after_start} MB (+{mem_after_start - mem_after_create} MB)")
|
||||
|
||||
# Wait for connection
|
||||
print(f"Waiting {wait_time}s for streams to connect and stabilize...")
|
||||
time.sleep(wait_time)
|
||||
|
||||
# Check connection status
|
||||
connected = sum(1 for d in decoders if d.is_connected())
|
||||
mem_stable = get_python_gpu_memory()
|
||||
|
||||
print(f"Connected: {connected}/{n} streams")
|
||||
print(f"Python process VRAM (stable): {mem_stable} MB")
|
||||
|
||||
# Get frame stats
|
||||
for i, decoder in enumerate(decoders):
|
||||
print(f" Stream {i+1}: {decoder.get_status().value:10s} "
|
||||
f"Buffer: {decoder.get_buffer_size()}/30 "
|
||||
f"Frames: {decoder.frame_count}")
|
||||
|
||||
# Cleanup
|
||||
for decoder in decoders:
|
||||
decoder.stop()
|
||||
|
||||
time.sleep(2)
|
||||
mem_after_cleanup = get_python_gpu_memory()
|
||||
print(f"After cleanup: {mem_after_cleanup} MB")
|
||||
|
||||
return mem_stable
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Python VRAM Scaling Test")
|
||||
print(f"PID: {os.getpid()}")
|
||||
|
||||
baseline = get_python_gpu_memory()
|
||||
print(f"Baseline Python process VRAM: {baseline} MB\n")
|
||||
|
||||
results = {}
|
||||
for n in [1, 2, 3, 4]:
|
||||
mem = test_n_streams(n, wait_time=15)
|
||||
results[n] = mem
|
||||
print(f"\n→ {n} stream(s): {mem} MB (process total)")
|
||||
|
||||
# Give time between tests
|
||||
if n < 4:
|
||||
print("\nWaiting 5s before next test...")
|
||||
time.sleep(5)
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*80)
|
||||
print("Python Process VRAM Scaling Summary")
|
||||
print("="*80)
|
||||
print(f"Baseline: {baseline:4d} MB")
|
||||
for n in [1, 2, 3, 4]:
|
||||
total = results[n]
|
||||
overhead = total - baseline
|
||||
per_stream = overhead / n if n > 0 else 0
|
||||
print(f"{n} stream(s): {total:4d} MB (+{overhead:3d} MB total, {per_stream:5.1f} MB per stream)")
|
||||
|
||||
# Calculate marginal cost
|
||||
print("\nMarginal cost per additional stream:")
|
||||
for n in [2, 3, 4]:
|
||||
marginal = results[n] - results[n-1]
|
||||
print(f" Stream {n}: +{marginal} MB")
|
||||
|
||||
print("="*80)
|
||||
Loading…
Add table
Add a link
Reference in a new issue