255 lines
9.4 KiB
Python
Executable file
255 lines
9.4 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Multi-stream test script to verify CUDA context sharing efficiency.
|
|
Tests multiple RTSP streams simultaneously and monitors VRAM usage.
|
|
"""
|
|
|
|
import argparse
|
|
import time
|
|
import sys
|
|
import subprocess
|
|
import os
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
from services import StreamDecoderFactory, ConnectionStatus
|
|
|
|
# Load environment variables from .env file
|
|
load_dotenv()
|
|
|
|
|
|
def get_gpu_memory_usage(gpu_id: int = 0) -> int:
|
|
"""Get current GPU memory usage in MB using nvidia-smi"""
|
|
try:
|
|
result = subprocess.run(
|
|
['nvidia-smi', '--query-gpu=memory.used', '--format=csv,noheader,nounits', f'--id={gpu_id}'],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True
|
|
)
|
|
return int(result.stdout.strip())
|
|
except Exception as e:
|
|
print(f"Warning: Could not get GPU memory usage: {e}")
|
|
return 0
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Test multi-stream decoding with context sharing')
|
|
parser.add_argument(
|
|
'--gpu-id',
|
|
type=int,
|
|
default=0,
|
|
help='GPU device ID'
|
|
)
|
|
parser.add_argument(
|
|
'--duration',
|
|
type=int,
|
|
default=20,
|
|
help='Test duration in seconds'
|
|
)
|
|
parser.add_argument(
|
|
'--capture-snapshots',
|
|
action='store_true',
|
|
help='Capture JPEG snapshots during test'
|
|
)
|
|
parser.add_argument(
|
|
'--output-dir',
|
|
type=str,
|
|
default='./multi_stream_snapshots',
|
|
help='Output directory for snapshots'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load camera URLs from environment
|
|
camera_urls = []
|
|
i = 1
|
|
while True:
|
|
url = os.getenv(f'CAMERA_URL_{i}')
|
|
if url:
|
|
camera_urls.append(url)
|
|
i += 1
|
|
else:
|
|
break
|
|
|
|
if not camera_urls:
|
|
print("Error: No camera URLs found in .env file")
|
|
print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
|
|
sys.exit(1)
|
|
|
|
# Create output directory if capturing snapshots
|
|
if args.capture_snapshots:
|
|
output_dir = Path(args.output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
print("=" * 80)
|
|
print("Multi-Stream RTSP Decoder Test - Context Sharing Verification")
|
|
print("=" * 80)
|
|
print(f"Number of Streams: {len(camera_urls)}")
|
|
print(f"GPU ID: {args.gpu_id}")
|
|
print(f"Test Duration: {args.duration} seconds")
|
|
print(f"Capture Snapshots: {args.capture_snapshots}")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
try:
|
|
# Get baseline GPU memory
|
|
print("[Baseline] Measuring initial GPU memory usage...")
|
|
baseline_memory = get_gpu_memory_usage(args.gpu_id)
|
|
print(f"✓ Baseline VRAM: {baseline_memory} MB\n")
|
|
|
|
# Initialize factory (shared CUDA context)
|
|
print("[1/4] Initializing StreamDecoderFactory with shared CUDA context...")
|
|
factory = StreamDecoderFactory(gpu_id=args.gpu_id)
|
|
|
|
factory_memory = get_gpu_memory_usage(args.gpu_id)
|
|
factory_overhead = factory_memory - baseline_memory
|
|
print(f"✓ Factory initialized")
|
|
print(f" VRAM after factory: {factory_memory} MB (+{factory_overhead} MB)\n")
|
|
|
|
# Create all decoders
|
|
print(f"[2/4] Creating {len(camera_urls)} StreamDecoder instances...")
|
|
decoders = []
|
|
for i, url in enumerate(camera_urls):
|
|
decoder = factory.create_decoder(
|
|
rtsp_url=url,
|
|
buffer_size=30,
|
|
codec='h264'
|
|
)
|
|
decoders.append(decoder)
|
|
print(f" ✓ Decoder {i+1} created for camera {url.split('@')[1].split('/')[0]}")
|
|
|
|
decoders_memory = get_gpu_memory_usage(args.gpu_id)
|
|
decoders_overhead = decoders_memory - factory_memory
|
|
print(f"\n VRAM after creating {len(decoders)} decoders: {decoders_memory} MB (+{decoders_overhead} MB)")
|
|
print(f" Average per decoder: {decoders_overhead / len(decoders):.1f} MB\n")
|
|
|
|
# Start all decoders
|
|
print(f"[3/4] Starting all {len(decoders)} decoders...")
|
|
for i, decoder in enumerate(decoders):
|
|
decoder.start()
|
|
print(f" ✓ Decoder {i+1} started")
|
|
|
|
started_memory = get_gpu_memory_usage(args.gpu_id)
|
|
started_overhead = started_memory - decoders_memory
|
|
print(f"\n VRAM after starting decoders: {started_memory} MB (+{started_overhead} MB)")
|
|
print(f" Average per running decoder: {started_overhead / len(decoders):.1f} MB\n")
|
|
|
|
# Wait for all streams to connect
|
|
print("[4/4] Waiting for all streams to connect...")
|
|
max_wait = 15
|
|
for wait_time in range(max_wait):
|
|
connected = sum(1 for d in decoders if d.is_connected())
|
|
print(f" Connected: {connected}/{len(decoders)} streams", end='\r')
|
|
|
|
if connected == len(decoders):
|
|
print(f"\n✓ All {len(decoders)} streams connected!\n")
|
|
break
|
|
|
|
time.sleep(1)
|
|
else:
|
|
connected = sum(1 for d in decoders if d.is_connected())
|
|
print(f"\n⚠ Only {connected}/{len(decoders)} streams connected after {max_wait}s\n")
|
|
|
|
connected_memory = get_gpu_memory_usage(args.gpu_id)
|
|
connected_overhead = connected_memory - started_memory
|
|
print(f" VRAM after connection: {connected_memory} MB (+{connected_overhead} MB)\n")
|
|
|
|
# Monitor streams
|
|
print(f"Monitoring streams for {args.duration} seconds...")
|
|
print("=" * 80)
|
|
print(f"{'Time':<8} {'VRAM':<10} {'Stream 1':<12} {'Stream 2':<12} {'Stream 3':<12} {'Stream 4':<12}")
|
|
print("-" * 80)
|
|
|
|
start_time = time.time()
|
|
snapshot_interval = args.duration // 3 if args.capture_snapshots else 0
|
|
last_snapshot = 0
|
|
|
|
while time.time() - start_time < args.duration:
|
|
elapsed = time.time() - start_time
|
|
current_memory = get_gpu_memory_usage(args.gpu_id)
|
|
|
|
# Get stats for each decoder
|
|
stats = []
|
|
for decoder in decoders:
|
|
status = decoder.get_status().value[:8]
|
|
buffer = decoder.get_buffer_size()
|
|
frames = decoder.frame_count
|
|
stats.append(f"{status:8s} {buffer:2d}/30 {frames:4d}")
|
|
|
|
print(f"{elapsed:6.1f}s {current_memory:6d}MB {stats[0]:<12} {stats[1]:<12} {stats[2]:<12} {stats[3]:<12}")
|
|
|
|
# Capture snapshots
|
|
if args.capture_snapshots and snapshot_interval > 0:
|
|
if elapsed - last_snapshot >= snapshot_interval:
|
|
print("\n → Capturing snapshots from all streams...")
|
|
for i, decoder in enumerate(decoders):
|
|
jpeg_bytes = decoder.get_frame_as_jpeg(quality=85)
|
|
if jpeg_bytes:
|
|
filename = output_dir / f"camera_{i+1}_t{int(elapsed)}s.jpg"
|
|
with open(filename, 'wb') as f:
|
|
f.write(jpeg_bytes)
|
|
print(f" Saved {filename.name} ({len(jpeg_bytes)/1024:.1f} KB)")
|
|
print()
|
|
last_snapshot = elapsed
|
|
|
|
time.sleep(1)
|
|
|
|
print("=" * 80)
|
|
|
|
# Final memory analysis
|
|
final_memory = get_gpu_memory_usage(args.gpu_id)
|
|
total_overhead = final_memory - baseline_memory
|
|
|
|
print("\n" + "=" * 80)
|
|
print("Memory Usage Analysis")
|
|
print("=" * 80)
|
|
print(f"Baseline VRAM: {baseline_memory:6d} MB")
|
|
print(f"After Factory Init: {factory_memory:6d} MB (+{factory_overhead:4d} MB)")
|
|
print(f"After Creating {len(decoders)} Decoders: {decoders_memory:6d} MB (+{decoders_overhead:4d} MB)")
|
|
print(f"After Starting Decoders: {started_memory:6d} MB (+{started_overhead:4d} MB)")
|
|
print(f"After Connection: {connected_memory:6d} MB (+{connected_overhead:4d} MB)")
|
|
print(f"Final (after {args.duration}s): {final_memory:6d} MB (+{total_overhead:4d} MB total)")
|
|
print("-" * 80)
|
|
print(f"Average VRAM per stream: {total_overhead / len(decoders):6.1f} MB")
|
|
print(f"Context sharing efficiency: {'EXCELLENT' if total_overhead < 500 else 'GOOD' if total_overhead < 800 else 'POOR'}")
|
|
print("=" * 80)
|
|
|
|
# Final stats
|
|
print("\nFinal Stream Statistics:")
|
|
print("-" * 80)
|
|
for i, decoder in enumerate(decoders):
|
|
status = decoder.get_status().value
|
|
buffer = decoder.get_buffer_size()
|
|
frames = decoder.frame_count
|
|
fps = frames / args.duration if args.duration > 0 else 0
|
|
print(f"Stream {i+1}: {status:12s} | Buffer: {buffer:2d}/{decoder.buffer_size} | "
|
|
f"Frames: {frames:5d} | Avg FPS: {fps:5.2f}")
|
|
print("=" * 80)
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\n✗ Interrupted by user")
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
print(f"\n\n✗ Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
finally:
|
|
# Cleanup
|
|
if 'decoders' in locals():
|
|
print("\nCleaning up...")
|
|
for i, decoder in enumerate(decoders):
|
|
decoder.stop()
|
|
print(f" ✓ Decoder {i+1} stopped")
|
|
|
|
cleanup_memory = get_gpu_memory_usage(args.gpu_id)
|
|
print(f"\nVRAM after cleanup: {cleanup_memory} MB")
|
|
|
|
print("\n✓ Multi-stream test completed successfully")
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|