#!/usr/bin/env python3 """ Multi-stream test script to verify CUDA context sharing efficiency. Tests multiple RTSP streams simultaneously and monitors VRAM usage. """ import argparse import time import sys import subprocess import os from pathlib import Path from dotenv import load_dotenv from services import StreamDecoderFactory, ConnectionStatus # Load environment variables from .env file load_dotenv() def get_gpu_memory_usage(gpu_id: int = 0) -> int: """Get current GPU memory usage in MB using nvidia-smi""" try: result = subprocess.run( ['nvidia-smi', '--query-gpu=memory.used', '--format=csv,noheader,nounits', f'--id={gpu_id}'], capture_output=True, text=True, check=True ) return int(result.stdout.strip()) except Exception as e: print(f"Warning: Could not get GPU memory usage: {e}") return 0 def main(): parser = argparse.ArgumentParser(description='Test multi-stream decoding with context sharing') parser.add_argument( '--gpu-id', type=int, default=0, help='GPU device ID' ) parser.add_argument( '--duration', type=int, default=20, help='Test duration in seconds' ) parser.add_argument( '--capture-snapshots', action='store_true', help='Capture JPEG snapshots during test' ) parser.add_argument( '--output-dir', type=str, default='./multi_stream_snapshots', help='Output directory for snapshots' ) args = parser.parse_args() # Load camera URLs from environment camera_urls = [] i = 1 while True: url = os.getenv(f'CAMERA_URL_{i}') if url: camera_urls.append(url) i += 1 else: break if not camera_urls: print("Error: No camera URLs found in .env file") print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file") sys.exit(1) # Create output directory if capturing snapshots if args.capture_snapshots: output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) print("=" * 80) print("Multi-Stream RTSP Decoder Test - Context Sharing Verification") print("=" * 80) print(f"Number of Streams: {len(camera_urls)}") print(f"GPU ID: {args.gpu_id}") print(f"Test Duration: {args.duration} seconds") print(f"Capture Snapshots: {args.capture_snapshots}") print("=" * 80) print() try: # Get baseline GPU memory print("[Baseline] Measuring initial GPU memory usage...") baseline_memory = get_gpu_memory_usage(args.gpu_id) print(f"✓ Baseline VRAM: {baseline_memory} MB\n") # Initialize factory (shared CUDA context) print("[1/4] Initializing StreamDecoderFactory with shared CUDA context...") factory = StreamDecoderFactory(gpu_id=args.gpu_id) factory_memory = get_gpu_memory_usage(args.gpu_id) factory_overhead = factory_memory - baseline_memory print(f"✓ Factory initialized") print(f" VRAM after factory: {factory_memory} MB (+{factory_overhead} MB)\n") # Create all decoders print(f"[2/4] Creating {len(camera_urls)} StreamDecoder instances...") decoders = [] for i, url in enumerate(camera_urls): decoder = factory.create_decoder( rtsp_url=url, buffer_size=30, codec='h264' ) decoders.append(decoder) print(f" ✓ Decoder {i+1} created for camera {url.split('@')[1].split('/')[0]}") decoders_memory = get_gpu_memory_usage(args.gpu_id) decoders_overhead = decoders_memory - factory_memory print(f"\n VRAM after creating {len(decoders)} decoders: {decoders_memory} MB (+{decoders_overhead} MB)") print(f" Average per decoder: {decoders_overhead / len(decoders):.1f} MB\n") # Start all decoders print(f"[3/4] Starting all {len(decoders)} decoders...") for i, decoder in enumerate(decoders): decoder.start() print(f" ✓ Decoder {i+1} started") started_memory = get_gpu_memory_usage(args.gpu_id) started_overhead = started_memory - decoders_memory print(f"\n VRAM after starting decoders: {started_memory} MB (+{started_overhead} MB)") print(f" Average per running decoder: {started_overhead / len(decoders):.1f} MB\n") # Wait for all streams to connect print("[4/4] Waiting for all streams to connect...") max_wait = 15 for wait_time in range(max_wait): connected = sum(1 for d in decoders if d.is_connected()) print(f" Connected: {connected}/{len(decoders)} streams", end='\r') if connected == len(decoders): print(f"\n✓ All {len(decoders)} streams connected!\n") break time.sleep(1) else: connected = sum(1 for d in decoders if d.is_connected()) print(f"\n⚠ Only {connected}/{len(decoders)} streams connected after {max_wait}s\n") connected_memory = get_gpu_memory_usage(args.gpu_id) connected_overhead = connected_memory - started_memory print(f" VRAM after connection: {connected_memory} MB (+{connected_overhead} MB)\n") # Monitor streams print(f"Monitoring streams for {args.duration} seconds...") print("=" * 80) print(f"{'Time':<8} {'VRAM':<10} {'Stream 1':<12} {'Stream 2':<12} {'Stream 3':<12} {'Stream 4':<12}") print("-" * 80) start_time = time.time() snapshot_interval = args.duration // 3 if args.capture_snapshots else 0 last_snapshot = 0 while time.time() - start_time < args.duration: elapsed = time.time() - start_time current_memory = get_gpu_memory_usage(args.gpu_id) # Get stats for each decoder stats = [] for decoder in decoders: status = decoder.get_status().value[:8] buffer = decoder.get_buffer_size() frames = decoder.frame_count stats.append(f"{status:8s} {buffer:2d}/30 {frames:4d}") print(f"{elapsed:6.1f}s {current_memory:6d}MB {stats[0]:<12} {stats[1]:<12} {stats[2]:<12} {stats[3]:<12}") # Capture snapshots if args.capture_snapshots and snapshot_interval > 0: if elapsed - last_snapshot >= snapshot_interval: print("\n → Capturing snapshots from all streams...") for i, decoder in enumerate(decoders): jpeg_bytes = decoder.get_frame_as_jpeg(quality=85) if jpeg_bytes: filename = output_dir / f"camera_{i+1}_t{int(elapsed)}s.jpg" with open(filename, 'wb') as f: f.write(jpeg_bytes) print(f" Saved {filename.name} ({len(jpeg_bytes)/1024:.1f} KB)") print() last_snapshot = elapsed time.sleep(1) print("=" * 80) # Final memory analysis final_memory = get_gpu_memory_usage(args.gpu_id) total_overhead = final_memory - baseline_memory print("\n" + "=" * 80) print("Memory Usage Analysis") print("=" * 80) print(f"Baseline VRAM: {baseline_memory:6d} MB") print(f"After Factory Init: {factory_memory:6d} MB (+{factory_overhead:4d} MB)") print(f"After Creating {len(decoders)} Decoders: {decoders_memory:6d} MB (+{decoders_overhead:4d} MB)") print(f"After Starting Decoders: {started_memory:6d} MB (+{started_overhead:4d} MB)") print(f"After Connection: {connected_memory:6d} MB (+{connected_overhead:4d} MB)") print(f"Final (after {args.duration}s): {final_memory:6d} MB (+{total_overhead:4d} MB total)") print("-" * 80) print(f"Average VRAM per stream: {total_overhead / len(decoders):6.1f} MB") print(f"Context sharing efficiency: {'EXCELLENT' if total_overhead < 500 else 'GOOD' if total_overhead < 800 else 'POOR'}") print("=" * 80) # Final stats print("\nFinal Stream Statistics:") print("-" * 80) for i, decoder in enumerate(decoders): status = decoder.get_status().value buffer = decoder.get_buffer_size() frames = decoder.frame_count fps = frames / args.duration if args.duration > 0 else 0 print(f"Stream {i+1}: {status:12s} | Buffer: {buffer:2d}/{decoder.buffer_size} | " f"Frames: {frames:5d} | Avg FPS: {fps:5.2f}") print("=" * 80) except KeyboardInterrupt: print("\n\n✗ Interrupted by user") sys.exit(1) except Exception as e: print(f"\n\n✗ Error: {e}") import traceback traceback.print_exc() sys.exit(1) finally: # Cleanup if 'decoders' in locals(): print("\nCleaning up...") for i, decoder in enumerate(decoders): decoder.stop() print(f" ✓ Decoder {i+1} stopped") cleanup_memory = get_gpu_memory_usage(args.gpu_id) print(f"\nVRAM after cleanup: {cleanup_memory} MB") print("\n✓ Multi-stream test completed successfully") sys.exit(0) if __name__ == '__main__': main()