feat: inference subsystem and optimization to decoder
This commit is contained in:
commit
3c83a57e44
19 changed files with 3897 additions and 0 deletions
255
test_multi_stream.py
Executable file
255
test_multi_stream.py
Executable file
|
|
@ -0,0 +1,255 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Multi-stream test script to verify CUDA context sharing efficiency.
|
||||
Tests multiple RTSP streams simultaneously and monitors VRAM usage.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import time
|
||||
import sys
|
||||
import subprocess
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
from services import StreamDecoderFactory, ConnectionStatus
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def get_gpu_memory_usage(gpu_id: int = 0) -> int:
|
||||
"""Get current GPU memory usage in MB using nvidia-smi"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['nvidia-smi', '--query-gpu=memory.used', '--format=csv,noheader,nounits', f'--id={gpu_id}'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
return int(result.stdout.strip())
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not get GPU memory usage: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Test multi-stream decoding with context sharing')
|
||||
parser.add_argument(
|
||||
'--gpu-id',
|
||||
type=int,
|
||||
default=0,
|
||||
help='GPU device ID'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--duration',
|
||||
type=int,
|
||||
default=20,
|
||||
help='Test duration in seconds'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--capture-snapshots',
|
||||
action='store_true',
|
||||
help='Capture JPEG snapshots during test'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
type=str,
|
||||
default='./multi_stream_snapshots',
|
||||
help='Output directory for snapshots'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load camera URLs from environment
|
||||
camera_urls = []
|
||||
i = 1
|
||||
while True:
|
||||
url = os.getenv(f'CAMERA_URL_{i}')
|
||||
if url:
|
||||
camera_urls.append(url)
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if not camera_urls:
|
||||
print("Error: No camera URLs found in .env file")
|
||||
print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
|
||||
sys.exit(1)
|
||||
|
||||
# Create output directory if capturing snapshots
|
||||
if args.capture_snapshots:
|
||||
output_dir = Path(args.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print("=" * 80)
|
||||
print("Multi-Stream RTSP Decoder Test - Context Sharing Verification")
|
||||
print("=" * 80)
|
||||
print(f"Number of Streams: {len(camera_urls)}")
|
||||
print(f"GPU ID: {args.gpu_id}")
|
||||
print(f"Test Duration: {args.duration} seconds")
|
||||
print(f"Capture Snapshots: {args.capture_snapshots}")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
try:
|
||||
# Get baseline GPU memory
|
||||
print("[Baseline] Measuring initial GPU memory usage...")
|
||||
baseline_memory = get_gpu_memory_usage(args.gpu_id)
|
||||
print(f"✓ Baseline VRAM: {baseline_memory} MB\n")
|
||||
|
||||
# Initialize factory (shared CUDA context)
|
||||
print("[1/4] Initializing StreamDecoderFactory with shared CUDA context...")
|
||||
factory = StreamDecoderFactory(gpu_id=args.gpu_id)
|
||||
|
||||
factory_memory = get_gpu_memory_usage(args.gpu_id)
|
||||
factory_overhead = factory_memory - baseline_memory
|
||||
print(f"✓ Factory initialized")
|
||||
print(f" VRAM after factory: {factory_memory} MB (+{factory_overhead} MB)\n")
|
||||
|
||||
# Create all decoders
|
||||
print(f"[2/4] Creating {len(camera_urls)} StreamDecoder instances...")
|
||||
decoders = []
|
||||
for i, url in enumerate(camera_urls):
|
||||
decoder = factory.create_decoder(
|
||||
rtsp_url=url,
|
||||
buffer_size=30,
|
||||
codec='h264'
|
||||
)
|
||||
decoders.append(decoder)
|
||||
print(f" ✓ Decoder {i+1} created for camera {url.split('@')[1].split('/')[0]}")
|
||||
|
||||
decoders_memory = get_gpu_memory_usage(args.gpu_id)
|
||||
decoders_overhead = decoders_memory - factory_memory
|
||||
print(f"\n VRAM after creating {len(decoders)} decoders: {decoders_memory} MB (+{decoders_overhead} MB)")
|
||||
print(f" Average per decoder: {decoders_overhead / len(decoders):.1f} MB\n")
|
||||
|
||||
# Start all decoders
|
||||
print(f"[3/4] Starting all {len(decoders)} decoders...")
|
||||
for i, decoder in enumerate(decoders):
|
||||
decoder.start()
|
||||
print(f" ✓ Decoder {i+1} started")
|
||||
|
||||
started_memory = get_gpu_memory_usage(args.gpu_id)
|
||||
started_overhead = started_memory - decoders_memory
|
||||
print(f"\n VRAM after starting decoders: {started_memory} MB (+{started_overhead} MB)")
|
||||
print(f" Average per running decoder: {started_overhead / len(decoders):.1f} MB\n")
|
||||
|
||||
# Wait for all streams to connect
|
||||
print("[4/4] Waiting for all streams to connect...")
|
||||
max_wait = 15
|
||||
for wait_time in range(max_wait):
|
||||
connected = sum(1 for d in decoders if d.is_connected())
|
||||
print(f" Connected: {connected}/{len(decoders)} streams", end='\r')
|
||||
|
||||
if connected == len(decoders):
|
||||
print(f"\n✓ All {len(decoders)} streams connected!\n")
|
||||
break
|
||||
|
||||
time.sleep(1)
|
||||
else:
|
||||
connected = sum(1 for d in decoders if d.is_connected())
|
||||
print(f"\n⚠ Only {connected}/{len(decoders)} streams connected after {max_wait}s\n")
|
||||
|
||||
connected_memory = get_gpu_memory_usage(args.gpu_id)
|
||||
connected_overhead = connected_memory - started_memory
|
||||
print(f" VRAM after connection: {connected_memory} MB (+{connected_overhead} MB)\n")
|
||||
|
||||
# Monitor streams
|
||||
print(f"Monitoring streams for {args.duration} seconds...")
|
||||
print("=" * 80)
|
||||
print(f"{'Time':<8} {'VRAM':<10} {'Stream 1':<12} {'Stream 2':<12} {'Stream 3':<12} {'Stream 4':<12}")
|
||||
print("-" * 80)
|
||||
|
||||
start_time = time.time()
|
||||
snapshot_interval = args.duration // 3 if args.capture_snapshots else 0
|
||||
last_snapshot = 0
|
||||
|
||||
while time.time() - start_time < args.duration:
|
||||
elapsed = time.time() - start_time
|
||||
current_memory = get_gpu_memory_usage(args.gpu_id)
|
||||
|
||||
# Get stats for each decoder
|
||||
stats = []
|
||||
for decoder in decoders:
|
||||
status = decoder.get_status().value[:8]
|
||||
buffer = decoder.get_buffer_size()
|
||||
frames = decoder.frame_count
|
||||
stats.append(f"{status:8s} {buffer:2d}/30 {frames:4d}")
|
||||
|
||||
print(f"{elapsed:6.1f}s {current_memory:6d}MB {stats[0]:<12} {stats[1]:<12} {stats[2]:<12} {stats[3]:<12}")
|
||||
|
||||
# Capture snapshots
|
||||
if args.capture_snapshots and snapshot_interval > 0:
|
||||
if elapsed - last_snapshot >= snapshot_interval:
|
||||
print("\n → Capturing snapshots from all streams...")
|
||||
for i, decoder in enumerate(decoders):
|
||||
jpeg_bytes = decoder.get_frame_as_jpeg(quality=85)
|
||||
if jpeg_bytes:
|
||||
filename = output_dir / f"camera_{i+1}_t{int(elapsed)}s.jpg"
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(jpeg_bytes)
|
||||
print(f" Saved {filename.name} ({len(jpeg_bytes)/1024:.1f} KB)")
|
||||
print()
|
||||
last_snapshot = elapsed
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
# Final memory analysis
|
||||
final_memory = get_gpu_memory_usage(args.gpu_id)
|
||||
total_overhead = final_memory - baseline_memory
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Memory Usage Analysis")
|
||||
print("=" * 80)
|
||||
print(f"Baseline VRAM: {baseline_memory:6d} MB")
|
||||
print(f"After Factory Init: {factory_memory:6d} MB (+{factory_overhead:4d} MB)")
|
||||
print(f"After Creating {len(decoders)} Decoders: {decoders_memory:6d} MB (+{decoders_overhead:4d} MB)")
|
||||
print(f"After Starting Decoders: {started_memory:6d} MB (+{started_overhead:4d} MB)")
|
||||
print(f"After Connection: {connected_memory:6d} MB (+{connected_overhead:4d} MB)")
|
||||
print(f"Final (after {args.duration}s): {final_memory:6d} MB (+{total_overhead:4d} MB total)")
|
||||
print("-" * 80)
|
||||
print(f"Average VRAM per stream: {total_overhead / len(decoders):6.1f} MB")
|
||||
print(f"Context sharing efficiency: {'EXCELLENT' if total_overhead < 500 else 'GOOD' if total_overhead < 800 else 'POOR'}")
|
||||
print("=" * 80)
|
||||
|
||||
# Final stats
|
||||
print("\nFinal Stream Statistics:")
|
||||
print("-" * 80)
|
||||
for i, decoder in enumerate(decoders):
|
||||
status = decoder.get_status().value
|
||||
buffer = decoder.get_buffer_size()
|
||||
frames = decoder.frame_count
|
||||
fps = frames / args.duration if args.duration > 0 else 0
|
||||
print(f"Stream {i+1}: {status:12s} | Buffer: {buffer:2d}/{decoder.buffer_size} | "
|
||||
f"Frames: {frames:5d} | Avg FPS: {fps:5.2f}")
|
||||
print("=" * 80)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n✗ Interrupted by user")
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n\n✗ Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
finally:
|
||||
# Cleanup
|
||||
if 'decoders' in locals():
|
||||
print("\nCleaning up...")
|
||||
for i, decoder in enumerate(decoders):
|
||||
decoder.stop()
|
||||
print(f" ✓ Decoder {i+1} stopped")
|
||||
|
||||
cleanup_memory = get_gpu_memory_usage(args.gpu_id)
|
||||
print(f"\nVRAM after cleanup: {cleanup_memory} MB")
|
||||
|
||||
print("\n✓ Multi-stream test completed successfully")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue