remove redundant examples

This commit is contained in:
Siwat Sirichai 2025-11-09 19:42:18 +07:00
parent dd57b5a246
commit d3dbf9a580
8 changed files with 0 additions and 1648 deletions

View file

@ -1,340 +0,0 @@
"""
FPS Benchmark Test for Single vs Multi-Camera Tracking
This script benchmarks the FPS performance of:
1. Single camera tracking
2. Multi-camera tracking (2+ cameras)
Usage:
python test_fps_benchmark.py
"""
import time
import os
from dotenv import load_dotenv
from services import (
StreamDecoderFactory,
TensorRTModelRepository,
TrackingFactory,
YOLOv8Utils,
COCO_CLASSES,
)
load_dotenv()
def benchmark_single_camera(duration=30):
"""
Benchmark single camera tracking performance.
Args:
duration: Test duration in seconds
Returns:
Dictionary with FPS statistics
"""
print("\n" + "=" * 80)
print("SINGLE CAMERA BENCHMARK")
print("=" * 80)
GPU_ID = 0
MODEL_PATH = "models/yolov8n.trt"
RTSP_URL = os.getenv('CAMERA_URL_1', 'rtsp://localhost:8554/test')
# Initialize components
print("\nInitializing...")
model_repo = TensorRTModelRepository(gpu_id=GPU_ID, default_num_contexts=4)
model_repo.load_model("detector", MODEL_PATH, num_contexts=4)
tracking_factory = TrackingFactory(gpu_id=GPU_ID)
controller = tracking_factory.create_controller(
model_repository=model_repo,
model_id="detector",
tracker_type="iou",
max_age=30,
min_confidence=0.5,
iou_threshold=0.3,
class_names=COCO_CLASSES
)
stream_factory = StreamDecoderFactory(gpu_id=GPU_ID)
decoder = stream_factory.create_decoder(RTSP_URL, buffer_size=30)
decoder.start()
print("Waiting for stream connection...")
time.sleep(5)
if not decoder.is_connected():
print("⚠ Stream not connected, results may be inaccurate")
# Benchmark
print(f"\nRunning benchmark for {duration} seconds...")
frame_count = 0
start_time = time.time()
fps_samples = []
sample_start = time.time()
sample_frames = 0
try:
while time.time() - start_time < duration:
frame_gpu = decoder.get_latest_frame(rgb=True)
if frame_gpu is None:
time.sleep(0.001)
continue
# Run tracking
tracked_objects = controller.track(
frame_gpu,
preprocess_fn=YOLOv8Utils.preprocess,
postprocess_fn=YOLOv8Utils.postprocess
)
frame_count += 1
sample_frames += 1
# Sample FPS every second
if time.time() - sample_start >= 1.0:
fps = sample_frames / (time.time() - sample_start)
fps_samples.append(fps)
sample_frames = 0
sample_start = time.time()
print(f" Current FPS: {fps:.2f}")
except KeyboardInterrupt:
print("\nBenchmark interrupted")
# Calculate statistics
total_time = time.time() - start_time
avg_fps = frame_count / total_time
# Cleanup
decoder.stop()
stats = {
'total_frames': frame_count,
'total_time': total_time,
'avg_fps': avg_fps,
'min_fps': min(fps_samples) if fps_samples else 0,
'max_fps': max(fps_samples) if fps_samples else 0,
'samples': fps_samples
}
print("\n" + "-" * 80)
print(f"Total Frames: {stats['total_frames']}")
print(f"Total Time: {stats['total_time']:.2f} seconds")
print(f"Average FPS: {stats['avg_fps']:.2f}")
print(f"Min FPS: {stats['min_fps']:.2f}")
print(f"Max FPS: {stats['max_fps']:.2f}")
print("-" * 80)
return stats
def benchmark_multi_camera(duration=30):
"""
Benchmark multi-camera tracking performance.
Args:
duration: Test duration in seconds
Returns:
Dictionary with FPS statistics per camera
"""
print("\n" + "=" * 80)
print("MULTI-CAMERA BENCHMARK")
print("=" * 80)
GPU_ID = 0
MODEL_PATH = "models/yolov8n.trt"
# Load camera URLs
camera_urls = []
i = 1
while True:
url = os.getenv(f'CAMERA_URL_{i}')
if url:
camera_urls.append(url)
i += 1
else:
break
if len(camera_urls) < 2:
print("⚠ Need at least 2 cameras for multi-camera test")
print(f" Found only {len(camera_urls)} camera(s) in .env")
return None
print(f"\nTesting with {len(camera_urls)} cameras")
# Initialize components
print("\nInitializing...")
model_repo = TensorRTModelRepository(gpu_id=GPU_ID, default_num_contexts=8)
model_repo.load_model("detector", MODEL_PATH, num_contexts=8)
tracking_factory = TrackingFactory(gpu_id=GPU_ID)
stream_factory = StreamDecoderFactory(gpu_id=GPU_ID)
decoders = []
controllers = []
for i, url in enumerate(camera_urls):
# Create decoder
decoder = stream_factory.create_decoder(url, buffer_size=30)
decoder.start()
decoders.append(decoder)
# Create controller
controller = tracking_factory.create_controller(
model_repository=model_repo,
model_id="detector",
tracker_type="iou",
max_age=30,
min_confidence=0.5,
iou_threshold=0.3,
class_names=COCO_CLASSES
)
controllers.append(controller)
print(f" Camera {i+1}: {url}")
print("\nWaiting for streams to connect...")
time.sleep(10)
# Benchmark
print(f"\nRunning benchmark for {duration} seconds...")
frame_counts = [0] * len(camera_urls)
fps_samples = [[] for _ in camera_urls]
sample_starts = [time.time()] * len(camera_urls)
sample_frames = [0] * len(camera_urls)
start_time = time.time()
try:
while time.time() - start_time < duration:
for i, (decoder, controller) in enumerate(zip(decoders, controllers)):
frame_gpu = decoder.get_latest_frame(rgb=True)
if frame_gpu is None:
continue
# Run tracking
tracked_objects = controller.track(
frame_gpu,
preprocess_fn=YOLOv8Utils.preprocess,
postprocess_fn=YOLOv8Utils.postprocess
)
frame_counts[i] += 1
sample_frames[i] += 1
# Sample FPS every second
if time.time() - sample_starts[i] >= 1.0:
fps = sample_frames[i] / (time.time() - sample_starts[i])
fps_samples[i].append(fps)
sample_frames[i] = 0
sample_starts[i] = time.time()
except KeyboardInterrupt:
print("\nBenchmark interrupted")
# Calculate statistics
total_time = time.time() - start_time
# Cleanup
for decoder in decoders:
decoder.stop()
# Compile results
results = {}
total_frames = 0
print("\n" + "-" * 80)
for i in range(len(camera_urls)):
avg_fps = frame_counts[i] / total_time if total_time > 0 else 0
total_frames += frame_counts[i]
cam_stats = {
'total_frames': frame_counts[i],
'avg_fps': avg_fps,
'min_fps': min(fps_samples[i]) if fps_samples[i] else 0,
'max_fps': max(fps_samples[i]) if fps_samples[i] else 0,
}
results[f'camera_{i+1}'] = cam_stats
print(f"Camera {i+1}:")
print(f" Total Frames: {cam_stats['total_frames']}")
print(f" Average FPS: {cam_stats['avg_fps']:.2f}")
print(f" Min FPS: {cam_stats['min_fps']:.2f}")
print(f" Max FPS: {cam_stats['max_fps']:.2f}")
print()
# Combined stats
combined_avg_fps = total_frames / total_time if total_time > 0 else 0
print("-" * 80)
print(f"COMBINED:")
print(f" Total Frames (all cameras): {total_frames}")
print(f" Total Time: {total_time:.2f} seconds")
print(f" Combined Throughput: {combined_avg_fps:.2f} FPS")
print(f" Per-Camera Average: {combined_avg_fps / len(camera_urls):.2f} FPS")
print("-" * 80)
results['combined'] = {
'total_frames': total_frames,
'total_time': total_time,
'combined_fps': combined_avg_fps,
'per_camera_avg': combined_avg_fps / len(camera_urls)
}
return results
def main():
"""Run both benchmarks and compare."""
print("=" * 80)
print("FPS BENCHMARK: Single vs Multi-Camera Tracking")
print("=" * 80)
# Run single camera benchmark
single_stats = benchmark_single_camera(duration=30)
# Run multi-camera benchmark
multi_stats = benchmark_multi_camera(duration=30)
# Comparison
if multi_stats:
print("\n" + "=" * 80)
print("COMPARISON")
print("=" * 80)
print(f"\nSingle Camera Performance:")
print(f" Average FPS: {single_stats['avg_fps']:.2f}")
print(f"\nMulti-Camera Performance:")
print(f" Per-Camera Average: {multi_stats['combined']['per_camera_avg']:.2f} FPS")
print(f" Combined Throughput: {multi_stats['combined']['combined_fps']:.2f} FPS")
# Calculate performance drop
fps_drop = ((single_stats['avg_fps'] - multi_stats['combined']['per_camera_avg'])
/ single_stats['avg_fps'] * 100)
print(f"\nPerformance Analysis:")
print(f" FPS Drop per Camera: {fps_drop:.1f}%")
if fps_drop < 10:
print(" ✓ Excellent - Minimal performance impact")
elif fps_drop < 25:
print(" ✓ Good - Acceptable performance scaling")
elif fps_drop < 50:
print(" ⚠ Moderate - Some performance degradation")
else:
print(" ⚠ Significant - Consider optimizations")
print("=" * 80)
if __name__ == "__main__":
main()

View file

@ -1,189 +0,0 @@
import time
import torch
import os
from dotenv import load_dotenv
from services.model_repository import TensorRTModelRepository
from services.stream_decoder import StreamDecoderFactory
import numpy as np
# COCO class names for YOLOv8
COCO_CLASSES = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
def postprocess(output, confidence_threshold=0.25, iou_threshold=0.45):
"""
Post-processes the output of a YOLOv8 model to extract bounding boxes, scores, and class IDs.
"""
# output shape: (batch_size, 84, 8400)
# 84 = 4 (bbox) + 80 (classes)
# Transpose the output to (batch_size, 8400, 84)
output = output.transpose(1, 2)
boxes = []
scores = []
class_ids = []
for detection in output[0]:
# First 4 values are bbox (cx, cy, w, h)
# The rest are class scores
class_scores = detection[4:]
max_score, max_class_id = torch.max(class_scores, 0)
if max_score > confidence_threshold:
cx, cy, w, h = detection[:4]
# Convert from center-width-height to x1-y1-x2-y2
x1 = cx - w / 2
y1 = cy - h / 2
x2 = cx + w / 2
y2 = cy + h / 2
boxes.append([x1.item(), y1.item(), x2.item(), y2.item()])
scores.append(max_score.item())
class_ids.append(max_class_id.item())
if not boxes:
return [], [], []
# Perform Non-Maximum Suppression (NMS)
# This is a simplified version. For production, use a library like torchvision.ops.nms
indices = []
boxes_np = np.array(boxes)
scores_np = np.array(scores)
order = scores_np.argsort()[::-1]
while order.size > 0:
i = order[0]
indices.append(i)
xx1 = np.maximum(boxes_np[i, 0], boxes_np[order[1:], 0])
yy1 = np.maximum(boxes_np[i, 1], boxes_np[order[1:], 1])
xx2 = np.minimum(boxes_np[i, 2], boxes_np[order[1:], 2])
yy2 = np.minimum(boxes_np[i, 3], boxes_np[order[1:], 3])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / ((boxes_np[i, 2] - boxes_np[i, 0] + 1) * (boxes_np[i, 3] - boxes_np[i, 1] + 1) + \
(boxes_np[order[1:], 2] - boxes_np[order[1:], 0] + 1) * \
(boxes_np[order[1:], 3] - boxes_np[order[1:], 1] + 1) - inter)
inds = np.where(ovr <= iou_threshold)[0]
order = order[inds + 1]
final_boxes = [boxes[i] for i in indices]
final_scores = [scores[i] for i in indices]
final_class_ids = [class_ids[i] for i in indices]
return final_boxes, final_scores, final_class_ids
def test_rtsp_stream_with_inference():
"""
Decodes an RTSP stream and runs inference, printing bounding boxes and class names.
"""
load_dotenv()
rtsp_url = os.getenv("CAMERA_URL_1")
if not rtsp_url:
print("Error: CAMERA_URL_1 not found in .env file.")
return
print("=" * 80)
print("RTSP Stream + TensorRT Inference")
print("=" * 80)
# Initialize components
decoder_factory = StreamDecoderFactory(gpu_id=0)
model_repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=1)
# Setup camera stream
decoder = decoder_factory.create_decoder(rtsp_url, buffer_size=1)
decoder.start()
# Load inference model
model_path = "models/yolov8n.trt"
try:
model_repo.load_model(
model_id="camera_main",
file_path=model_path
)
except Exception as e:
print(f"Error loading model: {e}")
print(f"Please ensure '{model_path}' exists.")
decoder.stop()
return
print("\nWaiting for stream to buffer frames...")
time.sleep(3)
try:
while True:
frame_gpu = decoder.get_latest_frame(rgb=True)
if frame_gpu is None:
time.sleep(0.1)
continue
# Preprocess frame for YOLOv8
# Resize to 640x640, normalize, and add batch dimension
frame_float = frame_gpu.unsqueeze(0).float() # Convert to float here
frame_resized = torch.nn.functional.interpolate(
frame_float, size=(640, 640), mode='bilinear', align_corners=False
)
frame_normalized = frame_resized.float() / 255.0
# Run inference
try:
outputs = model_repo.infer(
model_id="camera_main",
inputs={"images": frame_normalized},
synchronize=True
)
# Post-process the output
output_tensor = outputs['output0']
boxes, scores, class_ids = postprocess(output_tensor)
# Print results
print(f"\n--- Frame at {time.time():.2f} ---")
if boxes:
for box, score, class_id in zip(boxes, scores, class_ids):
class_name = COCO_CLASSES[class_id]
print(
f" Detected: {class_name} "
f"(confidence: {score:.2f}) at "
f"bbox: [{box[0]:.0f}, {box[1]:.0f}, {box[2]:.0f}, {box[3]:.0f}]"
)
else:
print(" No objects detected.")
except Exception as e:
print(f"Inference failed: {e}")
time.sleep(0.03) # ~30 FPS
except KeyboardInterrupt:
print("\nStopping...")
finally:
# Cleanup
decoder.stop()
model_repo.unload_model("camera_main")
print("Stream and model unloaded.")
if __name__ == "__main__":
test_rtsp_stream_with_inference()

View file

@ -1,174 +0,0 @@
#!/usr/bin/env python3
"""
Test script for JPEG encoding with nvImageCodec
Tests GPU-accelerated JPEG encoding from RTSP stream frames
"""
import argparse
import sys
import time
import os
from pathlib import Path
from dotenv import load_dotenv
from services import StreamDecoderFactory
# Load environment variables from .env file
load_dotenv()
def main():
parser = argparse.ArgumentParser(description='Test JPEG encoding from RTSP stream')
parser.add_argument(
'--rtsp-url',
type=str,
default=None,
help='RTSP stream URL (defaults to CAMERA_URL_1 from .env)'
)
parser.add_argument(
'--output-dir',
type=str,
default='./snapshots',
help='Output directory for JPEG files'
)
parser.add_argument(
'--num-frames',
type=int,
default=10,
help='Number of frames to capture'
)
parser.add_argument(
'--interval',
type=float,
default=1.0,
help='Interval between captures in seconds'
)
parser.add_argument(
'--quality',
type=int,
default=95,
help='JPEG quality (0-100)'
)
parser.add_argument(
'--gpu-id',
type=int,
default=0,
help='GPU device ID'
)
args = parser.parse_args()
# Get RTSP URL from command line or environment
rtsp_url = args.rtsp_url
if not rtsp_url:
rtsp_url = os.getenv('CAMERA_URL_1')
if not rtsp_url:
print("Error: No RTSP URL provided")
print("Please either:")
print(" 1. Use --rtsp-url argument, or")
print(" 2. Add CAMERA_URL_1 to your .env file")
sys.exit(1)
# Create output directory
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
print("=" * 80)
print("RTSP Stream JPEG Encoding Test")
print("=" * 80)
print(f"RTSP URL: {rtsp_url}")
print(f"Output Directory: {output_dir}")
print(f"Number of Frames: {args.num_frames}")
print(f"Capture Interval: {args.interval}s")
print(f"JPEG Quality: {args.quality}")
print(f"GPU ID: {args.gpu_id}")
print("=" * 80)
print()
try:
# Initialize factory and decoder
print("[1/3] Initializing StreamDecoderFactory...")
factory = StreamDecoderFactory(gpu_id=args.gpu_id)
print("✓ Factory initialized\n")
print("[2/3] Creating and starting decoder...")
decoder = factory.create_decoder(
rtsp_url=rtsp_url,
buffer_size=30
)
decoder.start()
print("✓ Decoder started\n")
# Wait for connection
print("[3/3] Waiting for stream to connect...")
max_wait = 10
for i in range(max_wait):
if decoder.is_connected():
print("✓ Stream connected\n")
break
time.sleep(1)
print(f" Waiting... {i+1}/{max_wait}s")
else:
print("✗ Failed to connect to stream")
sys.exit(1)
# Capture frames
print(f"Capturing {args.num_frames} frames...")
print("-" * 80)
captured = 0
for i in range(args.num_frames):
# Get frame as JPEG
start_time = time.time()
jpeg_bytes = decoder.get_frame_as_jpeg(quality=args.quality)
encode_time = (time.time() - start_time) * 1000 # ms
if jpeg_bytes:
# Save to file
filename = output_dir / f"frame_{i:04d}.jpg"
with open(filename, 'wb') as f:
f.write(jpeg_bytes)
size_kb = len(jpeg_bytes) / 1024
print(f"[{i+1}/{args.num_frames}] Saved {filename.name} "
f"({size_kb:.1f} KB, encoded in {encode_time:.2f}ms)")
captured += 1
else:
print(f"[{i+1}/{args.num_frames}] Failed to get frame")
# Wait before next capture (except for last frame)
if i < args.num_frames - 1:
time.sleep(args.interval)
print("-" * 80)
# Summary
print("\n" + "=" * 80)
print("Capture Complete")
print("=" * 80)
print(f"Successfully captured: {captured}/{args.num_frames} frames")
print(f"Output directory: {output_dir.absolute()}")
print("=" * 80)
except KeyboardInterrupt:
print("\n\n✗ Interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\n\n✗ Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
finally:
# Cleanup
if 'decoder' in locals():
print("\nCleaning up...")
decoder.stop()
print("✓ Decoder stopped")
print("\n✓ Test completed successfully")
sys.exit(0)
if __name__ == '__main__':
main()

View file

@ -1,310 +0,0 @@
"""
Test script for TensorRT Model Repository with multi-camera inference.
This demonstrates:
1. Loading the same model for multiple cameras (deduplication)
2. Context pool load balancing
3. GPU-to-GPU inference from RTSP streams
4. Memory efficiency with shared engines
"""
import time
import torch
from services.model_repository import TensorRTModelRepository
from services.stream_decoder import StreamDecoderFactory
def test_multi_camera_inference():
"""
Simulate multi-camera inference scenario.
Example: 100 cameras, all using the same YOLOv8 model
- Without pooling: 100 engines + 100 contexts in VRAM
- With pooling: 1 engine + 4 contexts in VRAM (huge savings!)
"""
# Initialize model repository with context pooling
repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=4)
# Camera configurations (simulated)
camera_configs = [
{"id": "camera_1", "rtsp_url": "rtsp://camera1.local/stream"},
{"id": "camera_2", "rtsp_url": "rtsp://camera2.local/stream"},
{"id": "camera_3", "rtsp_url": "rtsp://camera3.local/stream"},
# ... imagine 100 cameras here
]
# Load the same model for all cameras
model_file = "models/yolov8n.trt" # Same file for all cameras
print("=" * 80)
print("LOADING MODELS FOR MULTIPLE CAMERAS")
print("=" * 80)
for config in camera_configs:
try:
# Each camera gets its own model_id, but shares the same engine!
metadata = repo.load_model(
model_id=config["id"],
file_path=model_file,
num_contexts=4 # 4 contexts shared across all cameras
)
print(f"\n✓ Loaded model for {config['id']}")
except Exception as e:
print(f"\n✗ Failed to load model for {config['id']}: {e}")
# Show repository stats
print("\n" + "=" * 80)
print("REPOSITORY STATISTICS")
print("=" * 80)
stats = repo.get_stats()
print(f"Total model IDs: {stats['total_model_ids']}")
print(f"Unique engines in VRAM: {stats['unique_engines']}")
print(f"Total contexts: {stats['total_contexts']}")
print(f"Memory efficiency: {stats['memory_efficiency']}")
# Get detailed info for one camera
print("\n" + "=" * 80)
print("DETAILED MODEL INFO (camera_1)")
print("=" * 80)
info = repo.get_model_info("camera_1")
if info:
print(f"Model ID: {info['model_id']}")
print(f"File: {info['file_path']}")
print(f"File hash: {info['file_hash']}")
print(f"Engine references: {info['engine_references']}")
print(f"Context pool size: {info['context_pool_size']}")
print(f"Shared with: {info['shared_with_model_ids']}")
print(f"\nInputs:")
for name, spec in info['inputs'].items():
print(f" {name}: {spec['shape']} ({spec['dtype']})")
print(f"\nOutputs:")
for name, spec in info['outputs'].items():
print(f" {name}: {spec['shape']} ({spec['dtype']})")
# Simulate inference from multiple cameras
print("\n" + "=" * 80)
print("RUNNING INFERENCE (GPU-to-GPU)")
print("=" * 80)
# Create dummy input tensors (simulating frames from cameras)
# In real scenario, these come from StreamDecoder.get_frame()
batch_size = 1
channels = 3
height = 640
width = 640
for config in camera_configs:
try:
# Simulate getting frame from camera (already on GPU)
input_tensor = torch.rand(
batch_size, channels, height, width,
dtype=torch.float32,
device='cuda:0'
)
# Run inference (stays in GPU)
start = time.time()
outputs = repo.infer(
model_id=config["id"],
inputs={"images": input_tensor}, # Adjust input name based on your model
synchronize=True,
timeout=5.0
)
elapsed = (time.time() - start) * 1000 # Convert to ms
print(f"\n{config['id']}: Inference completed in {elapsed:.2f}ms")
for name, tensor in outputs.items():
print(f" Output '{name}': {tensor.shape} on {tensor.device}")
except Exception as e:
print(f"\n{config['id']}: Inference failed: {e}")
# Cleanup
print("\n" + "=" * 80)
print("CLEANUP")
print("=" * 80)
for config in camera_configs:
repo.unload_model(config["id"])
print("\nAll models unloaded.")
def test_rtsp_stream_with_inference():
"""
Real-world example: Decode RTSP stream and run inference.
Everything stays in GPU memory (zero CPU transfers).
"""
print("=" * 80)
print("RTSP STREAM + TENSORRT INFERENCE (GPU-to-GPU)")
print("=" * 80)
# Initialize components
decoder_factory = StreamDecoderFactory(gpu_id=0)
model_repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=4)
# Setup camera stream
rtsp_url = "rtsp://your-camera-ip/stream"
decoder = decoder_factory.create_decoder(rtsp_url, buffer_size=30)
decoder.start()
# Load inference model
try:
model_repo.load_model(
model_id="camera_main",
file_path="models/yolov8n.trt"
)
except FileNotFoundError:
print("\n⚠ Model file not found. Please export your model to TensorRT:")
print(" Example: yolo export model=yolov8n.pt format=engine device=0")
return
print("\nWaiting for stream to buffer frames...")
time.sleep(3)
# Process frames
for i in range(10):
# Get frame from decoder (already on GPU)
frame_gpu = decoder.get_latest_frame(rgb=True) # Returns torch.Tensor on CUDA
if frame_gpu is None:
print(f"Frame {i}: No frame available")
continue
# Preprocess if needed (stays on GPU)
# For YOLOv8: normalize, resize, etc.
# Example preprocessing (adjust for your model):
frame_gpu = frame_gpu.float() / 255.0 # Normalize to [0, 1]
frame_gpu = frame_gpu.unsqueeze(0) # Add batch dimension: (1, 3, H, W)
# Run inference (GPU-to-GPU, zero copy)
try:
outputs = model_repo.infer(
model_id="camera_main",
inputs={"images": frame_gpu},
synchronize=True
)
print(f"\nFrame {i}: Inference successful")
for name, tensor in outputs.items():
print(f" {name}: {tensor.shape} on {tensor.device}")
# Post-process results (can stay on GPU or move to CPU as needed)
# Example: NMS, bounding box extraction, etc.
except Exception as e:
print(f"\nFrame {i}: Inference failed: {e}")
time.sleep(0.1) # Simulate processing interval
# Cleanup
decoder.stop()
model_repo.unload_model("camera_main")
print("\n✓ Test completed successfully")
def test_concurrent_inference():
"""
Test concurrent inference from multiple threads.
Demonstrates context pool load balancing.
"""
import threading
print("=" * 80)
print("CONCURRENT INFERENCE TEST (Context Pool Load Balancing)")
print("=" * 80)
repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=4)
# Load model
try:
repo.load_model("shared_model", "models/yolov8n.trt", num_contexts=4)
except Exception as e:
print(f"Failed to load model: {e}")
return
def worker(worker_id: int, num_inferences: int):
"""Worker thread performing inference"""
for i in range(num_inferences):
try:
# Create dummy input
input_tensor = torch.rand(1, 3, 640, 640, device='cuda:0', dtype=torch.float32)
# Acquire context from pool, run inference, release context
outputs = repo.infer(
model_id="shared_model",
inputs={"images": input_tensor},
timeout=10.0
)
print(f"Worker {worker_id}, Inference {i}: SUCCESS")
except Exception as e:
print(f"Worker {worker_id}, Inference {i}: FAILED - {e}")
time.sleep(0.01) # Small delay
# Launch multiple worker threads (more workers than contexts!)
threads = []
num_workers = 10 # 10 workers sharing 4 contexts
inferences_per_worker = 5
print(f"\nLaunching {num_workers} workers (only 4 contexts available)")
print("Contexts will be borrowed/returned automatically\n")
start_time = time.time()
for worker_id in range(num_workers):
t = threading.Thread(target=worker, args=(worker_id, inferences_per_worker))
threads.append(t)
t.start()
# Wait for all workers
for t in threads:
t.join()
elapsed = time.time() - start_time
total_inferences = num_workers * inferences_per_worker
print(f"\n✓ Completed {total_inferences} inferences in {elapsed:.2f}s")
print(f" Throughput: {total_inferences / elapsed:.2f} inferences/sec")
print(f" With only 4 contexts for {num_workers} workers!")
repo.unload_model("shared_model")
if __name__ == "__main__":
print("\n" + "=" * 80)
print("TENSORRT MODEL REPOSITORY - TEST SUITE")
print("=" * 80)
# Test 1: Multi-camera model loading
print("\n\nTEST 1: Multi-Camera Model Loading with Deduplication")
print("-" * 80)
try:
test_multi_camera_inference()
except Exception as e:
print(f"Test 1 failed: {e}")
# Test 2: RTSP stream + inference (commented out by default)
# Uncomment if you have a real RTSP stream
# print("\n\nTEST 2: RTSP Stream + Inference")
# print("-" * 80)
# try:
# test_rtsp_stream_with_inference()
# except Exception as e:
# print(f"Test 2 failed: {e}")
# Test 3: Concurrent inference
print("\n\nTEST 3: Concurrent Inference with Context Pooling")
print("-" * 80)
try:
test_concurrent_inference()
except Exception as e:
print(f"Test 3 failed: {e}")
print("\n" + "=" * 80)
print("ALL TESTS COMPLETED")
print("=" * 80)

View file

@ -1,255 +0,0 @@
#!/usr/bin/env python3
"""
Multi-stream test script to verify CUDA context sharing efficiency.
Tests multiple RTSP streams simultaneously and monitors VRAM usage.
"""
import argparse
import time
import sys
import subprocess
import os
from pathlib import Path
from dotenv import load_dotenv
from services import StreamDecoderFactory, ConnectionStatus
# Load environment variables from .env file
load_dotenv()
def get_gpu_memory_usage(gpu_id: int = 0) -> int:
"""Get current GPU memory usage in MB using nvidia-smi"""
try:
result = subprocess.run(
['nvidia-smi', '--query-gpu=memory.used', '--format=csv,noheader,nounits', f'--id={gpu_id}'],
capture_output=True,
text=True,
check=True
)
return int(result.stdout.strip())
except Exception as e:
print(f"Warning: Could not get GPU memory usage: {e}")
return 0
def main():
parser = argparse.ArgumentParser(description='Test multi-stream decoding with context sharing')
parser.add_argument(
'--gpu-id',
type=int,
default=0,
help='GPU device ID'
)
parser.add_argument(
'--duration',
type=int,
default=20,
help='Test duration in seconds'
)
parser.add_argument(
'--capture-snapshots',
action='store_true',
help='Capture JPEG snapshots during test'
)
parser.add_argument(
'--output-dir',
type=str,
default='./multi_stream_snapshots',
help='Output directory for snapshots'
)
args = parser.parse_args()
# Load camera URLs from environment
camera_urls = []
i = 1
while True:
url = os.getenv(f'CAMERA_URL_{i}')
if url:
camera_urls.append(url)
i += 1
else:
break
if not camera_urls:
print("Error: No camera URLs found in .env file")
print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
sys.exit(1)
# Create output directory if capturing snapshots
if args.capture_snapshots:
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
print("=" * 80)
print("Multi-Stream RTSP Decoder Test - Context Sharing Verification")
print("=" * 80)
print(f"Number of Streams: {len(camera_urls)}")
print(f"GPU ID: {args.gpu_id}")
print(f"Test Duration: {args.duration} seconds")
print(f"Capture Snapshots: {args.capture_snapshots}")
print("=" * 80)
print()
try:
# Get baseline GPU memory
print("[Baseline] Measuring initial GPU memory usage...")
baseline_memory = get_gpu_memory_usage(args.gpu_id)
print(f"✓ Baseline VRAM: {baseline_memory} MB\n")
# Initialize factory (shared CUDA context)
print("[1/4] Initializing StreamDecoderFactory with shared CUDA context...")
factory = StreamDecoderFactory(gpu_id=args.gpu_id)
factory_memory = get_gpu_memory_usage(args.gpu_id)
factory_overhead = factory_memory - baseline_memory
print(f"✓ Factory initialized")
print(f" VRAM after factory: {factory_memory} MB (+{factory_overhead} MB)\n")
# Create all decoders
print(f"[2/4] Creating {len(camera_urls)} StreamDecoder instances...")
decoders = []
for i, url in enumerate(camera_urls):
decoder = factory.create_decoder(
rtsp_url=url,
buffer_size=30,
codec='h264'
)
decoders.append(decoder)
print(f" ✓ Decoder {i+1} created for camera {url.split('@')[1].split('/')[0]}")
decoders_memory = get_gpu_memory_usage(args.gpu_id)
decoders_overhead = decoders_memory - factory_memory
print(f"\n VRAM after creating {len(decoders)} decoders: {decoders_memory} MB (+{decoders_overhead} MB)")
print(f" Average per decoder: {decoders_overhead / len(decoders):.1f} MB\n")
# Start all decoders
print(f"[3/4] Starting all {len(decoders)} decoders...")
for i, decoder in enumerate(decoders):
decoder.start()
print(f" ✓ Decoder {i+1} started")
started_memory = get_gpu_memory_usage(args.gpu_id)
started_overhead = started_memory - decoders_memory
print(f"\n VRAM after starting decoders: {started_memory} MB (+{started_overhead} MB)")
print(f" Average per running decoder: {started_overhead / len(decoders):.1f} MB\n")
# Wait for all streams to connect
print("[4/4] Waiting for all streams to connect...")
max_wait = 15
for wait_time in range(max_wait):
connected = sum(1 for d in decoders if d.is_connected())
print(f" Connected: {connected}/{len(decoders)} streams", end='\r')
if connected == len(decoders):
print(f"\n✓ All {len(decoders)} streams connected!\n")
break
time.sleep(1)
else:
connected = sum(1 for d in decoders if d.is_connected())
print(f"\n⚠ Only {connected}/{len(decoders)} streams connected after {max_wait}s\n")
connected_memory = get_gpu_memory_usage(args.gpu_id)
connected_overhead = connected_memory - started_memory
print(f" VRAM after connection: {connected_memory} MB (+{connected_overhead} MB)\n")
# Monitor streams
print(f"Monitoring streams for {args.duration} seconds...")
print("=" * 80)
print(f"{'Time':<8} {'VRAM':<10} {'Stream 1':<12} {'Stream 2':<12} {'Stream 3':<12} {'Stream 4':<12}")
print("-" * 80)
start_time = time.time()
snapshot_interval = args.duration // 3 if args.capture_snapshots else 0
last_snapshot = 0
while time.time() - start_time < args.duration:
elapsed = time.time() - start_time
current_memory = get_gpu_memory_usage(args.gpu_id)
# Get stats for each decoder
stats = []
for decoder in decoders:
status = decoder.get_status().value[:8]
buffer = decoder.get_buffer_size()
frames = decoder.frame_count
stats.append(f"{status:8s} {buffer:2d}/30 {frames:4d}")
print(f"{elapsed:6.1f}s {current_memory:6d}MB {stats[0]:<12} {stats[1]:<12} {stats[2]:<12} {stats[3]:<12}")
# Capture snapshots
if args.capture_snapshots and snapshot_interval > 0:
if elapsed - last_snapshot >= snapshot_interval:
print("\n → Capturing snapshots from all streams...")
for i, decoder in enumerate(decoders):
jpeg_bytes = decoder.get_frame_as_jpeg(quality=85)
if jpeg_bytes:
filename = output_dir / f"camera_{i+1}_t{int(elapsed)}s.jpg"
with open(filename, 'wb') as f:
f.write(jpeg_bytes)
print(f" Saved {filename.name} ({len(jpeg_bytes)/1024:.1f} KB)")
print()
last_snapshot = elapsed
time.sleep(1)
print("=" * 80)
# Final memory analysis
final_memory = get_gpu_memory_usage(args.gpu_id)
total_overhead = final_memory - baseline_memory
print("\n" + "=" * 80)
print("Memory Usage Analysis")
print("=" * 80)
print(f"Baseline VRAM: {baseline_memory:6d} MB")
print(f"After Factory Init: {factory_memory:6d} MB (+{factory_overhead:4d} MB)")
print(f"After Creating {len(decoders)} Decoders: {decoders_memory:6d} MB (+{decoders_overhead:4d} MB)")
print(f"After Starting Decoders: {started_memory:6d} MB (+{started_overhead:4d} MB)")
print(f"After Connection: {connected_memory:6d} MB (+{connected_overhead:4d} MB)")
print(f"Final (after {args.duration}s): {final_memory:6d} MB (+{total_overhead:4d} MB total)")
print("-" * 80)
print(f"Average VRAM per stream: {total_overhead / len(decoders):6.1f} MB")
print(f"Context sharing efficiency: {'EXCELLENT' if total_overhead < 500 else 'GOOD' if total_overhead < 800 else 'POOR'}")
print("=" * 80)
# Final stats
print("\nFinal Stream Statistics:")
print("-" * 80)
for i, decoder in enumerate(decoders):
status = decoder.get_status().value
buffer = decoder.get_buffer_size()
frames = decoder.frame_count
fps = frames / args.duration if args.duration > 0 else 0
print(f"Stream {i+1}: {status:12s} | Buffer: {buffer:2d}/{decoder.buffer_size} | "
f"Frames: {frames:5d} | Avg FPS: {fps:5.2f}")
print("=" * 80)
except KeyboardInterrupt:
print("\n\n✗ Interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\n\n✗ Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
finally:
# Cleanup
if 'decoders' in locals():
print("\nCleaning up...")
for i, decoder in enumerate(decoders):
decoder.stop()
print(f" ✓ Decoder {i+1} stopped")
cleanup_memory = get_gpu_memory_usage(args.gpu_id)
print(f"\nVRAM after cleanup: {cleanup_memory} MB")
print("\n✓ Multi-stream test completed successfully")
sys.exit(0)
if __name__ == '__main__':
main()

View file

@ -1,152 +0,0 @@
#!/usr/bin/env python3
"""
CLI test script for StreamDecoder
Tests RTSP stream decoding with NVDEC hardware acceleration
"""
import argparse
import time
import sys
from services.stream_decoder import StreamDecoderFactory, ConnectionStatus
def main():
parser = argparse.ArgumentParser(description='Test RTSP stream decoder with NVDEC')
parser.add_argument(
'--rtsp-url',
type=str,
required=True,
help='RTSP stream URL (e.g., rtsp://user:pass@host/path)'
)
parser.add_argument(
'--gpu-id',
type=int,
default=0,
help='GPU device ID'
)
parser.add_argument(
'--buffer-size',
type=int,
default=30,
help='Frame buffer size'
)
parser.add_argument(
'--duration',
type=int,
default=30,
help='Test duration in seconds'
)
parser.add_argument(
'--check-interval',
type=float,
default=1.0,
help='Status check interval in seconds'
)
args = parser.parse_args()
print("=" * 80)
print("RTSP Stream Decoder Test")
print("=" * 80)
print(f"RTSP URL: {args.rtsp_url}")
print(f"GPU ID: {args.gpu_id}")
print(f"Buffer Size: {args.buffer_size} frames")
print(f"Test Duration: {args.duration} seconds")
print("=" * 80)
print()
try:
# Create factory with shared CUDA context
print("[1/4] Initializing StreamDecoderFactory...")
factory = StreamDecoderFactory(gpu_id=args.gpu_id)
print("✓ Factory initialized with shared CUDA context\n")
# Create decoder
print("[2/4] Creating StreamDecoder...")
decoder = factory.create_decoder(
rtsp_url=args.rtsp_url,
buffer_size=args.buffer_size,
codec='h264'
)
print(f"✓ Decoder created: {decoder}\n")
# Start decoding
print("[3/4] Starting decoder thread...")
decoder.start()
print("✓ Decoder thread started\n")
# Monitor for specified duration
print(f"[4/4] Monitoring stream for {args.duration} seconds...")
print("-" * 80)
start_time = time.time()
last_frame_count = 0
while time.time() - start_time < args.duration:
time.sleep(args.check_interval)
# Get status
status = decoder.get_status()
buffer_size = decoder.get_buffer_size()
frame_count = decoder.frame_count
fps = (frame_count - last_frame_count) / args.check_interval
last_frame_count = frame_count
# Print status
elapsed = time.time() - start_time
print(f"[{elapsed:6.1f}s] Status: {status.value:12s} | "
f"Buffer: {buffer_size:2d}/{args.buffer_size:2d} | "
f"Frames: {frame_count:5d} | "
f"FPS: {fps:5.1f}")
# Try to get latest frame
if status == ConnectionStatus.CONNECTED:
frame = decoder.get_latest_frame()
if frame is not None:
print(f" Frame shape: {frame.shape}, dtype: {frame.dtype}, "
f"device: {frame.device}")
# Check for errors
if status == ConnectionStatus.ERROR:
print("\n✗ ERROR: Stream connection failed!")
break
print("-" * 80)
# Final statistics
print("\n" + "=" * 80)
print("Test Complete - Final Statistics")
print("=" * 80)
print(f"Total Frames Decoded: {decoder.frame_count}")
print(f"Average FPS: {decoder.frame_count / args.duration:.2f}")
print(f"Final Status: {decoder.get_status().value}")
print(f"Buffer Utilization: {decoder.get_buffer_size()}/{args.buffer_size}")
if decoder.frame_width and decoder.frame_height:
print(f"Frame Resolution: {decoder.frame_width}x{decoder.frame_height}")
print("=" * 80)
except KeyboardInterrupt:
print("\n\n✗ Interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\n\n✗ Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
finally:
# Cleanup
if 'decoder' in locals():
print("\nCleaning up...")
decoder.stop()
print("✓ Decoder stopped")
print("\n✓ Test completed successfully")
sys.exit(0)
if __name__ == '__main__':
main()

View file

@ -1,143 +0,0 @@
#!/usr/bin/env python3
"""
VRAM scaling test - measures Python process memory usage for 1, 2, 3, and 4 streams.
"""
import os
import time
import subprocess
from dotenv import load_dotenv
from services import StreamDecoderFactory
# Load environment variables from .env file
load_dotenv()
# Load camera URLs from environment
camera_urls = []
i = 1
while True:
url = os.getenv(f'CAMERA_URL_{i}')
if url:
camera_urls.append(url)
i += 1
else:
break
if not camera_urls:
print("Error: No camera URLs found in .env file")
print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
exit(1)
def get_python_gpu_memory():
"""Get Python process GPU memory usage in MB"""
try:
pid = os.getpid()
result = subprocess.run(
['nvidia-smi', '--query-compute-apps=pid,used_memory', '--format=csv,noheader,nounits'],
capture_output=True, text=True, check=True
)
for line in result.stdout.strip().split('\n'):
if line:
parts = line.split(',')
if len(parts) >= 2 and int(parts[0].strip()) == pid:
return int(parts[1].strip())
return 0
except:
return 0
def test_n_streams(n, wait_time=15):
"""Test with n streams"""
print(f"\n{'='*80}")
print(f"Testing with {n} stream(s)")
print('='*80)
mem_before = get_python_gpu_memory()
print(f"Python process VRAM before: {mem_before} MB")
# Create factory
factory = StreamDecoderFactory(gpu_id=0)
time.sleep(1)
mem_after_factory = get_python_gpu_memory()
print(f"After factory: {mem_after_factory} MB (+{mem_after_factory - mem_before} MB)")
# Create decoders
decoders = []
for i in range(n):
decoder = factory.create_decoder(camera_urls[i], buffer_size=30)
decoders.append(decoder)
time.sleep(1)
mem_after_create = get_python_gpu_memory()
print(f"After creating {n} decoder(s): {mem_after_create} MB (+{mem_after_create - mem_after_factory} MB)")
# Start decoders
for decoder in decoders:
decoder.start()
time.sleep(2)
mem_after_start = get_python_gpu_memory()
print(f"After starting {n} decoder(s): {mem_after_start} MB (+{mem_after_start - mem_after_create} MB)")
# Wait for connection
print(f"Waiting {wait_time}s for streams to connect and stabilize...")
time.sleep(wait_time)
# Check connection status
connected = sum(1 for d in decoders if d.is_connected())
mem_stable = get_python_gpu_memory()
print(f"Connected: {connected}/{n} streams")
print(f"Python process VRAM (stable): {mem_stable} MB")
# Get frame stats
for i, decoder in enumerate(decoders):
print(f" Stream {i+1}: {decoder.get_status().value:10s} "
f"Buffer: {decoder.get_buffer_size()}/30 "
f"Frames: {decoder.frame_count}")
# Cleanup
for decoder in decoders:
decoder.stop()
time.sleep(2)
mem_after_cleanup = get_python_gpu_memory()
print(f"After cleanup: {mem_after_cleanup} MB")
return mem_stable
if __name__ == '__main__':
print("Python VRAM Scaling Test")
print(f"PID: {os.getpid()}")
baseline = get_python_gpu_memory()
print(f"Baseline Python process VRAM: {baseline} MB\n")
results = {}
for n in [1, 2, 3, 4]:
mem = test_n_streams(n, wait_time=15)
results[n] = mem
print(f"\n{n} stream(s): {mem} MB (process total)")
# Give time between tests
if n < 4:
print("\nWaiting 5s before next test...")
time.sleep(5)
# Summary
print("\n" + "="*80)
print("Python Process VRAM Scaling Summary")
print("="*80)
print(f"Baseline: {baseline:4d} MB")
for n in [1, 2, 3, 4]:
total = results[n]
overhead = total - baseline
per_stream = overhead / n if n > 0 else 0
print(f"{n} stream(s): {total:4d} MB (+{overhead:3d} MB total, {per_stream:5.1f} MB per stream)")
# Calculate marginal cost
print("\nMarginal cost per additional stream:")
for n in [2, 3, 4]:
marginal = results[n] - results[n-1]
print(f" Stream {n}: +{marginal} MB")
print("="*80)

View file

@ -1,85 +0,0 @@
#!/usr/bin/env python3
"""
Quick verification script for TensorRT model
"""
import torch
from services.model_repository import TensorRTModelRepository
def verify_model():
print("=" * 80)
print("TensorRT Model Verification")
print("=" * 80)
# Initialize repository
repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=2)
# Load the model
print("\nLoading YOLOv8n TensorRT engine...")
try:
metadata = repo.load_model(
model_id="yolov8n_test",
file_path="models/yolov8n.trt",
num_contexts=2
)
print("✓ Model loaded successfully!")
except Exception as e:
print(f"✗ Failed to load model: {e}")
return
# Get model info
print("\n" + "=" * 80)
print("Model Information")
print("=" * 80)
info = repo.get_model_info("yolov8n_test")
if info:
print(f"Model ID: {info['model_id']}")
print(f"File: {info['file_path']}")
print(f"File hash: {info['file_hash']}")
print(f"\nInputs:")
for name, spec in info['inputs'].items():
print(f" {name}: {spec['shape']} ({spec['dtype']})")
print(f"\nOutputs:")
for name, spec in info['outputs'].items():
print(f" {name}: {spec['shape']} ({spec['dtype']})")
# Run test inference
print("\n" + "=" * 80)
print("Running Test Inference")
print("=" * 80)
try:
# Create dummy input (simulating a 640x640 image)
input_tensor = torch.rand(1, 3, 640, 640, dtype=torch.float32, device='cuda:0')
print(f"Input tensor: {input_tensor.shape} on {input_tensor.device}")
# Run inference
outputs = repo.infer(
model_id="yolov8n_test",
inputs={"images": input_tensor},
synchronize=True
)
print("\n✓ Inference successful!")
print("\nOutputs:")
for name, tensor in outputs.items():
print(f" {name}: {tensor.shape} on {tensor.device} ({tensor.dtype})")
except Exception as e:
print(f"\n✗ Inference failed: {e}")
import traceback
traceback.print_exc()
# Cleanup
print("\n" + "=" * 80)
print("Cleanup")
print("=" * 80)
repo.unload_model("yolov8n_test")
print("✓ Model unloaded")
print("\n" + "=" * 80)
print("Verification Complete!")
print("=" * 80)
if __name__ == "__main__":
verify_model()