remove redundant examples
This commit is contained in:
parent
dd57b5a246
commit
d3dbf9a580
8 changed files with 0 additions and 1648 deletions
|
|
@ -1,340 +0,0 @@
|
||||||
"""
|
|
||||||
FPS Benchmark Test for Single vs Multi-Camera Tracking
|
|
||||||
|
|
||||||
This script benchmarks the FPS performance of:
|
|
||||||
1. Single camera tracking
|
|
||||||
2. Multi-camera tracking (2+ cameras)
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python test_fps_benchmark.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from services import (
|
|
||||||
StreamDecoderFactory,
|
|
||||||
TensorRTModelRepository,
|
|
||||||
TrackingFactory,
|
|
||||||
YOLOv8Utils,
|
|
||||||
COCO_CLASSES,
|
|
||||||
)
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
def benchmark_single_camera(duration=30):
|
|
||||||
"""
|
|
||||||
Benchmark single camera tracking performance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
duration: Test duration in seconds
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with FPS statistics
|
|
||||||
"""
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("SINGLE CAMERA BENCHMARK")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
GPU_ID = 0
|
|
||||||
MODEL_PATH = "models/yolov8n.trt"
|
|
||||||
RTSP_URL = os.getenv('CAMERA_URL_1', 'rtsp://localhost:8554/test')
|
|
||||||
|
|
||||||
# Initialize components
|
|
||||||
print("\nInitializing...")
|
|
||||||
model_repo = TensorRTModelRepository(gpu_id=GPU_ID, default_num_contexts=4)
|
|
||||||
model_repo.load_model("detector", MODEL_PATH, num_contexts=4)
|
|
||||||
|
|
||||||
tracking_factory = TrackingFactory(gpu_id=GPU_ID)
|
|
||||||
controller = tracking_factory.create_controller(
|
|
||||||
model_repository=model_repo,
|
|
||||||
model_id="detector",
|
|
||||||
tracker_type="iou",
|
|
||||||
max_age=30,
|
|
||||||
min_confidence=0.5,
|
|
||||||
iou_threshold=0.3,
|
|
||||||
class_names=COCO_CLASSES
|
|
||||||
)
|
|
||||||
|
|
||||||
stream_factory = StreamDecoderFactory(gpu_id=GPU_ID)
|
|
||||||
decoder = stream_factory.create_decoder(RTSP_URL, buffer_size=30)
|
|
||||||
decoder.start()
|
|
||||||
|
|
||||||
print("Waiting for stream connection...")
|
|
||||||
time.sleep(5)
|
|
||||||
|
|
||||||
if not decoder.is_connected():
|
|
||||||
print("⚠ Stream not connected, results may be inaccurate")
|
|
||||||
|
|
||||||
# Benchmark
|
|
||||||
print(f"\nRunning benchmark for {duration} seconds...")
|
|
||||||
frame_count = 0
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
fps_samples = []
|
|
||||||
sample_start = time.time()
|
|
||||||
sample_frames = 0
|
|
||||||
|
|
||||||
try:
|
|
||||||
while time.time() - start_time < duration:
|
|
||||||
frame_gpu = decoder.get_latest_frame(rgb=True)
|
|
||||||
|
|
||||||
if frame_gpu is None:
|
|
||||||
time.sleep(0.001)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Run tracking
|
|
||||||
tracked_objects = controller.track(
|
|
||||||
frame_gpu,
|
|
||||||
preprocess_fn=YOLOv8Utils.preprocess,
|
|
||||||
postprocess_fn=YOLOv8Utils.postprocess
|
|
||||||
)
|
|
||||||
|
|
||||||
frame_count += 1
|
|
||||||
sample_frames += 1
|
|
||||||
|
|
||||||
# Sample FPS every second
|
|
||||||
if time.time() - sample_start >= 1.0:
|
|
||||||
fps = sample_frames / (time.time() - sample_start)
|
|
||||||
fps_samples.append(fps)
|
|
||||||
sample_frames = 0
|
|
||||||
sample_start = time.time()
|
|
||||||
print(f" Current FPS: {fps:.2f}")
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\nBenchmark interrupted")
|
|
||||||
|
|
||||||
# Calculate statistics
|
|
||||||
total_time = time.time() - start_time
|
|
||||||
avg_fps = frame_count / total_time
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
decoder.stop()
|
|
||||||
|
|
||||||
stats = {
|
|
||||||
'total_frames': frame_count,
|
|
||||||
'total_time': total_time,
|
|
||||||
'avg_fps': avg_fps,
|
|
||||||
'min_fps': min(fps_samples) if fps_samples else 0,
|
|
||||||
'max_fps': max(fps_samples) if fps_samples else 0,
|
|
||||||
'samples': fps_samples
|
|
||||||
}
|
|
||||||
|
|
||||||
print("\n" + "-" * 80)
|
|
||||||
print(f"Total Frames: {stats['total_frames']}")
|
|
||||||
print(f"Total Time: {stats['total_time']:.2f} seconds")
|
|
||||||
print(f"Average FPS: {stats['avg_fps']:.2f}")
|
|
||||||
print(f"Min FPS: {stats['min_fps']:.2f}")
|
|
||||||
print(f"Max FPS: {stats['max_fps']:.2f}")
|
|
||||||
print("-" * 80)
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
|
|
||||||
def benchmark_multi_camera(duration=30):
|
|
||||||
"""
|
|
||||||
Benchmark multi-camera tracking performance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
duration: Test duration in seconds
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with FPS statistics per camera
|
|
||||||
"""
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("MULTI-CAMERA BENCHMARK")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
GPU_ID = 0
|
|
||||||
MODEL_PATH = "models/yolov8n.trt"
|
|
||||||
|
|
||||||
# Load camera URLs
|
|
||||||
camera_urls = []
|
|
||||||
i = 1
|
|
||||||
while True:
|
|
||||||
url = os.getenv(f'CAMERA_URL_{i}')
|
|
||||||
if url:
|
|
||||||
camera_urls.append(url)
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
if len(camera_urls) < 2:
|
|
||||||
print("⚠ Need at least 2 cameras for multi-camera test")
|
|
||||||
print(f" Found only {len(camera_urls)} camera(s) in .env")
|
|
||||||
return None
|
|
||||||
|
|
||||||
print(f"\nTesting with {len(camera_urls)} cameras")
|
|
||||||
|
|
||||||
# Initialize components
|
|
||||||
print("\nInitializing...")
|
|
||||||
model_repo = TensorRTModelRepository(gpu_id=GPU_ID, default_num_contexts=8)
|
|
||||||
model_repo.load_model("detector", MODEL_PATH, num_contexts=8)
|
|
||||||
|
|
||||||
tracking_factory = TrackingFactory(gpu_id=GPU_ID)
|
|
||||||
stream_factory = StreamDecoderFactory(gpu_id=GPU_ID)
|
|
||||||
|
|
||||||
decoders = []
|
|
||||||
controllers = []
|
|
||||||
|
|
||||||
for i, url in enumerate(camera_urls):
|
|
||||||
# Create decoder
|
|
||||||
decoder = stream_factory.create_decoder(url, buffer_size=30)
|
|
||||||
decoder.start()
|
|
||||||
decoders.append(decoder)
|
|
||||||
|
|
||||||
# Create controller
|
|
||||||
controller = tracking_factory.create_controller(
|
|
||||||
model_repository=model_repo,
|
|
||||||
model_id="detector",
|
|
||||||
tracker_type="iou",
|
|
||||||
max_age=30,
|
|
||||||
min_confidence=0.5,
|
|
||||||
iou_threshold=0.3,
|
|
||||||
class_names=COCO_CLASSES
|
|
||||||
)
|
|
||||||
controllers.append(controller)
|
|
||||||
|
|
||||||
print(f" Camera {i+1}: {url}")
|
|
||||||
|
|
||||||
print("\nWaiting for streams to connect...")
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
# Benchmark
|
|
||||||
print(f"\nRunning benchmark for {duration} seconds...")
|
|
||||||
|
|
||||||
frame_counts = [0] * len(camera_urls)
|
|
||||||
fps_samples = [[] for _ in camera_urls]
|
|
||||||
sample_starts = [time.time()] * len(camera_urls)
|
|
||||||
sample_frames = [0] * len(camera_urls)
|
|
||||||
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
try:
|
|
||||||
while time.time() - start_time < duration:
|
|
||||||
for i, (decoder, controller) in enumerate(zip(decoders, controllers)):
|
|
||||||
frame_gpu = decoder.get_latest_frame(rgb=True)
|
|
||||||
|
|
||||||
if frame_gpu is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Run tracking
|
|
||||||
tracked_objects = controller.track(
|
|
||||||
frame_gpu,
|
|
||||||
preprocess_fn=YOLOv8Utils.preprocess,
|
|
||||||
postprocess_fn=YOLOv8Utils.postprocess
|
|
||||||
)
|
|
||||||
|
|
||||||
frame_counts[i] += 1
|
|
||||||
sample_frames[i] += 1
|
|
||||||
|
|
||||||
# Sample FPS every second
|
|
||||||
if time.time() - sample_starts[i] >= 1.0:
|
|
||||||
fps = sample_frames[i] / (time.time() - sample_starts[i])
|
|
||||||
fps_samples[i].append(fps)
|
|
||||||
sample_frames[i] = 0
|
|
||||||
sample_starts[i] = time.time()
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\nBenchmark interrupted")
|
|
||||||
|
|
||||||
# Calculate statistics
|
|
||||||
total_time = time.time() - start_time
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
for decoder in decoders:
|
|
||||||
decoder.stop()
|
|
||||||
|
|
||||||
# Compile results
|
|
||||||
results = {}
|
|
||||||
total_frames = 0
|
|
||||||
|
|
||||||
print("\n" + "-" * 80)
|
|
||||||
for i in range(len(camera_urls)):
|
|
||||||
avg_fps = frame_counts[i] / total_time if total_time > 0 else 0
|
|
||||||
total_frames += frame_counts[i]
|
|
||||||
|
|
||||||
cam_stats = {
|
|
||||||
'total_frames': frame_counts[i],
|
|
||||||
'avg_fps': avg_fps,
|
|
||||||
'min_fps': min(fps_samples[i]) if fps_samples[i] else 0,
|
|
||||||
'max_fps': max(fps_samples[i]) if fps_samples[i] else 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
results[f'camera_{i+1}'] = cam_stats
|
|
||||||
|
|
||||||
print(f"Camera {i+1}:")
|
|
||||||
print(f" Total Frames: {cam_stats['total_frames']}")
|
|
||||||
print(f" Average FPS: {cam_stats['avg_fps']:.2f}")
|
|
||||||
print(f" Min FPS: {cam_stats['min_fps']:.2f}")
|
|
||||||
print(f" Max FPS: {cam_stats['max_fps']:.2f}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Combined stats
|
|
||||||
combined_avg_fps = total_frames / total_time if total_time > 0 else 0
|
|
||||||
|
|
||||||
print("-" * 80)
|
|
||||||
print(f"COMBINED:")
|
|
||||||
print(f" Total Frames (all cameras): {total_frames}")
|
|
||||||
print(f" Total Time: {total_time:.2f} seconds")
|
|
||||||
print(f" Combined Throughput: {combined_avg_fps:.2f} FPS")
|
|
||||||
print(f" Per-Camera Average: {combined_avg_fps / len(camera_urls):.2f} FPS")
|
|
||||||
print("-" * 80)
|
|
||||||
|
|
||||||
results['combined'] = {
|
|
||||||
'total_frames': total_frames,
|
|
||||||
'total_time': total_time,
|
|
||||||
'combined_fps': combined_avg_fps,
|
|
||||||
'per_camera_avg': combined_avg_fps / len(camera_urls)
|
|
||||||
}
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Run both benchmarks and compare."""
|
|
||||||
print("=" * 80)
|
|
||||||
print("FPS BENCHMARK: Single vs Multi-Camera Tracking")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Run single camera benchmark
|
|
||||||
single_stats = benchmark_single_camera(duration=30)
|
|
||||||
|
|
||||||
# Run multi-camera benchmark
|
|
||||||
multi_stats = benchmark_multi_camera(duration=30)
|
|
||||||
|
|
||||||
# Comparison
|
|
||||||
if multi_stats:
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("COMPARISON")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
print(f"\nSingle Camera Performance:")
|
|
||||||
print(f" Average FPS: {single_stats['avg_fps']:.2f}")
|
|
||||||
|
|
||||||
print(f"\nMulti-Camera Performance:")
|
|
||||||
print(f" Per-Camera Average: {multi_stats['combined']['per_camera_avg']:.2f} FPS")
|
|
||||||
print(f" Combined Throughput: {multi_stats['combined']['combined_fps']:.2f} FPS")
|
|
||||||
|
|
||||||
# Calculate performance drop
|
|
||||||
fps_drop = ((single_stats['avg_fps'] - multi_stats['combined']['per_camera_avg'])
|
|
||||||
/ single_stats['avg_fps'] * 100)
|
|
||||||
|
|
||||||
print(f"\nPerformance Analysis:")
|
|
||||||
print(f" FPS Drop per Camera: {fps_drop:.1f}%")
|
|
||||||
|
|
||||||
if fps_drop < 10:
|
|
||||||
print(" ✓ Excellent - Minimal performance impact")
|
|
||||||
elif fps_drop < 25:
|
|
||||||
print(" ✓ Good - Acceptable performance scaling")
|
|
||||||
elif fps_drop < 50:
|
|
||||||
print(" ⚠ Moderate - Some performance degradation")
|
|
||||||
else:
|
|
||||||
print(" ⚠ Significant - Consider optimizations")
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -1,189 +0,0 @@
|
||||||
|
|
||||||
import time
|
|
||||||
import torch
|
|
||||||
import os
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from services.model_repository import TensorRTModelRepository
|
|
||||||
from services.stream_decoder import StreamDecoderFactory
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
# COCO class names for YOLOv8
|
|
||||||
COCO_CLASSES = [
|
|
||||||
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
|
|
||||||
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
|
|
||||||
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
|
|
||||||
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
|
|
||||||
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
|
||||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
|
|
||||||
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
|
|
||||||
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
|
|
||||||
'scissors', 'teddy bear', 'hair drier', 'toothbrush'
|
|
||||||
]
|
|
||||||
|
|
||||||
def postprocess(output, confidence_threshold=0.25, iou_threshold=0.45):
|
|
||||||
"""
|
|
||||||
Post-processes the output of a YOLOv8 model to extract bounding boxes, scores, and class IDs.
|
|
||||||
"""
|
|
||||||
# output shape: (batch_size, 84, 8400)
|
|
||||||
# 84 = 4 (bbox) + 80 (classes)
|
|
||||||
|
|
||||||
# Transpose the output to (batch_size, 8400, 84)
|
|
||||||
output = output.transpose(1, 2)
|
|
||||||
|
|
||||||
boxes = []
|
|
||||||
scores = []
|
|
||||||
class_ids = []
|
|
||||||
|
|
||||||
for detection in output[0]:
|
|
||||||
# First 4 values are bbox (cx, cy, w, h)
|
|
||||||
# The rest are class scores
|
|
||||||
|
|
||||||
class_scores = detection[4:]
|
|
||||||
max_score, max_class_id = torch.max(class_scores, 0)
|
|
||||||
|
|
||||||
if max_score > confidence_threshold:
|
|
||||||
|
|
||||||
cx, cy, w, h = detection[:4]
|
|
||||||
|
|
||||||
# Convert from center-width-height to x1-y1-x2-y2
|
|
||||||
x1 = cx - w / 2
|
|
||||||
y1 = cy - h / 2
|
|
||||||
x2 = cx + w / 2
|
|
||||||
y2 = cy + h / 2
|
|
||||||
|
|
||||||
boxes.append([x1.item(), y1.item(), x2.item(), y2.item()])
|
|
||||||
scores.append(max_score.item())
|
|
||||||
class_ids.append(max_class_id.item())
|
|
||||||
|
|
||||||
if not boxes:
|
|
||||||
return [], [], []
|
|
||||||
|
|
||||||
# Perform Non-Maximum Suppression (NMS)
|
|
||||||
# This is a simplified version. For production, use a library like torchvision.ops.nms
|
|
||||||
indices = []
|
|
||||||
boxes_np = np.array(boxes)
|
|
||||||
scores_np = np.array(scores)
|
|
||||||
|
|
||||||
order = scores_np.argsort()[::-1]
|
|
||||||
|
|
||||||
while order.size > 0:
|
|
||||||
i = order[0]
|
|
||||||
indices.append(i)
|
|
||||||
|
|
||||||
xx1 = np.maximum(boxes_np[i, 0], boxes_np[order[1:], 0])
|
|
||||||
yy1 = np.maximum(boxes_np[i, 1], boxes_np[order[1:], 1])
|
|
||||||
xx2 = np.minimum(boxes_np[i, 2], boxes_np[order[1:], 2])
|
|
||||||
yy2 = np.minimum(boxes_np[i, 3], boxes_np[order[1:], 3])
|
|
||||||
|
|
||||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
|
||||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
|
||||||
inter = w * h
|
|
||||||
|
|
||||||
ovr = inter / ((boxes_np[i, 2] - boxes_np[i, 0] + 1) * (boxes_np[i, 3] - boxes_np[i, 1] + 1) + \
|
|
||||||
(boxes_np[order[1:], 2] - boxes_np[order[1:], 0] + 1) * \
|
|
||||||
(boxes_np[order[1:], 3] - boxes_np[order[1:], 1] + 1) - inter)
|
|
||||||
|
|
||||||
inds = np.where(ovr <= iou_threshold)[0]
|
|
||||||
order = order[inds + 1]
|
|
||||||
|
|
||||||
final_boxes = [boxes[i] for i in indices]
|
|
||||||
final_scores = [scores[i] for i in indices]
|
|
||||||
final_class_ids = [class_ids[i] for i in indices]
|
|
||||||
|
|
||||||
return final_boxes, final_scores, final_class_ids
|
|
||||||
|
|
||||||
|
|
||||||
def test_rtsp_stream_with_inference():
|
|
||||||
"""
|
|
||||||
Decodes an RTSP stream and runs inference, printing bounding boxes and class names.
|
|
||||||
"""
|
|
||||||
load_dotenv()
|
|
||||||
rtsp_url = os.getenv("CAMERA_URL_1")
|
|
||||||
if not rtsp_url:
|
|
||||||
print("Error: CAMERA_URL_1 not found in .env file.")
|
|
||||||
return
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print("RTSP Stream + TensorRT Inference")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Initialize components
|
|
||||||
decoder_factory = StreamDecoderFactory(gpu_id=0)
|
|
||||||
model_repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=1)
|
|
||||||
|
|
||||||
# Setup camera stream
|
|
||||||
decoder = decoder_factory.create_decoder(rtsp_url, buffer_size=1)
|
|
||||||
decoder.start()
|
|
||||||
|
|
||||||
# Load inference model
|
|
||||||
model_path = "models/yolov8n.trt"
|
|
||||||
try:
|
|
||||||
model_repo.load_model(
|
|
||||||
model_id="camera_main",
|
|
||||||
file_path=model_path
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error loading model: {e}")
|
|
||||||
print(f"Please ensure '{model_path}' exists.")
|
|
||||||
decoder.stop()
|
|
||||||
return
|
|
||||||
|
|
||||||
print("\nWaiting for stream to buffer frames...")
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
frame_gpu = decoder.get_latest_frame(rgb=True)
|
|
||||||
|
|
||||||
if frame_gpu is None:
|
|
||||||
time.sleep(0.1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Preprocess frame for YOLOv8
|
|
||||||
# Resize to 640x640, normalize, and add batch dimension
|
|
||||||
frame_float = frame_gpu.unsqueeze(0).float() # Convert to float here
|
|
||||||
frame_resized = torch.nn.functional.interpolate(
|
|
||||||
frame_float, size=(640, 640), mode='bilinear', align_corners=False
|
|
||||||
)
|
|
||||||
frame_normalized = frame_resized.float() / 255.0
|
|
||||||
|
|
||||||
# Run inference
|
|
||||||
try:
|
|
||||||
outputs = model_repo.infer(
|
|
||||||
model_id="camera_main",
|
|
||||||
inputs={"images": frame_normalized},
|
|
||||||
synchronize=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Post-process the output
|
|
||||||
output_tensor = outputs['output0']
|
|
||||||
boxes, scores, class_ids = postprocess(output_tensor)
|
|
||||||
|
|
||||||
# Print results
|
|
||||||
print(f"\n--- Frame at {time.time():.2f} ---")
|
|
||||||
if boxes:
|
|
||||||
for box, score, class_id in zip(boxes, scores, class_ids):
|
|
||||||
class_name = COCO_CLASSES[class_id]
|
|
||||||
print(
|
|
||||||
f" Detected: {class_name} "
|
|
||||||
f"(confidence: {score:.2f}) at "
|
|
||||||
f"bbox: [{box[0]:.0f}, {box[1]:.0f}, {box[2]:.0f}, {box[3]:.0f}]"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
print(" No objects detected.")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Inference failed: {e}")
|
|
||||||
|
|
||||||
time.sleep(0.03) # ~30 FPS
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\nStopping...")
|
|
||||||
finally:
|
|
||||||
# Cleanup
|
|
||||||
decoder.stop()
|
|
||||||
model_repo.unload_model("camera_main")
|
|
||||||
print("Stream and model unloaded.")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_rtsp_stream_with_inference()
|
|
||||||
|
|
@ -1,174 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Test script for JPEG encoding with nvImageCodec
|
|
||||||
Tests GPU-accelerated JPEG encoding from RTSP stream frames
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from services import StreamDecoderFactory
|
|
||||||
|
|
||||||
# Load environment variables from .env file
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Test JPEG encoding from RTSP stream')
|
|
||||||
parser.add_argument(
|
|
||||||
'--rtsp-url',
|
|
||||||
type=str,
|
|
||||||
default=None,
|
|
||||||
help='RTSP stream URL (defaults to CAMERA_URL_1 from .env)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--output-dir',
|
|
||||||
type=str,
|
|
||||||
default='./snapshots',
|
|
||||||
help='Output directory for JPEG files'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--num-frames',
|
|
||||||
type=int,
|
|
||||||
default=10,
|
|
||||||
help='Number of frames to capture'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--interval',
|
|
||||||
type=float,
|
|
||||||
default=1.0,
|
|
||||||
help='Interval between captures in seconds'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--quality',
|
|
||||||
type=int,
|
|
||||||
default=95,
|
|
||||||
help='JPEG quality (0-100)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--gpu-id',
|
|
||||||
type=int,
|
|
||||||
default=0,
|
|
||||||
help='GPU device ID'
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Get RTSP URL from command line or environment
|
|
||||||
rtsp_url = args.rtsp_url
|
|
||||||
if not rtsp_url:
|
|
||||||
rtsp_url = os.getenv('CAMERA_URL_1')
|
|
||||||
if not rtsp_url:
|
|
||||||
print("Error: No RTSP URL provided")
|
|
||||||
print("Please either:")
|
|
||||||
print(" 1. Use --rtsp-url argument, or")
|
|
||||||
print(" 2. Add CAMERA_URL_1 to your .env file")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Create output directory
|
|
||||||
output_dir = Path(args.output_dir)
|
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print("RTSP Stream JPEG Encoding Test")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"RTSP URL: {rtsp_url}")
|
|
||||||
print(f"Output Directory: {output_dir}")
|
|
||||||
print(f"Number of Frames: {args.num_frames}")
|
|
||||||
print(f"Capture Interval: {args.interval}s")
|
|
||||||
print(f"JPEG Quality: {args.quality}")
|
|
||||||
print(f"GPU ID: {args.gpu_id}")
|
|
||||||
print("=" * 80)
|
|
||||||
print()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Initialize factory and decoder
|
|
||||||
print("[1/3] Initializing StreamDecoderFactory...")
|
|
||||||
factory = StreamDecoderFactory(gpu_id=args.gpu_id)
|
|
||||||
print("✓ Factory initialized\n")
|
|
||||||
|
|
||||||
print("[2/3] Creating and starting decoder...")
|
|
||||||
decoder = factory.create_decoder(
|
|
||||||
rtsp_url=rtsp_url,
|
|
||||||
buffer_size=30
|
|
||||||
)
|
|
||||||
decoder.start()
|
|
||||||
print("✓ Decoder started\n")
|
|
||||||
|
|
||||||
# Wait for connection
|
|
||||||
print("[3/3] Waiting for stream to connect...")
|
|
||||||
max_wait = 10
|
|
||||||
for i in range(max_wait):
|
|
||||||
if decoder.is_connected():
|
|
||||||
print("✓ Stream connected\n")
|
|
||||||
break
|
|
||||||
time.sleep(1)
|
|
||||||
print(f" Waiting... {i+1}/{max_wait}s")
|
|
||||||
else:
|
|
||||||
print("✗ Failed to connect to stream")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Capture frames
|
|
||||||
print(f"Capturing {args.num_frames} frames...")
|
|
||||||
print("-" * 80)
|
|
||||||
|
|
||||||
captured = 0
|
|
||||||
for i in range(args.num_frames):
|
|
||||||
# Get frame as JPEG
|
|
||||||
start_time = time.time()
|
|
||||||
jpeg_bytes = decoder.get_frame_as_jpeg(quality=args.quality)
|
|
||||||
encode_time = (time.time() - start_time) * 1000 # ms
|
|
||||||
|
|
||||||
if jpeg_bytes:
|
|
||||||
# Save to file
|
|
||||||
filename = output_dir / f"frame_{i:04d}.jpg"
|
|
||||||
with open(filename, 'wb') as f:
|
|
||||||
f.write(jpeg_bytes)
|
|
||||||
|
|
||||||
size_kb = len(jpeg_bytes) / 1024
|
|
||||||
print(f"[{i+1}/{args.num_frames}] Saved {filename.name} "
|
|
||||||
f"({size_kb:.1f} KB, encoded in {encode_time:.2f}ms)")
|
|
||||||
captured += 1
|
|
||||||
else:
|
|
||||||
print(f"[{i+1}/{args.num_frames}] Failed to get frame")
|
|
||||||
|
|
||||||
# Wait before next capture (except for last frame)
|
|
||||||
if i < args.num_frames - 1:
|
|
||||||
time.sleep(args.interval)
|
|
||||||
|
|
||||||
print("-" * 80)
|
|
||||||
|
|
||||||
# Summary
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("Capture Complete")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"Successfully captured: {captured}/{args.num_frames} frames")
|
|
||||||
print(f"Output directory: {output_dir.absolute()}")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\n\n✗ Interrupted by user")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\n\n✗ Error: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# Cleanup
|
|
||||||
if 'decoder' in locals():
|
|
||||||
print("\nCleaning up...")
|
|
||||||
decoder.stop()
|
|
||||||
print("✓ Decoder stopped")
|
|
||||||
|
|
||||||
print("\n✓ Test completed successfully")
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
@ -1,310 +0,0 @@
|
||||||
"""
|
|
||||||
Test script for TensorRT Model Repository with multi-camera inference.
|
|
||||||
|
|
||||||
This demonstrates:
|
|
||||||
1. Loading the same model for multiple cameras (deduplication)
|
|
||||||
2. Context pool load balancing
|
|
||||||
3. GPU-to-GPU inference from RTSP streams
|
|
||||||
4. Memory efficiency with shared engines
|
|
||||||
"""
|
|
||||||
|
|
||||||
import time
|
|
||||||
import torch
|
|
||||||
from services.model_repository import TensorRTModelRepository
|
|
||||||
from services.stream_decoder import StreamDecoderFactory
|
|
||||||
|
|
||||||
|
|
||||||
def test_multi_camera_inference():
|
|
||||||
"""
|
|
||||||
Simulate multi-camera inference scenario.
|
|
||||||
|
|
||||||
Example: 100 cameras, all using the same YOLOv8 model
|
|
||||||
- Without pooling: 100 engines + 100 contexts in VRAM
|
|
||||||
- With pooling: 1 engine + 4 contexts in VRAM (huge savings!)
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Initialize model repository with context pooling
|
|
||||||
repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=4)
|
|
||||||
|
|
||||||
# Camera configurations (simulated)
|
|
||||||
camera_configs = [
|
|
||||||
{"id": "camera_1", "rtsp_url": "rtsp://camera1.local/stream"},
|
|
||||||
{"id": "camera_2", "rtsp_url": "rtsp://camera2.local/stream"},
|
|
||||||
{"id": "camera_3", "rtsp_url": "rtsp://camera3.local/stream"},
|
|
||||||
# ... imagine 100 cameras here
|
|
||||||
]
|
|
||||||
|
|
||||||
# Load the same model for all cameras
|
|
||||||
model_file = "models/yolov8n.trt" # Same file for all cameras
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print("LOADING MODELS FOR MULTIPLE CAMERAS")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
for config in camera_configs:
|
|
||||||
try:
|
|
||||||
# Each camera gets its own model_id, but shares the same engine!
|
|
||||||
metadata = repo.load_model(
|
|
||||||
model_id=config["id"],
|
|
||||||
file_path=model_file,
|
|
||||||
num_contexts=4 # 4 contexts shared across all cameras
|
|
||||||
)
|
|
||||||
print(f"\n✓ Loaded model for {config['id']}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\n✗ Failed to load model for {config['id']}: {e}")
|
|
||||||
|
|
||||||
# Show repository stats
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("REPOSITORY STATISTICS")
|
|
||||||
print("=" * 80)
|
|
||||||
stats = repo.get_stats()
|
|
||||||
print(f"Total model IDs: {stats['total_model_ids']}")
|
|
||||||
print(f"Unique engines in VRAM: {stats['unique_engines']}")
|
|
||||||
print(f"Total contexts: {stats['total_contexts']}")
|
|
||||||
print(f"Memory efficiency: {stats['memory_efficiency']}")
|
|
||||||
|
|
||||||
# Get detailed info for one camera
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("DETAILED MODEL INFO (camera_1)")
|
|
||||||
print("=" * 80)
|
|
||||||
info = repo.get_model_info("camera_1")
|
|
||||||
if info:
|
|
||||||
print(f"Model ID: {info['model_id']}")
|
|
||||||
print(f"File: {info['file_path']}")
|
|
||||||
print(f"File hash: {info['file_hash']}")
|
|
||||||
print(f"Engine references: {info['engine_references']}")
|
|
||||||
print(f"Context pool size: {info['context_pool_size']}")
|
|
||||||
print(f"Shared with: {info['shared_with_model_ids']}")
|
|
||||||
print(f"\nInputs:")
|
|
||||||
for name, spec in info['inputs'].items():
|
|
||||||
print(f" {name}: {spec['shape']} ({spec['dtype']})")
|
|
||||||
print(f"\nOutputs:")
|
|
||||||
for name, spec in info['outputs'].items():
|
|
||||||
print(f" {name}: {spec['shape']} ({spec['dtype']})")
|
|
||||||
|
|
||||||
# Simulate inference from multiple cameras
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("RUNNING INFERENCE (GPU-to-GPU)")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Create dummy input tensors (simulating frames from cameras)
|
|
||||||
# In real scenario, these come from StreamDecoder.get_frame()
|
|
||||||
batch_size = 1
|
|
||||||
channels = 3
|
|
||||||
height = 640
|
|
||||||
width = 640
|
|
||||||
|
|
||||||
for config in camera_configs:
|
|
||||||
try:
|
|
||||||
# Simulate getting frame from camera (already on GPU)
|
|
||||||
input_tensor = torch.rand(
|
|
||||||
batch_size, channels, height, width,
|
|
||||||
dtype=torch.float32,
|
|
||||||
device='cuda:0'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Run inference (stays in GPU)
|
|
||||||
start = time.time()
|
|
||||||
outputs = repo.infer(
|
|
||||||
model_id=config["id"],
|
|
||||||
inputs={"images": input_tensor}, # Adjust input name based on your model
|
|
||||||
synchronize=True,
|
|
||||||
timeout=5.0
|
|
||||||
)
|
|
||||||
elapsed = (time.time() - start) * 1000 # Convert to ms
|
|
||||||
|
|
||||||
print(f"\n{config['id']}: Inference completed in {elapsed:.2f}ms")
|
|
||||||
for name, tensor in outputs.items():
|
|
||||||
print(f" Output '{name}': {tensor.shape} on {tensor.device}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\n{config['id']}: Inference failed: {e}")
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("CLEANUP")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
for config in camera_configs:
|
|
||||||
repo.unload_model(config["id"])
|
|
||||||
|
|
||||||
print("\nAll models unloaded.")
|
|
||||||
|
|
||||||
|
|
||||||
def test_rtsp_stream_with_inference():
|
|
||||||
"""
|
|
||||||
Real-world example: Decode RTSP stream and run inference.
|
|
||||||
Everything stays in GPU memory (zero CPU transfers).
|
|
||||||
"""
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print("RTSP STREAM + TENSORRT INFERENCE (GPU-to-GPU)")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Initialize components
|
|
||||||
decoder_factory = StreamDecoderFactory(gpu_id=0)
|
|
||||||
model_repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=4)
|
|
||||||
|
|
||||||
# Setup camera stream
|
|
||||||
rtsp_url = "rtsp://your-camera-ip/stream"
|
|
||||||
decoder = decoder_factory.create_decoder(rtsp_url, buffer_size=30)
|
|
||||||
decoder.start()
|
|
||||||
|
|
||||||
# Load inference model
|
|
||||||
try:
|
|
||||||
model_repo.load_model(
|
|
||||||
model_id="camera_main",
|
|
||||||
file_path="models/yolov8n.trt"
|
|
||||||
)
|
|
||||||
except FileNotFoundError:
|
|
||||||
print("\n⚠ Model file not found. Please export your model to TensorRT:")
|
|
||||||
print(" Example: yolo export model=yolov8n.pt format=engine device=0")
|
|
||||||
return
|
|
||||||
|
|
||||||
print("\nWaiting for stream to buffer frames...")
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# Process frames
|
|
||||||
for i in range(10):
|
|
||||||
# Get frame from decoder (already on GPU)
|
|
||||||
frame_gpu = decoder.get_latest_frame(rgb=True) # Returns torch.Tensor on CUDA
|
|
||||||
|
|
||||||
if frame_gpu is None:
|
|
||||||
print(f"Frame {i}: No frame available")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Preprocess if needed (stays on GPU)
|
|
||||||
# For YOLOv8: normalize, resize, etc.
|
|
||||||
# Example preprocessing (adjust for your model):
|
|
||||||
frame_gpu = frame_gpu.float() / 255.0 # Normalize to [0, 1]
|
|
||||||
frame_gpu = frame_gpu.unsqueeze(0) # Add batch dimension: (1, 3, H, W)
|
|
||||||
|
|
||||||
# Run inference (GPU-to-GPU, zero copy)
|
|
||||||
try:
|
|
||||||
outputs = model_repo.infer(
|
|
||||||
model_id="camera_main",
|
|
||||||
inputs={"images": frame_gpu},
|
|
||||||
synchronize=True
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"\nFrame {i}: Inference successful")
|
|
||||||
for name, tensor in outputs.items():
|
|
||||||
print(f" {name}: {tensor.shape} on {tensor.device}")
|
|
||||||
|
|
||||||
# Post-process results (can stay on GPU or move to CPU as needed)
|
|
||||||
# Example: NMS, bounding box extraction, etc.
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\nFrame {i}: Inference failed: {e}")
|
|
||||||
|
|
||||||
time.sleep(0.1) # Simulate processing interval
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
decoder.stop()
|
|
||||||
model_repo.unload_model("camera_main")
|
|
||||||
print("\n✓ Test completed successfully")
|
|
||||||
|
|
||||||
|
|
||||||
def test_concurrent_inference():
|
|
||||||
"""
|
|
||||||
Test concurrent inference from multiple threads.
|
|
||||||
Demonstrates context pool load balancing.
|
|
||||||
"""
|
|
||||||
import threading
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print("CONCURRENT INFERENCE TEST (Context Pool Load Balancing)")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=4)
|
|
||||||
|
|
||||||
# Load model
|
|
||||||
try:
|
|
||||||
repo.load_model("shared_model", "models/yolov8n.trt", num_contexts=4)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Failed to load model: {e}")
|
|
||||||
return
|
|
||||||
|
|
||||||
def worker(worker_id: int, num_inferences: int):
|
|
||||||
"""Worker thread performing inference"""
|
|
||||||
for i in range(num_inferences):
|
|
||||||
try:
|
|
||||||
# Create dummy input
|
|
||||||
input_tensor = torch.rand(1, 3, 640, 640, device='cuda:0', dtype=torch.float32)
|
|
||||||
|
|
||||||
# Acquire context from pool, run inference, release context
|
|
||||||
outputs = repo.infer(
|
|
||||||
model_id="shared_model",
|
|
||||||
inputs={"images": input_tensor},
|
|
||||||
timeout=10.0
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"Worker {worker_id}, Inference {i}: SUCCESS")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Worker {worker_id}, Inference {i}: FAILED - {e}")
|
|
||||||
|
|
||||||
time.sleep(0.01) # Small delay
|
|
||||||
|
|
||||||
# Launch multiple worker threads (more workers than contexts!)
|
|
||||||
threads = []
|
|
||||||
num_workers = 10 # 10 workers sharing 4 contexts
|
|
||||||
inferences_per_worker = 5
|
|
||||||
|
|
||||||
print(f"\nLaunching {num_workers} workers (only 4 contexts available)")
|
|
||||||
print("Contexts will be borrowed/returned automatically\n")
|
|
||||||
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
for worker_id in range(num_workers):
|
|
||||||
t = threading.Thread(target=worker, args=(worker_id, inferences_per_worker))
|
|
||||||
threads.append(t)
|
|
||||||
t.start()
|
|
||||||
|
|
||||||
# Wait for all workers
|
|
||||||
for t in threads:
|
|
||||||
t.join()
|
|
||||||
|
|
||||||
elapsed = time.time() - start_time
|
|
||||||
total_inferences = num_workers * inferences_per_worker
|
|
||||||
|
|
||||||
print(f"\n✓ Completed {total_inferences} inferences in {elapsed:.2f}s")
|
|
||||||
print(f" Throughput: {total_inferences / elapsed:.2f} inferences/sec")
|
|
||||||
print(f" With only 4 contexts for {num_workers} workers!")
|
|
||||||
|
|
||||||
repo.unload_model("shared_model")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("TENSORRT MODEL REPOSITORY - TEST SUITE")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Test 1: Multi-camera model loading
|
|
||||||
print("\n\nTEST 1: Multi-Camera Model Loading with Deduplication")
|
|
||||||
print("-" * 80)
|
|
||||||
try:
|
|
||||||
test_multi_camera_inference()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Test 1 failed: {e}")
|
|
||||||
|
|
||||||
# Test 2: RTSP stream + inference (commented out by default)
|
|
||||||
# Uncomment if you have a real RTSP stream
|
|
||||||
# print("\n\nTEST 2: RTSP Stream + Inference")
|
|
||||||
# print("-" * 80)
|
|
||||||
# try:
|
|
||||||
# test_rtsp_stream_with_inference()
|
|
||||||
# except Exception as e:
|
|
||||||
# print(f"Test 2 failed: {e}")
|
|
||||||
|
|
||||||
# Test 3: Concurrent inference
|
|
||||||
print("\n\nTEST 3: Concurrent Inference with Context Pooling")
|
|
||||||
print("-" * 80)
|
|
||||||
try:
|
|
||||||
test_concurrent_inference()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Test 3 failed: {e}")
|
|
||||||
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("ALL TESTS COMPLETED")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
@ -1,255 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Multi-stream test script to verify CUDA context sharing efficiency.
|
|
||||||
Tests multiple RTSP streams simultaneously and monitors VRAM usage.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
import subprocess
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from services import StreamDecoderFactory, ConnectionStatus
|
|
||||||
|
|
||||||
# Load environment variables from .env file
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
def get_gpu_memory_usage(gpu_id: int = 0) -> int:
|
|
||||||
"""Get current GPU memory usage in MB using nvidia-smi"""
|
|
||||||
try:
|
|
||||||
result = subprocess.run(
|
|
||||||
['nvidia-smi', '--query-gpu=memory.used', '--format=csv,noheader,nounits', f'--id={gpu_id}'],
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
check=True
|
|
||||||
)
|
|
||||||
return int(result.stdout.strip())
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Warning: Could not get GPU memory usage: {e}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Test multi-stream decoding with context sharing')
|
|
||||||
parser.add_argument(
|
|
||||||
'--gpu-id',
|
|
||||||
type=int,
|
|
||||||
default=0,
|
|
||||||
help='GPU device ID'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--duration',
|
|
||||||
type=int,
|
|
||||||
default=20,
|
|
||||||
help='Test duration in seconds'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--capture-snapshots',
|
|
||||||
action='store_true',
|
|
||||||
help='Capture JPEG snapshots during test'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--output-dir',
|
|
||||||
type=str,
|
|
||||||
default='./multi_stream_snapshots',
|
|
||||||
help='Output directory for snapshots'
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Load camera URLs from environment
|
|
||||||
camera_urls = []
|
|
||||||
i = 1
|
|
||||||
while True:
|
|
||||||
url = os.getenv(f'CAMERA_URL_{i}')
|
|
||||||
if url:
|
|
||||||
camera_urls.append(url)
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not camera_urls:
|
|
||||||
print("Error: No camera URLs found in .env file")
|
|
||||||
print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Create output directory if capturing snapshots
|
|
||||||
if args.capture_snapshots:
|
|
||||||
output_dir = Path(args.output_dir)
|
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print("Multi-Stream RTSP Decoder Test - Context Sharing Verification")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"Number of Streams: {len(camera_urls)}")
|
|
||||||
print(f"GPU ID: {args.gpu_id}")
|
|
||||||
print(f"Test Duration: {args.duration} seconds")
|
|
||||||
print(f"Capture Snapshots: {args.capture_snapshots}")
|
|
||||||
print("=" * 80)
|
|
||||||
print()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get baseline GPU memory
|
|
||||||
print("[Baseline] Measuring initial GPU memory usage...")
|
|
||||||
baseline_memory = get_gpu_memory_usage(args.gpu_id)
|
|
||||||
print(f"✓ Baseline VRAM: {baseline_memory} MB\n")
|
|
||||||
|
|
||||||
# Initialize factory (shared CUDA context)
|
|
||||||
print("[1/4] Initializing StreamDecoderFactory with shared CUDA context...")
|
|
||||||
factory = StreamDecoderFactory(gpu_id=args.gpu_id)
|
|
||||||
|
|
||||||
factory_memory = get_gpu_memory_usage(args.gpu_id)
|
|
||||||
factory_overhead = factory_memory - baseline_memory
|
|
||||||
print(f"✓ Factory initialized")
|
|
||||||
print(f" VRAM after factory: {factory_memory} MB (+{factory_overhead} MB)\n")
|
|
||||||
|
|
||||||
# Create all decoders
|
|
||||||
print(f"[2/4] Creating {len(camera_urls)} StreamDecoder instances...")
|
|
||||||
decoders = []
|
|
||||||
for i, url in enumerate(camera_urls):
|
|
||||||
decoder = factory.create_decoder(
|
|
||||||
rtsp_url=url,
|
|
||||||
buffer_size=30,
|
|
||||||
codec='h264'
|
|
||||||
)
|
|
||||||
decoders.append(decoder)
|
|
||||||
print(f" ✓ Decoder {i+1} created for camera {url.split('@')[1].split('/')[0]}")
|
|
||||||
|
|
||||||
decoders_memory = get_gpu_memory_usage(args.gpu_id)
|
|
||||||
decoders_overhead = decoders_memory - factory_memory
|
|
||||||
print(f"\n VRAM after creating {len(decoders)} decoders: {decoders_memory} MB (+{decoders_overhead} MB)")
|
|
||||||
print(f" Average per decoder: {decoders_overhead / len(decoders):.1f} MB\n")
|
|
||||||
|
|
||||||
# Start all decoders
|
|
||||||
print(f"[3/4] Starting all {len(decoders)} decoders...")
|
|
||||||
for i, decoder in enumerate(decoders):
|
|
||||||
decoder.start()
|
|
||||||
print(f" ✓ Decoder {i+1} started")
|
|
||||||
|
|
||||||
started_memory = get_gpu_memory_usage(args.gpu_id)
|
|
||||||
started_overhead = started_memory - decoders_memory
|
|
||||||
print(f"\n VRAM after starting decoders: {started_memory} MB (+{started_overhead} MB)")
|
|
||||||
print(f" Average per running decoder: {started_overhead / len(decoders):.1f} MB\n")
|
|
||||||
|
|
||||||
# Wait for all streams to connect
|
|
||||||
print("[4/4] Waiting for all streams to connect...")
|
|
||||||
max_wait = 15
|
|
||||||
for wait_time in range(max_wait):
|
|
||||||
connected = sum(1 for d in decoders if d.is_connected())
|
|
||||||
print(f" Connected: {connected}/{len(decoders)} streams", end='\r')
|
|
||||||
|
|
||||||
if connected == len(decoders):
|
|
||||||
print(f"\n✓ All {len(decoders)} streams connected!\n")
|
|
||||||
break
|
|
||||||
|
|
||||||
time.sleep(1)
|
|
||||||
else:
|
|
||||||
connected = sum(1 for d in decoders if d.is_connected())
|
|
||||||
print(f"\n⚠ Only {connected}/{len(decoders)} streams connected after {max_wait}s\n")
|
|
||||||
|
|
||||||
connected_memory = get_gpu_memory_usage(args.gpu_id)
|
|
||||||
connected_overhead = connected_memory - started_memory
|
|
||||||
print(f" VRAM after connection: {connected_memory} MB (+{connected_overhead} MB)\n")
|
|
||||||
|
|
||||||
# Monitor streams
|
|
||||||
print(f"Monitoring streams for {args.duration} seconds...")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"{'Time':<8} {'VRAM':<10} {'Stream 1':<12} {'Stream 2':<12} {'Stream 3':<12} {'Stream 4':<12}")
|
|
||||||
print("-" * 80)
|
|
||||||
|
|
||||||
start_time = time.time()
|
|
||||||
snapshot_interval = args.duration // 3 if args.capture_snapshots else 0
|
|
||||||
last_snapshot = 0
|
|
||||||
|
|
||||||
while time.time() - start_time < args.duration:
|
|
||||||
elapsed = time.time() - start_time
|
|
||||||
current_memory = get_gpu_memory_usage(args.gpu_id)
|
|
||||||
|
|
||||||
# Get stats for each decoder
|
|
||||||
stats = []
|
|
||||||
for decoder in decoders:
|
|
||||||
status = decoder.get_status().value[:8]
|
|
||||||
buffer = decoder.get_buffer_size()
|
|
||||||
frames = decoder.frame_count
|
|
||||||
stats.append(f"{status:8s} {buffer:2d}/30 {frames:4d}")
|
|
||||||
|
|
||||||
print(f"{elapsed:6.1f}s {current_memory:6d}MB {stats[0]:<12} {stats[1]:<12} {stats[2]:<12} {stats[3]:<12}")
|
|
||||||
|
|
||||||
# Capture snapshots
|
|
||||||
if args.capture_snapshots and snapshot_interval > 0:
|
|
||||||
if elapsed - last_snapshot >= snapshot_interval:
|
|
||||||
print("\n → Capturing snapshots from all streams...")
|
|
||||||
for i, decoder in enumerate(decoders):
|
|
||||||
jpeg_bytes = decoder.get_frame_as_jpeg(quality=85)
|
|
||||||
if jpeg_bytes:
|
|
||||||
filename = output_dir / f"camera_{i+1}_t{int(elapsed)}s.jpg"
|
|
||||||
with open(filename, 'wb') as f:
|
|
||||||
f.write(jpeg_bytes)
|
|
||||||
print(f" Saved {filename.name} ({len(jpeg_bytes)/1024:.1f} KB)")
|
|
||||||
print()
|
|
||||||
last_snapshot = elapsed
|
|
||||||
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Final memory analysis
|
|
||||||
final_memory = get_gpu_memory_usage(args.gpu_id)
|
|
||||||
total_overhead = final_memory - baseline_memory
|
|
||||||
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("Memory Usage Analysis")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"Baseline VRAM: {baseline_memory:6d} MB")
|
|
||||||
print(f"After Factory Init: {factory_memory:6d} MB (+{factory_overhead:4d} MB)")
|
|
||||||
print(f"After Creating {len(decoders)} Decoders: {decoders_memory:6d} MB (+{decoders_overhead:4d} MB)")
|
|
||||||
print(f"After Starting Decoders: {started_memory:6d} MB (+{started_overhead:4d} MB)")
|
|
||||||
print(f"After Connection: {connected_memory:6d} MB (+{connected_overhead:4d} MB)")
|
|
||||||
print(f"Final (after {args.duration}s): {final_memory:6d} MB (+{total_overhead:4d} MB total)")
|
|
||||||
print("-" * 80)
|
|
||||||
print(f"Average VRAM per stream: {total_overhead / len(decoders):6.1f} MB")
|
|
||||||
print(f"Context sharing efficiency: {'EXCELLENT' if total_overhead < 500 else 'GOOD' if total_overhead < 800 else 'POOR'}")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Final stats
|
|
||||||
print("\nFinal Stream Statistics:")
|
|
||||||
print("-" * 80)
|
|
||||||
for i, decoder in enumerate(decoders):
|
|
||||||
status = decoder.get_status().value
|
|
||||||
buffer = decoder.get_buffer_size()
|
|
||||||
frames = decoder.frame_count
|
|
||||||
fps = frames / args.duration if args.duration > 0 else 0
|
|
||||||
print(f"Stream {i+1}: {status:12s} | Buffer: {buffer:2d}/{decoder.buffer_size} | "
|
|
||||||
f"Frames: {frames:5d} | Avg FPS: {fps:5.2f}")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\n\n✗ Interrupted by user")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\n\n✗ Error: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# Cleanup
|
|
||||||
if 'decoders' in locals():
|
|
||||||
print("\nCleaning up...")
|
|
||||||
for i, decoder in enumerate(decoders):
|
|
||||||
decoder.stop()
|
|
||||||
print(f" ✓ Decoder {i+1} stopped")
|
|
||||||
|
|
||||||
cleanup_memory = get_gpu_memory_usage(args.gpu_id)
|
|
||||||
print(f"\nVRAM after cleanup: {cleanup_memory} MB")
|
|
||||||
|
|
||||||
print("\n✓ Multi-stream test completed successfully")
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
152
test_stream.py
152
test_stream.py
|
|
@ -1,152 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
CLI test script for StreamDecoder
|
|
||||||
Tests RTSP stream decoding with NVDEC hardware acceleration
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
from services.stream_decoder import StreamDecoderFactory, ConnectionStatus
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Test RTSP stream decoder with NVDEC')
|
|
||||||
parser.add_argument(
|
|
||||||
'--rtsp-url',
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help='RTSP stream URL (e.g., rtsp://user:pass@host/path)'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--gpu-id',
|
|
||||||
type=int,
|
|
||||||
default=0,
|
|
||||||
help='GPU device ID'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--buffer-size',
|
|
||||||
type=int,
|
|
||||||
default=30,
|
|
||||||
help='Frame buffer size'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--duration',
|
|
||||||
type=int,
|
|
||||||
default=30,
|
|
||||||
help='Test duration in seconds'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--check-interval',
|
|
||||||
type=float,
|
|
||||||
default=1.0,
|
|
||||||
help='Status check interval in seconds'
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
print("RTSP Stream Decoder Test")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"RTSP URL: {args.rtsp_url}")
|
|
||||||
print(f"GPU ID: {args.gpu_id}")
|
|
||||||
print(f"Buffer Size: {args.buffer_size} frames")
|
|
||||||
print(f"Test Duration: {args.duration} seconds")
|
|
||||||
print("=" * 80)
|
|
||||||
print()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Create factory with shared CUDA context
|
|
||||||
print("[1/4] Initializing StreamDecoderFactory...")
|
|
||||||
factory = StreamDecoderFactory(gpu_id=args.gpu_id)
|
|
||||||
print("✓ Factory initialized with shared CUDA context\n")
|
|
||||||
|
|
||||||
# Create decoder
|
|
||||||
print("[2/4] Creating StreamDecoder...")
|
|
||||||
decoder = factory.create_decoder(
|
|
||||||
rtsp_url=args.rtsp_url,
|
|
||||||
buffer_size=args.buffer_size,
|
|
||||||
codec='h264'
|
|
||||||
)
|
|
||||||
print(f"✓ Decoder created: {decoder}\n")
|
|
||||||
|
|
||||||
# Start decoding
|
|
||||||
print("[3/4] Starting decoder thread...")
|
|
||||||
decoder.start()
|
|
||||||
print("✓ Decoder thread started\n")
|
|
||||||
|
|
||||||
# Monitor for specified duration
|
|
||||||
print(f"[4/4] Monitoring stream for {args.duration} seconds...")
|
|
||||||
print("-" * 80)
|
|
||||||
|
|
||||||
start_time = time.time()
|
|
||||||
last_frame_count = 0
|
|
||||||
|
|
||||||
while time.time() - start_time < args.duration:
|
|
||||||
time.sleep(args.check_interval)
|
|
||||||
|
|
||||||
# Get status
|
|
||||||
status = decoder.get_status()
|
|
||||||
buffer_size = decoder.get_buffer_size()
|
|
||||||
frame_count = decoder.frame_count
|
|
||||||
fps = (frame_count - last_frame_count) / args.check_interval
|
|
||||||
last_frame_count = frame_count
|
|
||||||
|
|
||||||
# Print status
|
|
||||||
elapsed = time.time() - start_time
|
|
||||||
print(f"[{elapsed:6.1f}s] Status: {status.value:12s} | "
|
|
||||||
f"Buffer: {buffer_size:2d}/{args.buffer_size:2d} | "
|
|
||||||
f"Frames: {frame_count:5d} | "
|
|
||||||
f"FPS: {fps:5.1f}")
|
|
||||||
|
|
||||||
# Try to get latest frame
|
|
||||||
if status == ConnectionStatus.CONNECTED:
|
|
||||||
frame = decoder.get_latest_frame()
|
|
||||||
if frame is not None:
|
|
||||||
print(f" Frame shape: {frame.shape}, dtype: {frame.dtype}, "
|
|
||||||
f"device: {frame.device}")
|
|
||||||
|
|
||||||
# Check for errors
|
|
||||||
if status == ConnectionStatus.ERROR:
|
|
||||||
print("\n✗ ERROR: Stream connection failed!")
|
|
||||||
break
|
|
||||||
|
|
||||||
print("-" * 80)
|
|
||||||
|
|
||||||
# Final statistics
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("Test Complete - Final Statistics")
|
|
||||||
print("=" * 80)
|
|
||||||
print(f"Total Frames Decoded: {decoder.frame_count}")
|
|
||||||
print(f"Average FPS: {decoder.frame_count / args.duration:.2f}")
|
|
||||||
print(f"Final Status: {decoder.get_status().value}")
|
|
||||||
print(f"Buffer Utilization: {decoder.get_buffer_size()}/{args.buffer_size}")
|
|
||||||
|
|
||||||
if decoder.frame_width and decoder.frame_height:
|
|
||||||
print(f"Frame Resolution: {decoder.frame_width}x{decoder.frame_height}")
|
|
||||||
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\n\n✗ Interrupted by user")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\n\n✗ Error: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# Cleanup
|
|
||||||
if 'decoder' in locals():
|
|
||||||
print("\nCleaning up...")
|
|
||||||
decoder.stop()
|
|
||||||
print("✓ Decoder stopped")
|
|
||||||
|
|
||||||
print("\n✓ Test completed successfully")
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
VRAM scaling test - measures Python process memory usage for 1, 2, 3, and 4 streams.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import subprocess
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from services import StreamDecoderFactory
|
|
||||||
|
|
||||||
# Load environment variables from .env file
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
# Load camera URLs from environment
|
|
||||||
camera_urls = []
|
|
||||||
i = 1
|
|
||||||
while True:
|
|
||||||
url = os.getenv(f'CAMERA_URL_{i}')
|
|
||||||
if url:
|
|
||||||
camera_urls.append(url)
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not camera_urls:
|
|
||||||
print("Error: No camera URLs found in .env file")
|
|
||||||
print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
def get_python_gpu_memory():
|
|
||||||
"""Get Python process GPU memory usage in MB"""
|
|
||||||
try:
|
|
||||||
pid = os.getpid()
|
|
||||||
result = subprocess.run(
|
|
||||||
['nvidia-smi', '--query-compute-apps=pid,used_memory', '--format=csv,noheader,nounits'],
|
|
||||||
capture_output=True, text=True, check=True
|
|
||||||
)
|
|
||||||
for line in result.stdout.strip().split('\n'):
|
|
||||||
if line:
|
|
||||||
parts = line.split(',')
|
|
||||||
if len(parts) >= 2 and int(parts[0].strip()) == pid:
|
|
||||||
return int(parts[1].strip())
|
|
||||||
return 0
|
|
||||||
except:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def test_n_streams(n, wait_time=15):
|
|
||||||
"""Test with n streams"""
|
|
||||||
print(f"\n{'='*80}")
|
|
||||||
print(f"Testing with {n} stream(s)")
|
|
||||||
print('='*80)
|
|
||||||
|
|
||||||
mem_before = get_python_gpu_memory()
|
|
||||||
print(f"Python process VRAM before: {mem_before} MB")
|
|
||||||
|
|
||||||
# Create factory
|
|
||||||
factory = StreamDecoderFactory(gpu_id=0)
|
|
||||||
time.sleep(1)
|
|
||||||
mem_after_factory = get_python_gpu_memory()
|
|
||||||
print(f"After factory: {mem_after_factory} MB (+{mem_after_factory - mem_before} MB)")
|
|
||||||
|
|
||||||
# Create decoders
|
|
||||||
decoders = []
|
|
||||||
for i in range(n):
|
|
||||||
decoder = factory.create_decoder(camera_urls[i], buffer_size=30)
|
|
||||||
decoders.append(decoder)
|
|
||||||
|
|
||||||
time.sleep(1)
|
|
||||||
mem_after_create = get_python_gpu_memory()
|
|
||||||
print(f"After creating {n} decoder(s): {mem_after_create} MB (+{mem_after_create - mem_after_factory} MB)")
|
|
||||||
|
|
||||||
# Start decoders
|
|
||||||
for decoder in decoders:
|
|
||||||
decoder.start()
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
||||||
mem_after_start = get_python_gpu_memory()
|
|
||||||
print(f"After starting {n} decoder(s): {mem_after_start} MB (+{mem_after_start - mem_after_create} MB)")
|
|
||||||
|
|
||||||
# Wait for connection
|
|
||||||
print(f"Waiting {wait_time}s for streams to connect and stabilize...")
|
|
||||||
time.sleep(wait_time)
|
|
||||||
|
|
||||||
# Check connection status
|
|
||||||
connected = sum(1 for d in decoders if d.is_connected())
|
|
||||||
mem_stable = get_python_gpu_memory()
|
|
||||||
|
|
||||||
print(f"Connected: {connected}/{n} streams")
|
|
||||||
print(f"Python process VRAM (stable): {mem_stable} MB")
|
|
||||||
|
|
||||||
# Get frame stats
|
|
||||||
for i, decoder in enumerate(decoders):
|
|
||||||
print(f" Stream {i+1}: {decoder.get_status().value:10s} "
|
|
||||||
f"Buffer: {decoder.get_buffer_size()}/30 "
|
|
||||||
f"Frames: {decoder.frame_count}")
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
for decoder in decoders:
|
|
||||||
decoder.stop()
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
||||||
mem_after_cleanup = get_python_gpu_memory()
|
|
||||||
print(f"After cleanup: {mem_after_cleanup} MB")
|
|
||||||
|
|
||||||
return mem_stable
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print("Python VRAM Scaling Test")
|
|
||||||
print(f"PID: {os.getpid()}")
|
|
||||||
|
|
||||||
baseline = get_python_gpu_memory()
|
|
||||||
print(f"Baseline Python process VRAM: {baseline} MB\n")
|
|
||||||
|
|
||||||
results = {}
|
|
||||||
for n in [1, 2, 3, 4]:
|
|
||||||
mem = test_n_streams(n, wait_time=15)
|
|
||||||
results[n] = mem
|
|
||||||
print(f"\n→ {n} stream(s): {mem} MB (process total)")
|
|
||||||
|
|
||||||
# Give time between tests
|
|
||||||
if n < 4:
|
|
||||||
print("\nWaiting 5s before next test...")
|
|
||||||
time.sleep(5)
|
|
||||||
|
|
||||||
# Summary
|
|
||||||
print("\n" + "="*80)
|
|
||||||
print("Python Process VRAM Scaling Summary")
|
|
||||||
print("="*80)
|
|
||||||
print(f"Baseline: {baseline:4d} MB")
|
|
||||||
for n in [1, 2, 3, 4]:
|
|
||||||
total = results[n]
|
|
||||||
overhead = total - baseline
|
|
||||||
per_stream = overhead / n if n > 0 else 0
|
|
||||||
print(f"{n} stream(s): {total:4d} MB (+{overhead:3d} MB total, {per_stream:5.1f} MB per stream)")
|
|
||||||
|
|
||||||
# Calculate marginal cost
|
|
||||||
print("\nMarginal cost per additional stream:")
|
|
||||||
for n in [2, 3, 4]:
|
|
||||||
marginal = results[n] - results[n-1]
|
|
||||||
print(f" Stream {n}: +{marginal} MB")
|
|
||||||
|
|
||||||
print("="*80)
|
|
||||||
|
|
@ -1,85 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Quick verification script for TensorRT model
|
|
||||||
"""
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from services.model_repository import TensorRTModelRepository
|
|
||||||
|
|
||||||
def verify_model():
|
|
||||||
print("=" * 80)
|
|
||||||
print("TensorRT Model Verification")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
# Initialize repository
|
|
||||||
repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=2)
|
|
||||||
|
|
||||||
# Load the model
|
|
||||||
print("\nLoading YOLOv8n TensorRT engine...")
|
|
||||||
try:
|
|
||||||
metadata = repo.load_model(
|
|
||||||
model_id="yolov8n_test",
|
|
||||||
file_path="models/yolov8n.trt",
|
|
||||||
num_contexts=2
|
|
||||||
)
|
|
||||||
print("✓ Model loaded successfully!")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"✗ Failed to load model: {e}")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Get model info
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("Model Information")
|
|
||||||
print("=" * 80)
|
|
||||||
info = repo.get_model_info("yolov8n_test")
|
|
||||||
if info:
|
|
||||||
print(f"Model ID: {info['model_id']}")
|
|
||||||
print(f"File: {info['file_path']}")
|
|
||||||
print(f"File hash: {info['file_hash']}")
|
|
||||||
print(f"\nInputs:")
|
|
||||||
for name, spec in info['inputs'].items():
|
|
||||||
print(f" {name}: {spec['shape']} ({spec['dtype']})")
|
|
||||||
print(f"\nOutputs:")
|
|
||||||
for name, spec in info['outputs'].items():
|
|
||||||
print(f" {name}: {spec['shape']} ({spec['dtype']})")
|
|
||||||
|
|
||||||
# Run test inference
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("Running Test Inference")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Create dummy input (simulating a 640x640 image)
|
|
||||||
input_tensor = torch.rand(1, 3, 640, 640, dtype=torch.float32, device='cuda:0')
|
|
||||||
print(f"Input tensor: {input_tensor.shape} on {input_tensor.device}")
|
|
||||||
|
|
||||||
# Run inference
|
|
||||||
outputs = repo.infer(
|
|
||||||
model_id="yolov8n_test",
|
|
||||||
inputs={"images": input_tensor},
|
|
||||||
synchronize=True
|
|
||||||
)
|
|
||||||
|
|
||||||
print("\n✓ Inference successful!")
|
|
||||||
print("\nOutputs:")
|
|
||||||
for name, tensor in outputs.items():
|
|
||||||
print(f" {name}: {tensor.shape} on {tensor.device} ({tensor.dtype})")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\n✗ Inference failed: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("Cleanup")
|
|
||||||
print("=" * 80)
|
|
||||||
repo.unload_model("yolov8n_test")
|
|
||||||
print("✓ Model unloaded")
|
|
||||||
|
|
||||||
print("\n" + "=" * 80)
|
|
||||||
print("Verification Complete!")
|
|
||||||
print("=" * 80)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
verify_model()
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue