python-detector-worker/tests/performance/test_detection_performance.py
2025-09-12 18:55:23 +07:00

672 lines
No EOL
27 KiB
Python

"""
Performance tests for detection pipeline components.
These tests benchmark the performance of key detection pipeline
components to ensure they meet performance requirements.
"""
import pytest
import time
import asyncio
import statistics
from unittest.mock import Mock, patch
import numpy as np
import psutil
import gc
from detector_worker.detection.yolo_detector import YOLODetector
from detector_worker.detection.tracking_manager import TrackingManager
from detector_worker.detection.stability_validator import StabilityValidator
from detector_worker.pipeline.pipeline_executor import PipelineExecutor
from detector_worker.models.model_manager import ModelManager
from detector_worker.streams.stream_manager import StreamManager
@pytest.fixture
def sample_frame():
"""Create a sample frame for performance testing."""
return np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
@pytest.fixture
def large_frame():
"""Create a large frame for stress testing."""
return np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)
@pytest.fixture
def performance_config():
"""Configuration for performance tests."""
return {
"target_fps": 30,
"max_detection_time_ms": 100,
"max_tracking_time_ms": 50,
"max_pipeline_time_ms": 500,
"memory_limit_mb": 1024
}
class TestDetectionPerformance:
"""Test detection performance benchmarks."""
def test_yolo_detection_speed(self, sample_frame, performance_config):
"""Benchmark YOLO detection speed."""
detector = YOLODetector()
with patch('torch.load') as mock_torch_load:
# Setup fast mock model
mock_model = Mock()
mock_result = Mock()
mock_result.boxes = Mock()
mock_result.boxes.xyxy = Mock()
mock_result.boxes.conf = Mock()
mock_result.boxes.cls = Mock()
mock_result.names = {0: "car", 1: "person"}
# Mock detection results
mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([
[100, 200, 300, 400],
[150, 250, 350, 450]
])
mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9, 0.8])
mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0, 1])
mock_model.return_value = mock_result
mock_torch_load.return_value = mock_model
# Warm up
for _ in range(5):
detector.detect(sample_frame, confidence_threshold=0.5)
# Benchmark detection speed
detection_times = []
num_iterations = 100
for _ in range(num_iterations):
start_time = time.perf_counter()
detections = detector.detect(sample_frame, confidence_threshold=0.5)
end_time = time.perf_counter()
detection_time_ms = (end_time - start_time) * 1000
detection_times.append(detection_time_ms)
# Calculate statistics
avg_detection_time = statistics.mean(detection_times)
median_detection_time = statistics.median(detection_times)
max_detection_time = max(detection_times)
min_detection_time = min(detection_times)
# Performance assertions
assert avg_detection_time < performance_config["max_detection_time_ms"]
assert median_detection_time < performance_config["max_detection_time_ms"]
# Calculate theoretical FPS
theoretical_fps = 1000 / avg_detection_time
assert theoretical_fps >= performance_config["target_fps"]
print(f"\nDetection Performance Metrics:")
print(f"Average detection time: {avg_detection_time:.2f} ms")
print(f"Median detection time: {median_detection_time:.2f} ms")
print(f"Min detection time: {min_detection_time:.2f} ms")
print(f"Max detection time: {max_detection_time:.2f} ms")
print(f"Theoretical FPS: {theoretical_fps:.1f}")
def test_tracking_performance(self, sample_frame, performance_config):
"""Benchmark object tracking performance."""
tracking_manager = TrackingManager()
# Create mock detections
detections = [
{"class": "car", "confidence": 0.9, "bbox": [100, 200, 300, 400]},
{"class": "car", "confidence": 0.8, "bbox": [150, 250, 350, 450]},
{"class": "person", "confidence": 0.7, "bbox": [200, 300, 250, 400]}
]
# Warm up tracking
for i in range(10):
tracking_manager.update_tracks(detections, frame_id=i)
# Benchmark tracking speed
tracking_times = []
num_iterations = 100
for i in range(num_iterations):
# Simulate moving detections
moving_detections = []
for det in detections:
moved_det = det.copy()
# Add small random movement
bbox = moved_det["bbox"]
moved_det["bbox"] = [
bbox[0] + np.random.randint(-5, 5),
bbox[1] + np.random.randint(-5, 5),
bbox[2] + np.random.randint(-5, 5),
bbox[3] + np.random.randint(-5, 5)
]
moving_detections.append(moved_det)
start_time = time.perf_counter()
tracks = tracking_manager.update_tracks(moving_detections, frame_id=i + 10)
end_time = time.perf_counter()
tracking_time_ms = (end_time - start_time) * 1000
tracking_times.append(tracking_time_ms)
# Calculate statistics
avg_tracking_time = statistics.mean(tracking_times)
max_tracking_time = max(tracking_times)
# Performance assertions
assert avg_tracking_time < performance_config["max_tracking_time_ms"]
assert max_tracking_time < performance_config["max_tracking_time_ms"] * 2
print(f"\nTracking Performance Metrics:")
print(f"Average tracking time: {avg_tracking_time:.2f} ms")
print(f"Max tracking time: {max_tracking_time:.2f} ms")
def test_stability_validation_performance(self, performance_config):
"""Benchmark stability validation performance."""
validator = StabilityValidator()
# Create stable detections sequence
base_detection = {
"class": "car",
"confidence": 0.9,
"bbox": [100, 200, 300, 400],
"track_id": 1001
}
# Add sequence of stable detections
for i in range(20):
detection = base_detection.copy()
# Add small variations to simulate real detection noise
detection["confidence"] = 0.9 + np.random.normal(0, 0.02)
bbox = detection["bbox"]
detection["bbox"] = [
bbox[0] + np.random.normal(0, 2),
bbox[1] + np.random.normal(0, 2),
bbox[2] + np.random.normal(0, 2),
bbox[3] + np.random.normal(0, 2)
]
validator.add_detection(detection, frame_id=i)
# Benchmark validation performance
validation_times = []
num_iterations = 1000
for i in range(num_iterations):
test_detection = base_detection.copy()
test_detection["confidence"] = 0.85 + np.random.normal(0, 0.05)
start_time = time.perf_counter()
is_stable = validator.is_detection_stable(
test_detection,
stability_frames=10,
confidence_threshold=0.8
)
end_time = time.perf_counter()
validation_time_ms = (end_time - start_time) * 1000
validation_times.append(validation_time_ms)
avg_validation_time = statistics.mean(validation_times)
max_validation_time = max(validation_times)
# Should be very fast (< 1ms typically)
assert avg_validation_time < 1.0
assert max_validation_time < 5.0
print(f"\nStability Validation Performance Metrics:")
print(f"Average validation time: {avg_validation_time:.3f} ms")
print(f"Max validation time: {max_validation_time:.3f} ms")
@pytest.mark.asyncio
async def test_pipeline_executor_performance(self, sample_frame, performance_config):
"""Benchmark complete pipeline execution performance."""
pipeline_executor = PipelineExecutor()
# Simple pipeline configuration
pipeline_config = {
"modelId": "fast_detection_model",
"modelFile": "fast_model.pt",
"expectedClasses": ["car"],
"minConfidence": 0.5,
"actions": [],
"branches": []
}
detection_context = {
"camera_id": "perf_camera",
"display_id": "perf_display",
"frame": sample_frame,
"timestamp": int(time.time() * 1000),
"session_id": "perf_session"
}
with patch('torch.load') as mock_torch_load, \
patch('os.path.exists', return_value=True):
# Setup fast mock model
mock_model = Mock()
mock_result = Mock()
mock_result.boxes = Mock()
mock_result.boxes.xyxy = Mock()
mock_result.boxes.conf = Mock()
mock_result.boxes.cls = Mock()
mock_result.names = {0: "car"}
mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]])
mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9])
mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0])
mock_model.return_value = mock_result
mock_torch_load.return_value = mock_model
# Warm up
for _ in range(3):
await pipeline_executor.execute_pipeline(pipeline_config, detection_context)
# Benchmark pipeline execution
pipeline_times = []
num_iterations = 50
for _ in range(num_iterations):
start_time = time.perf_counter()
result = await pipeline_executor.execute_pipeline(pipeline_config, detection_context)
end_time = time.perf_counter()
pipeline_time_ms = (end_time - start_time) * 1000
pipeline_times.append(pipeline_time_ms)
# Ensure result is valid
assert result is not None
avg_pipeline_time = statistics.mean(pipeline_times)
max_pipeline_time = max(pipeline_times)
# Performance assertions
assert avg_pipeline_time < performance_config["max_pipeline_time_ms"]
print(f"\nPipeline Execution Performance Metrics:")
print(f"Average pipeline time: {avg_pipeline_time:.2f} ms")
print(f"Max pipeline time: {max_pipeline_time:.2f} ms")
def test_memory_usage_detection(self, sample_frame, performance_config):
"""Test memory usage during detection operations."""
detector = YOLODetector()
with patch('torch.load') as mock_torch_load:
# Setup mock model
mock_model = Mock()
mock_result = Mock()
mock_result.boxes = Mock()
mock_result.boxes.xyxy = Mock()
mock_result.boxes.conf = Mock()
mock_result.boxes.cls = Mock()
mock_result.names = {0: "car"}
mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]])
mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9])
mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0])
mock_model.return_value = mock_result
mock_torch_load.return_value = mock_model
# Measure memory usage
gc.collect() # Clean up before measurement
initial_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB
# Run detections and monitor memory
memory_measurements = []
for i in range(100):
detections = detector.detect(sample_frame, confidence_threshold=0.5)
if i % 10 == 0: # Measure every 10 iterations
current_memory = psutil.Process().memory_info().rss / 1024 / 1024
memory_measurements.append(current_memory - initial_memory)
# Final memory measurement
gc.collect()
final_memory = psutil.Process().memory_info().rss / 1024 / 1024
memory_increase = final_memory - initial_memory
# Memory should not grow significantly
assert memory_increase < 100 # Less than 100MB increase
# Memory should be relatively stable (not constantly growing)
if len(memory_measurements) > 1:
memory_trend = memory_measurements[-1] - memory_measurements[0]
assert memory_trend < 50 # Less than 50MB trend growth
print(f"\nMemory Usage Metrics:")
print(f"Initial memory: {initial_memory:.1f} MB")
print(f"Final memory: {final_memory:.1f} MB")
print(f"Memory increase: {memory_increase:.1f} MB")
def test_concurrent_detection_performance(self, sample_frame):
"""Test performance with concurrent detection operations."""
with patch('torch.load') as mock_torch_load:
# Setup mock model
mock_model = Mock()
mock_result = Mock()
mock_result.boxes = Mock()
mock_result.boxes.xyxy = Mock()
mock_result.boxes.conf = Mock()
mock_result.boxes.cls = Mock()
mock_result.names = {0: "car"}
mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]])
mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9])
mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0])
mock_model.return_value = mock_result
mock_torch_load.return_value = mock_model
# Create multiple detectors
detectors = [YOLODetector() for _ in range(4)]
import threading
import concurrent.futures
def run_detection(detector, frame, iterations=25):
"""Run detection iterations."""
times = []
for _ in range(iterations):
start_time = time.perf_counter()
detections = detector.detect(frame, confidence_threshold=0.5)
end_time = time.perf_counter()
times.append((end_time - start_time) * 1000)
return times
# Run concurrent detections
start_time = time.perf_counter()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(run_detection, detector, sample_frame)
for detector in detectors
]
results = [future.result() for future in concurrent.futures.as_completed(futures)]
end_time = time.perf_counter()
total_time = end_time - start_time
# Analyze results
all_times = [time_ms for result in results for time_ms in result]
total_detections = len(all_times)
avg_detection_time = statistics.mean(all_times)
# Calculate effective throughput
effective_fps = total_detections / total_time
print(f"\nConcurrent Detection Performance:")
print(f"Total detections: {total_detections}")
print(f"Total time: {total_time:.2f} seconds")
print(f"Average detection time: {avg_detection_time:.2f} ms")
print(f"Effective throughput: {effective_fps:.1f} FPS")
# Should maintain reasonable performance under load
assert avg_detection_time < 200 # Less than 200ms average
assert effective_fps > 20 # More than 20 effective FPS
def test_large_frame_performance(self, large_frame):
"""Test detection performance with large frames."""
detector = YOLODetector()
with patch('torch.load') as mock_torch_load:
# Setup mock model
mock_model = Mock()
mock_result = Mock()
mock_result.boxes = Mock()
mock_result.boxes.xyxy = Mock()
mock_result.boxes.conf = Mock()
mock_result.boxes.cls = Mock()
mock_result.names = {0: "car", 1: "person"}
# Larger frame might have more detections
mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([
[100, 200, 300, 400],
[500, 600, 700, 800],
[1000, 200, 1200, 400]
])
mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9, 0.8, 0.7])
mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0, 1, 0])
mock_model.return_value = mock_result
mock_torch_load.return_value = mock_model
# Benchmark large frame detection
detection_times = []
num_iterations = 20 # Fewer iterations for large frames
for _ in range(num_iterations):
start_time = time.perf_counter()
detections = detector.detect(large_frame, confidence_threshold=0.5)
end_time = time.perf_counter()
detection_time_ms = (end_time - start_time) * 1000
detection_times.append(detection_time_ms)
avg_detection_time = statistics.mean(detection_times)
max_detection_time = max(detection_times)
print(f"\nLarge Frame Detection Performance:")
print(f"Frame size: {large_frame.shape}")
print(f"Average detection time: {avg_detection_time:.2f} ms")
print(f"Max detection time: {max_detection_time:.2f} ms")
# Large frames should still be processed in reasonable time
assert avg_detection_time < 300 # Less than 300ms for large frames
assert max_detection_time < 500 # Less than 500ms max
class TestStreamPerformance:
"""Test stream management performance."""
@pytest.mark.asyncio
async def test_stream_creation_performance(self):
"""Test performance of stream creation and management."""
stream_manager = StreamManager()
with patch('cv2.VideoCapture') as mock_video_cap:
# Setup fast mock
mock_cap_instance = Mock()
mock_video_cap.return_value = mock_cap_instance
mock_cap_instance.isOpened.return_value = True
mock_cap_instance.read.return_value = (True, np.ones((480, 640, 3), dtype=np.uint8))
# Benchmark stream creation
creation_times = []
num_streams = 20
try:
for i in range(num_streams):
from detector_worker.streams.stream_manager import StreamConfig
config = StreamConfig(
stream_url=f"rtsp://test{i}.example.com/stream",
stream_type="rtsp"
)
start_time = time.perf_counter()
await stream_manager.create_stream(f"camera_{i}", config, f"sub_{i}")
end_time = time.perf_counter()
creation_time_ms = (end_time - start_time) * 1000
creation_times.append(creation_time_ms)
avg_creation_time = statistics.mean(creation_times)
max_creation_time = max(creation_times)
# Stream creation should be fast
assert avg_creation_time < 100 # Less than 100ms average
assert max_creation_time < 500 # Less than 500ms max
print(f"\nStream Creation Performance:")
print(f"Streams created: {num_streams}")
print(f"Average creation time: {avg_creation_time:.2f} ms")
print(f"Max creation time: {max_creation_time:.2f} ms")
finally:
await stream_manager.stop_all_streams()
@pytest.mark.asyncio
async def test_frame_retrieval_performance(self, sample_frame):
"""Test performance of frame retrieval operations."""
stream_manager = StreamManager()
with patch('cv2.VideoCapture') as mock_video_cap:
mock_cap_instance = Mock()
mock_video_cap.return_value = mock_cap_instance
mock_cap_instance.isOpened.return_value = True
mock_cap_instance.read.return_value = (True, sample_frame)
try:
# Create test stream
from detector_worker.streams.stream_manager import StreamConfig
config = StreamConfig(
stream_url="rtsp://perf.example.com/stream",
stream_type="rtsp"
)
await stream_manager.create_stream("perf_camera", config, "perf_sub")
# Let stream capture some frames
await asyncio.sleep(0.1)
# Benchmark frame retrieval
retrieval_times = []
num_retrievals = 1000
for _ in range(num_retrievals):
start_time = time.perf_counter()
frame = stream_manager.get_latest_frame("perf_camera")
end_time = time.perf_counter()
retrieval_time_ms = (end_time - start_time) * 1000
retrieval_times.append(retrieval_time_ms)
avg_retrieval_time = statistics.mean(retrieval_times)
max_retrieval_time = max(retrieval_times)
# Frame retrieval should be very fast
assert avg_retrieval_time < 1.0 # Less than 1ms average
assert max_retrieval_time < 10.0 # Less than 10ms max
print(f"\nFrame Retrieval Performance:")
print(f"Frame retrievals: {num_retrievals}")
print(f"Average retrieval time: {avg_retrieval_time:.3f} ms")
print(f"Max retrieval time: {max_retrieval_time:.3f} ms")
finally:
await stream_manager.stop_all_streams()
class TestModelPerformance:
"""Test model management performance."""
def test_model_loading_performance(self):
"""Test performance of model loading operations."""
model_manager = ModelManager()
with patch('torch.load') as mock_torch_load, \
patch('os.path.exists', return_value=True):
# Setup mock model
def create_mock_model():
model = Mock()
# Mock model parameters for memory estimation
param = Mock()
param.numel.return_value = 1000000 # 1M parameters
param.element_size.return_value = 4 # 4 bytes each
model.parameters.return_value = [param]
return model
mock_torch_load.side_effect = lambda *args, **kwargs: create_mock_model()
# Benchmark model loading
loading_times = []
num_models = 10
for i in range(num_models):
from detector_worker.models.model_manager import ModelConfig
config = ModelConfig(
model_id=f"perf_model_{i}",
model_path=f"/fake/path/model_{i}.pt",
model_type="detection",
device="cpu"
)
start_time = time.perf_counter()
model = model_manager.load_model(config)
end_time = time.perf_counter()
loading_time_ms = (end_time - start_time) * 1000
loading_times.append(loading_time_ms)
avg_loading_time = statistics.mean(loading_times)
max_loading_time = max(loading_times)
print(f"\nModel Loading Performance:")
print(f"Models loaded: {num_models}")
print(f"Average loading time: {avg_loading_time:.2f} ms")
print(f"Max loading time: {max_loading_time:.2f} ms")
# Model loading should be reasonable
assert avg_loading_time < 200 # Less than 200ms average
def test_model_cache_performance(self):
"""Test performance of model cache operations."""
model_manager = ModelManager()
with patch('torch.load') as mock_torch_load, \
patch('os.path.exists', return_value=True):
mock_torch_load.return_value = Mock()
# Load model first
from detector_worker.models.model_manager import ModelConfig
config = ModelConfig(
model_id="cache_perf_model",
model_path="/fake/path/model.pt",
model_type="detection",
device="cpu"
)
# Initial load
model_manager.load_model(config)
# Benchmark cache retrieval
cache_times = []
num_retrievals = 10000
for _ in range(num_retrievals):
start_time = time.perf_counter()
model = model_manager.get_model("cache_perf_model")
end_time = time.perf_counter()
cache_time_ms = (end_time - start_time) * 1000
cache_times.append(cache_time_ms)
avg_cache_time = statistics.mean(cache_times)
max_cache_time = max(cache_times)
print(f"\nModel Cache Performance:")
print(f"Cache retrievals: {num_retrievals}")
print(f"Average cache time: {avg_cache_time:.4f} ms")
print(f"Max cache time: {max_cache_time:.4f} ms")
# Cache should be very fast
assert avg_cache_time < 0.1 # Less than 0.1ms average
assert max_cache_time < 1.0 # Less than 1ms max