""" Performance tests for detection pipeline components. These tests benchmark the performance of key detection pipeline components to ensure they meet performance requirements. """ import pytest import time import asyncio import statistics from unittest.mock import Mock, patch import numpy as np import psutil import gc from detector_worker.detection.yolo_detector import YOLODetector from detector_worker.detection.tracking_manager import TrackingManager from detector_worker.detection.stability_validator import StabilityValidator from detector_worker.pipeline.pipeline_executor import PipelineExecutor from detector_worker.models.model_manager import ModelManager from detector_worker.streams.stream_manager import StreamManager @pytest.fixture def sample_frame(): """Create a sample frame for performance testing.""" return np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8) @pytest.fixture def large_frame(): """Create a large frame for stress testing.""" return np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8) @pytest.fixture def performance_config(): """Configuration for performance tests.""" return { "target_fps": 30, "max_detection_time_ms": 100, "max_tracking_time_ms": 50, "max_pipeline_time_ms": 500, "memory_limit_mb": 1024 } class TestDetectionPerformance: """Test detection performance benchmarks.""" def test_yolo_detection_speed(self, sample_frame, performance_config): """Benchmark YOLO detection speed.""" detector = YOLODetector() with patch('torch.load') as mock_torch_load: # Setup fast mock model mock_model = Mock() mock_result = Mock() mock_result.boxes = Mock() mock_result.boxes.xyxy = Mock() mock_result.boxes.conf = Mock() mock_result.boxes.cls = Mock() mock_result.names = {0: "car", 1: "person"} # Mock detection results mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([ [100, 200, 300, 400], [150, 250, 350, 450] ]) mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9, 0.8]) mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0, 1]) mock_model.return_value = mock_result mock_torch_load.return_value = mock_model # Warm up for _ in range(5): detector.detect(sample_frame, confidence_threshold=0.5) # Benchmark detection speed detection_times = [] num_iterations = 100 for _ in range(num_iterations): start_time = time.perf_counter() detections = detector.detect(sample_frame, confidence_threshold=0.5) end_time = time.perf_counter() detection_time_ms = (end_time - start_time) * 1000 detection_times.append(detection_time_ms) # Calculate statistics avg_detection_time = statistics.mean(detection_times) median_detection_time = statistics.median(detection_times) max_detection_time = max(detection_times) min_detection_time = min(detection_times) # Performance assertions assert avg_detection_time < performance_config["max_detection_time_ms"] assert median_detection_time < performance_config["max_detection_time_ms"] # Calculate theoretical FPS theoretical_fps = 1000 / avg_detection_time assert theoretical_fps >= performance_config["target_fps"] print(f"\nDetection Performance Metrics:") print(f"Average detection time: {avg_detection_time:.2f} ms") print(f"Median detection time: {median_detection_time:.2f} ms") print(f"Min detection time: {min_detection_time:.2f} ms") print(f"Max detection time: {max_detection_time:.2f} ms") print(f"Theoretical FPS: {theoretical_fps:.1f}") def test_tracking_performance(self, sample_frame, performance_config): """Benchmark object tracking performance.""" tracking_manager = TrackingManager() # Create mock detections detections = [ {"class": "car", "confidence": 0.9, "bbox": [100, 200, 300, 400]}, {"class": "car", "confidence": 0.8, "bbox": [150, 250, 350, 450]}, {"class": "person", "confidence": 0.7, "bbox": [200, 300, 250, 400]} ] # Warm up tracking for i in range(10): tracking_manager.update_tracks(detections, frame_id=i) # Benchmark tracking speed tracking_times = [] num_iterations = 100 for i in range(num_iterations): # Simulate moving detections moving_detections = [] for det in detections: moved_det = det.copy() # Add small random movement bbox = moved_det["bbox"] moved_det["bbox"] = [ bbox[0] + np.random.randint(-5, 5), bbox[1] + np.random.randint(-5, 5), bbox[2] + np.random.randint(-5, 5), bbox[3] + np.random.randint(-5, 5) ] moving_detections.append(moved_det) start_time = time.perf_counter() tracks = tracking_manager.update_tracks(moving_detections, frame_id=i + 10) end_time = time.perf_counter() tracking_time_ms = (end_time - start_time) * 1000 tracking_times.append(tracking_time_ms) # Calculate statistics avg_tracking_time = statistics.mean(tracking_times) max_tracking_time = max(tracking_times) # Performance assertions assert avg_tracking_time < performance_config["max_tracking_time_ms"] assert max_tracking_time < performance_config["max_tracking_time_ms"] * 2 print(f"\nTracking Performance Metrics:") print(f"Average tracking time: {avg_tracking_time:.2f} ms") print(f"Max tracking time: {max_tracking_time:.2f} ms") def test_stability_validation_performance(self, performance_config): """Benchmark stability validation performance.""" validator = StabilityValidator() # Create stable detections sequence base_detection = { "class": "car", "confidence": 0.9, "bbox": [100, 200, 300, 400], "track_id": 1001 } # Add sequence of stable detections for i in range(20): detection = base_detection.copy() # Add small variations to simulate real detection noise detection["confidence"] = 0.9 + np.random.normal(0, 0.02) bbox = detection["bbox"] detection["bbox"] = [ bbox[0] + np.random.normal(0, 2), bbox[1] + np.random.normal(0, 2), bbox[2] + np.random.normal(0, 2), bbox[3] + np.random.normal(0, 2) ] validator.add_detection(detection, frame_id=i) # Benchmark validation performance validation_times = [] num_iterations = 1000 for i in range(num_iterations): test_detection = base_detection.copy() test_detection["confidence"] = 0.85 + np.random.normal(0, 0.05) start_time = time.perf_counter() is_stable = validator.is_detection_stable( test_detection, stability_frames=10, confidence_threshold=0.8 ) end_time = time.perf_counter() validation_time_ms = (end_time - start_time) * 1000 validation_times.append(validation_time_ms) avg_validation_time = statistics.mean(validation_times) max_validation_time = max(validation_times) # Should be very fast (< 1ms typically) assert avg_validation_time < 1.0 assert max_validation_time < 5.0 print(f"\nStability Validation Performance Metrics:") print(f"Average validation time: {avg_validation_time:.3f} ms") print(f"Max validation time: {max_validation_time:.3f} ms") @pytest.mark.asyncio async def test_pipeline_executor_performance(self, sample_frame, performance_config): """Benchmark complete pipeline execution performance.""" pipeline_executor = PipelineExecutor() # Simple pipeline configuration pipeline_config = { "modelId": "fast_detection_model", "modelFile": "fast_model.pt", "expectedClasses": ["car"], "minConfidence": 0.5, "actions": [], "branches": [] } detection_context = { "camera_id": "perf_camera", "display_id": "perf_display", "frame": sample_frame, "timestamp": int(time.time() * 1000), "session_id": "perf_session" } with patch('torch.load') as mock_torch_load, \ patch('os.path.exists', return_value=True): # Setup fast mock model mock_model = Mock() mock_result = Mock() mock_result.boxes = Mock() mock_result.boxes.xyxy = Mock() mock_result.boxes.conf = Mock() mock_result.boxes.cls = Mock() mock_result.names = {0: "car"} mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]]) mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9]) mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0]) mock_model.return_value = mock_result mock_torch_load.return_value = mock_model # Warm up for _ in range(3): await pipeline_executor.execute_pipeline(pipeline_config, detection_context) # Benchmark pipeline execution pipeline_times = [] num_iterations = 50 for _ in range(num_iterations): start_time = time.perf_counter() result = await pipeline_executor.execute_pipeline(pipeline_config, detection_context) end_time = time.perf_counter() pipeline_time_ms = (end_time - start_time) * 1000 pipeline_times.append(pipeline_time_ms) # Ensure result is valid assert result is not None avg_pipeline_time = statistics.mean(pipeline_times) max_pipeline_time = max(pipeline_times) # Performance assertions assert avg_pipeline_time < performance_config["max_pipeline_time_ms"] print(f"\nPipeline Execution Performance Metrics:") print(f"Average pipeline time: {avg_pipeline_time:.2f} ms") print(f"Max pipeline time: {max_pipeline_time:.2f} ms") def test_memory_usage_detection(self, sample_frame, performance_config): """Test memory usage during detection operations.""" detector = YOLODetector() with patch('torch.load') as mock_torch_load: # Setup mock model mock_model = Mock() mock_result = Mock() mock_result.boxes = Mock() mock_result.boxes.xyxy = Mock() mock_result.boxes.conf = Mock() mock_result.boxes.cls = Mock() mock_result.names = {0: "car"} mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]]) mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9]) mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0]) mock_model.return_value = mock_result mock_torch_load.return_value = mock_model # Measure memory usage gc.collect() # Clean up before measurement initial_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB # Run detections and monitor memory memory_measurements = [] for i in range(100): detections = detector.detect(sample_frame, confidence_threshold=0.5) if i % 10 == 0: # Measure every 10 iterations current_memory = psutil.Process().memory_info().rss / 1024 / 1024 memory_measurements.append(current_memory - initial_memory) # Final memory measurement gc.collect() final_memory = psutil.Process().memory_info().rss / 1024 / 1024 memory_increase = final_memory - initial_memory # Memory should not grow significantly assert memory_increase < 100 # Less than 100MB increase # Memory should be relatively stable (not constantly growing) if len(memory_measurements) > 1: memory_trend = memory_measurements[-1] - memory_measurements[0] assert memory_trend < 50 # Less than 50MB trend growth print(f"\nMemory Usage Metrics:") print(f"Initial memory: {initial_memory:.1f} MB") print(f"Final memory: {final_memory:.1f} MB") print(f"Memory increase: {memory_increase:.1f} MB") def test_concurrent_detection_performance(self, sample_frame): """Test performance with concurrent detection operations.""" with patch('torch.load') as mock_torch_load: # Setup mock model mock_model = Mock() mock_result = Mock() mock_result.boxes = Mock() mock_result.boxes.xyxy = Mock() mock_result.boxes.conf = Mock() mock_result.boxes.cls = Mock() mock_result.names = {0: "car"} mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]]) mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9]) mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0]) mock_model.return_value = mock_result mock_torch_load.return_value = mock_model # Create multiple detectors detectors = [YOLODetector() for _ in range(4)] import threading import concurrent.futures def run_detection(detector, frame, iterations=25): """Run detection iterations.""" times = [] for _ in range(iterations): start_time = time.perf_counter() detections = detector.detect(frame, confidence_threshold=0.5) end_time = time.perf_counter() times.append((end_time - start_time) * 1000) return times # Run concurrent detections start_time = time.perf_counter() with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: futures = [ executor.submit(run_detection, detector, sample_frame) for detector in detectors ] results = [future.result() for future in concurrent.futures.as_completed(futures)] end_time = time.perf_counter() total_time = end_time - start_time # Analyze results all_times = [time_ms for result in results for time_ms in result] total_detections = len(all_times) avg_detection_time = statistics.mean(all_times) # Calculate effective throughput effective_fps = total_detections / total_time print(f"\nConcurrent Detection Performance:") print(f"Total detections: {total_detections}") print(f"Total time: {total_time:.2f} seconds") print(f"Average detection time: {avg_detection_time:.2f} ms") print(f"Effective throughput: {effective_fps:.1f} FPS") # Should maintain reasonable performance under load assert avg_detection_time < 200 # Less than 200ms average assert effective_fps > 20 # More than 20 effective FPS def test_large_frame_performance(self, large_frame): """Test detection performance with large frames.""" detector = YOLODetector() with patch('torch.load') as mock_torch_load: # Setup mock model mock_model = Mock() mock_result = Mock() mock_result.boxes = Mock() mock_result.boxes.xyxy = Mock() mock_result.boxes.conf = Mock() mock_result.boxes.cls = Mock() mock_result.names = {0: "car", 1: "person"} # Larger frame might have more detections mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([ [100, 200, 300, 400], [500, 600, 700, 800], [1000, 200, 1200, 400] ]) mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9, 0.8, 0.7]) mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0, 1, 0]) mock_model.return_value = mock_result mock_torch_load.return_value = mock_model # Benchmark large frame detection detection_times = [] num_iterations = 20 # Fewer iterations for large frames for _ in range(num_iterations): start_time = time.perf_counter() detections = detector.detect(large_frame, confidence_threshold=0.5) end_time = time.perf_counter() detection_time_ms = (end_time - start_time) * 1000 detection_times.append(detection_time_ms) avg_detection_time = statistics.mean(detection_times) max_detection_time = max(detection_times) print(f"\nLarge Frame Detection Performance:") print(f"Frame size: {large_frame.shape}") print(f"Average detection time: {avg_detection_time:.2f} ms") print(f"Max detection time: {max_detection_time:.2f} ms") # Large frames should still be processed in reasonable time assert avg_detection_time < 300 # Less than 300ms for large frames assert max_detection_time < 500 # Less than 500ms max class TestStreamPerformance: """Test stream management performance.""" @pytest.mark.asyncio async def test_stream_creation_performance(self): """Test performance of stream creation and management.""" stream_manager = StreamManager() with patch('cv2.VideoCapture') as mock_video_cap: # Setup fast mock mock_cap_instance = Mock() mock_video_cap.return_value = mock_cap_instance mock_cap_instance.isOpened.return_value = True mock_cap_instance.read.return_value = (True, np.ones((480, 640, 3), dtype=np.uint8)) # Benchmark stream creation creation_times = [] num_streams = 20 try: for i in range(num_streams): from detector_worker.streams.stream_manager import StreamConfig config = StreamConfig( stream_url=f"rtsp://test{i}.example.com/stream", stream_type="rtsp" ) start_time = time.perf_counter() await stream_manager.create_stream(f"camera_{i}", config, f"sub_{i}") end_time = time.perf_counter() creation_time_ms = (end_time - start_time) * 1000 creation_times.append(creation_time_ms) avg_creation_time = statistics.mean(creation_times) max_creation_time = max(creation_times) # Stream creation should be fast assert avg_creation_time < 100 # Less than 100ms average assert max_creation_time < 500 # Less than 500ms max print(f"\nStream Creation Performance:") print(f"Streams created: {num_streams}") print(f"Average creation time: {avg_creation_time:.2f} ms") print(f"Max creation time: {max_creation_time:.2f} ms") finally: await stream_manager.stop_all_streams() @pytest.mark.asyncio async def test_frame_retrieval_performance(self, sample_frame): """Test performance of frame retrieval operations.""" stream_manager = StreamManager() with patch('cv2.VideoCapture') as mock_video_cap: mock_cap_instance = Mock() mock_video_cap.return_value = mock_cap_instance mock_cap_instance.isOpened.return_value = True mock_cap_instance.read.return_value = (True, sample_frame) try: # Create test stream from detector_worker.streams.stream_manager import StreamConfig config = StreamConfig( stream_url="rtsp://perf.example.com/stream", stream_type="rtsp" ) await stream_manager.create_stream("perf_camera", config, "perf_sub") # Let stream capture some frames await asyncio.sleep(0.1) # Benchmark frame retrieval retrieval_times = [] num_retrievals = 1000 for _ in range(num_retrievals): start_time = time.perf_counter() frame = stream_manager.get_latest_frame("perf_camera") end_time = time.perf_counter() retrieval_time_ms = (end_time - start_time) * 1000 retrieval_times.append(retrieval_time_ms) avg_retrieval_time = statistics.mean(retrieval_times) max_retrieval_time = max(retrieval_times) # Frame retrieval should be very fast assert avg_retrieval_time < 1.0 # Less than 1ms average assert max_retrieval_time < 10.0 # Less than 10ms max print(f"\nFrame Retrieval Performance:") print(f"Frame retrievals: {num_retrievals}") print(f"Average retrieval time: {avg_retrieval_time:.3f} ms") print(f"Max retrieval time: {max_retrieval_time:.3f} ms") finally: await stream_manager.stop_all_streams() class TestModelPerformance: """Test model management performance.""" def test_model_loading_performance(self): """Test performance of model loading operations.""" model_manager = ModelManager() with patch('torch.load') as mock_torch_load, \ patch('os.path.exists', return_value=True): # Setup mock model def create_mock_model(): model = Mock() # Mock model parameters for memory estimation param = Mock() param.numel.return_value = 1000000 # 1M parameters param.element_size.return_value = 4 # 4 bytes each model.parameters.return_value = [param] return model mock_torch_load.side_effect = lambda *args, **kwargs: create_mock_model() # Benchmark model loading loading_times = [] num_models = 10 for i in range(num_models): from detector_worker.models.model_manager import ModelConfig config = ModelConfig( model_id=f"perf_model_{i}", model_path=f"/fake/path/model_{i}.pt", model_type="detection", device="cpu" ) start_time = time.perf_counter() model = model_manager.load_model(config) end_time = time.perf_counter() loading_time_ms = (end_time - start_time) * 1000 loading_times.append(loading_time_ms) avg_loading_time = statistics.mean(loading_times) max_loading_time = max(loading_times) print(f"\nModel Loading Performance:") print(f"Models loaded: {num_models}") print(f"Average loading time: {avg_loading_time:.2f} ms") print(f"Max loading time: {max_loading_time:.2f} ms") # Model loading should be reasonable assert avg_loading_time < 200 # Less than 200ms average def test_model_cache_performance(self): """Test performance of model cache operations.""" model_manager = ModelManager() with patch('torch.load') as mock_torch_load, \ patch('os.path.exists', return_value=True): mock_torch_load.return_value = Mock() # Load model first from detector_worker.models.model_manager import ModelConfig config = ModelConfig( model_id="cache_perf_model", model_path="/fake/path/model.pt", model_type="detection", device="cpu" ) # Initial load model_manager.load_model(config) # Benchmark cache retrieval cache_times = [] num_retrievals = 10000 for _ in range(num_retrievals): start_time = time.perf_counter() model = model_manager.get_model("cache_perf_model") end_time = time.perf_counter() cache_time_ms = (end_time - start_time) * 1000 cache_times.append(cache_time_ms) avg_cache_time = statistics.mean(cache_times) max_cache_time = max(cache_times) print(f"\nModel Cache Performance:") print(f"Cache retrievals: {num_retrievals}") print(f"Average cache time: {avg_cache_time:.4f} ms") print(f"Max cache time: {max_cache_time:.4f} ms") # Cache should be very fast assert avg_cache_time < 0.1 # Less than 0.1ms average assert max_cache_time < 1.0 # Less than 1ms max