"""
Performance tests for detection pipeline components.

These tests benchmark the performance of key detection pipeline
components to ensure they meet performance requirements.
"""
import pytest
import time
import asyncio
import statistics
from unittest.mock import Mock, patch
import numpy as np
import psutil
import gc

from detector_worker.detection.yolo_detector import YOLODetector
from detector_worker.detection.tracking_manager import TrackingManager
from detector_worker.detection.stability_validator import StabilityValidator
from detector_worker.pipeline.pipeline_executor import PipelineExecutor
from detector_worker.models.model_manager import ModelManager
from detector_worker.streams.stream_manager import StreamManager


@pytest.fixture
def sample_frame():
    """Create a sample frame for performance testing."""
    return np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)


@pytest.fixture
def large_frame():
    """Create a large frame for stress testing."""
    return np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)


@pytest.fixture
def performance_config():
    """Configuration for performance tests."""
    return {
        "target_fps": 30,
        "max_detection_time_ms": 100,
        "max_tracking_time_ms": 50,
        "max_pipeline_time_ms": 500,
        "memory_limit_mb": 1024
    }


class TestDetectionPerformance:
    """Test detection performance benchmarks."""
    
    def test_yolo_detection_speed(self, sample_frame, performance_config):
        """Benchmark YOLO detection speed."""
        
        detector = YOLODetector()
        
        with patch('torch.load') as mock_torch_load:
            # Setup fast mock model
            mock_model = Mock()
            mock_result = Mock()
            mock_result.boxes = Mock()
            mock_result.boxes.xyxy = Mock()
            mock_result.boxes.conf = Mock()
            mock_result.boxes.cls = Mock()
            mock_result.names = {0: "car", 1: "person"}
            
            # Mock detection results
            mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([
                [100, 200, 300, 400],
                [150, 250, 350, 450]
            ])
            mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9, 0.8])
            mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0, 1])
            
            mock_model.return_value = mock_result
            mock_torch_load.return_value = mock_model
            
            # Warm up
            for _ in range(5):
                detector.detect(sample_frame, confidence_threshold=0.5)
            
            # Benchmark detection speed
            detection_times = []
            num_iterations = 100
            
            for _ in range(num_iterations):
                start_time = time.perf_counter()
                detections = detector.detect(sample_frame, confidence_threshold=0.5)
                end_time = time.perf_counter()
                
                detection_time_ms = (end_time - start_time) * 1000
                detection_times.append(detection_time_ms)
            
            # Calculate statistics
            avg_detection_time = statistics.mean(detection_times)
            median_detection_time = statistics.median(detection_times)
            max_detection_time = max(detection_times)
            min_detection_time = min(detection_times)
            
            # Performance assertions
            assert avg_detection_time < performance_config["max_detection_time_ms"]
            assert median_detection_time < performance_config["max_detection_time_ms"]
            
            # Calculate theoretical FPS
            theoretical_fps = 1000 / avg_detection_time
            assert theoretical_fps >= performance_config["target_fps"]
            
            print(f"\nDetection Performance Metrics:")
            print(f"Average detection time: {avg_detection_time:.2f} ms")
            print(f"Median detection time: {median_detection_time:.2f} ms")
            print(f"Min detection time: {min_detection_time:.2f} ms")
            print(f"Max detection time: {max_detection_time:.2f} ms")
            print(f"Theoretical FPS: {theoretical_fps:.1f}")
    
    def test_tracking_performance(self, sample_frame, performance_config):
        """Benchmark object tracking performance."""
        
        tracking_manager = TrackingManager()
        
        # Create mock detections
        detections = [
            {"class": "car", "confidence": 0.9, "bbox": [100, 200, 300, 400]},
            {"class": "car", "confidence": 0.8, "bbox": [150, 250, 350, 450]},
            {"class": "person", "confidence": 0.7, "bbox": [200, 300, 250, 400]}
        ]
        
        # Warm up tracking
        for i in range(10):
            tracking_manager.update_tracks(detections, frame_id=i)
        
        # Benchmark tracking speed
        tracking_times = []
        num_iterations = 100
        
        for i in range(num_iterations):
            # Simulate moving detections
            moving_detections = []
            for det in detections:
                moved_det = det.copy()
                # Add small random movement
                bbox = moved_det["bbox"]
                moved_det["bbox"] = [
                    bbox[0] + np.random.randint(-5, 5),
                    bbox[1] + np.random.randint(-5, 5),
                    bbox[2] + np.random.randint(-5, 5),
                    bbox[3] + np.random.randint(-5, 5)
                ]
                moving_detections.append(moved_det)
            
            start_time = time.perf_counter()
            tracks = tracking_manager.update_tracks(moving_detections, frame_id=i + 10)
            end_time = time.perf_counter()
            
            tracking_time_ms = (end_time - start_time) * 1000
            tracking_times.append(tracking_time_ms)
        
        # Calculate statistics
        avg_tracking_time = statistics.mean(tracking_times)
        max_tracking_time = max(tracking_times)
        
        # Performance assertions
        assert avg_tracking_time < performance_config["max_tracking_time_ms"]
        assert max_tracking_time < performance_config["max_tracking_time_ms"] * 2
        
        print(f"\nTracking Performance Metrics:")
        print(f"Average tracking time: {avg_tracking_time:.2f} ms")
        print(f"Max tracking time: {max_tracking_time:.2f} ms")
    
    def test_stability_validation_performance(self, performance_config):
        """Benchmark stability validation performance."""
        
        validator = StabilityValidator()
        
        # Create stable detections sequence
        base_detection = {
            "class": "car",
            "confidence": 0.9,
            "bbox": [100, 200, 300, 400],
            "track_id": 1001
        }
        
        # Add sequence of stable detections
        for i in range(20):
            detection = base_detection.copy()
            # Add small variations to simulate real detection noise
            detection["confidence"] = 0.9 + np.random.normal(0, 0.02)
            bbox = detection["bbox"]
            detection["bbox"] = [
                bbox[0] + np.random.normal(0, 2),
                bbox[1] + np.random.normal(0, 2),
                bbox[2] + np.random.normal(0, 2),
                bbox[3] + np.random.normal(0, 2)
            ]
            
            validator.add_detection(detection, frame_id=i)
        
        # Benchmark validation performance
        validation_times = []
        num_iterations = 1000
        
        for i in range(num_iterations):
            test_detection = base_detection.copy()
            test_detection["confidence"] = 0.85 + np.random.normal(0, 0.05)
            
            start_time = time.perf_counter()
            is_stable = validator.is_detection_stable(
                test_detection, 
                stability_frames=10, 
                confidence_threshold=0.8
            )
            end_time = time.perf_counter()
            
            validation_time_ms = (end_time - start_time) * 1000
            validation_times.append(validation_time_ms)
        
        avg_validation_time = statistics.mean(validation_times)
        max_validation_time = max(validation_times)
        
        # Should be very fast (< 1ms typically)
        assert avg_validation_time < 1.0
        assert max_validation_time < 5.0
        
        print(f"\nStability Validation Performance Metrics:")
        print(f"Average validation time: {avg_validation_time:.3f} ms")
        print(f"Max validation time: {max_validation_time:.3f} ms")
    
    @pytest.mark.asyncio
    async def test_pipeline_executor_performance(self, sample_frame, performance_config):
        """Benchmark complete pipeline execution performance."""
        
        pipeline_executor = PipelineExecutor()
        
        # Simple pipeline configuration
        pipeline_config = {
            "modelId": "fast_detection_model",
            "modelFile": "fast_model.pt",
            "expectedClasses": ["car"],
            "minConfidence": 0.5,
            "actions": [],
            "branches": []
        }
        
        detection_context = {
            "camera_id": "perf_camera",
            "display_id": "perf_display",
            "frame": sample_frame,
            "timestamp": int(time.time() * 1000),
            "session_id": "perf_session"
        }
        
        with patch('torch.load') as mock_torch_load, \
             patch('os.path.exists', return_value=True):
            
            # Setup fast mock model
            mock_model = Mock()
            mock_result = Mock()
            mock_result.boxes = Mock()
            mock_result.boxes.xyxy = Mock()
            mock_result.boxes.conf = Mock()
            mock_result.boxes.cls = Mock()
            mock_result.names = {0: "car"}
            
            mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]])
            mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9])
            mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0])
            
            mock_model.return_value = mock_result
            mock_torch_load.return_value = mock_model
            
            # Warm up
            for _ in range(3):
                await pipeline_executor.execute_pipeline(pipeline_config, detection_context)
            
            # Benchmark pipeline execution
            pipeline_times = []
            num_iterations = 50
            
            for _ in range(num_iterations):
                start_time = time.perf_counter()
                result = await pipeline_executor.execute_pipeline(pipeline_config, detection_context)
                end_time = time.perf_counter()
                
                pipeline_time_ms = (end_time - start_time) * 1000
                pipeline_times.append(pipeline_time_ms)
                
                # Ensure result is valid
                assert result is not None
            
            avg_pipeline_time = statistics.mean(pipeline_times)
            max_pipeline_time = max(pipeline_times)
            
            # Performance assertions
            assert avg_pipeline_time < performance_config["max_pipeline_time_ms"]
            
            print(f"\nPipeline Execution Performance Metrics:")
            print(f"Average pipeline time: {avg_pipeline_time:.2f} ms")
            print(f"Max pipeline time: {max_pipeline_time:.2f} ms")
    
    def test_memory_usage_detection(self, sample_frame, performance_config):
        """Test memory usage during detection operations."""
        
        detector = YOLODetector()
        
        with patch('torch.load') as mock_torch_load:
            # Setup mock model
            mock_model = Mock()
            mock_result = Mock()
            mock_result.boxes = Mock()
            mock_result.boxes.xyxy = Mock()
            mock_result.boxes.conf = Mock()
            mock_result.boxes.cls = Mock()
            mock_result.names = {0: "car"}
            
            mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]])
            mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9])
            mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0])
            
            mock_model.return_value = mock_result
            mock_torch_load.return_value = mock_model
            
            # Measure memory usage
            gc.collect()  # Clean up before measurement
            initial_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
            
            # Run detections and monitor memory
            memory_measurements = []
            for i in range(100):
                detections = detector.detect(sample_frame, confidence_threshold=0.5)
                
                if i % 10 == 0:  # Measure every 10 iterations
                    current_memory = psutil.Process().memory_info().rss / 1024 / 1024
                    memory_measurements.append(current_memory - initial_memory)
            
            # Final memory measurement
            gc.collect()
            final_memory = psutil.Process().memory_info().rss / 1024 / 1024
            memory_increase = final_memory - initial_memory
            
            # Memory should not grow significantly
            assert memory_increase < 100  # Less than 100MB increase
            
            # Memory should be relatively stable (not constantly growing)
            if len(memory_measurements) > 1:
                memory_trend = memory_measurements[-1] - memory_measurements[0]
                assert memory_trend < 50  # Less than 50MB trend growth
            
            print(f"\nMemory Usage Metrics:")
            print(f"Initial memory: {initial_memory:.1f} MB")
            print(f"Final memory: {final_memory:.1f} MB")
            print(f"Memory increase: {memory_increase:.1f} MB")
    
    def test_concurrent_detection_performance(self, sample_frame):
        """Test performance with concurrent detection operations."""
        
        with patch('torch.load') as mock_torch_load:
            # Setup mock model
            mock_model = Mock()
            mock_result = Mock()
            mock_result.boxes = Mock()
            mock_result.boxes.xyxy = Mock()
            mock_result.boxes.conf = Mock()
            mock_result.boxes.cls = Mock()
            mock_result.names = {0: "car"}
            
            mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([[100, 200, 300, 400]])
            mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9])
            mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0])
            
            mock_model.return_value = mock_result
            mock_torch_load.return_value = mock_model
            
            # Create multiple detectors
            detectors = [YOLODetector() for _ in range(4)]
            
            import threading
            import concurrent.futures
            
            def run_detection(detector, frame, iterations=25):
                """Run detection iterations."""
                times = []
                for _ in range(iterations):
                    start_time = time.perf_counter()
                    detections = detector.detect(frame, confidence_threshold=0.5)
                    end_time = time.perf_counter()
                    times.append((end_time - start_time) * 1000)
                return times
            
            # Run concurrent detections
            start_time = time.perf_counter()
            
            with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
                futures = [
                    executor.submit(run_detection, detector, sample_frame)
                    for detector in detectors
                ]
                
                results = [future.result() for future in concurrent.futures.as_completed(futures)]
            
            end_time = time.perf_counter()
            total_time = end_time - start_time
            
            # Analyze results
            all_times = [time_ms for result in results for time_ms in result]
            total_detections = len(all_times)
            avg_detection_time = statistics.mean(all_times)
            
            # Calculate effective throughput
            effective_fps = total_detections / total_time
            
            print(f"\nConcurrent Detection Performance:")
            print(f"Total detections: {total_detections}")
            print(f"Total time: {total_time:.2f} seconds")
            print(f"Average detection time: {avg_detection_time:.2f} ms")
            print(f"Effective throughput: {effective_fps:.1f} FPS")
            
            # Should maintain reasonable performance under load
            assert avg_detection_time < 200  # Less than 200ms average
            assert effective_fps > 20  # More than 20 effective FPS
    
    def test_large_frame_performance(self, large_frame):
        """Test detection performance with large frames."""
        
        detector = YOLODetector()
        
        with patch('torch.load') as mock_torch_load:
            # Setup mock model
            mock_model = Mock()
            mock_result = Mock()
            mock_result.boxes = Mock()
            mock_result.boxes.xyxy = Mock()
            mock_result.boxes.conf = Mock()
            mock_result.boxes.cls = Mock()
            mock_result.names = {0: "car", 1: "person"}
            
            # Larger frame might have more detections
            mock_result.boxes.xyxy.cpu.return_value.numpy.return_value = np.array([
                [100, 200, 300, 400],
                [500, 600, 700, 800],
                [1000, 200, 1200, 400]
            ])
            mock_result.boxes.conf.cpu.return_value.numpy.return_value = np.array([0.9, 0.8, 0.7])
            mock_result.boxes.cls.cpu.return_value.numpy.return_value = np.array([0, 1, 0])
            
            mock_model.return_value = mock_result
            mock_torch_load.return_value = mock_model
            
            # Benchmark large frame detection
            detection_times = []
            num_iterations = 20  # Fewer iterations for large frames
            
            for _ in range(num_iterations):
                start_time = time.perf_counter()
                detections = detector.detect(large_frame, confidence_threshold=0.5)
                end_time = time.perf_counter()
                
                detection_time_ms = (end_time - start_time) * 1000
                detection_times.append(detection_time_ms)
            
            avg_detection_time = statistics.mean(detection_times)
            max_detection_time = max(detection_times)
            
            print(f"\nLarge Frame Detection Performance:")
            print(f"Frame size: {large_frame.shape}")
            print(f"Average detection time: {avg_detection_time:.2f} ms")
            print(f"Max detection time: {max_detection_time:.2f} ms")
            
            # Large frames should still be processed in reasonable time
            assert avg_detection_time < 300  # Less than 300ms for large frames
            assert max_detection_time < 500  # Less than 500ms max


class TestStreamPerformance:
    """Test stream management performance."""
    
    @pytest.mark.asyncio
    async def test_stream_creation_performance(self):
        """Test performance of stream creation and management."""
        
        stream_manager = StreamManager()
        
        with patch('cv2.VideoCapture') as mock_video_cap:
            # Setup fast mock
            mock_cap_instance = Mock()
            mock_video_cap.return_value = mock_cap_instance
            mock_cap_instance.isOpened.return_value = True
            mock_cap_instance.read.return_value = (True, np.ones((480, 640, 3), dtype=np.uint8))
            
            # Benchmark stream creation
            creation_times = []
            num_streams = 20
            
            try:
                for i in range(num_streams):
                    from detector_worker.streams.stream_manager import StreamConfig
                    config = StreamConfig(
                        stream_url=f"rtsp://test{i}.example.com/stream",
                        stream_type="rtsp"
                    )
                    
                    start_time = time.perf_counter()
                    await stream_manager.create_stream(f"camera_{i}", config, f"sub_{i}")
                    end_time = time.perf_counter()
                    
                    creation_time_ms = (end_time - start_time) * 1000
                    creation_times.append(creation_time_ms)
                
                avg_creation_time = statistics.mean(creation_times)
                max_creation_time = max(creation_times)
                
                # Stream creation should be fast
                assert avg_creation_time < 100  # Less than 100ms average
                assert max_creation_time < 500  # Less than 500ms max
                
                print(f"\nStream Creation Performance:")
                print(f"Streams created: {num_streams}")
                print(f"Average creation time: {avg_creation_time:.2f} ms")
                print(f"Max creation time: {max_creation_time:.2f} ms")
                
            finally:
                await stream_manager.stop_all_streams()
    
    @pytest.mark.asyncio
    async def test_frame_retrieval_performance(self, sample_frame):
        """Test performance of frame retrieval operations."""
        
        stream_manager = StreamManager()
        
        with patch('cv2.VideoCapture') as mock_video_cap:
            mock_cap_instance = Mock()
            mock_video_cap.return_value = mock_cap_instance
            mock_cap_instance.isOpened.return_value = True
            mock_cap_instance.read.return_value = (True, sample_frame)
            
            try:
                # Create test stream
                from detector_worker.streams.stream_manager import StreamConfig
                config = StreamConfig(
                    stream_url="rtsp://perf.example.com/stream",
                    stream_type="rtsp"
                )
                
                await stream_manager.create_stream("perf_camera", config, "perf_sub")
                
                # Let stream capture some frames
                await asyncio.sleep(0.1)
                
                # Benchmark frame retrieval
                retrieval_times = []
                num_retrievals = 1000
                
                for _ in range(num_retrievals):
                    start_time = time.perf_counter()
                    frame = stream_manager.get_latest_frame("perf_camera")
                    end_time = time.perf_counter()
                    
                    retrieval_time_ms = (end_time - start_time) * 1000
                    retrieval_times.append(retrieval_time_ms)
                
                avg_retrieval_time = statistics.mean(retrieval_times)
                max_retrieval_time = max(retrieval_times)
                
                # Frame retrieval should be very fast
                assert avg_retrieval_time < 1.0  # Less than 1ms average
                assert max_retrieval_time < 10.0  # Less than 10ms max
                
                print(f"\nFrame Retrieval Performance:")
                print(f"Frame retrievals: {num_retrievals}")
                print(f"Average retrieval time: {avg_retrieval_time:.3f} ms")
                print(f"Max retrieval time: {max_retrieval_time:.3f} ms")
                
            finally:
                await stream_manager.stop_all_streams()


class TestModelPerformance:
    """Test model management performance."""
    
    def test_model_loading_performance(self):
        """Test performance of model loading operations."""
        
        model_manager = ModelManager()
        
        with patch('torch.load') as mock_torch_load, \
             patch('os.path.exists', return_value=True):
            
            # Setup mock model
            def create_mock_model():
                model = Mock()
                # Mock model parameters for memory estimation
                param = Mock()
                param.numel.return_value = 1000000  # 1M parameters
                param.element_size.return_value = 4  # 4 bytes each
                model.parameters.return_value = [param]
                return model
            
            mock_torch_load.side_effect = lambda *args, **kwargs: create_mock_model()
            
            # Benchmark model loading
            loading_times = []
            num_models = 10
            
            for i in range(num_models):
                from detector_worker.models.model_manager import ModelConfig
                config = ModelConfig(
                    model_id=f"perf_model_{i}",
                    model_path=f"/fake/path/model_{i}.pt",
                    model_type="detection",
                    device="cpu"
                )
                
                start_time = time.perf_counter()
                model = model_manager.load_model(config)
                end_time = time.perf_counter()
                
                loading_time_ms = (end_time - start_time) * 1000
                loading_times.append(loading_time_ms)
            
            avg_loading_time = statistics.mean(loading_times)
            max_loading_time = max(loading_times)
            
            print(f"\nModel Loading Performance:")
            print(f"Models loaded: {num_models}")
            print(f"Average loading time: {avg_loading_time:.2f} ms")
            print(f"Max loading time: {max_loading_time:.2f} ms")
            
            # Model loading should be reasonable
            assert avg_loading_time < 200  # Less than 200ms average
    
    def test_model_cache_performance(self):
        """Test performance of model cache operations."""
        
        model_manager = ModelManager()
        
        with patch('torch.load') as mock_torch_load, \
             patch('os.path.exists', return_value=True):
            
            mock_torch_load.return_value = Mock()
            
            # Load model first
            from detector_worker.models.model_manager import ModelConfig
            config = ModelConfig(
                model_id="cache_perf_model",
                model_path="/fake/path/model.pt",
                model_type="detection",
                device="cpu"
            )
            
            # Initial load
            model_manager.load_model(config)
            
            # Benchmark cache retrieval
            cache_times = []
            num_retrievals = 10000
            
            for _ in range(num_retrievals):
                start_time = time.perf_counter()
                model = model_manager.get_model("cache_perf_model")
                end_time = time.perf_counter()
                
                cache_time_ms = (end_time - start_time) * 1000
                cache_times.append(cache_time_ms)
            
            avg_cache_time = statistics.mean(cache_times)
            max_cache_time = max(cache_times)
            
            print(f"\nModel Cache Performance:")
            print(f"Cache retrievals: {num_retrievals}")
            print(f"Average cache time: {avg_cache_time:.4f} ms")
            print(f"Max cache time: {max_cache_time:.4f} ms")
            
            # Cache should be very fast
            assert avg_cache_time < 0.1  # Less than 0.1ms average
            assert max_cache_time < 1.0  # Less than 1ms max