fix: car detection use wrong image source

2025-10-20 16:54:27 +07:00 · 2025-10-20 16:54:27 +07:00 · a4cfb264b9
commit a4cfb264b9
parent 5e59e00c55
3 changed files with 98 additions and 34 deletions
--- a/core/detection/branches.py
+++ b/core/detection/branches.py
@ -393,7 +393,12 @@ class BranchProcessor:
        trigger_classes = getattr(branch_config, 'trigger_classes', [])
        logger.info(f"[DETECTED REGIONS] {branch_id}: Available parent detections: {list(detected_regions.keys())}")
        for region_name, region_data in detected_regions.items():
-            logger.debug(f"[REGION DATA] {branch_id}: '{region_name}' -> bbox={region_data.get('bbox')}, conf={region_data.get('confidence')}")
+            # Handle both list (new) and single dict (backward compat)
            if isinstance(region_data, list):
                for i, region in enumerate(region_data):
                    logger.debug(f"[REGION DATA] {branch_id}: '{region_name}[{i}]' -> bbox={region.get('bbox')}, conf={region.get('confidence')}")
            else:
                logger.debug(f"[REGION DATA] {branch_id}: '{region_name}' -> bbox={region_data.get('bbox')}, conf={region_data.get('confidence')}")
        if trigger_classes:
            # Check if any parent detection matches our trigger classes (case-insensitive)
@ -454,18 +459,24 @@ class BranchProcessor:
                for crop_class in crop_classes:
                    if crop_class in detected_regions:
-                        region = detected_regions[crop_class]
+                        regions = detected_regions[crop_class]
                        confidence = region.get('confidence', 0.0)
-                        # Select largest bbox (no confidence filtering - parent already validated it)
+                        # Handle both list (new) and single dict (backward compat)
-                        bbox = region['bbox']
+                        if not isinstance(regions, list):
-                        area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])  # width * height
+                            regions = [regions]
-                        # Choose biggest bbox among available detections
+                        # Find largest bbox from all detections of this class
-                        if area > best_area:
+                        for region in regions:
-                            best_region = region
+                            confidence = region.get('confidence', 0.0)
-                            best_class = crop_class
+                            bbox = region['bbox']
-                            best_area = area
+                            area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])  # width * height
                            # Choose biggest bbox among all available detections
                            if area > best_area:
                                best_region = region
                                best_class = crop_class
                                best_area = area
                                logger.debug(f"[CROP] Selected larger bbox for '{crop_class}': area={area:.0f}px², conf={confidence:.3f}")
                if best_region:
                    bbox = best_region['bbox']
@ -483,7 +494,6 @@ class BranchProcessor:
            logger.info(f"[INFERENCE START] {branch_id}: Running inference on {'cropped' if input_frame is not frame else 'full'} frame "
                       f"({input_frame.shape[1]}x{input_frame.shape[0]}) with confidence={min_confidence}")
            # Use .predict() method for both detection and classification models
            inference_start = time.time()
            detection_results = model.model.predict(input_frame, conf=min_confidence, verbose=False)
@ -690,10 +700,26 @@ class BranchProcessor:
            bbox = None
            if region_name and region_name in detected_regions:
                # Crop the specified region
-                bbox = detected_regions[region_name]['bbox']
+                # Handle both list (new) and single dict (backward compat)
                regions = detected_regions[region_name]
                if isinstance(regions, list):
                    # Multiple detections - select largest bbox
                    if regions:
                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
                        bbox = best_region['bbox']
                else:
                    bbox = regions['bbox']
            elif region_name and region_name.lower() == 'frontal' and 'front_rear' in detected_regions:
                # Special case: "frontal" region maps to "front_rear" detection
-                bbox = detected_regions['front_rear']['bbox']
+                # Handle both list (new) and single dict (backward compat)
                regions = detected_regions['front_rear']
                if isinstance(regions, list):
                    # Multiple detections - select largest bbox
                    if regions:
                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
                        bbox = best_region['bbox']
                else:
                    bbox = regions['bbox']
            if bbox is not None:
                x1, y1, x2, y2 = [int(coord) for coord in bbox]
--- a/core/detection/pipeline.py
+++ b/core/detection/pipeline.py
@ -495,11 +495,13 @@ class DetectionPipeline:
                        }
                        valid_detections.append(detection_info)
-                        # Store region for processing phase
+                        # Store region for processing phase (support multiple detections per class)
-                        detected_regions[class_name] = {
+                        if class_name not in detected_regions:
                            detected_regions[class_name] = []
                        detected_regions[class_name].append({
                            'bbox': bbox,
                            'confidence': confidence
-                        }
+                        })
                else:
                    logger.warning("[DETECTION PHASE] No boxes found in detection results")
@ -951,14 +953,26 @@ class DetectionPipeline:
            if region_name and region_name in detected_regions:
                # Crop the specified region
-                bbox = detected_regions[region_name]['bbox']
+                # Handle both list (new) and single dict (backward compat)
-                x1, y1, x2, y2 = [int(coord) for coord in bbox]
+                regions = detected_regions[region_name]
-                cropped = frame[y1:y2, x1:x2]
+                if isinstance(regions, list):
-                if cropped.size > 0:
+                    # Multiple detections - select largest bbox
-                    image_to_save = cropped
+                    if regions:
-                    logger.debug(f"Cropped region '{region_name}' for redis_save_image")
+                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
                        bbox = best_region['bbox']
                    else:
                        bbox = None
                else:
-                    logger.warning(f"Empty crop for region '{region_name}', using full frame")
+                    bbox = regions['bbox']
                if bbox:
                    x1, y1, x2, y2 = [int(coord) for coord in bbox]
                    cropped = frame[y1:y2, x1:x2]
                    if cropped.size > 0:
                        image_to_save = cropped
                        logger.debug(f"Cropped region '{region_name}' for redis_save_image")
                    else:
                        logger.warning(f"Empty crop for region '{region_name}', using full frame")
            # Format key with context
            key = action.params['key'].format(**context)
--- a/core/tracking/integration.py
+++ b/core/tracking/integration.py
@ -350,10 +350,21 @@ class TrackingPipelineIntegration:
                    'session_id': session_id
                }
            # Fetch high-quality 2K snapshot for detection phase (not RTSP frame)
            # This ensures bbox coordinates match the frame used in processing phase
            logger.info(f"[DETECTION PHASE] Fetching 2K snapshot for vehicle {vehicle.track_id}")
            snapshot_frame = self._fetch_snapshot()
            if snapshot_frame is None:
                logger.warning(f"[DETECTION PHASE] Failed to fetch snapshot, falling back to RTSP frame")
                snapshot_frame = frame  # Fallback to RTSP if snapshot fails
            else:
                logger.info(f"[DETECTION PHASE] Using {snapshot_frame.shape[1]}x{snapshot_frame.shape[0]} snapshot for detection")
            # Execute only the detection phase (first phase)
            # This will run detection and send imageDetection message to backend
            detection_result = await self.detection_pipeline.execute_detection_phase(
-                frame=frame,
+                frame=snapshot_frame,  # Use 2K snapshot instead of RTSP frame
                display_id=display_id,
                subscription_id=subscription_id
            )
@ -373,13 +384,13 @@ class TrackingPipelineIntegration:
            if detection_result['message_sent']:
                # Store for later processing when sessionId is received
                self.pending_processing_data[display_id] = {
-                    'frame': frame.copy(),  # Store copy of frame for processing phase
+                    'frame': snapshot_frame.copy(),  # Store copy of 2K snapshot (not RTSP frame!)
                    'vehicle': vehicle,
                    'subscription_id': subscription_id,
                    'detection_result': detection_result,
                    'timestamp': time.time()
                }
-                logger.info(f"Stored processing data for {display_id}, waiting for sessionId from backend")
+                logger.info(f"Stored processing data ({snapshot_frame.shape[1]}x{snapshot_frame.shape[0]} frame) for {display_id}, waiting for sessionId from backend")
            return detection_result
@ -413,14 +424,27 @@ class TrackingPipelineIntegration:
            logger.info(f"Executing processing phase for session {session_id}, vehicle {vehicle.track_id}")
-            # Capture high-quality snapshot for pipeline processing
+            # Reuse the snapshot from detection phase OR fetch fresh one if detection used RTSP fallback
-            logger.info(f"[PROCESSING PHASE] Fetching 2K snapshot for session {session_id}")
+            detection_frame = processing_data['frame']
-            frame = self._fetch_snapshot()
+            frame_height = detection_frame.shape[0]
-            if frame is None:
+            # Check if detection phase used 2K snapshot (height > 1000) or RTSP fallback (height = 720)
-                logger.warning(f"[PROCESSING PHASE] Failed to capture snapshot, falling back to RTSP frame")
+            if frame_height >= 1000:
-                # Fall back to RTSP frame if snapshot fails
+                # Detection used 2K snapshot - reuse it for consistent coordinates
-                frame = processing_data['frame']
+                logger.info(f"[PROCESSING PHASE] Reusing 2K snapshot from detection phase ({detection_frame.shape[1]}x{detection_frame.shape[0]})")
                frame = detection_frame
            else:
                # Detection used RTSP fallback - need to fetch fresh 2K snapshot
                logger.warning(f"[PROCESSING PHASE] Detection used RTSP fallback ({detection_frame.shape[1]}x{detection_frame.shape[0]}), fetching fresh 2K snapshot")
                frame = self._fetch_snapshot()
                if frame is None:
                    logger.error(f"[PROCESSING PHASE] Failed to fetch snapshot and detection used RTSP - coordinate mismatch will occur!")
                    logger.error(f"[PROCESSING PHASE] Cannot proceed with mismatched coordinates. Aborting processing phase.")
                    return  # Cannot process safely - bbox coordinates won't match frame resolution
                else:
                    logger.warning(f"[PROCESSING PHASE] Fetched fresh 2K snapshot ({frame.shape[1]}x{frame.shape[0]}), but coordinates may not match exactly")
                    logger.warning(f"[PROCESSING PHASE] Re-running detection on fresh snapshot is recommended but not implemented yet")
            # Extract detected regions from detection phase result if available
            detected_regions = detection_result.get('detected_regions', {})