fix: car detection use wrong image source

2025-10-20 16:54:27 +07:00 · 2025-10-20 16:54:27 +07:00 · a4cfb264b9
commit a4cfb264b9
parent 5e59e00c55
3 changed files with 98 additions and 34 deletions
--- a/core/detection/branches.py
+++ b/core/detection/branches.py
@ -393,6 +393,11 @@ class BranchProcessor:
        trigger_classes = getattr(branch_config, 'trigger_classes', [])
        logger.info(f"[DETECTED REGIONS] {branch_id}: Available parent detections: {list(detected_regions.keys())}")
        for region_name, region_data in detected_regions.items():
+            # Handle both list (new) and single dict (backward compat)
+            if isinstance(region_data, list):
+                for i, region in enumerate(region_data):
+                    logger.debug(f"[REGION DATA] {branch_id}: '{region_name}[{i}]' -> bbox={region.get('bbox')}, conf={region.get('confidence')}")
+            else:
                logger.debug(f"[REGION DATA] {branch_id}: '{region_name}' -> bbox={region_data.get('bbox')}, conf={region_data.get('confidence')}")

        if trigger_classes:
@ -454,18 +459,24 @@ class BranchProcessor:

                for crop_class in crop_classes:
                    if crop_class in detected_regions:
-                        region = detected_regions[crop_class]
-                        confidence = region.get('confidence', 0.0)
+                        regions = detected_regions[crop_class]

-                        # Select largest bbox (no confidence filtering - parent already validated it)
+                        # Handle both list (new) and single dict (backward compat)
+                        if not isinstance(regions, list):
+                            regions = [regions]
+
+                        # Find largest bbox from all detections of this class
+                        for region in regions:
+                            confidence = region.get('confidence', 0.0)
                            bbox = region['bbox']
                            area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])  # width * height

-                        # Choose biggest bbox among available detections
+                            # Choose biggest bbox among all available detections
                            if area > best_area:
                                best_region = region
                                best_class = crop_class
                                best_area = area
+                                logger.debug(f"[CROP] Selected larger bbox for '{crop_class}': area={area:.0f}px², conf={confidence:.3f}")

                if best_region:
                    bbox = best_region['bbox']
@ -483,7 +494,6 @@ class BranchProcessor:
            logger.info(f"[INFERENCE START] {branch_id}: Running inference on {'cropped' if input_frame is not frame else 'full'} frame "
                       f"({input_frame.shape[1]}x{input_frame.shape[0]}) with confidence={min_confidence}")

-
            # Use .predict() method for both detection and classification models
            inference_start = time.time()
            detection_results = model.model.predict(input_frame, conf=min_confidence, verbose=False)
@ -690,10 +700,26 @@ class BranchProcessor:
            bbox = None
            if region_name and region_name in detected_regions:
                # Crop the specified region
-                bbox = detected_regions[region_name]['bbox']
+                # Handle both list (new) and single dict (backward compat)
+                regions = detected_regions[region_name]
+                if isinstance(regions, list):
+                    # Multiple detections - select largest bbox
+                    if regions:
+                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
+                        bbox = best_region['bbox']
+                else:
+                    bbox = regions['bbox']
            elif region_name and region_name.lower() == 'frontal' and 'front_rear' in detected_regions:
                # Special case: "frontal" region maps to "front_rear" detection
-                bbox = detected_regions['front_rear']['bbox']
+                # Handle both list (new) and single dict (backward compat)
+                regions = detected_regions['front_rear']
+                if isinstance(regions, list):
+                    # Multiple detections - select largest bbox
+                    if regions:
+                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
+                        bbox = best_region['bbox']
+                else:
+                    bbox = regions['bbox']

            if bbox is not None:
                x1, y1, x2, y2 = [int(coord) for coord in bbox]
--- a/core/detection/pipeline.py
+++ b/core/detection/pipeline.py
@ -495,11 +495,13 @@ class DetectionPipeline:
                        }
                        valid_detections.append(detection_info)

-                        # Store region for processing phase
-                        detected_regions[class_name] = {
+                        # Store region for processing phase (support multiple detections per class)
+                        if class_name not in detected_regions:
+                            detected_regions[class_name] = []
+                        detected_regions[class_name].append({
                            'bbox': bbox,
                            'confidence': confidence
-                        }
+                        })
                else:
                    logger.warning("[DETECTION PHASE] No boxes found in detection results")

@ -951,7 +953,19 @@ class DetectionPipeline:

            if region_name and region_name in detected_regions:
                # Crop the specified region
-                bbox = detected_regions[region_name]['bbox']
+                # Handle both list (new) and single dict (backward compat)
+                regions = detected_regions[region_name]
+                if isinstance(regions, list):
+                    # Multiple detections - select largest bbox
+                    if regions:
+                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
+                        bbox = best_region['bbox']
+                    else:
+                        bbox = None
+                else:
+                    bbox = regions['bbox']
+
+                if bbox:
                    x1, y1, x2, y2 = [int(coord) for coord in bbox]
                    cropped = frame[y1:y2, x1:x2]
                    if cropped.size > 0:
--- a/core/tracking/integration.py
+++ b/core/tracking/integration.py
@ -350,10 +350,21 @@ class TrackingPipelineIntegration:
                    'session_id': session_id
                }

+            # Fetch high-quality 2K snapshot for detection phase (not RTSP frame)
+            # This ensures bbox coordinates match the frame used in processing phase
+            logger.info(f"[DETECTION PHASE] Fetching 2K snapshot for vehicle {vehicle.track_id}")
+            snapshot_frame = self._fetch_snapshot()
+
+            if snapshot_frame is None:
+                logger.warning(f"[DETECTION PHASE] Failed to fetch snapshot, falling back to RTSP frame")
+                snapshot_frame = frame  # Fallback to RTSP if snapshot fails
+            else:
+                logger.info(f"[DETECTION PHASE] Using {snapshot_frame.shape[1]}x{snapshot_frame.shape[0]} snapshot for detection")
+
            # Execute only the detection phase (first phase)
            # This will run detection and send imageDetection message to backend
            detection_result = await self.detection_pipeline.execute_detection_phase(
-                frame=frame,
+                frame=snapshot_frame,  # Use 2K snapshot instead of RTSP frame
                display_id=display_id,
                subscription_id=subscription_id
            )
@ -373,13 +384,13 @@ class TrackingPipelineIntegration:
            if detection_result['message_sent']:
                # Store for later processing when sessionId is received
                self.pending_processing_data[display_id] = {
-                    'frame': frame.copy(),  # Store copy of frame for processing phase
+                    'frame': snapshot_frame.copy(),  # Store copy of 2K snapshot (not RTSP frame!)
                    'vehicle': vehicle,
                    'subscription_id': subscription_id,
                    'detection_result': detection_result,
                    'timestamp': time.time()
                }
-                logger.info(f"Stored processing data for {display_id}, waiting for sessionId from backend")
+                logger.info(f"Stored processing data ({snapshot_frame.shape[1]}x{snapshot_frame.shape[0]} frame) for {display_id}, waiting for sessionId from backend")

            return detection_result

@ -413,14 +424,27 @@ class TrackingPipelineIntegration:

            logger.info(f"Executing processing phase for session {session_id}, vehicle {vehicle.track_id}")

-            # Capture high-quality snapshot for pipeline processing
-            logger.info(f"[PROCESSING PHASE] Fetching 2K snapshot for session {session_id}")
+            # Reuse the snapshot from detection phase OR fetch fresh one if detection used RTSP fallback
+            detection_frame = processing_data['frame']
+            frame_height = detection_frame.shape[0]
+
+            # Check if detection phase used 2K snapshot (height > 1000) or RTSP fallback (height = 720)
+            if frame_height >= 1000:
+                # Detection used 2K snapshot - reuse it for consistent coordinates
+                logger.info(f"[PROCESSING PHASE] Reusing 2K snapshot from detection phase ({detection_frame.shape[1]}x{detection_frame.shape[0]})")
+                frame = detection_frame
+            else:
+                # Detection used RTSP fallback - need to fetch fresh 2K snapshot
+                logger.warning(f"[PROCESSING PHASE] Detection used RTSP fallback ({detection_frame.shape[1]}x{detection_frame.shape[0]}), fetching fresh 2K snapshot")
                frame = self._fetch_snapshot()

                if frame is None:
-                logger.warning(f"[PROCESSING PHASE] Failed to capture snapshot, falling back to RTSP frame")
-                # Fall back to RTSP frame if snapshot fails
-                frame = processing_data['frame']
+                    logger.error(f"[PROCESSING PHASE] Failed to fetch snapshot and detection used RTSP - coordinate mismatch will occur!")
+                    logger.error(f"[PROCESSING PHASE] Cannot proceed with mismatched coordinates. Aborting processing phase.")
+                    return  # Cannot process safely - bbox coordinates won't match frame resolution
+                else:
+                    logger.warning(f"[PROCESSING PHASE] Fetched fresh 2K snapshot ({frame.shape[1]}x{frame.shape[0]}), but coordinates may not match exactly")
+                    logger.warning(f"[PROCESSING PHASE] Re-running detection on fresh snapshot is recommended but not implemented yet")

            # Extract detected regions from detection phase result if available
            detected_regions = detection_result.get('detected_regions', {})