From a4cfb264b9bddd305c47e4f42afe517a05a434d3 Mon Sep 17 00:00:00 2001
From: ziesorx <nwisessan@hotmail.com>
Date: Mon, 20 Oct 2025 16:54:27 +0700
Subject: [PATCH] fix: car detection use wrong image source

---
 core/detection/branches.py   | 54 ++++++++++++++++++++++++++----------
 core/detection/pipeline.py   | 34 ++++++++++++++++-------
 core/tracking/integration.py | 44 ++++++++++++++++++++++-------
 3 files changed, 98 insertions(+), 34 deletions(-)

diff --git a/core/detection/branches.py b/core/detection/branches.py
index 9359ea8..97c44ff 100644
--- a/core/detection/branches.py
+++ b/core/detection/branches.py
@@ -393,7 +393,12 @@ class BranchProcessor:
         trigger_classes = getattr(branch_config, 'trigger_classes', [])
         logger.info(f"[DETECTED REGIONS] {branch_id}: Available parent detections: {list(detected_regions.keys())}")
         for region_name, region_data in detected_regions.items():
-            logger.debug(f"[REGION DATA] {branch_id}: '{region_name}' -> bbox={region_data.get('bbox')}, conf={region_data.get('confidence')}")
+            # Handle both list (new) and single dict (backward compat)
+            if isinstance(region_data, list):
+                for i, region in enumerate(region_data):
+                    logger.debug(f"[REGION DATA] {branch_id}: '{region_name}[{i}]' -> bbox={region.get('bbox')}, conf={region.get('confidence')}")
+            else:
+                logger.debug(f"[REGION DATA] {branch_id}: '{region_name}' -> bbox={region_data.get('bbox')}, conf={region_data.get('confidence')}")
 
         if trigger_classes:
             # Check if any parent detection matches our trigger classes (case-insensitive)
@@ -454,18 +459,24 @@ class BranchProcessor:
 
                 for crop_class in crop_classes:
                     if crop_class in detected_regions:
-                        region = detected_regions[crop_class]
-                        confidence = region.get('confidence', 0.0)
+                        regions = detected_regions[crop_class]
 
-                        # Select largest bbox (no confidence filtering - parent already validated it)
-                        bbox = region['bbox']
-                        area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])  # width * height
+                        # Handle both list (new) and single dict (backward compat)
+                        if not isinstance(regions, list):
+                            regions = [regions]
 
-                        # Choose biggest bbox among available detections
-                        if area > best_area:
-                            best_region = region
-                            best_class = crop_class
-                            best_area = area
+                        # Find largest bbox from all detections of this class
+                        for region in regions:
+                            confidence = region.get('confidence', 0.0)
+                            bbox = region['bbox']
+                            area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])  # width * height
+
+                            # Choose biggest bbox among all available detections
+                            if area > best_area:
+                                best_region = region
+                                best_class = crop_class
+                                best_area = area
+                                logger.debug(f"[CROP] Selected larger bbox for '{crop_class}': area={area:.0f}px², conf={confidence:.3f}")
 
                 if best_region:
                     bbox = best_region['bbox']
@@ -483,7 +494,6 @@ class BranchProcessor:
             logger.info(f"[INFERENCE START] {branch_id}: Running inference on {'cropped' if input_frame is not frame else 'full'} frame "
                        f"({input_frame.shape[1]}x{input_frame.shape[0]}) with confidence={min_confidence}")
 
-
             # Use .predict() method for both detection and classification models
             inference_start = time.time()
             detection_results = model.model.predict(input_frame, conf=min_confidence, verbose=False)
@@ -690,10 +700,26 @@ class BranchProcessor:
             bbox = None
             if region_name and region_name in detected_regions:
                 # Crop the specified region
-                bbox = detected_regions[region_name]['bbox']
+                # Handle both list (new) and single dict (backward compat)
+                regions = detected_regions[region_name]
+                if isinstance(regions, list):
+                    # Multiple detections - select largest bbox
+                    if regions:
+                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
+                        bbox = best_region['bbox']
+                else:
+                    bbox = regions['bbox']
             elif region_name and region_name.lower() == 'frontal' and 'front_rear' in detected_regions:
                 # Special case: "frontal" region maps to "front_rear" detection
-                bbox = detected_regions['front_rear']['bbox']
+                # Handle both list (new) and single dict (backward compat)
+                regions = detected_regions['front_rear']
+                if isinstance(regions, list):
+                    # Multiple detections - select largest bbox
+                    if regions:
+                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
+                        bbox = best_region['bbox']
+                else:
+                    bbox = regions['bbox']
 
             if bbox is not None:
                 x1, y1, x2, y2 = [int(coord) for coord in bbox]
diff --git a/core/detection/pipeline.py b/core/detection/pipeline.py
index d395f3a..ba9ac9a 100644
--- a/core/detection/pipeline.py
+++ b/core/detection/pipeline.py
@@ -495,11 +495,13 @@ class DetectionPipeline:
                         }
                         valid_detections.append(detection_info)
 
-                        # Store region for processing phase
-                        detected_regions[class_name] = {
+                        # Store region for processing phase (support multiple detections per class)
+                        if class_name not in detected_regions:
+                            detected_regions[class_name] = []
+                        detected_regions[class_name].append({
                             'bbox': bbox,
                             'confidence': confidence
-                        }
+                        })
                 else:
                     logger.warning("[DETECTION PHASE] No boxes found in detection results")
 
@@ -951,14 +953,26 @@ class DetectionPipeline:
 
             if region_name and region_name in detected_regions:
                 # Crop the specified region
-                bbox = detected_regions[region_name]['bbox']
-                x1, y1, x2, y2 = [int(coord) for coord in bbox]
-                cropped = frame[y1:y2, x1:x2]
-                if cropped.size > 0:
-                    image_to_save = cropped
-                    logger.debug(f"Cropped region '{region_name}' for redis_save_image")
+                # Handle both list (new) and single dict (backward compat)
+                regions = detected_regions[region_name]
+                if isinstance(regions, list):
+                    # Multiple detections - select largest bbox
+                    if regions:
+                        best_region = max(regions, key=lambda r: (r['bbox'][2] - r['bbox'][0]) * (r['bbox'][3] - r['bbox'][1]))
+                        bbox = best_region['bbox']
+                    else:
+                        bbox = None
                 else:
-                    logger.warning(f"Empty crop for region '{region_name}', using full frame")
+                    bbox = regions['bbox']
+
+                if bbox:
+                    x1, y1, x2, y2 = [int(coord) for coord in bbox]
+                    cropped = frame[y1:y2, x1:x2]
+                    if cropped.size > 0:
+                        image_to_save = cropped
+                        logger.debug(f"Cropped region '{region_name}' for redis_save_image")
+                    else:
+                        logger.warning(f"Empty crop for region '{region_name}', using full frame")
 
             # Format key with context
             key = action.params['key'].format(**context)
diff --git a/core/tracking/integration.py b/core/tracking/integration.py
index 2fba002..1e3fc97 100644
--- a/core/tracking/integration.py
+++ b/core/tracking/integration.py
@@ -350,10 +350,21 @@ class TrackingPipelineIntegration:
                     'session_id': session_id
                 }
 
+            # Fetch high-quality 2K snapshot for detection phase (not RTSP frame)
+            # This ensures bbox coordinates match the frame used in processing phase
+            logger.info(f"[DETECTION PHASE] Fetching 2K snapshot for vehicle {vehicle.track_id}")
+            snapshot_frame = self._fetch_snapshot()
+
+            if snapshot_frame is None:
+                logger.warning(f"[DETECTION PHASE] Failed to fetch snapshot, falling back to RTSP frame")
+                snapshot_frame = frame  # Fallback to RTSP if snapshot fails
+            else:
+                logger.info(f"[DETECTION PHASE] Using {snapshot_frame.shape[1]}x{snapshot_frame.shape[0]} snapshot for detection")
+
             # Execute only the detection phase (first phase)
             # This will run detection and send imageDetection message to backend
             detection_result = await self.detection_pipeline.execute_detection_phase(
-                frame=frame,
+                frame=snapshot_frame,  # Use 2K snapshot instead of RTSP frame
                 display_id=display_id,
                 subscription_id=subscription_id
             )
@@ -373,13 +384,13 @@ class TrackingPipelineIntegration:
             if detection_result['message_sent']:
                 # Store for later processing when sessionId is received
                 self.pending_processing_data[display_id] = {
-                    'frame': frame.copy(),  # Store copy of frame for processing phase
+                    'frame': snapshot_frame.copy(),  # Store copy of 2K snapshot (not RTSP frame!)
                     'vehicle': vehicle,
                     'subscription_id': subscription_id,
                     'detection_result': detection_result,
                     'timestamp': time.time()
                 }
-                logger.info(f"Stored processing data for {display_id}, waiting for sessionId from backend")
+                logger.info(f"Stored processing data ({snapshot_frame.shape[1]}x{snapshot_frame.shape[0]} frame) for {display_id}, waiting for sessionId from backend")
 
             return detection_result
 
@@ -413,14 +424,27 @@ class TrackingPipelineIntegration:
 
             logger.info(f"Executing processing phase for session {session_id}, vehicle {vehicle.track_id}")
 
-            # Capture high-quality snapshot for pipeline processing
-            logger.info(f"[PROCESSING PHASE] Fetching 2K snapshot for session {session_id}")
-            frame = self._fetch_snapshot()
+            # Reuse the snapshot from detection phase OR fetch fresh one if detection used RTSP fallback
+            detection_frame = processing_data['frame']
+            frame_height = detection_frame.shape[0]
 
-            if frame is None:
-                logger.warning(f"[PROCESSING PHASE] Failed to capture snapshot, falling back to RTSP frame")
-                # Fall back to RTSP frame if snapshot fails
-                frame = processing_data['frame']
+            # Check if detection phase used 2K snapshot (height > 1000) or RTSP fallback (height = 720)
+            if frame_height >= 1000:
+                # Detection used 2K snapshot - reuse it for consistent coordinates
+                logger.info(f"[PROCESSING PHASE] Reusing 2K snapshot from detection phase ({detection_frame.shape[1]}x{detection_frame.shape[0]})")
+                frame = detection_frame
+            else:
+                # Detection used RTSP fallback - need to fetch fresh 2K snapshot
+                logger.warning(f"[PROCESSING PHASE] Detection used RTSP fallback ({detection_frame.shape[1]}x{detection_frame.shape[0]}), fetching fresh 2K snapshot")
+                frame = self._fetch_snapshot()
+
+                if frame is None:
+                    logger.error(f"[PROCESSING PHASE] Failed to fetch snapshot and detection used RTSP - coordinate mismatch will occur!")
+                    logger.error(f"[PROCESSING PHASE] Cannot proceed with mismatched coordinates. Aborting processing phase.")
+                    return  # Cannot process safely - bbox coordinates won't match frame resolution
+                else:
+                    logger.warning(f"[PROCESSING PHASE] Fetched fresh 2K snapshot ({frame.shape[1]}x{frame.shape[0]}), but coordinates may not match exactly")
+                    logger.warning(f"[PROCESSING PHASE] Re-running detection on fresh snapshot is recommended but not implemented yet")
 
             # Extract detected regions from detection phase result if available
             detected_regions = detection_result.get('detected_regions', {})