feat: custom bot-sort based tracker

This commit is contained in:
ziesorx 2025-09-26 14:22:38 +07:00
parent bd201acac1
commit 791f611f7d
8 changed files with 649 additions and 282 deletions

View file

@ -60,6 +60,8 @@ class YOLOWrapper:
self.model = None
self._class_names = []
self._load_model()
logger.info(f"Initialized YOLO wrapper for {model_id} on {self.device}")
@ -115,6 +117,7 @@ class YOLOWrapper:
logger.error(f"Failed to extract class names: {str(e)}")
self._class_names = {}
def infer(
self,
image: np.ndarray,
@ -222,55 +225,30 @@ class YOLOWrapper:
return detections
def track(
self,
image: np.ndarray,
confidence_threshold: float = 0.5,
trigger_classes: Optional[List[str]] = None,
persist: bool = True
persist: bool = True,
camera_id: Optional[str] = None
) -> InferenceResult:
"""
Run tracking on an image
Run detection (tracking will be handled by external tracker)
Args:
image: Input image as numpy array (BGR format)
confidence_threshold: Minimum confidence for detections
trigger_classes: List of class names to filter
persist: Whether to persist tracks across frames
persist: Ignored - tracking handled externally
camera_id: Ignored - tracking handled externally
Returns:
InferenceResult containing detections with track IDs
InferenceResult containing detections (no track IDs from YOLO)
"""
if self.model is None:
raise RuntimeError(f"Model {self.model_id} not loaded")
try:
import time
start_time = time.time()
# Run tracking
results = self.model.track(
image,
conf=confidence_threshold,
persist=persist,
verbose=False
)
inference_time = time.time() - start_time
# Parse results
detections = self._parse_results(results[0], trigger_classes)
return InferenceResult(
detections=detections,
image_shape=(image.shape[0], image.shape[1]),
inference_time=inference_time,
model_id=self.model_id
)
except Exception as e:
logger.error(f"Tracking failed for model {self.model_id}: {str(e)}", exc_info=True)
raise
# Just do detection - no YOLO tracking
return self.infer(image, confidence_threshold, trigger_classes)
def predict_classification(
self,
@ -350,6 +328,7 @@ class YOLOWrapper:
"""Get the number of classes the model can detect"""
return len(self._class_names)
def clear_cache(self) -> None:
"""Clear the model cache"""
with self._cache_lock:

View file

@ -130,7 +130,7 @@ class StreamManager:
try:
if stream_config.rtsp_url:
# RTSP stream using FFmpeg subprocess with CUDA acceleration
logger.info(f"[STREAM_START] Starting FFmpeg RTSP stream for camera_id='{camera_id}' URL={stream_config.rtsp_url}")
logger.info(f"\033[94m[RTSP] Starting {camera_id}\033[0m")
reader = FFmpegRTSPReader(
camera_id=camera_id,
rtsp_url=stream_config.rtsp_url,
@ -139,11 +139,11 @@ class StreamManager:
reader.set_frame_callback(self._frame_callback)
reader.start()
self._streams[camera_id] = reader
logger.info(f"[STREAM_START] ✅ Started FFmpeg RTSP stream for camera_id='{camera_id}'")
logger.info(f"\033[92m[RTSP] {camera_id} connected\033[0m")
elif stream_config.snapshot_url:
# HTTP snapshot stream
logger.info(f"[STREAM_START] Starting HTTP snapshot stream for camera_id='{camera_id}' URL={stream_config.snapshot_url}")
logger.info(f"\033[95m[HTTP] Starting {camera_id}\033[0m")
reader = HTTPSnapshotReader(
camera_id=camera_id,
snapshot_url=stream_config.snapshot_url,
@ -153,7 +153,7 @@ class StreamManager:
reader.set_frame_callback(self._frame_callback)
reader.start()
self._streams[camera_id] = reader
logger.info(f"[STREAM_START] ✅ Started HTTP snapshot stream for camera_id='{camera_id}'")
logger.info(f"\033[92m[HTTP] {camera_id} connected\033[0m")
else:
logger.error(f"No valid URL provided for camera {camera_id}")
@ -182,11 +182,16 @@ class StreamManager:
try:
# Store frame in shared buffer
shared_cache_buffer.put_frame(camera_id, frame)
logger.info(f"[FRAME_CALLBACK] Stored frame for camera_id='{camera_id}' in shared_cache_buffer, shape={frame.shape}")
# Quieter frame callback logging - only log occasionally
if hasattr(self, '_frame_log_count'):
self._frame_log_count += 1
else:
self._frame_log_count = 1
# Log current buffer state
available_cameras = shared_cache_buffer.frame_buffer.get_camera_list()
logger.info(f"[FRAME_CALLBACK] Buffer now contains {len(available_cameras)} cameras: {available_cameras}")
# Log every 100 frames to avoid spam
if self._frame_log_count % 100 == 0:
available_cameras = shared_cache_buffer.frame_buffer.get_camera_list()
logger.info(f"\033[96m[BUFFER] {len(available_cameras)} active cameras: {', '.join(available_cameras)}\033[0m")
# Process tracking for subscriptions with tracking integration
self._process_tracking_for_camera(camera_id, frame)

View file

@ -21,6 +21,34 @@ os.environ["OPENCV_FFMPEG_LOGLEVEL"] = "-8" # Suppress FFMPEG warnings
logger = logging.getLogger(__name__)
# Color codes for pretty logging
class Colors:
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BLUE = '\033[94m'
PURPLE = '\033[95m'
CYAN = '\033[96m'
WHITE = '\033[97m'
BOLD = '\033[1m'
END = '\033[0m'
def log_success(camera_id: str, message: str):
"""Log success messages in green"""
logger.info(f"{Colors.GREEN}[{camera_id}] {message}{Colors.END}")
def log_warning(camera_id: str, message: str):
"""Log warnings in yellow"""
logger.warning(f"{Colors.YELLOW}[{camera_id}] {message}{Colors.END}")
def log_error(camera_id: str, message: str):
"""Log errors in red"""
logger.error(f"{Colors.RED}[{camera_id}] {message}{Colors.END}")
def log_info(camera_id: str, message: str):
"""Log info in cyan"""
logger.info(f"{Colors.CYAN}[{camera_id}] {message}{Colors.END}")
# Removed watchdog logging configuration - no longer using file watching
@ -56,7 +84,7 @@ class FFmpegRTSPReader:
self.stop_event.clear()
self.thread = threading.Thread(target=self._read_frames, daemon=True)
self.thread.start()
logger.info(f"Started FFmpeg reader for camera {self.camera_id}")
log_success(self.camera_id, "Stream started")
def stop(self):
"""Stop the FFmpeg subprocess reader."""
@ -69,61 +97,12 @@ class FFmpegRTSPReader:
self.process.kill()
if self.thread:
self.thread.join(timeout=5.0)
logger.info(f"Stopped FFmpeg reader for camera {self.camera_id}")
log_info(self.camera_id, "Stream stopped")
def _probe_stream_info(self):
"""Probe stream to get resolution and other info."""
try:
cmd = [
'ffprobe',
'-v', 'quiet',
'-print_format', 'json',
'-show_streams',
'-select_streams', 'v:0', # First video stream
'-rtsp_transport', 'tcp',
self.rtsp_url
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if result.returncode != 0:
logger.error(f"Camera {self.camera_id}: ffprobe failed (code {result.returncode})")
if result.stderr:
logger.error(f"Camera {self.camera_id}: ffprobe stderr: {result.stderr}")
if result.stdout:
logger.debug(f"Camera {self.camera_id}: ffprobe stdout: {result.stdout}")
return None
import json
data = json.loads(result.stdout)
if not data.get('streams'):
logger.error(f"Camera {self.camera_id}: No video streams found")
return None
stream = data['streams'][0]
width = stream.get('width')
height = stream.get('height')
if not width or not height:
logger.error(f"Camera {self.camera_id}: Could not determine resolution")
return None
logger.info(f"Camera {self.camera_id}: Detected resolution {width}x{height}")
return width, height
except Exception as e:
logger.error(f"Camera {self.camera_id}: Error probing stream: {e}")
return None
# Removed _probe_stream_info - BMP headers contain dimensions
def _start_ffmpeg_process(self):
"""Start FFmpeg subprocess outputting raw RGB frames to stdout pipe."""
# First probe the stream to get resolution
probe_result = self._probe_stream_info()
if not probe_result:
logger.error(f"Camera {self.camera_id}: Failed to probe stream info")
return False
self.actual_width, self.actual_height = probe_result
"""Start FFmpeg subprocess outputting BMP frames to stdout pipe."""
cmd = [
'ffmpeg',
# DO NOT REMOVE
@ -131,17 +110,14 @@ class FFmpegRTSPReader:
# '-hwaccel_device', '0',
'-rtsp_transport', 'tcp',
'-i', self.rtsp_url,
'-f', 'rawvideo', # Raw video output instead of PPM
'-pix_fmt', 'rgb24', # Raw RGB24 format
'-f', 'image2pipe', # Output images to pipe
'-vcodec', 'bmp', # BMP format with header containing dimensions
# Use native stream resolution and framerate
'-an', # No audio
'-' # Output to stdout
]
try:
# Log the FFmpeg command for debugging
logger.info(f"Starting FFmpeg for camera {self.camera_id} with command: {' '.join(cmd)}")
# Start FFmpeg with stdout pipe to read frames directly
self.process = subprocess.Popen(
cmd,
@ -149,46 +125,60 @@ class FFmpegRTSPReader:
stderr=subprocess.DEVNULL,
bufsize=0 # Unbuffered for real-time processing
)
logger.info(f"Started FFmpeg process PID {self.process.pid} for camera {self.camera_id} -> stdout pipe (resolution: {self.actual_width}x{self.actual_height})")
return True
except Exception as e:
logger.error(f"Failed to start FFmpeg for camera {self.camera_id}: {e}")
log_error(self.camera_id, f"FFmpeg startup failed: {e}")
return False
def _read_raw_frame(self, pipe):
"""Read raw RGB frame data from pipe with proper buffering."""
def _read_bmp_frame(self, pipe):
"""Read BMP frame from pipe - BMP header contains dimensions."""
try:
# Calculate frame size using actual detected dimensions
frame_size = self.actual_width * self.actual_height * 3
# Read BMP header (14 bytes file header + 40 bytes info header = 54 bytes minimum)
header_data = b''
bytes_to_read = 54
# Read frame data in chunks until we have the complete frame
frame_data = b''
bytes_remaining = frame_size
while len(header_data) < bytes_to_read:
chunk = pipe.read(bytes_to_read - len(header_data))
if not chunk:
return None # Silent end of stream
header_data += chunk
while bytes_remaining > 0:
chunk = pipe.read(bytes_remaining)
if not chunk: # EOF
if len(frame_data) == 0:
logger.debug(f"Camera {self.camera_id}: No more data (stream ended)")
else:
logger.warning(f"Camera {self.camera_id}: Stream ended mid-frame: {len(frame_data)}/{frame_size} bytes")
return None
# Parse BMP header
if header_data[:2] != b'BM':
return None # Invalid format, skip frame silently
frame_data += chunk
bytes_remaining -= len(chunk)
# Extract file size from header (bytes 2-5)
import struct
file_size = struct.unpack('<L', header_data[2:6])[0]
# Convert raw RGB data to numpy array using actual dimensions
frame_array = np.frombuffer(frame_data, dtype=np.uint8)
frame_rgb = frame_array.reshape((self.actual_height, self.actual_width, 3))
# Extract width and height from info header (bytes 18-21 and 22-25)
width = struct.unpack('<L', header_data[18:22])[0]
height = struct.unpack('<L', header_data[22:26])[0]
# Convert RGB to BGR for OpenCV compatibility
frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
# Read remaining file data
remaining_size = file_size - 54
remaining_data = b''
return frame_bgr
while len(remaining_data) < remaining_size:
chunk = pipe.read(remaining_size - len(remaining_data))
if not chunk:
return None # Stream ended silently
remaining_data += chunk
except Exception as e:
logger.error(f"Camera {self.camera_id}: Error reading raw frame: {e}")
return None
# Complete BMP data
bmp_data = header_data + remaining_data
# Use OpenCV to decode BMP directly from memory
frame_array = np.frombuffer(bmp_data, dtype=np.uint8)
frame = cv2.imdecode(frame_array, cv2.IMREAD_COLOR)
if frame is None:
return None # Decode failed silently
return frame
except Exception:
return None # Error reading frame silently
def _read_frames(self):
"""Read frames directly from FFmpeg stdout pipe."""
@ -200,51 +190,45 @@ class FFmpegRTSPReader:
# Start FFmpeg if not running
if not self.process or self.process.poll() is not None:
if self.process and self.process.poll() is not None:
logger.warning(f"FFmpeg process died for camera {self.camera_id}, restarting...")
log_warning(self.camera_id, "Stream disconnected, reconnecting...")
if not self._start_ffmpeg_process():
time.sleep(5.0)
continue
logger.info(f"FFmpeg started for camera {self.camera_id}, reading frames from pipe...")
# Read frames directly from FFmpeg stdout
try:
if self.process and self.process.stdout:
# Read raw frame data
frame = self._read_raw_frame(self.process.stdout)
# Read BMP frame data
frame = self._read_bmp_frame(self.process.stdout)
if frame is None:
continue
# Call frame callback
if self.frame_callback:
self.frame_callback(self.camera_id, frame)
logger.debug(f"Camera {self.camera_id}: Called frame callback with shape {frame.shape}")
frame_count += 1
# Log progress
# Log progress every 60 seconds (quieter)
current_time = time.time()
if current_time - last_log_time >= 30:
logger.info(f"Camera {self.camera_id}: {frame_count} frames processed via pipe")
if current_time - last_log_time >= 60:
log_success(self.camera_id, f"{frame_count} frames captured ({frame.shape[1]}x{frame.shape[0]})")
last_log_time = current_time
except Exception as e:
logger.error(f"Camera {self.camera_id}: Error reading from pipe: {e}")
except Exception:
# Process might have died, let it restart on next iteration
if self.process:
self.process.terminate()
self.process = None
time.sleep(1.0)
except Exception as e:
logger.error(f"Camera {self.camera_id}: Error in pipe frame reading: {e}")
except Exception:
time.sleep(1.0)
# Cleanup
if self.process:
self.process.terminate()
logger.info(f"FFmpeg pipe reader ended for camera {self.camera_id}")
logger = logging.getLogger(__name__)

View file

@ -0,0 +1,408 @@
"""
BoT-SORT Multi-Object Tracker with Camera Isolation
Based on BoT-SORT: Robust Associations Multi-Pedestrian Tracking
"""
import logging
import time
import numpy as np
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass
from scipy.optimize import linear_sum_assignment
from filterpy.kalman import KalmanFilter
import cv2
logger = logging.getLogger(__name__)
@dataclass
class TrackState:
"""Track state enumeration"""
TENTATIVE = "tentative" # New track, not confirmed yet
CONFIRMED = "confirmed" # Confirmed track
DELETED = "deleted" # Track to be deleted
class Track:
"""
Individual track representation with Kalman filter for motion prediction
"""
def __init__(self, detection, track_id: int, camera_id: str):
"""
Initialize a new track
Args:
detection: Initial detection (bbox, confidence, class)
track_id: Unique track identifier within camera
camera_id: Camera identifier
"""
self.track_id = track_id
self.camera_id = camera_id
self.state = TrackState.TENTATIVE
# Time tracking
self.start_time = time.time()
self.last_update_time = time.time()
# Appearance and motion
self.bbox = detection.bbox # [x1, y1, x2, y2]
self.confidence = detection.confidence
self.class_name = detection.class_name
# Track management
self.hit_streak = 1
self.time_since_update = 0
self.age = 1
# Kalman filter for motion prediction
self.kf = self._create_kalman_filter()
self._update_kalman_filter(detection.bbox)
# Track history
self.history = [detection.bbox]
self.max_history = 10
def _create_kalman_filter(self) -> KalmanFilter:
"""Create Kalman filter for bbox tracking (x, y, w, h, vx, vy, vw, vh)"""
kf = KalmanFilter(dim_x=8, dim_z=4)
# State transition matrix (constant velocity model)
kf.F = np.array([
[1, 0, 0, 0, 1, 0, 0, 0],
[0, 1, 0, 0, 0, 1, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1]
])
# Measurement matrix (observe x, y, w, h)
kf.H = np.array([
[1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0]
])
# Process noise
kf.Q *= 0.01
# Measurement noise
kf.R *= 10
# Initial covariance
kf.P *= 100
return kf
def _update_kalman_filter(self, bbox: List[float]):
"""Update Kalman filter with new bbox"""
# Convert [x1, y1, x2, y2] to [cx, cy, w, h]
x1, y1, x2, y2 = bbox
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
# Properly assign to column vector
self.kf.x[:4, 0] = [cx, cy, w, h]
def predict(self) -> np.ndarray:
"""Predict next position using Kalman filter"""
self.kf.predict()
# Convert back to [x1, y1, x2, y2] format
cx, cy, w, h = self.kf.x[:4, 0] # Extract from column vector
x1 = cx - w/2
y1 = cy - h/2
x2 = cx + w/2
y2 = cy + h/2
return np.array([x1, y1, x2, y2])
def update(self, detection):
"""Update track with new detection"""
self.last_update_time = time.time()
self.time_since_update = 0
self.hit_streak += 1
self.age += 1
# Update track properties
self.bbox = detection.bbox
self.confidence = detection.confidence
# Update Kalman filter
x1, y1, x2, y2 = detection.bbox
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
self.kf.update([cx, cy, w, h])
# Update history
self.history.append(detection.bbox)
if len(self.history) > self.max_history:
self.history.pop(0)
# Update state
if self.state == TrackState.TENTATIVE and self.hit_streak >= 3:
self.state = TrackState.CONFIRMED
def mark_missed(self):
"""Mark track as missed in this frame"""
self.time_since_update += 1
self.age += 1
if self.time_since_update > 5: # Delete after 5 missed frames
self.state = TrackState.DELETED
def is_confirmed(self) -> bool:
"""Check if track is confirmed"""
return self.state == TrackState.CONFIRMED
def is_deleted(self) -> bool:
"""Check if track should be deleted"""
return self.state == TrackState.DELETED
class CameraTracker:
"""
BoT-SORT tracker for a single camera
"""
def __init__(self, camera_id: str, max_disappeared: int = 10):
"""
Initialize camera tracker
Args:
camera_id: Unique camera identifier
max_disappeared: Maximum frames a track can be missed before deletion
"""
self.camera_id = camera_id
self.max_disappeared = max_disappeared
# Track management
self.tracks: Dict[int, Track] = {}
self.next_id = 1
self.frame_count = 0
logger.info(f"Initialized BoT-SORT tracker for camera {camera_id}")
def update(self, detections: List) -> List[Track]:
"""
Update tracker with new detections
Args:
detections: List of Detection objects
Returns:
List of active confirmed tracks
"""
self.frame_count += 1
# Predict all existing tracks
for track in self.tracks.values():
track.predict()
# Associate detections to tracks
matched_tracks, unmatched_detections, unmatched_tracks = self._associate(detections)
# Update matched tracks
for track_id, detection in matched_tracks:
self.tracks[track_id].update(detection)
# Mark unmatched tracks as missed
for track_id in unmatched_tracks:
self.tracks[track_id].mark_missed()
# Create new tracks for unmatched detections
for detection in unmatched_detections:
track = Track(detection, self.next_id, self.camera_id)
self.tracks[self.next_id] = track
self.next_id += 1
# Remove deleted tracks
tracks_to_remove = [tid for tid, track in self.tracks.items() if track.is_deleted()]
for tid in tracks_to_remove:
del self.tracks[tid]
# Return confirmed tracks
confirmed_tracks = [track for track in self.tracks.values() if track.is_confirmed()]
return confirmed_tracks
def _associate(self, detections: List) -> Tuple[List[Tuple[int, Any]], List[Any], List[int]]:
"""
Associate detections to existing tracks using IoU distance
Returns:
(matched_tracks, unmatched_detections, unmatched_tracks)
"""
if not detections or not self.tracks:
return [], detections, list(self.tracks.keys())
# Calculate IoU distance matrix
track_ids = list(self.tracks.keys())
cost_matrix = np.zeros((len(track_ids), len(detections)))
for i, track_id in enumerate(track_ids):
track = self.tracks[track_id]
predicted_bbox = track.predict()
for j, detection in enumerate(detections):
iou = self._calculate_iou(predicted_bbox, detection.bbox)
cost_matrix[i, j] = 1 - iou # Convert IoU to distance
# Solve assignment problem
row_indices, col_indices = linear_sum_assignment(cost_matrix)
# Filter matches by IoU threshold
iou_threshold = 0.3
matched_tracks = []
matched_detection_indices = set()
matched_track_indices = set()
for row, col in zip(row_indices, col_indices):
if cost_matrix[row, col] <= (1 - iou_threshold):
track_id = track_ids[row]
detection = detections[col]
matched_tracks.append((track_id, detection))
matched_detection_indices.add(col)
matched_track_indices.add(row)
# Find unmatched detections and tracks
unmatched_detections = [detections[i] for i in range(len(detections))
if i not in matched_detection_indices]
unmatched_tracks = [track_ids[i] for i in range(len(track_ids))
if i not in matched_track_indices]
return matched_tracks, unmatched_detections, unmatched_tracks
def _calculate_iou(self, bbox1: np.ndarray, bbox2: List[float]) -> float:
"""Calculate IoU between two bounding boxes"""
x1_1, y1_1, x2_1, y2_1 = bbox1
x1_2, y1_2, x2_2, y2_2 = bbox2
# Calculate intersection area
x1_i = max(x1_1, x1_2)
y1_i = max(y1_1, y1_2)
x2_i = min(x2_1, x2_2)
y2_i = min(y2_1, y2_2)
if x2_i <= x1_i or y2_i <= y1_i:
return 0.0
intersection = (x2_i - x1_i) * (y2_i - y1_i)
# Calculate union area
area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
union = area1 + area2 - intersection
return intersection / union if union > 0 else 0.0
class MultiCameraBoTSORT:
"""
Multi-camera BoT-SORT tracker with complete camera isolation
"""
def __init__(self, trigger_classes: List[str], min_confidence: float = 0.6):
"""
Initialize multi-camera tracker
Args:
trigger_classes: List of class names to track
min_confidence: Minimum detection confidence threshold
"""
self.trigger_classes = trigger_classes
self.min_confidence = min_confidence
# Camera-specific trackers
self.camera_trackers: Dict[str, CameraTracker] = {}
logger.info(f"Initialized MultiCameraBoTSORT with classes={trigger_classes}, "
f"min_confidence={min_confidence}")
def get_or_create_tracker(self, camera_id: str) -> CameraTracker:
"""Get or create tracker for specific camera"""
if camera_id not in self.camera_trackers:
self.camera_trackers[camera_id] = CameraTracker(camera_id)
logger.info(f"Created new tracker for camera {camera_id}")
return self.camera_trackers[camera_id]
def update(self, camera_id: str, inference_result) -> List[Dict]:
"""
Update tracker for specific camera with detections
Args:
camera_id: Camera identifier
inference_result: InferenceResult with detections
Returns:
List of track information dictionaries
"""
# Filter detections by confidence and trigger classes
filtered_detections = []
if hasattr(inference_result, 'detections') and inference_result.detections:
for detection in inference_result.detections:
if (detection.confidence >= self.min_confidence and
detection.class_name in self.trigger_classes):
filtered_detections.append(detection)
# Get camera tracker and update
tracker = self.get_or_create_tracker(camera_id)
confirmed_tracks = tracker.update(filtered_detections)
# Convert tracks to output format
track_results = []
for track in confirmed_tracks:
track_results.append({
'track_id': track.track_id,
'camera_id': track.camera_id,
'bbox': track.bbox,
'confidence': track.confidence,
'class_name': track.class_name,
'hit_streak': track.hit_streak,
'age': track.age
})
return track_results
def get_statistics(self) -> Dict[str, Any]:
"""Get tracking statistics across all cameras"""
stats = {}
total_tracks = 0
for camera_id, tracker in self.camera_trackers.items():
camera_stats = {
'active_tracks': len([t for t in tracker.tracks.values() if t.is_confirmed()]),
'total_tracks': len(tracker.tracks),
'frame_count': tracker.frame_count
}
stats[camera_id] = camera_stats
total_tracks += camera_stats['active_tracks']
stats['summary'] = {
'total_cameras': len(self.camera_trackers),
'total_active_tracks': total_tracks
}
return stats
def reset_camera(self, camera_id: str):
"""Reset tracking for specific camera"""
if camera_id in self.camera_trackers:
del self.camera_trackers[camera_id]
logger.info(f"Reset tracking for camera {camera_id}")
def reset_all(self):
"""Reset all camera trackers"""
self.camera_trackers.clear()
logger.info("Reset all camera trackers")

View file

@ -63,7 +63,7 @@ class TrackingPipelineIntegration:
self.pending_processing_data: Dict[str, Dict] = {} # display_id -> processing data (waiting for session ID)
# Additional validators for enhanced flow control
self.permanently_processed: Dict[int, float] = {} # track_id -> process_time (never process again)
self.permanently_processed: Dict[str, float] = {} # "camera_id:track_id" -> process_time (never process again)
self.progression_stages: Dict[str, str] = {} # session_id -> current_stage
self.last_detection_time: Dict[str, float] = {} # display_id -> last_detection_timestamp
self.abandonment_timeout = 3.0 # seconds to wait before declaring car abandoned
@ -183,7 +183,7 @@ class TrackingPipelineIntegration:
# Run tracking model
if self.tracking_model:
# Run inference with tracking
# Run detection-only (tracking handled by our own tracker)
tracking_results = self.tracking_model.track(
frame,
confidence_threshold=self.tracker.min_confidence,
@ -486,7 +486,10 @@ class TrackingPipelineIntegration:
self.session_vehicles[session_id] = track_id
# Mark vehicle as permanently processed (won't process again even after session clear)
self.permanently_processed[track_id] = time.time()
# Use composite key to distinguish same track IDs across different cameras
camera_id = display_id # Using display_id as camera_id for isolation
permanent_key = f"{camera_id}:{track_id}"
self.permanently_processed[permanent_key] = time.time()
# Remove from pending
del self.pending_vehicles[display_id]
@ -667,6 +670,7 @@ class TrackingPipelineIntegration:
self.executor.shutdown(wait=False)
self.reset_tracking()
# Cleanup detection pipeline
if self.detection_pipeline:
self.detection_pipeline.cleanup()

View file

@ -1,6 +1,6 @@
"""
Vehicle Tracking Module - Continuous tracking with front_rear_detection model
Implements vehicle identification, persistence, and motion analysis.
Vehicle Tracking Module - BoT-SORT based tracking with camera isolation
Implements vehicle identification, persistence, and motion analysis using external tracker.
"""
import logging
import time
@ -10,6 +10,8 @@ from dataclasses import dataclass, field
import numpy as np
from threading import Lock
from .bot_sort_tracker import MultiCameraBoTSORT
logger = logging.getLogger(__name__)
@ -17,6 +19,7 @@ logger = logging.getLogger(__name__)
class TrackedVehicle:
"""Represents a tracked vehicle with all its state information."""
track_id: int
camera_id: str
first_seen: float
last_seen: float
session_id: Optional[str] = None
@ -30,6 +33,8 @@ class TrackedVehicle:
processed_pipeline: bool = False
last_position_history: List[Tuple[float, float]] = field(default_factory=list)
avg_confidence: float = 0.0
hit_streak: int = 0
age: int = 0
def update_position(self, bbox: Tuple[int, int, int, int], confidence: float):
"""Update vehicle position and confidence."""
@ -73,7 +78,7 @@ class TrackedVehicle:
class VehicleTracker:
"""
Main vehicle tracking implementation using YOLO tracking capabilities.
Main vehicle tracking implementation using BoT-SORT with camera isolation.
Manages continuous tracking, vehicle identification, and state persistence.
"""
@ -88,18 +93,19 @@ class VehicleTracker:
self.trigger_classes = self.config.get('trigger_classes', self.config.get('triggerClasses', ['frontal']))
self.min_confidence = self.config.get('minConfidence', 0.6)
# Tracking state
self.tracked_vehicles: Dict[int, TrackedVehicle] = {}
self.next_track_id = 1
# BoT-SORT multi-camera tracker
self.bot_sort = MultiCameraBoTSORT(self.trigger_classes, self.min_confidence)
# Tracking state - maintain compatibility with existing code
self.tracked_vehicles: Dict[str, Dict[int, TrackedVehicle]] = {} # camera_id -> {track_id: vehicle}
self.lock = Lock()
# Tracking parameters
self.stability_threshold = 0.7
self.min_stable_frames = 5
self.position_tolerance = 50 # pixels
self.timeout_seconds = 2.0
logger.info(f"VehicleTracker initialized with trigger_classes={self.trigger_classes}, "
logger.info(f"VehicleTracker initialized with BoT-SORT: trigger_classes={self.trigger_classes}, "
f"min_confidence={self.min_confidence}")
def process_detections(self,
@ -107,10 +113,10 @@ class VehicleTracker:
display_id: str,
frame: np.ndarray) -> List[TrackedVehicle]:
"""
Process YOLO detection results and update tracking state.
Process detection results using BoT-SORT tracking.
Args:
results: YOLO detection results with tracking
results: Detection results (InferenceResult)
display_id: Display identifier for this stream
frame: Current frame being processed
@ -118,108 +124,67 @@ class VehicleTracker:
List of currently tracked vehicles
"""
current_time = time.time()
active_tracks = []
# Extract camera_id from display_id for tracking isolation
camera_id = display_id # Using display_id as camera_id for isolation
with self.lock:
# Clean up expired tracks
expired_ids = [
track_id for track_id, vehicle in self.tracked_vehicles.items()
if vehicle.is_expired(self.timeout_seconds)
]
for track_id in expired_ids:
logger.debug(f"Removing expired track {track_id}")
del self.tracked_vehicles[track_id]
# Update BoT-SORT tracker
track_results = self.bot_sort.update(camera_id, results)
# Process new detections from InferenceResult
if hasattr(results, 'detections') and results.detections:
# Process detections from InferenceResult
for detection in results.detections:
# Skip if confidence is too low
if detection.confidence < self.min_confidence:
continue
# Ensure camera tracking dict exists
if camera_id not in self.tracked_vehicles:
self.tracked_vehicles[camera_id] = {}
# Check if class is in trigger classes
if detection.class_name not in self.trigger_classes:
continue
# Update tracked vehicles based on BoT-SORT results
current_tracks = {}
active_tracks = []
# Use track_id if available, otherwise generate one
track_id = detection.track_id if detection.track_id is not None else self.next_track_id
if detection.track_id is None:
self.next_track_id += 1
for track_result in track_results:
track_id = track_result['track_id']
# Get bounding box from Detection object
x1, y1, x2, y2 = detection.bbox
bbox = (int(x1), int(y1), int(x2), int(y2))
# Create or update TrackedVehicle
if track_id in self.tracked_vehicles[camera_id]:
# Update existing vehicle
vehicle = self.tracked_vehicles[camera_id][track_id]
vehicle.update_position(track_result['bbox'], track_result['confidence'])
vehicle.hit_streak = track_result['hit_streak']
vehicle.age = track_result['age']
# Update or create tracked vehicle
confidence = detection.confidence
if track_id in self.tracked_vehicles:
# Update existing track
vehicle = self.tracked_vehicles[track_id]
vehicle.update_position(bbox, confidence)
vehicle.display_id = display_id
# Update stability based on hit_streak
if vehicle.hit_streak >= self.min_stable_frames:
vehicle.is_stable = True
vehicle.stable_frames = vehicle.hit_streak
# Check stability
stability = vehicle.calculate_stability()
if stability > self.stability_threshold:
vehicle.stable_frames += 1
if vehicle.stable_frames >= self.min_stable_frames:
vehicle.is_stable = True
else:
vehicle.stable_frames = max(0, vehicle.stable_frames - 1)
if vehicle.stable_frames < self.min_stable_frames:
vehicle.is_stable = False
logger.debug(f"Updated track {track_id}: conf={vehicle.confidence:.2f}, "
f"stable={vehicle.is_stable}, hit_streak={vehicle.hit_streak}")
else:
# Create new vehicle
x1, y1, x2, y2 = track_result['bbox']
vehicle = TrackedVehicle(
track_id=track_id,
camera_id=camera_id,
first_seen=current_time,
last_seen=current_time,
display_id=display_id,
confidence=track_result['confidence'],
bbox=tuple(track_result['bbox']),
center=((x1 + x2) / 2, (y1 + y2) / 2),
total_frames=1,
hit_streak=track_result['hit_streak'],
age=track_result['age']
)
vehicle.last_position_history.append(vehicle.center)
logger.info(f"New vehicle tracked: ID={track_id}, camera={camera_id}, display={display_id}")
logger.debug(f"Updated track {track_id}: conf={confidence:.2f}, "
f"stable={vehicle.is_stable}, stability={stability:.2f}")
else:
# Create new track
vehicle = TrackedVehicle(
track_id=track_id,
first_seen=current_time,
last_seen=current_time,
display_id=display_id,
confidence=confidence,
bbox=bbox,
center=((x1 + x2) / 2, (y1 + y2) / 2),
total_frames=1
)
vehicle.last_position_history.append(vehicle.center)
self.tracked_vehicles[track_id] = vehicle
logger.info(f"New vehicle tracked: ID={track_id}, display={display_id}")
current_tracks[track_id] = vehicle
active_tracks.append(vehicle)
active_tracks.append(self.tracked_vehicles[track_id])
# Update the camera's tracked vehicles
self.tracked_vehicles[camera_id] = current_tracks
return active_tracks
def _find_closest_track(self, center: Tuple[float, float]) -> Optional[TrackedVehicle]:
"""
Find the closest existing track to a given position.
Args:
center: Center position to match
Returns:
Closest tracked vehicle if within tolerance, None otherwise
"""
min_distance = float('inf')
closest_track = None
for vehicle in self.tracked_vehicles.values():
if vehicle.is_expired(0.5): # Shorter timeout for matching
continue
distance = np.sqrt(
(center[0] - vehicle.center[0]) ** 2 +
(center[1] - vehicle.center[1]) ** 2
)
if distance < min_distance and distance < self.position_tolerance:
min_distance = distance
closest_track = vehicle
return closest_track
def get_stable_vehicles(self, display_id: Optional[str] = None) -> List[TrackedVehicle]:
"""
Get all stable vehicles, optionally filtered by display.
@ -231,11 +196,15 @@ class VehicleTracker:
List of stable tracked vehicles
"""
with self.lock:
stable = [
v for v in self.tracked_vehicles.values()
if v.is_stable and not v.is_expired(self.timeout_seconds)
and (display_id is None or v.display_id == display_id)
]
stable = []
camera_id = display_id # Using display_id as camera_id
if camera_id in self.tracked_vehicles:
for vehicle in self.tracked_vehicles[camera_id].values():
if (vehicle.is_stable and not vehicle.is_expired(self.timeout_seconds) and
(display_id is None or vehicle.display_id == display_id)):
stable.append(vehicle)
return stable
def get_vehicle_by_session(self, session_id: str) -> Optional[TrackedVehicle]:
@ -249,9 +218,11 @@ class VehicleTracker:
Tracked vehicle if found, None otherwise
"""
with self.lock:
for vehicle in self.tracked_vehicles.values():
if vehicle.session_id == session_id:
return vehicle
# Search across all cameras
for camera_vehicles in self.tracked_vehicles.values():
for vehicle in camera_vehicles.values():
if vehicle.session_id == session_id:
return vehicle
return None
def mark_processed(self, track_id: int, session_id: str):
@ -263,11 +234,14 @@ class VehicleTracker:
session_id: Session ID assigned to this vehicle
"""
with self.lock:
if track_id in self.tracked_vehicles:
vehicle = self.tracked_vehicles[track_id]
vehicle.processed_pipeline = True
vehicle.session_id = session_id
logger.info(f"Marked vehicle {track_id} as processed with session {session_id}")
# Search across all cameras for the track_id
for camera_vehicles in self.tracked_vehicles.values():
if track_id in camera_vehicles:
vehicle = camera_vehicles[track_id]
vehicle.processed_pipeline = True
vehicle.session_id = session_id
logger.info(f"Marked vehicle {track_id} as processed with session {session_id}")
return
def clear_session(self, session_id: str):
"""
@ -277,30 +251,43 @@ class VehicleTracker:
session_id: Session ID to clear
"""
with self.lock:
for vehicle in self.tracked_vehicles.values():
if vehicle.session_id == session_id:
logger.info(f"Clearing session {session_id} from vehicle {vehicle.track_id}")
vehicle.session_id = None
# Keep processed_pipeline=True to prevent re-processing
# Search across all cameras
for camera_vehicles in self.tracked_vehicles.values():
for vehicle in camera_vehicles.values():
if vehicle.session_id == session_id:
logger.info(f"Clearing session {session_id} from vehicle {vehicle.track_id}")
vehicle.session_id = None
# Keep processed_pipeline=True to prevent re-processing
def reset_tracking(self):
"""Reset all tracking state."""
with self.lock:
self.tracked_vehicles.clear()
self.next_track_id = 1
self.bot_sort.reset_all()
logger.info("Vehicle tracking state reset")
def get_statistics(self) -> Dict:
"""Get tracking statistics."""
with self.lock:
total = len(self.tracked_vehicles)
stable = sum(1 for v in self.tracked_vehicles.values() if v.is_stable)
processed = sum(1 for v in self.tracked_vehicles.values() if v.processed_pipeline)
total = 0
stable = 0
processed = 0
all_confidences = []
# Aggregate stats across all cameras
for camera_vehicles in self.tracked_vehicles.values():
total += len(camera_vehicles)
for vehicle in camera_vehicles.values():
if vehicle.is_stable:
stable += 1
if vehicle.processed_pipeline:
processed += 1
all_confidences.append(vehicle.avg_confidence)
return {
'total_tracked': total,
'stable_vehicles': stable,
'processed_vehicles': processed,
'avg_confidence': np.mean([v.avg_confidence for v in self.tracked_vehicles.values()])
if self.tracked_vehicles else 0.0
'avg_confidence': np.mean(all_confidences) if all_confidences else 0.0,
'bot_sort_stats': self.bot_sort.get_statistics()
}

View file

@ -354,25 +354,28 @@ class StableCarValidator:
def should_skip_same_car(self,
vehicle: TrackedVehicle,
session_cleared: bool = False,
permanently_processed: Dict[int, float] = None) -> bool:
permanently_processed: Dict[str, float] = None) -> bool:
"""
Determine if we should skip processing for the same car after session clear.
Args:
vehicle: The tracked vehicle
session_cleared: Whether the session was recently cleared
permanently_processed: Dict of permanently processed vehicles
permanently_processed: Dict of permanently processed vehicles (camera_id:track_id -> time)
Returns:
True if we should skip this vehicle
"""
# Check if this vehicle was permanently processed (never process again)
if permanently_processed and vehicle.track_id in permanently_processed:
process_time = permanently_processed[vehicle.track_id]
time_since = time.time() - process_time
logger.debug(f"Skipping permanently processed vehicle {vehicle.track_id} "
f"(processed {time_since:.1f}s ago)")
return True
if permanently_processed:
# Create composite key using camera_id and track_id
permanent_key = f"{vehicle.camera_id}:{vehicle.track_id}"
if permanent_key in permanently_processed:
process_time = permanently_processed[permanent_key]
time_since = time.time() - process_time
logger.debug(f"Skipping permanently processed vehicle {vehicle.track_id} on camera {vehicle.camera_id} "
f"(processed {time_since:.1f}s ago)")
return True
# If vehicle has a session_id but it was cleared, skip for a period
if vehicle.session_id is None and vehicle.processed_pipeline and session_cleared: