feat: custom bot-sort based tracker

This commit is contained in:
ziesorx 2025-09-26 14:22:38 +07:00
parent bd201acac1
commit 791f611f7d
8 changed files with 649 additions and 282 deletions

9
app.py
View file

@ -158,21 +158,18 @@ async def get_camera_image(camera_id: str):
# Get frame from the shared cache buffer # Get frame from the shared cache buffer
from core.streaming.buffers import shared_cache_buffer from core.streaming.buffers import shared_cache_buffer
# Debug: Log available cameras in buffer # Only show buffer debug info if camera not found (to reduce log spam)
available_cameras = shared_cache_buffer.frame_buffer.get_camera_list() available_cameras = shared_cache_buffer.frame_buffer.get_camera_list()
logger.debug(f"Available cameras in buffer: {available_cameras}")
logger.debug(f"Looking for camera: '{actual_camera_id}'")
frame = shared_cache_buffer.get_frame(actual_camera_id) frame = shared_cache_buffer.get_frame(actual_camera_id)
if frame is None: if frame is None:
logger.warning(f"No cached frame available for camera '{actual_camera_id}' (from subscription '{camera_id}')") logger.warning(f"\033[93m[API] No frame for '{actual_camera_id}' - Available: {available_cameras}\033[0m")
logger.warning(f"Available cameras in buffer: {available_cameras}")
raise HTTPException( raise HTTPException(
status_code=404, status_code=404,
detail=f"No frame available for camera {actual_camera_id}" detail=f"No frame available for camera {actual_camera_id}"
) )
logger.debug(f"Retrieved cached frame for camera '{actual_camera_id}' (from subscription '{camera_id}'), shape: {frame.shape}") # Successful frame retrieval - log only occasionally to avoid spam
# Encode frame as JPEG # Encode frame as JPEG
success, buffer_img = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85]) success, buffer_img = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])

View file

@ -60,6 +60,8 @@ class YOLOWrapper:
self.model = None self.model = None
self._class_names = [] self._class_names = []
self._load_model() self._load_model()
logger.info(f"Initialized YOLO wrapper for {model_id} on {self.device}") logger.info(f"Initialized YOLO wrapper for {model_id} on {self.device}")
@ -115,6 +117,7 @@ class YOLOWrapper:
logger.error(f"Failed to extract class names: {str(e)}") logger.error(f"Failed to extract class names: {str(e)}")
self._class_names = {} self._class_names = {}
def infer( def infer(
self, self,
image: np.ndarray, image: np.ndarray,
@ -222,55 +225,30 @@ class YOLOWrapper:
return detections return detections
def track( def track(
self, self,
image: np.ndarray, image: np.ndarray,
confidence_threshold: float = 0.5, confidence_threshold: float = 0.5,
trigger_classes: Optional[List[str]] = None, trigger_classes: Optional[List[str]] = None,
persist: bool = True persist: bool = True,
camera_id: Optional[str] = None
) -> InferenceResult: ) -> InferenceResult:
""" """
Run tracking on an image Run detection (tracking will be handled by external tracker)
Args: Args:
image: Input image as numpy array (BGR format) image: Input image as numpy array (BGR format)
confidence_threshold: Minimum confidence for detections confidence_threshold: Minimum confidence for detections
trigger_classes: List of class names to filter trigger_classes: List of class names to filter
persist: Whether to persist tracks across frames persist: Ignored - tracking handled externally
camera_id: Ignored - tracking handled externally
Returns: Returns:
InferenceResult containing detections with track IDs InferenceResult containing detections (no track IDs from YOLO)
""" """
if self.model is None: # Just do detection - no YOLO tracking
raise RuntimeError(f"Model {self.model_id} not loaded") return self.infer(image, confidence_threshold, trigger_classes)
try:
import time
start_time = time.time()
# Run tracking
results = self.model.track(
image,
conf=confidence_threshold,
persist=persist,
verbose=False
)
inference_time = time.time() - start_time
# Parse results
detections = self._parse_results(results[0], trigger_classes)
return InferenceResult(
detections=detections,
image_shape=(image.shape[0], image.shape[1]),
inference_time=inference_time,
model_id=self.model_id
)
except Exception as e:
logger.error(f"Tracking failed for model {self.model_id}: {str(e)}", exc_info=True)
raise
def predict_classification( def predict_classification(
self, self,
@ -350,6 +328,7 @@ class YOLOWrapper:
"""Get the number of classes the model can detect""" """Get the number of classes the model can detect"""
return len(self._class_names) return len(self._class_names)
def clear_cache(self) -> None: def clear_cache(self) -> None:
"""Clear the model cache""" """Clear the model cache"""
with self._cache_lock: with self._cache_lock:

View file

@ -130,7 +130,7 @@ class StreamManager:
try: try:
if stream_config.rtsp_url: if stream_config.rtsp_url:
# RTSP stream using FFmpeg subprocess with CUDA acceleration # RTSP stream using FFmpeg subprocess with CUDA acceleration
logger.info(f"[STREAM_START] Starting FFmpeg RTSP stream for camera_id='{camera_id}' URL={stream_config.rtsp_url}") logger.info(f"\033[94m[RTSP] Starting {camera_id}\033[0m")
reader = FFmpegRTSPReader( reader = FFmpegRTSPReader(
camera_id=camera_id, camera_id=camera_id,
rtsp_url=stream_config.rtsp_url, rtsp_url=stream_config.rtsp_url,
@ -139,11 +139,11 @@ class StreamManager:
reader.set_frame_callback(self._frame_callback) reader.set_frame_callback(self._frame_callback)
reader.start() reader.start()
self._streams[camera_id] = reader self._streams[camera_id] = reader
logger.info(f"[STREAM_START] ✅ Started FFmpeg RTSP stream for camera_id='{camera_id}'") logger.info(f"\033[92m[RTSP] {camera_id} connected\033[0m")
elif stream_config.snapshot_url: elif stream_config.snapshot_url:
# HTTP snapshot stream # HTTP snapshot stream
logger.info(f"[STREAM_START] Starting HTTP snapshot stream for camera_id='{camera_id}' URL={stream_config.snapshot_url}") logger.info(f"\033[95m[HTTP] Starting {camera_id}\033[0m")
reader = HTTPSnapshotReader( reader = HTTPSnapshotReader(
camera_id=camera_id, camera_id=camera_id,
snapshot_url=stream_config.snapshot_url, snapshot_url=stream_config.snapshot_url,
@ -153,7 +153,7 @@ class StreamManager:
reader.set_frame_callback(self._frame_callback) reader.set_frame_callback(self._frame_callback)
reader.start() reader.start()
self._streams[camera_id] = reader self._streams[camera_id] = reader
logger.info(f"[STREAM_START] ✅ Started HTTP snapshot stream for camera_id='{camera_id}'") logger.info(f"\033[92m[HTTP] {camera_id} connected\033[0m")
else: else:
logger.error(f"No valid URL provided for camera {camera_id}") logger.error(f"No valid URL provided for camera {camera_id}")
@ -182,11 +182,16 @@ class StreamManager:
try: try:
# Store frame in shared buffer # Store frame in shared buffer
shared_cache_buffer.put_frame(camera_id, frame) shared_cache_buffer.put_frame(camera_id, frame)
logger.info(f"[FRAME_CALLBACK] Stored frame for camera_id='{camera_id}' in shared_cache_buffer, shape={frame.shape}") # Quieter frame callback logging - only log occasionally
if hasattr(self, '_frame_log_count'):
self._frame_log_count += 1
else:
self._frame_log_count = 1
# Log current buffer state # Log every 100 frames to avoid spam
available_cameras = shared_cache_buffer.frame_buffer.get_camera_list() if self._frame_log_count % 100 == 0:
logger.info(f"[FRAME_CALLBACK] Buffer now contains {len(available_cameras)} cameras: {available_cameras}") available_cameras = shared_cache_buffer.frame_buffer.get_camera_list()
logger.info(f"\033[96m[BUFFER] {len(available_cameras)} active cameras: {', '.join(available_cameras)}\033[0m")
# Process tracking for subscriptions with tracking integration # Process tracking for subscriptions with tracking integration
self._process_tracking_for_camera(camera_id, frame) self._process_tracking_for_camera(camera_id, frame)

View file

@ -21,6 +21,34 @@ os.environ["OPENCV_FFMPEG_LOGLEVEL"] = "-8" # Suppress FFMPEG warnings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Color codes for pretty logging
class Colors:
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BLUE = '\033[94m'
PURPLE = '\033[95m'
CYAN = '\033[96m'
WHITE = '\033[97m'
BOLD = '\033[1m'
END = '\033[0m'
def log_success(camera_id: str, message: str):
"""Log success messages in green"""
logger.info(f"{Colors.GREEN}[{camera_id}] {message}{Colors.END}")
def log_warning(camera_id: str, message: str):
"""Log warnings in yellow"""
logger.warning(f"{Colors.YELLOW}[{camera_id}] {message}{Colors.END}")
def log_error(camera_id: str, message: str):
"""Log errors in red"""
logger.error(f"{Colors.RED}[{camera_id}] {message}{Colors.END}")
def log_info(camera_id: str, message: str):
"""Log info in cyan"""
logger.info(f"{Colors.CYAN}[{camera_id}] {message}{Colors.END}")
# Removed watchdog logging configuration - no longer using file watching # Removed watchdog logging configuration - no longer using file watching
@ -56,7 +84,7 @@ class FFmpegRTSPReader:
self.stop_event.clear() self.stop_event.clear()
self.thread = threading.Thread(target=self._read_frames, daemon=True) self.thread = threading.Thread(target=self._read_frames, daemon=True)
self.thread.start() self.thread.start()
logger.info(f"Started FFmpeg reader for camera {self.camera_id}") log_success(self.camera_id, "Stream started")
def stop(self): def stop(self):
"""Stop the FFmpeg subprocess reader.""" """Stop the FFmpeg subprocess reader."""
@ -69,61 +97,12 @@ class FFmpegRTSPReader:
self.process.kill() self.process.kill()
if self.thread: if self.thread:
self.thread.join(timeout=5.0) self.thread.join(timeout=5.0)
logger.info(f"Stopped FFmpeg reader for camera {self.camera_id}") log_info(self.camera_id, "Stream stopped")
def _probe_stream_info(self): # Removed _probe_stream_info - BMP headers contain dimensions
"""Probe stream to get resolution and other info."""
try:
cmd = [
'ffprobe',
'-v', 'quiet',
'-print_format', 'json',
'-show_streams',
'-select_streams', 'v:0', # First video stream
'-rtsp_transport', 'tcp',
self.rtsp_url
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if result.returncode != 0:
logger.error(f"Camera {self.camera_id}: ffprobe failed (code {result.returncode})")
if result.stderr:
logger.error(f"Camera {self.camera_id}: ffprobe stderr: {result.stderr}")
if result.stdout:
logger.debug(f"Camera {self.camera_id}: ffprobe stdout: {result.stdout}")
return None
import json
data = json.loads(result.stdout)
if not data.get('streams'):
logger.error(f"Camera {self.camera_id}: No video streams found")
return None
stream = data['streams'][0]
width = stream.get('width')
height = stream.get('height')
if not width or not height:
logger.error(f"Camera {self.camera_id}: Could not determine resolution")
return None
logger.info(f"Camera {self.camera_id}: Detected resolution {width}x{height}")
return width, height
except Exception as e:
logger.error(f"Camera {self.camera_id}: Error probing stream: {e}")
return None
def _start_ffmpeg_process(self): def _start_ffmpeg_process(self):
"""Start FFmpeg subprocess outputting raw RGB frames to stdout pipe.""" """Start FFmpeg subprocess outputting BMP frames to stdout pipe."""
# First probe the stream to get resolution
probe_result = self._probe_stream_info()
if not probe_result:
logger.error(f"Camera {self.camera_id}: Failed to probe stream info")
return False
self.actual_width, self.actual_height = probe_result
cmd = [ cmd = [
'ffmpeg', 'ffmpeg',
# DO NOT REMOVE # DO NOT REMOVE
@ -131,17 +110,14 @@ class FFmpegRTSPReader:
# '-hwaccel_device', '0', # '-hwaccel_device', '0',
'-rtsp_transport', 'tcp', '-rtsp_transport', 'tcp',
'-i', self.rtsp_url, '-i', self.rtsp_url,
'-f', 'rawvideo', # Raw video output instead of PPM '-f', 'image2pipe', # Output images to pipe
'-pix_fmt', 'rgb24', # Raw RGB24 format '-vcodec', 'bmp', # BMP format with header containing dimensions
# Use native stream resolution and framerate # Use native stream resolution and framerate
'-an', # No audio '-an', # No audio
'-' # Output to stdout '-' # Output to stdout
] ]
try: try:
# Log the FFmpeg command for debugging
logger.info(f"Starting FFmpeg for camera {self.camera_id} with command: {' '.join(cmd)}")
# Start FFmpeg with stdout pipe to read frames directly # Start FFmpeg with stdout pipe to read frames directly
self.process = subprocess.Popen( self.process = subprocess.Popen(
cmd, cmd,
@ -149,46 +125,60 @@ class FFmpegRTSPReader:
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
bufsize=0 # Unbuffered for real-time processing bufsize=0 # Unbuffered for real-time processing
) )
logger.info(f"Started FFmpeg process PID {self.process.pid} for camera {self.camera_id} -> stdout pipe (resolution: {self.actual_width}x{self.actual_height})")
return True return True
except Exception as e: except Exception as e:
logger.error(f"Failed to start FFmpeg for camera {self.camera_id}: {e}") log_error(self.camera_id, f"FFmpeg startup failed: {e}")
return False return False
def _read_raw_frame(self, pipe): def _read_bmp_frame(self, pipe):
"""Read raw RGB frame data from pipe with proper buffering.""" """Read BMP frame from pipe - BMP header contains dimensions."""
try: try:
# Calculate frame size using actual detected dimensions # Read BMP header (14 bytes file header + 40 bytes info header = 54 bytes minimum)
frame_size = self.actual_width * self.actual_height * 3 header_data = b''
bytes_to_read = 54
# Read frame data in chunks until we have the complete frame while len(header_data) < bytes_to_read:
frame_data = b'' chunk = pipe.read(bytes_to_read - len(header_data))
bytes_remaining = frame_size if not chunk:
return None # Silent end of stream
header_data += chunk
while bytes_remaining > 0: # Parse BMP header
chunk = pipe.read(bytes_remaining) if header_data[:2] != b'BM':
if not chunk: # EOF return None # Invalid format, skip frame silently
if len(frame_data) == 0:
logger.debug(f"Camera {self.camera_id}: No more data (stream ended)")
else:
logger.warning(f"Camera {self.camera_id}: Stream ended mid-frame: {len(frame_data)}/{frame_size} bytes")
return None
frame_data += chunk # Extract file size from header (bytes 2-5)
bytes_remaining -= len(chunk) import struct
file_size = struct.unpack('<L', header_data[2:6])[0]
# Convert raw RGB data to numpy array using actual dimensions # Extract width and height from info header (bytes 18-21 and 22-25)
frame_array = np.frombuffer(frame_data, dtype=np.uint8) width = struct.unpack('<L', header_data[18:22])[0]
frame_rgb = frame_array.reshape((self.actual_height, self.actual_width, 3)) height = struct.unpack('<L', header_data[22:26])[0]
# Convert RGB to BGR for OpenCV compatibility # Read remaining file data
frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR) remaining_size = file_size - 54
remaining_data = b''
return frame_bgr while len(remaining_data) < remaining_size:
chunk = pipe.read(remaining_size - len(remaining_data))
if not chunk:
return None # Stream ended silently
remaining_data += chunk
except Exception as e: # Complete BMP data
logger.error(f"Camera {self.camera_id}: Error reading raw frame: {e}") bmp_data = header_data + remaining_data
return None
# Use OpenCV to decode BMP directly from memory
frame_array = np.frombuffer(bmp_data, dtype=np.uint8)
frame = cv2.imdecode(frame_array, cv2.IMREAD_COLOR)
if frame is None:
return None # Decode failed silently
return frame
except Exception:
return None # Error reading frame silently
def _read_frames(self): def _read_frames(self):
"""Read frames directly from FFmpeg stdout pipe.""" """Read frames directly from FFmpeg stdout pipe."""
@ -200,51 +190,45 @@ class FFmpegRTSPReader:
# Start FFmpeg if not running # Start FFmpeg if not running
if not self.process or self.process.poll() is not None: if not self.process or self.process.poll() is not None:
if self.process and self.process.poll() is not None: if self.process and self.process.poll() is not None:
logger.warning(f"FFmpeg process died for camera {self.camera_id}, restarting...") log_warning(self.camera_id, "Stream disconnected, reconnecting...")
if not self._start_ffmpeg_process(): if not self._start_ffmpeg_process():
time.sleep(5.0) time.sleep(5.0)
continue continue
logger.info(f"FFmpeg started for camera {self.camera_id}, reading frames from pipe...")
# Read frames directly from FFmpeg stdout # Read frames directly from FFmpeg stdout
try: try:
if self.process and self.process.stdout: if self.process and self.process.stdout:
# Read raw frame data # Read BMP frame data
frame = self._read_raw_frame(self.process.stdout) frame = self._read_bmp_frame(self.process.stdout)
if frame is None: if frame is None:
continue continue
# Call frame callback # Call frame callback
if self.frame_callback: if self.frame_callback:
self.frame_callback(self.camera_id, frame) self.frame_callback(self.camera_id, frame)
logger.debug(f"Camera {self.camera_id}: Called frame callback with shape {frame.shape}")
frame_count += 1 frame_count += 1
# Log progress # Log progress every 60 seconds (quieter)
current_time = time.time() current_time = time.time()
if current_time - last_log_time >= 30: if current_time - last_log_time >= 60:
logger.info(f"Camera {self.camera_id}: {frame_count} frames processed via pipe") log_success(self.camera_id, f"{frame_count} frames captured ({frame.shape[1]}x{frame.shape[0]})")
last_log_time = current_time last_log_time = current_time
except Exception as e: except Exception:
logger.error(f"Camera {self.camera_id}: Error reading from pipe: {e}")
# Process might have died, let it restart on next iteration # Process might have died, let it restart on next iteration
if self.process: if self.process:
self.process.terminate() self.process.terminate()
self.process = None self.process = None
time.sleep(1.0) time.sleep(1.0)
except Exception as e: except Exception:
logger.error(f"Camera {self.camera_id}: Error in pipe frame reading: {e}")
time.sleep(1.0) time.sleep(1.0)
# Cleanup # Cleanup
if self.process: if self.process:
self.process.terminate() self.process.terminate()
logger.info(f"FFmpeg pipe reader ended for camera {self.camera_id}")
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View file

@ -0,0 +1,408 @@
"""
BoT-SORT Multi-Object Tracker with Camera Isolation
Based on BoT-SORT: Robust Associations Multi-Pedestrian Tracking
"""
import logging
import time
import numpy as np
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass
from scipy.optimize import linear_sum_assignment
from filterpy.kalman import KalmanFilter
import cv2
logger = logging.getLogger(__name__)
@dataclass
class TrackState:
"""Track state enumeration"""
TENTATIVE = "tentative" # New track, not confirmed yet
CONFIRMED = "confirmed" # Confirmed track
DELETED = "deleted" # Track to be deleted
class Track:
"""
Individual track representation with Kalman filter for motion prediction
"""
def __init__(self, detection, track_id: int, camera_id: str):
"""
Initialize a new track
Args:
detection: Initial detection (bbox, confidence, class)
track_id: Unique track identifier within camera
camera_id: Camera identifier
"""
self.track_id = track_id
self.camera_id = camera_id
self.state = TrackState.TENTATIVE
# Time tracking
self.start_time = time.time()
self.last_update_time = time.time()
# Appearance and motion
self.bbox = detection.bbox # [x1, y1, x2, y2]
self.confidence = detection.confidence
self.class_name = detection.class_name
# Track management
self.hit_streak = 1
self.time_since_update = 0
self.age = 1
# Kalman filter for motion prediction
self.kf = self._create_kalman_filter()
self._update_kalman_filter(detection.bbox)
# Track history
self.history = [detection.bbox]
self.max_history = 10
def _create_kalman_filter(self) -> KalmanFilter:
"""Create Kalman filter for bbox tracking (x, y, w, h, vx, vy, vw, vh)"""
kf = KalmanFilter(dim_x=8, dim_z=4)
# State transition matrix (constant velocity model)
kf.F = np.array([
[1, 0, 0, 0, 1, 0, 0, 0],
[0, 1, 0, 0, 0, 1, 0, 0],
[0, 0, 1, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1]
])
# Measurement matrix (observe x, y, w, h)
kf.H = np.array([
[1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0]
])
# Process noise
kf.Q *= 0.01
# Measurement noise
kf.R *= 10
# Initial covariance
kf.P *= 100
return kf
def _update_kalman_filter(self, bbox: List[float]):
"""Update Kalman filter with new bbox"""
# Convert [x1, y1, x2, y2] to [cx, cy, w, h]
x1, y1, x2, y2 = bbox
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
# Properly assign to column vector
self.kf.x[:4, 0] = [cx, cy, w, h]
def predict(self) -> np.ndarray:
"""Predict next position using Kalman filter"""
self.kf.predict()
# Convert back to [x1, y1, x2, y2] format
cx, cy, w, h = self.kf.x[:4, 0] # Extract from column vector
x1 = cx - w/2
y1 = cy - h/2
x2 = cx + w/2
y2 = cy + h/2
return np.array([x1, y1, x2, y2])
def update(self, detection):
"""Update track with new detection"""
self.last_update_time = time.time()
self.time_since_update = 0
self.hit_streak += 1
self.age += 1
# Update track properties
self.bbox = detection.bbox
self.confidence = detection.confidence
# Update Kalman filter
x1, y1, x2, y2 = detection.bbox
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
self.kf.update([cx, cy, w, h])
# Update history
self.history.append(detection.bbox)
if len(self.history) > self.max_history:
self.history.pop(0)
# Update state
if self.state == TrackState.TENTATIVE and self.hit_streak >= 3:
self.state = TrackState.CONFIRMED
def mark_missed(self):
"""Mark track as missed in this frame"""
self.time_since_update += 1
self.age += 1
if self.time_since_update > 5: # Delete after 5 missed frames
self.state = TrackState.DELETED
def is_confirmed(self) -> bool:
"""Check if track is confirmed"""
return self.state == TrackState.CONFIRMED
def is_deleted(self) -> bool:
"""Check if track should be deleted"""
return self.state == TrackState.DELETED
class CameraTracker:
"""
BoT-SORT tracker for a single camera
"""
def __init__(self, camera_id: str, max_disappeared: int = 10):
"""
Initialize camera tracker
Args:
camera_id: Unique camera identifier
max_disappeared: Maximum frames a track can be missed before deletion
"""
self.camera_id = camera_id
self.max_disappeared = max_disappeared
# Track management
self.tracks: Dict[int, Track] = {}
self.next_id = 1
self.frame_count = 0
logger.info(f"Initialized BoT-SORT tracker for camera {camera_id}")
def update(self, detections: List) -> List[Track]:
"""
Update tracker with new detections
Args:
detections: List of Detection objects
Returns:
List of active confirmed tracks
"""
self.frame_count += 1
# Predict all existing tracks
for track in self.tracks.values():
track.predict()
# Associate detections to tracks
matched_tracks, unmatched_detections, unmatched_tracks = self._associate(detections)
# Update matched tracks
for track_id, detection in matched_tracks:
self.tracks[track_id].update(detection)
# Mark unmatched tracks as missed
for track_id in unmatched_tracks:
self.tracks[track_id].mark_missed()
# Create new tracks for unmatched detections
for detection in unmatched_detections:
track = Track(detection, self.next_id, self.camera_id)
self.tracks[self.next_id] = track
self.next_id += 1
# Remove deleted tracks
tracks_to_remove = [tid for tid, track in self.tracks.items() if track.is_deleted()]
for tid in tracks_to_remove:
del self.tracks[tid]
# Return confirmed tracks
confirmed_tracks = [track for track in self.tracks.values() if track.is_confirmed()]
return confirmed_tracks
def _associate(self, detections: List) -> Tuple[List[Tuple[int, Any]], List[Any], List[int]]:
"""
Associate detections to existing tracks using IoU distance
Returns:
(matched_tracks, unmatched_detections, unmatched_tracks)
"""
if not detections or not self.tracks:
return [], detections, list(self.tracks.keys())
# Calculate IoU distance matrix
track_ids = list(self.tracks.keys())
cost_matrix = np.zeros((len(track_ids), len(detections)))
for i, track_id in enumerate(track_ids):
track = self.tracks[track_id]
predicted_bbox = track.predict()
for j, detection in enumerate(detections):
iou = self._calculate_iou(predicted_bbox, detection.bbox)
cost_matrix[i, j] = 1 - iou # Convert IoU to distance
# Solve assignment problem
row_indices, col_indices = linear_sum_assignment(cost_matrix)
# Filter matches by IoU threshold
iou_threshold = 0.3
matched_tracks = []
matched_detection_indices = set()
matched_track_indices = set()
for row, col in zip(row_indices, col_indices):
if cost_matrix[row, col] <= (1 - iou_threshold):
track_id = track_ids[row]
detection = detections[col]
matched_tracks.append((track_id, detection))
matched_detection_indices.add(col)
matched_track_indices.add(row)
# Find unmatched detections and tracks
unmatched_detections = [detections[i] for i in range(len(detections))
if i not in matched_detection_indices]
unmatched_tracks = [track_ids[i] for i in range(len(track_ids))
if i not in matched_track_indices]
return matched_tracks, unmatched_detections, unmatched_tracks
def _calculate_iou(self, bbox1: np.ndarray, bbox2: List[float]) -> float:
"""Calculate IoU between two bounding boxes"""
x1_1, y1_1, x2_1, y2_1 = bbox1
x1_2, y1_2, x2_2, y2_2 = bbox2
# Calculate intersection area
x1_i = max(x1_1, x1_2)
y1_i = max(y1_1, y1_2)
x2_i = min(x2_1, x2_2)
y2_i = min(y2_1, y2_2)
if x2_i <= x1_i or y2_i <= y1_i:
return 0.0
intersection = (x2_i - x1_i) * (y2_i - y1_i)
# Calculate union area
area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
union = area1 + area2 - intersection
return intersection / union if union > 0 else 0.0
class MultiCameraBoTSORT:
"""
Multi-camera BoT-SORT tracker with complete camera isolation
"""
def __init__(self, trigger_classes: List[str], min_confidence: float = 0.6):
"""
Initialize multi-camera tracker
Args:
trigger_classes: List of class names to track
min_confidence: Minimum detection confidence threshold
"""
self.trigger_classes = trigger_classes
self.min_confidence = min_confidence
# Camera-specific trackers
self.camera_trackers: Dict[str, CameraTracker] = {}
logger.info(f"Initialized MultiCameraBoTSORT with classes={trigger_classes}, "
f"min_confidence={min_confidence}")
def get_or_create_tracker(self, camera_id: str) -> CameraTracker:
"""Get or create tracker for specific camera"""
if camera_id not in self.camera_trackers:
self.camera_trackers[camera_id] = CameraTracker(camera_id)
logger.info(f"Created new tracker for camera {camera_id}")
return self.camera_trackers[camera_id]
def update(self, camera_id: str, inference_result) -> List[Dict]:
"""
Update tracker for specific camera with detections
Args:
camera_id: Camera identifier
inference_result: InferenceResult with detections
Returns:
List of track information dictionaries
"""
# Filter detections by confidence and trigger classes
filtered_detections = []
if hasattr(inference_result, 'detections') and inference_result.detections:
for detection in inference_result.detections:
if (detection.confidence >= self.min_confidence and
detection.class_name in self.trigger_classes):
filtered_detections.append(detection)
# Get camera tracker and update
tracker = self.get_or_create_tracker(camera_id)
confirmed_tracks = tracker.update(filtered_detections)
# Convert tracks to output format
track_results = []
for track in confirmed_tracks:
track_results.append({
'track_id': track.track_id,
'camera_id': track.camera_id,
'bbox': track.bbox,
'confidence': track.confidence,
'class_name': track.class_name,
'hit_streak': track.hit_streak,
'age': track.age
})
return track_results
def get_statistics(self) -> Dict[str, Any]:
"""Get tracking statistics across all cameras"""
stats = {}
total_tracks = 0
for camera_id, tracker in self.camera_trackers.items():
camera_stats = {
'active_tracks': len([t for t in tracker.tracks.values() if t.is_confirmed()]),
'total_tracks': len(tracker.tracks),
'frame_count': tracker.frame_count
}
stats[camera_id] = camera_stats
total_tracks += camera_stats['active_tracks']
stats['summary'] = {
'total_cameras': len(self.camera_trackers),
'total_active_tracks': total_tracks
}
return stats
def reset_camera(self, camera_id: str):
"""Reset tracking for specific camera"""
if camera_id in self.camera_trackers:
del self.camera_trackers[camera_id]
logger.info(f"Reset tracking for camera {camera_id}")
def reset_all(self):
"""Reset all camera trackers"""
self.camera_trackers.clear()
logger.info("Reset all camera trackers")

View file

@ -63,7 +63,7 @@ class TrackingPipelineIntegration:
self.pending_processing_data: Dict[str, Dict] = {} # display_id -> processing data (waiting for session ID) self.pending_processing_data: Dict[str, Dict] = {} # display_id -> processing data (waiting for session ID)
# Additional validators for enhanced flow control # Additional validators for enhanced flow control
self.permanently_processed: Dict[int, float] = {} # track_id -> process_time (never process again) self.permanently_processed: Dict[str, float] = {} # "camera_id:track_id" -> process_time (never process again)
self.progression_stages: Dict[str, str] = {} # session_id -> current_stage self.progression_stages: Dict[str, str] = {} # session_id -> current_stage
self.last_detection_time: Dict[str, float] = {} # display_id -> last_detection_timestamp self.last_detection_time: Dict[str, float] = {} # display_id -> last_detection_timestamp
self.abandonment_timeout = 3.0 # seconds to wait before declaring car abandoned self.abandonment_timeout = 3.0 # seconds to wait before declaring car abandoned
@ -183,7 +183,7 @@ class TrackingPipelineIntegration:
# Run tracking model # Run tracking model
if self.tracking_model: if self.tracking_model:
# Run inference with tracking # Run detection-only (tracking handled by our own tracker)
tracking_results = self.tracking_model.track( tracking_results = self.tracking_model.track(
frame, frame,
confidence_threshold=self.tracker.min_confidence, confidence_threshold=self.tracker.min_confidence,
@ -486,7 +486,10 @@ class TrackingPipelineIntegration:
self.session_vehicles[session_id] = track_id self.session_vehicles[session_id] = track_id
# Mark vehicle as permanently processed (won't process again even after session clear) # Mark vehicle as permanently processed (won't process again even after session clear)
self.permanently_processed[track_id] = time.time() # Use composite key to distinguish same track IDs across different cameras
camera_id = display_id # Using display_id as camera_id for isolation
permanent_key = f"{camera_id}:{track_id}"
self.permanently_processed[permanent_key] = time.time()
# Remove from pending # Remove from pending
del self.pending_vehicles[display_id] del self.pending_vehicles[display_id]
@ -667,6 +670,7 @@ class TrackingPipelineIntegration:
self.executor.shutdown(wait=False) self.executor.shutdown(wait=False)
self.reset_tracking() self.reset_tracking()
# Cleanup detection pipeline # Cleanup detection pipeline
if self.detection_pipeline: if self.detection_pipeline:
self.detection_pipeline.cleanup() self.detection_pipeline.cleanup()

View file

@ -1,6 +1,6 @@
""" """
Vehicle Tracking Module - Continuous tracking with front_rear_detection model Vehicle Tracking Module - BoT-SORT based tracking with camera isolation
Implements vehicle identification, persistence, and motion analysis. Implements vehicle identification, persistence, and motion analysis using external tracker.
""" """
import logging import logging
import time import time
@ -10,6 +10,8 @@ from dataclasses import dataclass, field
import numpy as np import numpy as np
from threading import Lock from threading import Lock
from .bot_sort_tracker import MultiCameraBoTSORT
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -17,6 +19,7 @@ logger = logging.getLogger(__name__)
class TrackedVehicle: class TrackedVehicle:
"""Represents a tracked vehicle with all its state information.""" """Represents a tracked vehicle with all its state information."""
track_id: int track_id: int
camera_id: str
first_seen: float first_seen: float
last_seen: float last_seen: float
session_id: Optional[str] = None session_id: Optional[str] = None
@ -30,6 +33,8 @@ class TrackedVehicle:
processed_pipeline: bool = False processed_pipeline: bool = False
last_position_history: List[Tuple[float, float]] = field(default_factory=list) last_position_history: List[Tuple[float, float]] = field(default_factory=list)
avg_confidence: float = 0.0 avg_confidence: float = 0.0
hit_streak: int = 0
age: int = 0
def update_position(self, bbox: Tuple[int, int, int, int], confidence: float): def update_position(self, bbox: Tuple[int, int, int, int], confidence: float):
"""Update vehicle position and confidence.""" """Update vehicle position and confidence."""
@ -73,7 +78,7 @@ class TrackedVehicle:
class VehicleTracker: class VehicleTracker:
""" """
Main vehicle tracking implementation using YOLO tracking capabilities. Main vehicle tracking implementation using BoT-SORT with camera isolation.
Manages continuous tracking, vehicle identification, and state persistence. Manages continuous tracking, vehicle identification, and state persistence.
""" """
@ -88,18 +93,19 @@ class VehicleTracker:
self.trigger_classes = self.config.get('trigger_classes', self.config.get('triggerClasses', ['frontal'])) self.trigger_classes = self.config.get('trigger_classes', self.config.get('triggerClasses', ['frontal']))
self.min_confidence = self.config.get('minConfidence', 0.6) self.min_confidence = self.config.get('minConfidence', 0.6)
# Tracking state # BoT-SORT multi-camera tracker
self.tracked_vehicles: Dict[int, TrackedVehicle] = {} self.bot_sort = MultiCameraBoTSORT(self.trigger_classes, self.min_confidence)
self.next_track_id = 1
# Tracking state - maintain compatibility with existing code
self.tracked_vehicles: Dict[str, Dict[int, TrackedVehicle]] = {} # camera_id -> {track_id: vehicle}
self.lock = Lock() self.lock = Lock()
# Tracking parameters # Tracking parameters
self.stability_threshold = 0.7 self.stability_threshold = 0.7
self.min_stable_frames = 5 self.min_stable_frames = 5
self.position_tolerance = 50 # pixels
self.timeout_seconds = 2.0 self.timeout_seconds = 2.0
logger.info(f"VehicleTracker initialized with trigger_classes={self.trigger_classes}, " logger.info(f"VehicleTracker initialized with BoT-SORT: trigger_classes={self.trigger_classes}, "
f"min_confidence={self.min_confidence}") f"min_confidence={self.min_confidence}")
def process_detections(self, def process_detections(self,
@ -107,10 +113,10 @@ class VehicleTracker:
display_id: str, display_id: str,
frame: np.ndarray) -> List[TrackedVehicle]: frame: np.ndarray) -> List[TrackedVehicle]:
""" """
Process YOLO detection results and update tracking state. Process detection results using BoT-SORT tracking.
Args: Args:
results: YOLO detection results with tracking results: Detection results (InferenceResult)
display_id: Display identifier for this stream display_id: Display identifier for this stream
frame: Current frame being processed frame: Current frame being processed
@ -118,108 +124,67 @@ class VehicleTracker:
List of currently tracked vehicles List of currently tracked vehicles
""" """
current_time = time.time() current_time = time.time()
active_tracks = []
# Extract camera_id from display_id for tracking isolation
camera_id = display_id # Using display_id as camera_id for isolation
with self.lock: with self.lock:
# Clean up expired tracks # Update BoT-SORT tracker
expired_ids = [ track_results = self.bot_sort.update(camera_id, results)
track_id for track_id, vehicle in self.tracked_vehicles.items()
if vehicle.is_expired(self.timeout_seconds)
]
for track_id in expired_ids:
logger.debug(f"Removing expired track {track_id}")
del self.tracked_vehicles[track_id]
# Process new detections from InferenceResult # Ensure camera tracking dict exists
if hasattr(results, 'detections') and results.detections: if camera_id not in self.tracked_vehicles:
# Process detections from InferenceResult self.tracked_vehicles[camera_id] = {}
for detection in results.detections:
# Skip if confidence is too low
if detection.confidence < self.min_confidence:
continue
# Check if class is in trigger classes # Update tracked vehicles based on BoT-SORT results
if detection.class_name not in self.trigger_classes: current_tracks = {}
continue active_tracks = []
# Use track_id if available, otherwise generate one for track_result in track_results:
track_id = detection.track_id if detection.track_id is not None else self.next_track_id track_id = track_result['track_id']
if detection.track_id is None:
self.next_track_id += 1
# Get bounding box from Detection object # Create or update TrackedVehicle
x1, y1, x2, y2 = detection.bbox if track_id in self.tracked_vehicles[camera_id]:
bbox = (int(x1), int(y1), int(x2), int(y2)) # Update existing vehicle
vehicle = self.tracked_vehicles[camera_id][track_id]
vehicle.update_position(track_result['bbox'], track_result['confidence'])
vehicle.hit_streak = track_result['hit_streak']
vehicle.age = track_result['age']
# Update or create tracked vehicle # Update stability based on hit_streak
confidence = detection.confidence if vehicle.hit_streak >= self.min_stable_frames:
if track_id in self.tracked_vehicles: vehicle.is_stable = True
# Update existing track vehicle.stable_frames = vehicle.hit_streak
vehicle = self.tracked_vehicles[track_id]
vehicle.update_position(bbox, confidence)
vehicle.display_id = display_id
# Check stability logger.debug(f"Updated track {track_id}: conf={vehicle.confidence:.2f}, "
stability = vehicle.calculate_stability() f"stable={vehicle.is_stable}, hit_streak={vehicle.hit_streak}")
if stability > self.stability_threshold: else:
vehicle.stable_frames += 1 # Create new vehicle
if vehicle.stable_frames >= self.min_stable_frames: x1, y1, x2, y2 = track_result['bbox']
vehicle.is_stable = True vehicle = TrackedVehicle(
else: track_id=track_id,
vehicle.stable_frames = max(0, vehicle.stable_frames - 1) camera_id=camera_id,
if vehicle.stable_frames < self.min_stable_frames: first_seen=current_time,
vehicle.is_stable = False last_seen=current_time,
display_id=display_id,
confidence=track_result['confidence'],
bbox=tuple(track_result['bbox']),
center=((x1 + x2) / 2, (y1 + y2) / 2),
total_frames=1,
hit_streak=track_result['hit_streak'],
age=track_result['age']
)
vehicle.last_position_history.append(vehicle.center)
logger.info(f"New vehicle tracked: ID={track_id}, camera={camera_id}, display={display_id}")
logger.debug(f"Updated track {track_id}: conf={confidence:.2f}, " current_tracks[track_id] = vehicle
f"stable={vehicle.is_stable}, stability={stability:.2f}") active_tracks.append(vehicle)
else:
# Create new track
vehicle = TrackedVehicle(
track_id=track_id,
first_seen=current_time,
last_seen=current_time,
display_id=display_id,
confidence=confidence,
bbox=bbox,
center=((x1 + x2) / 2, (y1 + y2) / 2),
total_frames=1
)
vehicle.last_position_history.append(vehicle.center)
self.tracked_vehicles[track_id] = vehicle
logger.info(f"New vehicle tracked: ID={track_id}, display={display_id}")
active_tracks.append(self.tracked_vehicles[track_id]) # Update the camera's tracked vehicles
self.tracked_vehicles[camera_id] = current_tracks
return active_tracks return active_tracks
def _find_closest_track(self, center: Tuple[float, float]) -> Optional[TrackedVehicle]:
"""
Find the closest existing track to a given position.
Args:
center: Center position to match
Returns:
Closest tracked vehicle if within tolerance, None otherwise
"""
min_distance = float('inf')
closest_track = None
for vehicle in self.tracked_vehicles.values():
if vehicle.is_expired(0.5): # Shorter timeout for matching
continue
distance = np.sqrt(
(center[0] - vehicle.center[0]) ** 2 +
(center[1] - vehicle.center[1]) ** 2
)
if distance < min_distance and distance < self.position_tolerance:
min_distance = distance
closest_track = vehicle
return closest_track
def get_stable_vehicles(self, display_id: Optional[str] = None) -> List[TrackedVehicle]: def get_stable_vehicles(self, display_id: Optional[str] = None) -> List[TrackedVehicle]:
""" """
Get all stable vehicles, optionally filtered by display. Get all stable vehicles, optionally filtered by display.
@ -231,11 +196,15 @@ class VehicleTracker:
List of stable tracked vehicles List of stable tracked vehicles
""" """
with self.lock: with self.lock:
stable = [ stable = []
v for v in self.tracked_vehicles.values() camera_id = display_id # Using display_id as camera_id
if v.is_stable and not v.is_expired(self.timeout_seconds)
and (display_id is None or v.display_id == display_id) if camera_id in self.tracked_vehicles:
] for vehicle in self.tracked_vehicles[camera_id].values():
if (vehicle.is_stable and not vehicle.is_expired(self.timeout_seconds) and
(display_id is None or vehicle.display_id == display_id)):
stable.append(vehicle)
return stable return stable
def get_vehicle_by_session(self, session_id: str) -> Optional[TrackedVehicle]: def get_vehicle_by_session(self, session_id: str) -> Optional[TrackedVehicle]:
@ -249,9 +218,11 @@ class VehicleTracker:
Tracked vehicle if found, None otherwise Tracked vehicle if found, None otherwise
""" """
with self.lock: with self.lock:
for vehicle in self.tracked_vehicles.values(): # Search across all cameras
if vehicle.session_id == session_id: for camera_vehicles in self.tracked_vehicles.values():
return vehicle for vehicle in camera_vehicles.values():
if vehicle.session_id == session_id:
return vehicle
return None return None
def mark_processed(self, track_id: int, session_id: str): def mark_processed(self, track_id: int, session_id: str):
@ -263,11 +234,14 @@ class VehicleTracker:
session_id: Session ID assigned to this vehicle session_id: Session ID assigned to this vehicle
""" """
with self.lock: with self.lock:
if track_id in self.tracked_vehicles: # Search across all cameras for the track_id
vehicle = self.tracked_vehicles[track_id] for camera_vehicles in self.tracked_vehicles.values():
vehicle.processed_pipeline = True if track_id in camera_vehicles:
vehicle.session_id = session_id vehicle = camera_vehicles[track_id]
logger.info(f"Marked vehicle {track_id} as processed with session {session_id}") vehicle.processed_pipeline = True
vehicle.session_id = session_id
logger.info(f"Marked vehicle {track_id} as processed with session {session_id}")
return
def clear_session(self, session_id: str): def clear_session(self, session_id: str):
""" """
@ -277,30 +251,43 @@ class VehicleTracker:
session_id: Session ID to clear session_id: Session ID to clear
""" """
with self.lock: with self.lock:
for vehicle in self.tracked_vehicles.values(): # Search across all cameras
if vehicle.session_id == session_id: for camera_vehicles in self.tracked_vehicles.values():
logger.info(f"Clearing session {session_id} from vehicle {vehicle.track_id}") for vehicle in camera_vehicles.values():
vehicle.session_id = None if vehicle.session_id == session_id:
# Keep processed_pipeline=True to prevent re-processing logger.info(f"Clearing session {session_id} from vehicle {vehicle.track_id}")
vehicle.session_id = None
# Keep processed_pipeline=True to prevent re-processing
def reset_tracking(self): def reset_tracking(self):
"""Reset all tracking state.""" """Reset all tracking state."""
with self.lock: with self.lock:
self.tracked_vehicles.clear() self.tracked_vehicles.clear()
self.next_track_id = 1 self.bot_sort.reset_all()
logger.info("Vehicle tracking state reset") logger.info("Vehicle tracking state reset")
def get_statistics(self) -> Dict: def get_statistics(self) -> Dict:
"""Get tracking statistics.""" """Get tracking statistics."""
with self.lock: with self.lock:
total = len(self.tracked_vehicles) total = 0
stable = sum(1 for v in self.tracked_vehicles.values() if v.is_stable) stable = 0
processed = sum(1 for v in self.tracked_vehicles.values() if v.processed_pipeline) processed = 0
all_confidences = []
# Aggregate stats across all cameras
for camera_vehicles in self.tracked_vehicles.values():
total += len(camera_vehicles)
for vehicle in camera_vehicles.values():
if vehicle.is_stable:
stable += 1
if vehicle.processed_pipeline:
processed += 1
all_confidences.append(vehicle.avg_confidence)
return { return {
'total_tracked': total, 'total_tracked': total,
'stable_vehicles': stable, 'stable_vehicles': stable,
'processed_vehicles': processed, 'processed_vehicles': processed,
'avg_confidence': np.mean([v.avg_confidence for v in self.tracked_vehicles.values()]) 'avg_confidence': np.mean(all_confidences) if all_confidences else 0.0,
if self.tracked_vehicles else 0.0 'bot_sort_stats': self.bot_sort.get_statistics()
} }

View file

@ -354,25 +354,28 @@ class StableCarValidator:
def should_skip_same_car(self, def should_skip_same_car(self,
vehicle: TrackedVehicle, vehicle: TrackedVehicle,
session_cleared: bool = False, session_cleared: bool = False,
permanently_processed: Dict[int, float] = None) -> bool: permanently_processed: Dict[str, float] = None) -> bool:
""" """
Determine if we should skip processing for the same car after session clear. Determine if we should skip processing for the same car after session clear.
Args: Args:
vehicle: The tracked vehicle vehicle: The tracked vehicle
session_cleared: Whether the session was recently cleared session_cleared: Whether the session was recently cleared
permanently_processed: Dict of permanently processed vehicles permanently_processed: Dict of permanently processed vehicles (camera_id:track_id -> time)
Returns: Returns:
True if we should skip this vehicle True if we should skip this vehicle
""" """
# Check if this vehicle was permanently processed (never process again) # Check if this vehicle was permanently processed (never process again)
if permanently_processed and vehicle.track_id in permanently_processed: if permanently_processed:
process_time = permanently_processed[vehicle.track_id] # Create composite key using camera_id and track_id
time_since = time.time() - process_time permanent_key = f"{vehicle.camera_id}:{vehicle.track_id}"
logger.debug(f"Skipping permanently processed vehicle {vehicle.track_id} " if permanent_key in permanently_processed:
f"(processed {time_since:.1f}s ago)") process_time = permanently_processed[permanent_key]
return True time_since = time.time() - process_time
logger.debug(f"Skipping permanently processed vehicle {vehicle.track_id} on camera {vehicle.camera_id} "
f"(processed {time_since:.1f}s ago)")
return True
# If vehicle has a session_id but it was cleared, skip for a period # If vehicle has a session_id but it was cleared, skip for a period
if vehicle.session_id is None and vehicle.processed_pipeline and session_cleared: if vehicle.session_id is None and vehicle.processed_pipeline and session_cleared: