python-rtsp-worker/services/tracking_controller.py

483 lines
16 KiB
Python

import threading
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass, field
from collections import defaultdict, deque
import time
import torch
import numpy as np
@dataclass
class TrackedObject:
"""
Represents a tracked object with persistent ID and metadata.
Attributes:
track_id: Unique persistent tracking ID
class_id: Object class ID from detection model
class_name: Object class name (if available)
confidence: Detection confidence score (0-1)
bbox: Bounding box in format [x1, y1, x2, y2] (normalized or absolute)
last_seen_frame: Frame number when object was last detected
first_seen_frame: Frame number when object was first detected
track_history: Deque of historical bboxes for trajectory tracking
state: Custom state dict for additional tracking data
"""
track_id: int
class_id: int
class_name: str = "unknown"
confidence: float = 0.0
bbox: List[float] = field(default_factory=list)
last_seen_frame: int = 0
first_seen_frame: int = 0
track_history: deque = field(default_factory=lambda: deque(maxlen=30))
state: Dict[str, Any] = field(default_factory=dict)
def update(self, bbox: List[float], confidence: float, frame_num: int):
"""Update tracked object with new detection"""
self.bbox = bbox
self.confidence = confidence
self.last_seen_frame = frame_num
self.track_history.append((frame_num, bbox, confidence))
def age(self, current_frame: int) -> int:
"""Get age of track in frames since last seen"""
return current_frame - self.last_seen_frame
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization"""
return {
'track_id': self.track_id,
'class_id': self.class_id,
'class_name': self.class_name,
'confidence': self.confidence,
'bbox': self.bbox,
'last_seen_frame': self.last_seen_frame,
'first_seen_frame': self.first_seen_frame,
'age': self.last_seen_frame - self.first_seen_frame,
'history_length': len(self.track_history),
'state': self.state
}
@dataclass
class Detection:
"""
Represents a single detection from object detection model.
Attributes:
bbox: Bounding box [x1, y1, x2, y2]
confidence: Detection confidence (0-1)
class_id: Object class ID
class_name: Object class name (optional)
"""
bbox: List[float]
confidence: float
class_id: int
class_name: str = "unknown"
class ObjectTracker:
"""
Lightweight GPU-accelerated object tracker (decoupled from inference).
This class only handles tracking logic - associating detections with existing tracks,
maintaining track IDs, and managing track lifecycle. It does NOT perform inference.
Architecture (Event-Driven Mode):
- Receives pre-computed detections (from ModelController)
- Maintains persistent tracking state (track IDs, histories)
- GPU-accelerated IoU computation for track association
- Thread-safe for concurrent operations
Tracking Flow:
Detections → Track Association (GPU IoU) → Update Tracks → Return Tracked Objects
Features:
- Lightweight: No model_repository dependency (zero VRAM overhead)
- GPU-accelerated: IoU computation on GPU for performance
- Persistent IDs: Tracks maintain consistent IDs across frames
- Track History: Maintains trajectory history for each object
- Thread-safe: Mutex-based locking for concurrent access
Example:
# Event-driven mode (no model dependency)
tracker = ObjectTracker(
gpu_id=0,
tracker_type="iou",
max_age=30,
iou_threshold=0.3,
class_names=COCO_CLASSES
)
# Update with pre-computed detections
detections = [Detection(bbox=[x1,y1,x2,y2], confidence=0.9, class_id=0)]
tracked_objects = tracker.update(detections)
"""
def __init__(self,
gpu_id: int = 0,
tracker_type: str = "iou",
max_age: int = 30,
iou_threshold: float = 0.3,
class_names: Optional[Dict[int, str]] = None):
"""
Initialize ObjectTracker (no model dependency).
Args:
gpu_id: GPU device ID for IoU computation
tracker_type: Tracking algorithm type ("iou")
max_age: Maximum frames to keep track without detection
iou_threshold: IoU threshold for track association
class_names: Optional mapping of class IDs to names
"""
self.gpu_id = gpu_id
self.device = torch.device(f'cuda:{gpu_id}')
self.tracker_type = tracker_type
self.max_age = max_age
self.iou_threshold = iou_threshold
self.class_names = class_names or {}
# Tracking state
self._tracks: Dict[int, TrackedObject] = {}
self._next_track_id: int = 0
self._frame_count: int = 0
self._lock = threading.RLock()
# Statistics
self._total_detections = 0
self._total_tracks_created = 0
def _compute_iou_gpu(self, boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
"""
Compute IoU between two sets of boxes on GPU.
Args:
boxes1: Tensor of shape (N, 4) in format [x1, y1, x2, y2]
boxes2: Tensor of shape (M, 4) in format [x1, y1, x2, y2]
Returns:
IoU matrix of shape (N, M)
"""
# Ensure on GPU
boxes1 = boxes1.to(self.device)
boxes2 = boxes2.to(self.device)
# Compute intersection
x1_max = torch.max(boxes1[:, None, 0], boxes2[:, 0]) # (N, M)
y1_max = torch.max(boxes1[:, None, 1], boxes2[:, 1]) # (N, M)
x2_min = torch.min(boxes1[:, None, 2], boxes2[:, 2]) # (N, M)
y2_min = torch.min(boxes1[:, None, 3], boxes2[:, 3]) # (N, M)
intersection_width = torch.clamp(x2_min - x1_max, min=0)
intersection_height = torch.clamp(y2_min - y1_max, min=0)
intersection_area = intersection_width * intersection_height
# Compute areas
boxes1_area = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
boxes2_area = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
# Compute union
union_area = boxes1_area[:, None] + boxes2_area - intersection_area
# Compute IoU
iou = intersection_area / (union_area + 1e-6)
return iou
def _iou_tracking(self, detections: torch.Tensor) -> List[Tuple[int, int]]:
"""
Simple IoU-based tracking algorithm (on GPU).
Args:
detections: Tensor of shape (N, 6) with [x1, y1, x2, y2, conf, class_id]
Returns:
List of (detection_idx, track_id) associations
"""
if len(self._tracks) == 0:
# No existing tracks, create new ones for all detections
return [(-1, -1) for _ in range(len(detections))]
# Get existing track bboxes
track_ids = list(self._tracks.keys())
track_bboxes = torch.tensor(
[self._tracks[tid].bbox for tid in track_ids],
dtype=torch.float32,
device=self.device
)
# Extract detection bboxes
det_bboxes = detections[:, :4] # (N, 4)
# Compute IoU matrix (GPU)
iou_matrix = self._compute_iou_gpu(det_bboxes, track_bboxes) # (N, M)
# Greedy matching: assign each detection to best matching track
associations = []
matched_tracks = set()
# Convert to CPU for matching logic (small matrix)
iou_cpu = iou_matrix.cpu().numpy()
for det_idx in range(len(detections)):
best_iou = self.iou_threshold
best_track_idx = -1
for track_idx, track_id in enumerate(track_ids):
if track_idx in matched_tracks:
continue
if iou_cpu[det_idx, track_idx] > best_iou:
best_iou = iou_cpu[det_idx, track_idx]
best_track_idx = track_idx
if best_track_idx >= 0:
associations.append((det_idx, track_ids[best_track_idx]))
matched_tracks.add(best_track_idx)
else:
associations.append((det_idx, -1)) # New track
return associations
def _create_track(self, bbox: List[float], confidence: float,
class_id: int, frame_num: int) -> TrackedObject:
"""Create a new tracked object"""
track_id = self._next_track_id
self._next_track_id += 1
self._total_tracks_created += 1
class_name = self.class_names.get(class_id, f"class_{class_id}")
track = TrackedObject(
track_id=track_id,
class_id=class_id,
class_name=class_name,
confidence=confidence,
bbox=bbox,
last_seen_frame=frame_num,
first_seen_frame=frame_num
)
track.track_history.append((frame_num, bbox, confidence))
return track
def _cleanup_stale_tracks(self):
"""Remove tracks that haven't been seen for max_age frames"""
stale_track_ids = [
tid for tid, track in self._tracks.items()
if track.age(self._frame_count) > self.max_age
]
for tid in stale_track_ids:
del self._tracks[tid]
def update(self, detections: List[Detection], frame_shape: tuple = None, model_input_size: int = 640) -> List[TrackedObject]:
"""
Update tracker with new detections (decoupled from inference).
Args:
detections: List of Detection objects from model inference
frame_shape: Original frame shape (C, H, W) for scaling bboxes back from model space
model_input_size: Model input size (default: 640 for YOLOv8)
Returns:
List of currently tracked objects
"""
with self._lock:
self._frame_count += 1
self._total_detections += len(detections)
# No detections, just cleanup stale tracks
if len(detections) == 0:
self._cleanup_stale_tracks()
return list(self._tracks.values())
# Scale detections from model space (640x640) to frame space (H x W)
if frame_shape is not None:
_, frame_h, frame_w = frame_shape
scale_x = frame_w / model_input_size
scale_y = frame_h / model_input_size
# Scale all detection bboxes
for det in detections:
x1, y1, x2, y2 = det.bbox
det.bbox = [
x1 * scale_x,
y1 * scale_y,
x2 * scale_x,
y2 * scale_y
]
# Convert detections to tensor for GPU processing
det_tensor = torch.tensor(
[[*det.bbox, det.confidence, det.class_id] for det in detections],
dtype=torch.float32,
device=self.device
)
# Run tracking algorithm
if self.tracker_type == "iou":
associations = self._iou_tracking(det_tensor)
else:
raise NotImplementedError(f"Tracker type '{self.tracker_type}' not implemented")
# Update tracks based on associations
for det_idx, track_id in associations:
det = detections[det_idx]
if track_id == -1:
# Create new track
new_track = self._create_track(
det.bbox, det.confidence, det.class_id, self._frame_count
)
self._tracks[new_track.track_id] = new_track
else:
# Update existing track
self._tracks[track_id].update(det.bbox, det.confidence, self._frame_count)
# Cleanup stale tracks
self._cleanup_stale_tracks()
return list(self._tracks.values())
def get_all_tracks(self, active_only: bool = True) -> List[TrackedObject]:
"""
Get all tracked objects.
Args:
active_only: If True, only return tracks seen in recent frames
Returns:
List of tracked objects
"""
with self._lock:
if active_only:
return [
track for track in self._tracks.values()
if track.age(self._frame_count) <= self.max_age
]
else:
return list(self._tracks.values())
def get_track_by_id(self, track_id: int) -> Optional[TrackedObject]:
"""
Get a specific track by ID.
Args:
track_id: Track ID to retrieve
Returns:
TrackedObject or None if not found
"""
with self._lock:
return self._tracks.get(track_id)
def get_tracks_by_class(self, class_id: int, active_only: bool = True) -> List[TrackedObject]:
"""
Get all tracks of a specific class.
Args:
class_id: Class ID to filter by
active_only: If True, only return active tracks
Returns:
List of tracked objects
"""
all_tracks = self.get_all_tracks(active_only=active_only)
return [track for track in all_tracks if track.class_id == class_id]
def get_track_count(self, active_only: bool = True) -> int:
"""
Get number of tracked objects.
Args:
active_only: If True, only count active tracks
Returns:
Number of tracks
"""
return len(self.get_all_tracks(active_only=active_only))
def get_class_counts(self, active_only: bool = True) -> Dict[int, int]:
"""
Get count of tracked objects per class.
Args:
active_only: If True, only count active tracks
Returns:
Dictionary mapping class_id to count
"""
tracks = self.get_all_tracks(active_only=active_only)
counts = defaultdict(int)
for track in tracks:
counts[track.class_id] += 1
return dict(counts)
def reset_tracks(self):
"""Clear all tracking state"""
with self._lock:
self._tracks.clear()
self._next_track_id = 0
self._frame_count = 0
print("Tracking state reset")
def get_statistics(self) -> Dict[str, Any]:
"""
Get tracking statistics.
Returns:
Dictionary with tracking stats
"""
with self._lock:
return {
'frame_count': self._frame_count,
'active_tracks': len(self._tracks),
'total_tracks_created': self._total_tracks_created,
'total_detections': self._total_detections,
'avg_detections_per_frame': self._total_detections / max(self._frame_count, 1),
'tracker_type': self.tracker_type,
'class_counts': self.get_class_counts(active_only=True)
}
def export_tracks(self, format: str = "dict") -> Any:
"""
Export all tracks in specified format.
Args:
format: Export format ("dict", "json", "numpy")
Returns:
Tracks in specified format
"""
with self._lock:
tracks = self.get_all_tracks(active_only=False)
if format == "dict":
return {track.track_id: track.to_dict() for track in tracks}
elif format == "json":
import json
return json.dumps(
{track.track_id: track.to_dict() for track in tracks},
indent=2
)
elif format == "numpy":
# Export as numpy array: [track_id, class_id, x1, y1, x2, y2, conf]
data = []
for track in tracks:
data.append([
track.track_id,
track.class_id,
*track.bbox,
track.confidence
])
return np.array(data) if data else np.array([])
else:
raise ValueError(f"Unknown export format: {format}")
def __repr__(self):
with self._lock:
return (f"ObjectTracker(tracker={self.tracker_type}, "
f"frame={self._frame_count}, "
f"tracks={len(self._tracks)})")