profiling

This commit is contained in:
Siwat Sirichai 2025-11-10 00:10:53 +07:00
parent 7044b1e588
commit c0ffa3967b
9 changed files with 354 additions and 1298 deletions

View file

@ -5,8 +5,7 @@ Services package for RTSP stream processing with GPU acceleration.
from .stream_decoder import StreamDecoderFactory, StreamDecoder, ConnectionStatus
from .jpeg_encoder import JPEGEncoderFactory, encode_frame_to_jpeg
from .model_repository import TensorRTModelRepository, ModelMetadata, ExecutionContext, SharedEngine
from .tracking_controller import TrackingController, TrackedObject
from .tracking_factory import TrackingFactory
from .tracking_controller import ObjectTracker, TrackedObject, Detection
from .yolo import YOLOv8Utils, COCO_CLASSES
from .model_controller import ModelController, BatchFrame, BufferState
from .stream_connection_manager import StreamConnectionManager, StreamConnection, TrackingResult
@ -23,9 +22,9 @@ __all__ = [
'ModelMetadata',
'ExecutionContext',
'SharedEngine',
'TrackingController',
'ObjectTracker',
'TrackedObject',
'TrackingFactory',
'Detection',
'YOLOv8Utils',
'COCO_CLASSES',
'ModelController',

View file

@ -16,7 +16,6 @@ import torch
from .model_controller import ModelController
from .stream_decoder import StreamDecoderFactory
from .tracking_factory import TrackingFactory
from .model_repository import TensorRTModelRepository
logger = logging.getLogger(__name__)
@ -133,28 +132,32 @@ class StreamConnection:
async def _frame_poller(self):
"""Poll frames from threaded decoder and submit to model controller"""
last_frame_ptr = None
last_decoder_frame_count = -1
while self.running:
try:
# Poll frame from decoder (runs in thread)
frame = self.decoder.get_latest_frame(rgb=True)
# Get current decoder frame count (no data transfer, just counter)
decoder_frame_count = self.decoder.get_frame_count()
# Check if we got a new frame (avoid reprocessing same frame)
if frame is not None and frame.data_ptr() != last_frame_ptr:
last_frame_ptr = frame.data_ptr()
self.last_frame_time = time.time()
self.frame_count += 1
# Check if decoder has a new frame (avoid reprocessing same frame)
if decoder_frame_count > last_decoder_frame_count:
# Poll frame from decoder (zero-copy - stays in VRAM)
frame = self.decoder.get_latest_frame(rgb=True)
# Submit to model controller for batched inference
await self.model_controller.submit_frame(
stream_id=self.stream_id,
frame=frame,
metadata={
"frame_number": self.frame_count,
"shape": tuple(frame.shape),
}
)
if frame is not None:
last_decoder_frame_count = decoder_frame_count
self.last_frame_time = time.time()
self.frame_count += 1
# Submit to model controller for batched inference
await self.model_controller.submit_frame(
stream_id=self.stream_id,
frame=frame,
metadata={
"frame_number": self.frame_count,
"shape": tuple(frame.shape),
}
)
# Check decoder status
if not self.decoder.is_connected():
@ -211,53 +214,37 @@ class StreamConnection:
logger.error(f"Error handling inference result for {self.stream_id}: {e}", exc_info=True)
await self.error_queue.put(e)
def _run_tracking_sync(self, detections):
def _run_tracking_sync(self, detections, min_confidence=0.7):
"""
Run tracking synchronously (called from executor).
Args:
detections: Detection tensor (N, 6) [x1, y1, x2, y2, conf, class_id]
min_confidence: Minimum confidence threshold for detections
Returns:
List of TrackedObject instances
"""
# Use the TrackingController's internal tracking with detections
# We need to manually update tracks since we already have detections
import torch
# Convert tensor detections to Detection objects, filtering by confidence
from .tracking_controller import Detection
with self.tracking_controller._lock:
self.tracking_controller._frame_count += 1
detection_list = []
for det in detections:
confidence = float(det[4])
# If no detections, just cleanup and return current tracks
if len(detections) == 0:
self.tracking_controller._cleanup_stale_tracks()
return list(self.tracking_controller._tracks.values())
# Filter by confidence threshold (prevents track accumulation)
if confidence < min_confidence:
continue
# Run IoU tracking to associate detections with existing tracks
associations = self.tracking_controller._iou_tracking(detections)
detection_list.append(Detection(
bbox=det[:4].cpu().tolist(),
confidence=confidence,
class_id=int(det[5]) if det.shape[0] > 5 else 0,
class_name=f"class_{int(det[5])}" if det.shape[0] > 5 else "unknown"
))
# Update or create tracks
for (det_idx, track_id), detection in zip(associations, detections):
bbox = detection[:4].cpu().tolist()
confidence = float(detection[4])
class_id = int(detection[5]) if detection.shape[0] > 5 else 0
if track_id == -1:
# Create new track
new_track = self.tracking_controller._create_track(
bbox, confidence, class_id, self.tracking_controller._frame_count
)
self.tracking_controller._tracks[new_track.track_id] = new_track
else:
# Update existing track
self.tracking_controller._tracks[track_id].update(
bbox, confidence, self.tracking_controller._frame_count
)
# Cleanup stale tracks
self.tracking_controller._cleanup_stale_tracks()
return list(self.tracking_controller._tracks.values())
# Update tracker with detections (lightweight, no model dependency!)
return self.tracking_controller.update(detection_list)
async def tracking_results(self) -> AsyncIterator[TrackingResult]:
"""
@ -341,7 +328,6 @@ class StreamConnectionManager:
# Factories
self.decoder_factory = StreamDecoderFactory(gpu_id=gpu_id)
self.tracking_factory = TrackingFactory(gpu_id=gpu_id)
self.model_repository = TensorRTModelRepository(
gpu_id=gpu_id,
enable_pt_conversion=enable_pt_conversion
@ -349,7 +335,6 @@ class StreamConnectionManager:
# Controllers
self.model_controller: Optional[ModelController] = None
self.tracking_controller = None
# Connections
self.connections: Dict[str, StreamConnection] = {}
@ -454,17 +439,16 @@ class StreamConnectionManager:
# Create decoder
decoder = self.decoder_factory.create_decoder(rtsp_url, buffer_size=buffer_size)
# Create dedicated tracking controller for THIS stream
# This prevents track accumulation across multiple streams
tracking_controller = self.tracking_factory.create_controller(
model_repository=self.model_repository,
model_id=self.model_id_for_tracking,
# Create lightweight tracker (NO model_repository dependency!)
from .tracking_controller import ObjectTracker
tracking_controller = ObjectTracker(
gpu_id=self.gpu_id,
tracker_type="iou",
max_age=30,
min_confidence=0.5,
iou_threshold=0.3,
class_names=None # TODO: pass class names if available
)
logger.info(f"Created dedicated TrackingController for stream {stream_id}")
logger.info(f"Created lightweight ObjectTracker for stream {stream_id}")
# Create connection
connection = StreamConnection(

View file

@ -448,6 +448,10 @@ class StreamDecoder:
with self._buffer_lock:
return len(self.frame_buffer)
def get_frame_count(self) -> int:
"""Get total number of frames decoded since start"""
return self.frame_count
def is_connected(self) -> bool:
"""Check if stream is actively connected"""
return self.get_status() == ConnectionStatus.CONNECTED

View file

@ -5,7 +5,6 @@ from collections import defaultdict, deque
import time
import torch
import numpy as np
from .model_repository import TensorRTModelRepository
@dataclass
@ -61,78 +60,81 @@ class TrackedObject:
}
class TrackingController:
@dataclass
class Detection:
"""
GPU-accelerated object tracking controller that wraps TensorRTModelRepository.
Represents a single detection from object detection model.
Architecture:
- Wraps model repository for dependency injection
- Maintains CUDA state for bbox tracking operations
- Stores persistent tracking data (track IDs, histories, states)
- Processes GPU tensor frames directly (zero-copy pipeline)
- Thread-safe for concurrent tracking operations
Attributes:
bbox: Bounding box [x1, y1, x2, y2]
confidence: Detection confidence (0-1)
class_id: Object class ID
class_name: Object class name (optional)
"""
bbox: List[float]
confidence: float
class_id: int
class_name: str = "unknown"
class ObjectTracker:
"""
Lightweight GPU-accelerated object tracker (decoupled from inference).
This class only handles tracking logic - associating detections with existing tracks,
maintaining track IDs, and managing track lifecycle. It does NOT perform inference.
Architecture (Event-Driven Mode):
- Receives pre-computed detections (from ModelController)
- Maintains persistent tracking state (track IDs, histories)
- GPU-accelerated IoU computation for track association
- Thread-safe for concurrent operations
Tracking Flow:
GPU Frame Model Inference (GPU) Detections (GPU)
Tracking Algorithm (GPU/CPU) Track Assignment
Update Persistent Tracks Return Tracked Objects
Detections Track Association (GPU IoU) Update Tracks Return Tracked Objects
Features:
- GPU-first: All tensor operations stay on GPU until final results
- Lightweight: No model_repository dependency (zero VRAM overhead)
- GPU-accelerated: IoU computation on GPU for performance
- Persistent IDs: Tracks maintain consistent IDs across frames
- Track History: Maintains trajectory history for each object
- Configurable: Supports custom tracking algorithms via callbacks
- Thread-safe: Mutex-based locking for concurrent access
Example:
# Initialize with DI
repo = TensorRTModelRepository(gpu_id=0)
factory = TrackingFactory(gpu_id=0)
controller = factory.create_controller(
model_repository=repo,
model_id="yolov8_detector",
tracker_type="iou"
# Event-driven mode (no model dependency)
tracker = ObjectTracker(
gpu_id=0,
tracker_type="iou",
max_age=30,
iou_threshold=0.3,
class_names=COCO_CLASSES
)
# Track objects in frame
rgb_frame = decoder.get_latest_frame() # GPU tensor
tracked_objects = controller.track(rgb_frame)
# Get all tracked objects
all_tracks = controller.get_all_tracks()
# Update with pre-computed detections
detections = [Detection(bbox=[x1,y1,x2,y2], confidence=0.9, class_id=0)]
tracked_objects = tracker.update(detections)
"""
def __init__(self,
model_repository: TensorRTModelRepository,
model_id: str,
gpu_id: int = 0,
tracker_type: str = "iou",
max_age: int = 30,
min_confidence: float = 0.5,
iou_threshold: float = 0.3,
class_names: Optional[Dict[int, str]] = None):
"""
Initialize TrackingController.
Initialize ObjectTracker (no model dependency).
Args:
model_repository: TensorRT model repository (dependency injection)
model_id: Model ID in repository to use for detection
gpu_id: GPU device ID
tracker_type: Tracking algorithm type ("iou", "sort", "deepsort", "bytetrack")
gpu_id: GPU device ID for IoU computation
tracker_type: Tracking algorithm type ("iou")
max_age: Maximum frames to keep track without detection
min_confidence: Minimum confidence threshold for detections
iou_threshold: IoU threshold for track association
class_names: Optional mapping of class IDs to names
"""
self.model_repository = model_repository
self.model_id = model_id
self.gpu_id = gpu_id
self.device = torch.device(f'cuda:{gpu_id}')
self.tracker_type = tracker_type
self.max_age = max_age
self.min_confidence = min_confidence
self.iou_threshold = iou_threshold
self.class_names = class_names or {}
@ -146,19 +148,6 @@ class TrackingController:
self._total_detections = 0
self._total_tracks_created = 0
# Verify model exists in repository
metadata = self.model_repository.get_metadata(model_id)
if metadata is None:
raise ValueError(f"Model '{model_id}' not found in repository")
print(f"TrackingController initialized:")
print(f" Model ID: {model_id}")
print(f" GPU: {gpu_id}")
print(f" Tracker: {tracker_type}")
print(f" Max age: {max_age} frames")
print(f" Min confidence: {min_confidence}")
print(f" IoU threshold: {iou_threshold}")
def _compute_iou_gpu(self, boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor:
"""
Compute IoU between two sets of boxes on GPU.
@ -283,97 +272,51 @@ class TrackingController:
for tid in stale_track_ids:
del self._tracks[tid]
def track(self, frame: torch.Tensor,
preprocess_fn: Optional[callable] = None,
postprocess_fn: Optional[callable] = None) -> List[TrackedObject]:
def update(self, detections: List[Detection]) -> List[TrackedObject]:
"""
Track objects in a GPU tensor frame.
Update tracker with new detections (decoupled from inference).
Args:
frame: RGB frame as GPU tensor, shape (3, H, W) or (1, 3, H, W)
preprocess_fn: Optional preprocessing function (frame -> model_input)
postprocess_fn: Optional postprocessing function (model_output -> detections)
Should return tensor of shape (N, 6): [x1, y1, x2, y2, conf, class_id]
detections: List of Detection objects from model inference
Returns:
List of currently tracked objects
"""
with self._lock:
self._frame_count += 1
# Ensure frame is on correct device
if not frame.is_cuda:
frame = frame.to(self.device)
elif frame.device != self.device:
frame = frame.to(self.device)
# Preprocess frame for model
if preprocess_fn is not None:
model_input = preprocess_fn(frame)
else:
# Default: add batch dimension if needed
if frame.dim() == 3:
model_input = frame.unsqueeze(0) # (1, 3, H, W)
else:
model_input = frame
# Run inference (GPU-to-GPU)
# Assuming model expects input named "images" or "input"
metadata = self.model_repository.get_metadata(self.model_id)
input_name = metadata.input_names[0] if metadata else "images"
outputs = self.model_repository.infer(
model_id=self.model_id,
inputs={input_name: model_input},
synchronize=True
)
# Postprocess model output to get detections
if postprocess_fn is not None:
detections = postprocess_fn(outputs)
else:
# Default: assume output is already in correct format
# Get first output tensor
output_name = list(outputs.keys())[0]
detections = outputs[output_name]
# Reshape if needed: (1, N, 6) -> (N, 6)
if detections.dim() == 3:
detections = detections.squeeze(0)
# Filter by confidence
if detections.dim() == 2 and detections.shape[1] >= 5:
conf_mask = detections[:, 4] >= self.min_confidence
detections = detections[conf_mask]
self._total_detections += len(detections)
# Track objects
# No detections, just cleanup stale tracks
if len(detections) == 0:
# No detections, just cleanup stale tracks
self._cleanup_stale_tracks()
return list(self._tracks.values())
# Convert detections to tensor for GPU processing
det_tensor = torch.tensor(
[[*det.bbox, det.confidence, det.class_id] for det in detections],
dtype=torch.float32,
device=self.device
)
# Run tracking algorithm
if self.tracker_type == "iou":
associations = self._iou_tracking(detections)
associations = self._iou_tracking(det_tensor)
else:
raise NotImplementedError(f"Tracker type '{self.tracker_type}' not implemented")
# Update tracks based on associations
for det_idx, track_id in associations:
detection = detections[det_idx]
bbox = detection[:4].cpu().tolist()
confidence = float(detection[4])
class_id = int(detection[5]) if detection.shape[0] > 5 else 0
det = detections[det_idx]
if track_id == -1:
# Create new track
new_track = self._create_track(bbox, confidence, class_id, self._frame_count)
new_track = self._create_track(
det.bbox, det.confidence, det.class_id, self._frame_count
)
self._tracks[new_track.track_id] = new_track
else:
# Update existing track
self._tracks[track_id].update(bbox, confidence, self._frame_count)
self._tracks[track_id].update(det.bbox, det.confidence, self._frame_count)
# Cleanup stale tracks
self._cleanup_stale_tracks()
@ -476,7 +419,6 @@ class TrackingController:
'total_tracks_created': self._total_tracks_created,
'total_detections': self._total_detections,
'avg_detections_per_frame': self._total_detections / max(self._frame_count, 1),
'model_id': self.model_id,
'tracker_type': self.tracker_type,
'class_counts': self.get_class_counts(active_only=True)
}
@ -518,7 +460,6 @@ class TrackingController:
def __repr__(self):
with self._lock:
return (f"TrackingController(model={self.model_id}, "
f"tracker={self.tracker_type}, "
return (f"ObjectTracker(tracker={self.tracker_type}, "
f"frame={self._frame_count}, "
f"tracks={len(self._tracks)})")

View file

@ -1,8 +1,11 @@
import threading
from typing import Optional, Dict
from .tracking_controller import TrackingController
from .tracking_controller import ObjectTracker
from .model_repository import TensorRTModelRepository
# Backward compatibility alias (TrackingFactory is deprecated in event-driven mode)
TrackingController = ObjectTracker
class TrackingFactory:
"""