165 lines
4.9 KiB
Python
165 lines
4.9 KiB
Python
"""
|
|
Detailed profiling with timing instrumentation to find the exact bottleneck.
|
|
|
|
This script adds detailed timing logs at each stage of the pipeline.
|
|
"""
|
|
|
|
import asyncio
|
|
import time
|
|
import os
|
|
import torch
|
|
from dotenv import load_dotenv
|
|
from collections import defaultdict
|
|
|
|
import sys
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
|
|
from services import (
|
|
StreamConnectionManager,
|
|
YOLOv8Utils,
|
|
)
|
|
|
|
load_dotenv()
|
|
|
|
# Timing statistics
|
|
timings = defaultdict(list)
|
|
frame_timestamps = {}
|
|
|
|
def log_timing(event, frame_id=None, extra_data=None):
|
|
"""Log timing event"""
|
|
timestamp = time.time()
|
|
timings[event].append(timestamp)
|
|
if frame_id is not None:
|
|
if frame_id not in frame_timestamps:
|
|
frame_timestamps[frame_id] = {}
|
|
frame_timestamps[frame_id][event] = timestamp
|
|
if extra_data:
|
|
frame_timestamps[frame_id].update(extra_data)
|
|
|
|
async def instrumented_main():
|
|
"""Instrumented version of profiling script"""
|
|
print("=" * 80)
|
|
print("Detailed Profiling: Event-Driven GPU-Accelerated Object Tracking")
|
|
print("=" * 80)
|
|
|
|
# Configuration
|
|
GPU_ID = 0
|
|
MODEL_PATH = "bangchak/models/frontal_detection_v5.pt"
|
|
STREAM_URL = os.getenv('CAMERA_URL_1', 'rtsp://localhost:8554/test')
|
|
BATCH_SIZE = 4
|
|
FORCE_TIMEOUT = 0.05
|
|
MAX_FRAMES = 50 # Fewer frames for detailed analysis
|
|
|
|
print(f"\nConfiguration:")
|
|
print(f" GPU: {GPU_ID}")
|
|
print(f" Model: {MODEL_PATH}")
|
|
print(f" Stream: {STREAM_URL}")
|
|
print(f" Batch size: {BATCH_SIZE}")
|
|
print(f" Max frames: {MAX_FRAMES}\n")
|
|
|
|
# Create manager
|
|
print("[1/3] Creating StreamConnectionManager...")
|
|
manager = StreamConnectionManager(
|
|
gpu_id=GPU_ID,
|
|
batch_size=BATCH_SIZE,
|
|
force_timeout=FORCE_TIMEOUT,
|
|
enable_pt_conversion=True
|
|
)
|
|
print("✓ Manager created")
|
|
|
|
# Initialize
|
|
print("\n[2/3] Initializing...")
|
|
await manager.initialize(
|
|
model_path=MODEL_PATH,
|
|
model_id="detector",
|
|
preprocess_fn=YOLOv8Utils.preprocess,
|
|
postprocess_fn=YOLOv8Utils.postprocess,
|
|
num_contexts=4,
|
|
pt_input_shapes={"images": (1, 3, 640, 640)},
|
|
pt_precision=torch.float16
|
|
)
|
|
print("✓ Initialized")
|
|
|
|
# Connect stream
|
|
print("\n[3/3] Connecting to stream...")
|
|
connection = await manager.connect_stream(
|
|
rtsp_url=STREAM_URL,
|
|
stream_id="camera_1",
|
|
buffer_size=30
|
|
)
|
|
print("✓ Connected\n")
|
|
|
|
print(f"{'=' * 80}")
|
|
print(f"Running instrumented profiling for {MAX_FRAMES} frames...")
|
|
print(f"{'=' * 80}\n")
|
|
|
|
result_count = 0
|
|
start_time = time.time()
|
|
last_result_time = start_time
|
|
|
|
try:
|
|
async for result in connection.tracking_results():
|
|
current_time = time.time()
|
|
result_interval = (current_time - last_result_time) * 1000
|
|
|
|
result_count += 1
|
|
frame_id = result_count
|
|
|
|
log_timing('result_received', frame_id, {
|
|
'interval_ms': result_interval,
|
|
'num_objects': len(result.tracked_objects),
|
|
'num_detections': len(result.detections)
|
|
})
|
|
|
|
print(f"Frame {result_count:3d}: interval={result_interval:6.1f}ms, "
|
|
f"objects={len(result.tracked_objects):2d}, "
|
|
f"detections={len(result.detections):2d}")
|
|
|
|
last_result_time = current_time
|
|
|
|
if result_count >= MAX_FRAMES:
|
|
print(f"\n✓ Reached max frames limit ({MAX_FRAMES})")
|
|
break
|
|
|
|
except KeyboardInterrupt:
|
|
print(f"\n✓ Interrupted by user")
|
|
|
|
# Cleanup
|
|
print(f"\n{'=' * 80}")
|
|
print("Cleanup")
|
|
print(f"{'=' * 80}")
|
|
await connection.stop()
|
|
await manager.shutdown()
|
|
print("✓ Stopped")
|
|
|
|
# Analysis
|
|
elapsed = time.time() - start_time
|
|
avg_fps = result_count / elapsed if elapsed > 0 else 0
|
|
|
|
print(f"\n{'=' * 80}")
|
|
print("TIMING ANALYSIS")
|
|
print(f"{'=' * 80}")
|
|
print(f"\nOverall:")
|
|
print(f" Results: {result_count}")
|
|
print(f" Time: {elapsed:.1f}s")
|
|
print(f" FPS: {avg_fps:.2f}")
|
|
|
|
# Frame intervals
|
|
if len(frame_timestamps) > 1:
|
|
intervals = []
|
|
for i in range(2, result_count + 1):
|
|
if i in frame_timestamps and (i-1) in frame_timestamps:
|
|
interval = (frame_timestamps[i]['result_received'] -
|
|
frame_timestamps[i-1]['result_received']) * 1000
|
|
intervals.append(interval)
|
|
|
|
if intervals:
|
|
print(f"\nFrame Intervals:")
|
|
print(f" Min: {min(intervals):.1f}ms")
|
|
print(f" Max: {max(intervals):.1f}ms")
|
|
print(f" Avg: {sum(intervals)/len(intervals):.1f}ms")
|
|
print(f" Expected (6 FPS): 166.7ms")
|
|
print(f" Deviation: {(sum(intervals)/len(intervals) - 166.7):.1f}ms")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(instrumented_main())
|