test script

2025-11-09 01:07:16 +07:00 · 2025-11-09 01:07:16 +07:00 · cf24a172a2
commit cf24a172a2
parent 3c83a57e44
1 changed files with 189 additions and 0 deletions
--- a/test_inference.py
+++ b/test_inference.py
@ -0,0 +1,189 @@
+
+import time
+import torch
+import os
+from dotenv import load_dotenv
+from services.model_repository import TensorRTModelRepository
+from services.stream_decoder import StreamDecoderFactory
+import numpy as np
+
+# COCO class names for YOLOv8
+COCO_CLASSES = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+    'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
+    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
+    'scissors', 'teddy bear', 'hair drier', 'toothbrush'
+]
+
+def postprocess(output, confidence_threshold=0.25, iou_threshold=0.45):
+    """
+    Post-processes the output of a YOLOv8 model to extract bounding boxes, scores, and class IDs.
+    """
+    # output shape: (batch_size, 84, 8400)
+    # 84 = 4 (bbox) + 80 (classes) 
+    
+    # Transpose the output to (batch_size, 8400, 84)
+    output = output.transpose(1, 2)
+
+    boxes = []
+    scores = []
+    class_ids = []
+
+    for detection in output[0]:
+        # First 4 values are bbox (cx, cy, w, h)
+        # The rest are class scores
+        
+        class_scores = detection[4:]
+        max_score, max_class_id = torch.max(class_scores, 0)
+
+        if max_score > confidence_threshold:
+            
+            cx, cy, w, h = detection[:4]
+            
+            # Convert from center-width-height to x1-y1-x2-y2
+            x1 = cx - w / 2
+            y1 = cy - h / 2
+            x2 = cx + w / 2
+            y2 = cy + h / 2
+            
+            boxes.append([x1.item(), y1.item(), x2.item(), y2.item()])
+            scores.append(max_score.item())
+            class_ids.append(max_class_id.item())
+
+    if not boxes:
+        return [], [], []
+
+    # Perform Non-Maximum Suppression (NMS)
+    # This is a simplified version. For production, use a library like torchvision.ops.nms
+    indices = []
+    boxes_np = np.array(boxes)
+    scores_np = np.array(scores)
+    
+    order = scores_np.argsort()[::-1]
+    
+    while order.size > 0:
+        i = order[0]
+        indices.append(i)
+        
+        xx1 = np.maximum(boxes_np[i, 0], boxes_np[order[1:], 0])
+        yy1 = np.maximum(boxes_np[i, 1], boxes_np[order[1:], 1])
+        xx2 = np.minimum(boxes_np[i, 2], boxes_np[order[1:], 2])
+        yy2 = np.minimum(boxes_np[i, 3], boxes_np[order[1:], 3])
+        
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        
+        ovr = inter / ((boxes_np[i, 2] - boxes_np[i, 0] + 1) * (boxes_np[i, 3] - boxes_np[i, 1] + 1) + \
+                       (boxes_np[order[1:], 2] - boxes_np[order[1:], 0] + 1) * \
+                       (boxes_np[order[1:], 3] - boxes_np[order[1:], 1] + 1) - inter)
+        
+        inds = np.where(ovr <= iou_threshold)[0]
+        order = order[inds + 1]
+
+    final_boxes = [boxes[i] for i in indices]
+    final_scores = [scores[i] for i in indices]
+    final_class_ids = [class_ids[i] for i in indices]
+
+    return final_boxes, final_scores, final_class_ids
+
+
+def test_rtsp_stream_with_inference():
+    """
+    Decodes an RTSP stream and runs inference, printing bounding boxes and class names.
+    """
+    load_dotenv()
+    rtsp_url = os.getenv("CAMERA_URL_1")
+    if not rtsp_url:
+        print("Error: CAMERA_URL_1 not found in .env file.")
+        return
+
+    print("=" * 80)
+    print("RTSP Stream + TensorRT Inference")
+    print("=" * 80)
+
+    # Initialize components
+    decoder_factory = StreamDecoderFactory(gpu_id=0)
+    model_repo = TensorRTModelRepository(gpu_id=0, default_num_contexts=1)
+
+    # Setup camera stream
+    decoder = decoder_factory.create_decoder(rtsp_url, buffer_size=1)
+    decoder.start()
+
+    # Load inference model
+    model_path = "models/yolov8n.trt"
+    try:
+        model_repo.load_model(
+            model_id="camera_main",
+            file_path=model_path
+        )
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        print(f"Please ensure '{model_path}' exists.")
+        decoder.stop()
+        return
+
+    print("\nWaiting for stream to buffer frames...")
+    time.sleep(3)
+
+    try:
+        while True:
+            frame_gpu = decoder.get_latest_frame(rgb=True)
+
+            if frame_gpu is None:
+                time.sleep(0.1)
+                continue
+
+            # Preprocess frame for YOLOv8
+            # Resize to 640x640, normalize, and add batch dimension
+            frame_float = frame_gpu.unsqueeze(0).float() # Convert to float here
+            frame_resized = torch.nn.functional.interpolate(
+                frame_float, size=(640, 640), mode='bilinear', align_corners=False
+            )
+            frame_normalized = frame_resized.float() / 255.0
+
+            # Run inference
+            try:
+                outputs = model_repo.infer(
+                    model_id="camera_main",
+                    inputs={"images": frame_normalized},
+                    synchronize=True
+                )
+                
+                # Post-process the output
+                output_tensor = outputs['output0']
+                boxes, scores, class_ids = postprocess(output_tensor)
+
+                # Print results
+                print(f"\n--- Frame at {time.time():.2f} ---")
+                if boxes:
+                    for box, score, class_id in zip(boxes, scores, class_ids):
+                        class_name = COCO_CLASSES[class_id]
+                        print(
+                            f"  Detected: {class_name} "
+                            f"(confidence: {score:.2f}) at "
+                            f"bbox: [{box[0]:.0f}, {box[1]:.0f}, {box[2]:.0f}, {box[3]:.0f}]"
+                        )
+                else:
+                    print("  No objects detected.")
+
+            except Exception as e:
+                print(f"Inference failed: {e}")
+
+            time.sleep(0.03) # ~30 FPS
+
+    except KeyboardInterrupt:
+        print("\nStopping...")
+    finally:
+        # Cleanup
+        decoder.stop()
+        model_repo.unload_model("camera_main")
+        print("Stream and model unloaded.")
+
+if __name__ == "__main__":
+    test_rtsp_stream_with_inference()