diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml deleted file mode 100644 index 585009f..0000000 --- a/.gitea/workflows/build.yml +++ /dev/null @@ -1,112 +0,0 @@ -name: Build Worker Base and Application Images - -on: - push: - branches: - - main - - dev - workflow_dispatch: - inputs: - force_base_build: - description: 'Force base image build regardless of changes' - required: false - default: 'false' - type: boolean - -jobs: - check-base-changes: - runs-on: ubuntu-latest - outputs: - base-changed: ${{ steps.changes.outputs.base-changed }} - steps: - - name: Checkout code - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - name: Check for base changes - id: changes - run: | - if git diff HEAD^ HEAD --name-only | grep -E "(Dockerfile\.base|requirements\.base\.txt)" > /dev/null; then - echo "base-changed=true" >> $GITHUB_OUTPUT - else - echo "base-changed=false" >> $GITHUB_OUTPUT - fi - - build-base: - needs: check-base-changes - if: needs.check-base-changes.outputs.base-changed == 'true' || (github.event_name == 'workflow_dispatch' && github.event.inputs.force_base_build == 'true') - runs-on: ubuntu-latest - permissions: - packages: write - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: git.siwatsystem.com - username: ${{ github.actor }} - password: ${{ secrets.RUNNER_TOKEN }} - - - name: Build and push base Docker image - uses: docker/build-push-action@v4 - with: - context: . - file: ./Dockerfile.base - push: true - tags: git.siwatsystem.com/adsist-cms/worker-base:latest - - build-docker: - needs: [check-base-changes, build-base] - if: always() && (needs.build-base.result == 'success' || needs.build-base.result == 'skipped') - runs-on: ubuntu-latest - permissions: - packages: write - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: git.siwatsystem.com - username: ${{ github.actor }} - password: ${{ secrets.RUNNER_TOKEN }} - - - name: Build and push Docker image - uses: docker/build-push-action@v4 - with: - context: . - file: ./Dockerfile - push: true - tags: git.siwatsystem.com/adsist-cms/worker:${{ github.ref_name == 'main' && 'latest' || 'dev' }} - - deploy-stack: - needs: build-docker - runs-on: adsist - steps: - - name: Checkout code - uses: actions/checkout@v3 - - name: Set up SSH connection - run: | - mkdir -p ~/.ssh - echo "${{ secrets.DEPLOY_KEY_CMS }}" > ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa - ssh-keyscan -H ${{ vars.DEPLOY_HOST_CMS }} >> ~/.ssh/known_hosts - - name: Deploy stack - run: | - echo "Pulling and starting containers on server..." - if [ "${{ github.ref_name }}" = "main" ]; then - echo "Deploying production stack..." - ssh -i ~/.ssh/id_rsa ${{ vars.DEPLOY_USER_CMS }}@${{ vars.DEPLOY_HOST_CMS }} "cd ~/cms-system-k8s && docker compose -f docker-compose.production.yml pull && docker compose -f docker-compose.production.yml up -d" - else - echo "Deploying staging stack..." - ssh -i ~/.ssh/id_rsa ${{ vars.DEPLOY_USER_CMS }}@${{ vars.DEPLOY_HOST_CMS }} "cd ~/cms-system-k8s && docker compose -f docker-compose.staging.yml pull && docker compose -f docker-compose.staging.yml up -d" - fi \ No newline at end of file diff --git a/.gitignore b/.gitignore index c990ddb..2c881e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,3 @@ -/models -app.log -*.pt -# All pycache directories -__pycache__/ -.mptacache - -mptas -detector_worker.log -.gitignore -no_frame_debug.log - -feeder/ -.venv/ +/__pycache__ +models \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 06f7b97..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,277 +0,0 @@ -# Python Detector Worker - CLAUDE.md - -## Project Overview -This is a FastAPI-based computer vision detection worker that processes video streams from RTSP/HTTP sources and runs advanced YOLO-based machine learning pipelines for multi-class object detection and parallel classification. The system features comprehensive database integration, Redis support, and hierarchical pipeline execution designed to work within a larger CMS (Content Management System) architecture. - -### Key Features -- **Multi-Class Detection**: Simultaneous detection of multiple object classes (e.g., Car + Frontal) -- **Parallel Processing**: Concurrent execution of classification branches using ThreadPoolExecutor -- **Database Integration**: Automatic PostgreSQL schema management and record updates -- **Redis Actions**: Image storage with region cropping and pub/sub messaging -- **Pipeline Synchronization**: Branch coordination with `waitForBranches` functionality -- **Dynamic Field Mapping**: Template-based field resolution for database operations - -## Architecture & Technology Stack -- **Framework**: FastAPI with WebSocket support -- **ML/CV**: PyTorch, Ultralytics YOLO, OpenCV -- **Containerization**: Docker (Python 3.13-bookworm base) -- **Data Storage**: Redis integration for action handling + PostgreSQL for persistent storage -- **Database**: Automatic schema management with gas_station_1 database -- **Parallel Processing**: ThreadPoolExecutor for concurrent classification -- **Communication**: WebSocket-based real-time protocol - -## Core Components - -### Main Application (`app.py`) -- **FastAPI WebSocket server** for real-time communication -- **Multi-camera stream management** with shared stream optimization -- **HTTP REST endpoint** for image retrieval (`/camera/{camera_id}/image`) -- **Threading-based frame readers** for RTSP streams and HTTP snapshots -- **Model loading and inference** using MPTA (Machine Learning Pipeline Archive) format -- **Session management** with display identifier mapping -- **Resource monitoring** (CPU, memory, GPU usage via psutil) - -### Pipeline System (`siwatsystem/pympta.py`) -- **MPTA file handling** - ZIP archives containing model configurations -- **Hierarchical pipeline execution** with detection → classification branching -- **Multi-class detection** - Simultaneous detection of multiple classes (Car + Frontal) -- **Parallel processing** - Concurrent classification branches with ThreadPoolExecutor -- **Redis action system** - Image saving with region cropping and message publishing -- **PostgreSQL integration** - Automatic table creation and combined updates -- **Dynamic model loading** with GPU optimization -- **Configurable trigger classes and confidence thresholds** -- **Branch synchronization** - waitForBranches coordination for database updates - -### Database System (`siwatsystem/database.py`) -- **DatabaseManager class** for PostgreSQL operations -- **Automatic table creation** with gas_station_1.car_frontal_info schema -- **Combined update operations** with field mapping from branch results -- **Session management** with UUID generation -- **Error handling** and connection management - -### Testing & Debugging -- **Protocol test script** (`test_protocol.py`) for WebSocket communication validation -- **Pipeline webcam utility** (`pipeline_webcam.py`) for local testing with visual output -- **RTSP streaming debug tool** (`debug/rtsp_webcam.py`) using GStreamer - -## Code Conventions & Patterns - -### Logging -- **Structured logging** using Python's logging module -- **File + console output** to `detector_worker.log` -- **Debug level separation** for detailed troubleshooting -- **Context-aware messages** with camera IDs and model information - -### Error Handling -- **Graceful failure handling** with retry mechanisms (configurable max_retries) -- **Thread-safe operations** using locks for streams and models -- **WebSocket disconnect handling** with proper cleanup -- **Model loading validation** with detailed error reporting - -### Configuration -- **JSON configuration** (`config.json`) for runtime parameters: - - `poll_interval_ms`: Frame processing interval - - `max_streams`: Concurrent stream limit - - `target_fps`: Target frame rate - - `reconnect_interval_sec`: Stream reconnection delay - - `max_retries`: Maximum retry attempts (-1 for unlimited) - -### Threading Model -- **Frame reader threads** for each camera stream (RTSP/HTTP) -- **Shared stream optimization** - multiple subscriptions can reuse the same camera stream -- **Async WebSocket handling** with concurrent task management -- **Thread-safe data structures** with proper locking mechanisms - -## WebSocket Protocol - -### Message Types -- **subscribe**: Start camera stream with model pipeline -- **unsubscribe**: Stop camera stream processing -- **requestState**: Request current worker status -- **setSessionId**: Associate display with session identifier -- **patchSession**: Update session data -- **stateReport**: Periodic heartbeat with system metrics -- **imageDetection**: Detection results with timestamp and model info - -### Subscription Format -```json -{ - "type": "subscribe", - "payload": { - "subscriptionIdentifier": "display-001;cam-001", - "rtspUrl": "rtsp://...", // OR snapshotUrl - "snapshotUrl": "http://...", - "snapshotInterval": 5000, - "modelUrl": "http://...model.mpta", - "modelId": 101, - "modelName": "Vehicle Detection", - "cropX1": 100, "cropY1": 200, - "cropX2": 300, "cropY2": 400 - } -} -``` - -## Model Pipeline (MPTA) Format - -### Enhanced Structure -- **ZIP archive** containing models and configuration -- **pipeline.json** - Main configuration file with Redis + PostgreSQL settings -- **Model files** - YOLO .pt files for detection/classification -- **Multi-model support** - Detection + multiple classification models - -### Advanced Pipeline Flow -1. **Multi-class detection stage** - YOLO detection of Car + Frontal simultaneously -2. **Validation stage** - Check for expected classes (flexible matching) -3. **Database initialization** - Create initial record with session_id -4. **Redis actions** - Save cropped frontal images with expiration -5. **Parallel classification** - Concurrent brand and body type classification -6. **Branch synchronization** - Wait for all classification branches to complete -7. **Database update** - Combined update with all classification results - -### Enhanced Branch Configuration -```json -{ - "modelId": "car_frontal_detection_v1", - "modelFile": "car_frontal_detection_v1.pt", - "multiClass": true, - "expectedClasses": ["Car", "Frontal"], - "triggerClasses": ["Car", "Frontal"], - "minConfidence": 0.8, - "actions": [ - { - "type": "redis_save_image", - "region": "Frontal", - "key": "inference:{display_id}:{timestamp}:{session_id}:{filename}", - "expire_seconds": 600 - } - ], - "branches": [ - { - "modelId": "car_brand_cls_v1", - "modelFile": "car_brand_cls_v1.pt", - "parallel": true, - "crop": true, - "cropClass": "Frontal", - "triggerClasses": ["Frontal"], - "minConfidence": 0.85 - } - ], - "parallelActions": [ - { - "type": "postgresql_update_combined", - "table": "car_frontal_info", - "key_field": "session_id", - "waitForBranches": ["car_brand_cls_v1", "car_bodytype_cls_v1"], - "fields": { - "car_brand": "{car_brand_cls_v1.brand}", - "car_body_type": "{car_bodytype_cls_v1.body_type}" - } - } - ] -} -``` - -## Stream Management - -### Shared Streams -- Multiple subscriptions can share the same camera URL -- Reference counting prevents premature stream termination -- Automatic cleanup when last subscription ends - -### Frame Processing -- **Queue-based buffering** with single frame capacity (latest frame only) -- **Configurable polling interval** based on target FPS -- **Automatic reconnection** with exponential backoff - -## Development & Testing - -### Local Development -```bash -# Install dependencies -pip install -r requirements.txt - -# Run the worker -python app.py - -# Test protocol compliance -python test_protocol.py - -# Test pipeline with webcam -python pipeline_webcam.py --mpta-file path/to/model.mpta --video 0 -``` - -### Docker Deployment -```bash -# Build container -docker build -t detector-worker . - -# Run with volume mounts for models -docker run -p 8000:8000 -v ./models:/app/models detector-worker -``` - -### Testing Commands -- **Protocol testing**: `python test_protocol.py` -- **Pipeline validation**: `python pipeline_webcam.py --mpta-file --video 0` -- **RTSP debugging**: `python debug/rtsp_webcam.py` - -## Dependencies -- **fastapi[standard]**: Web framework with WebSocket support -- **uvicorn**: ASGI server -- **torch, torchvision**: PyTorch for ML inference -- **ultralytics**: YOLO implementation -- **opencv-python**: Computer vision operations -- **websockets**: WebSocket client/server -- **redis**: Redis client for action execution -- **psycopg2-binary**: PostgreSQL database adapter -- **scipy**: Scientific computing for advanced algorithms -- **filterpy**: Kalman filtering and state estimation - -## Security Considerations -- Model files are loaded from trusted sources only -- Redis connections use authentication when configured -- WebSocket connections handle disconnects gracefully -- Resource usage is monitored to prevent DoS - -## Database Integration - -### Schema Management -The system automatically creates and manages PostgreSQL tables: - -```sql -CREATE TABLE IF NOT EXISTS gas_station_1.car_frontal_info ( - display_id VARCHAR(255), - captured_timestamp VARCHAR(255), - session_id VARCHAR(255) PRIMARY KEY, - license_character VARCHAR(255) DEFAULT NULL, - license_type VARCHAR(255) DEFAULT 'No model available', - car_brand VARCHAR(255) DEFAULT NULL, - car_model VARCHAR(255) DEFAULT NULL, - car_body_type VARCHAR(255) DEFAULT NULL, - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW() -); -``` - -### Workflow -1. **Detection**: When both "Car" and "Frontal" are detected, create initial database record with UUID session_id -2. **Redis Storage**: Save cropped frontal image to Redis with session_id in key -3. **Parallel Processing**: Run brand and body type classification concurrently -4. **Synchronization**: Wait for all branches to complete using `waitForBranches` -5. **Database Update**: Update record with combined classification results using field mapping - -### Field Mapping -Templates like `{car_brand_cls_v1.brand}` are resolved to actual classification results: -- `car_brand_cls_v1.brand` → "Honda" -- `car_bodytype_cls_v1.body_type` → "Sedan" - -## Performance Optimizations -- GPU acceleration when CUDA is available -- Shared camera streams reduce resource usage -- Frame queue optimization (single latest frame) -- Model caching across subscriptions -- Trigger class filtering for faster inference -- Parallel processing with ThreadPoolExecutor for classification branches -- Multi-class detection reduces inference passes -- Region-based cropping minimizes processing overhead -- Database connection pooling and prepared statements -- Redis image storage with automatic expiration \ No newline at end of file diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 2b3fcc6..0000000 --- a/Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -# Use our pre-built base image with ML dependencies -FROM git.siwatsystem.com/adsist-cms/worker-base:latest - -# Copy and install application requirements (frequently changing dependencies) -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the application code -COPY . . - -# Run the application -CMD ["python3", "-m", "fastapi", "run", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/Dockerfile.base b/Dockerfile.base deleted file mode 100644 index 3700920..0000000 --- a/Dockerfile.base +++ /dev/null @@ -1,15 +0,0 @@ -# Base image with all ML dependencies -FROM python:3.13-bookworm - -# Install system dependencies -RUN apt update && apt install -y libgl1 && rm -rf /var/lib/apt/lists/* - -# Copy and install base requirements (ML dependencies that rarely change) -COPY requirements.base.txt . -RUN pip install --no-cache-dir -r requirements.base.txt - -# Set working directory -WORKDIR /app - -# This base image will be reused for all worker builds -CMD ["python3", "-m", "fastapi", "run", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/app.log b/app.log new file mode 100644 index 0000000..0866815 --- /dev/null +++ b/app.log @@ -0,0 +1,601 @@ +2025-01-09 00:43:08,967 [INFO] Will watch for changes in these directories: ['/Users/siwatsirichai/Documents/GitHub/python-detector-worker'] +2025-01-09 00:43:08,967 [INFO] Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit) +2025-01-09 00:43:08,967 [INFO] Started reloader process [36467] using WatchFiles +2025-01-09 00:43:09,356 [INFO] 1 change detected +2025-01-09 00:43:10,532 [INFO] Started server process [36471] +2025-01-09 00:43:10,534 [INFO] Waiting for application startup. +2025-01-09 00:43:10,534 [INFO] Application startup complete. +2025-01-09 00:43:17,203 [INFO] WebSocket connection accepted +2025-01-09 00:43:17,205 [INFO] ('127.0.0.1', 59148) - "WebSocket /" [accepted] +2025-01-09 00:43:17,207 [INFO] connection open +2025-01-09 00:43:17,207 [INFO] Started processing streams +2025-01-09 00:43:23,325 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:44:48,212 [INFO] 1 change detected +2025-01-09 00:44:48,217 [WARNING] WatchFiles detected changes in 'app.py'. Reloading... +2025-01-09 00:44:48,227 [INFO] Shutting down +2025-01-09 00:44:48,239 [ERROR] Error in WebSocket connection: (1012, None) +2025-01-09 00:44:48,255 [INFO] Released camera camera1 +2025-01-09 00:44:48,255 [INFO] WebSocket connection closed +2025-01-09 00:44:48,256 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 102, in detect + streams.clear() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:44:48,323 [INFO] connection closed +2025-01-09 00:44:48,333 [INFO] Waiting for application shutdown. +2025-01-09 00:44:48,334 [INFO] Application shutdown complete. +2025-01-09 00:44:48,335 [INFO] Finished server process [36471] +2025-01-09 00:44:48,728 [INFO] 1 change detected +2025-01-09 00:44:51,790 [INFO] Started server process [36622] +2025-01-09 00:44:51,793 [INFO] Waiting for application startup. +2025-01-09 00:44:51,794 [INFO] Application startup complete. +2025-01-09 00:44:52,764 [INFO] WebSocket connection accepted +2025-01-09 00:44:52,764 [INFO] ('127.0.0.1', 59328) - "WebSocket /" [accepted] +2025-01-09 00:44:52,765 [INFO] connection open +2025-01-09 00:44:52,766 [INFO] Started processing streams +2025-01-09 00:44:59,314 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:45:23,328 [ERROR] Error in WebSocket connection: (, '') +2025-01-09 00:45:23,354 [INFO] Released camera camera1 +2025-01-09 00:45:23,354 [INFO] WebSocket connection closed +2025-01-09 00:45:23,356 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 104, in detect + await websocket.close() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:45:23,433 [INFO] connection closed +2025-01-09 00:45:25,088 [INFO] WebSocket connection accepted +2025-01-09 00:45:25,088 [INFO] ('127.0.0.1', 59396) - "WebSocket /" [accepted] +2025-01-09 00:45:25,091 [INFO] connection open +2025-01-09 00:45:25,092 [INFO] Started processing streams +2025-01-09 00:45:31,313 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:45:37,901 [INFO] Shutting down +2025-01-09 00:45:37,906 [ERROR] Error in WebSocket connection: (1012, None) +2025-01-09 00:45:37,919 [INFO] Released camera camera1 +2025-01-09 00:45:37,919 [INFO] WebSocket connection closed +2025-01-09 00:45:37,919 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 104, in detect + await websocket.close() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:45:37,921 [INFO] connection closed +2025-01-09 00:45:38,006 [INFO] Waiting for application shutdown. +2025-01-09 00:45:38,007 [INFO] Application shutdown complete. +2025-01-09 00:45:38,008 [INFO] Finished server process [36622] +2025-01-09 00:45:38,031 [INFO] Stopping reloader process [36467] +2025-01-09 00:46:40,345 [INFO] Will watch for changes in these directories: ['/Users/siwatsirichai/Documents/GitHub/python-detector-worker'] +2025-01-09 00:46:40,346 [INFO] Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit) +2025-01-09 00:46:40,347 [INFO] Started reloader process [36868] using WatchFiles +2025-01-09 00:46:42,402 [INFO] Started server process [36902] +2025-01-09 00:46:42,404 [INFO] Waiting for application startup. +2025-01-09 00:46:42,405 [INFO] Application startup complete. +2025-01-09 00:46:42,439 [INFO] WebSocket connection accepted +2025-01-09 00:46:42,439 [INFO] ('127.0.0.1', 59523) - "WebSocket /" [accepted] +2025-01-09 00:46:42,440 [INFO] connection open +2025-01-09 00:46:42,440 [INFO] Started processing streams +2025-01-09 00:46:47,311 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:46:51,990 [ERROR] Error in WebSocket connection: (, '') +2025-01-09 00:46:52,001 [INFO] Released camera camera1 +2025-01-09 00:46:52,002 [INFO] WebSocket connection closed +2025-01-09 00:46:52,002 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 104, in detect + await websocket.close() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:46:52,030 [INFO] connection closed +2025-01-09 00:47:56,615 [INFO] WebSocket connection accepted +2025-01-09 00:47:56,616 [INFO] ('127.0.0.1', 59664) - "WebSocket /" [accepted] +2025-01-09 00:47:56,628 [INFO] connection open +2025-01-09 00:47:56,631 [INFO] Started processing streams +2025-01-09 00:48:03,306 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:48:06,345 [ERROR] Error in WebSocket connection: (, '') +2025-01-09 00:48:06,352 [INFO] Released camera camera1 +2025-01-09 00:48:06,352 [INFO] WebSocket connection closed +2025-01-09 00:48:06,353 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 104, in detect + await websocket.close() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:48:06,361 [INFO] connection closed +2025-01-09 00:48:38,544 [INFO] WebSocket connection accepted +2025-01-09 00:48:38,545 [INFO] ('127.0.0.1', 59735) - "WebSocket /" [accepted] +2025-01-09 00:48:38,546 [INFO] connection open +2025-01-09 00:48:38,550 [INFO] Started processing streams +2025-01-09 00:48:43,303 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:49:28,103 [ERROR] Error in WebSocket connection: (, '') +2025-01-09 00:49:28,115 [INFO] Released camera camera1 +2025-01-09 00:49:28,116 [INFO] WebSocket connection closed +2025-01-09 00:49:28,116 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 104, in detect + await websocket.close() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:49:28,125 [INFO] connection closed +2025-01-09 00:50:30,615 [INFO] WebSocket connection accepted +2025-01-09 00:50:30,616 [INFO] ('127.0.0.1', 59919) - "WebSocket /" [accepted] +2025-01-09 00:50:30,618 [INFO] connection open +2025-01-09 00:50:30,619 [INFO] Started processing streams +2025-01-09 00:50:35,299 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:51:20,717 [ERROR] Error in WebSocket connection: (, '') +2025-01-09 00:51:20,727 [INFO] Released camera camera1 +2025-01-09 00:51:20,727 [INFO] WebSocket connection closed +2025-01-09 00:51:20,727 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 104, in detect + await websocket.close() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:51:20,732 [INFO] connection closed +2025-01-09 00:52:20,552 [INFO] 1 change detected +2025-01-09 00:52:20,571 [WARNING] WatchFiles detected changes in 'app.py'. Reloading... +2025-01-09 00:52:20,681 [INFO] Shutting down +2025-01-09 00:52:20,787 [INFO] Waiting for application shutdown. +2025-01-09 00:52:20,790 [INFO] Application shutdown complete. +2025-01-09 00:52:20,791 [INFO] Finished server process [36902] +2025-01-09 00:52:21,170 [INFO] 1 change detected +2025-01-09 00:52:23,436 [INFO] Started server process [37369] +2025-01-09 00:52:23,438 [INFO] Waiting for application startup. +2025-01-09 00:52:23,438 [INFO] Application startup complete. +2025-01-09 00:52:54,852 [INFO] 1 change detected +2025-01-09 00:52:54,860 [WARNING] WatchFiles detected changes in 'app.py'. Reloading... +2025-01-09 00:52:54,949 [INFO] Shutting down +2025-01-09 00:52:55,052 [INFO] Waiting for application shutdown. +2025-01-09 00:52:55,053 [INFO] Application shutdown complete. +2025-01-09 00:52:55,053 [INFO] Finished server process [37369] +2025-01-09 00:52:55,426 [INFO] 1 change detected +2025-01-09 00:52:57,074 [INFO] Started server process [37436] +2025-01-09 00:52:57,076 [INFO] Waiting for application startup. +2025-01-09 00:52:57,078 [INFO] Application startup complete. +2025-01-09 00:53:06,378 [INFO] 1 change detected +2025-01-09 00:53:08,915 [INFO] Shutting down +2025-01-09 00:53:09,018 [INFO] Waiting for application shutdown. +2025-01-09 00:53:09,020 [INFO] Application shutdown complete. +2025-01-09 00:53:09,021 [INFO] Finished server process [37436] +2025-01-09 00:53:09,044 [INFO] Stopping reloader process [36868] +2025-01-09 00:53:11,752 [INFO] Will watch for changes in these directories: ['/Users/siwatsirichai/Documents/GitHub/python-detector-worker'] +2025-01-09 00:53:11,753 [INFO] Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit) +2025-01-09 00:53:11,753 [INFO] Started reloader process [37483] using WatchFiles +2025-01-09 00:53:13,520 [INFO] Started server process [37487] +2025-01-09 00:53:13,522 [INFO] Waiting for application startup. +2025-01-09 00:53:13,523 [INFO] Application startup complete. +2025-01-09 00:53:14,050 [INFO] WebSocket connection accepted +2025-01-09 00:53:14,050 [INFO] ('127.0.0.1', 60224) - "WebSocket /" [accepted] +2025-01-09 00:53:14,052 [INFO] connection open +2025-01-09 00:53:14,052 [INFO] Started processing streams +2025-01-09 00:53:19,283 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:53:36,514 [INFO] 1 change detected +2025-01-09 00:53:38,902 [ERROR] Error in WebSocket connection: (, '') +2025-01-09 00:53:38,910 [INFO] Released camera camera1 +2025-01-09 00:53:38,911 [INFO] WebSocket connection closed +2025-01-09 00:53:38,911 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 111, in detect + await websocket.close() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:53:38,944 [INFO] connection closed +2025-01-09 00:53:40,757 [INFO] Shutting down +2025-01-09 00:53:40,880 [INFO] Finished server process [37487] +2025-01-09 00:53:40,980 [ERROR] Traceback (most recent call last): + File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/runners.py", line 44, in run + return loop.run_until_complete(main) + File "uvloop/loop.pyx", line 1512, in uvloop.loop.Loop.run_until_complete + File "uvloop/loop.pyx", line 1505, in uvloop.loop.Loop.run_until_complete + File "uvloop/loop.pyx", line 1379, in uvloop.loop.Loop.run_forever + File "uvloop/loop.pyx", line 557, in uvloop.loop.Loop._run + File "uvloop/loop.pyx", line 476, in uvloop.loop.Loop._on_idle + File "uvloop/cbhandles.pyx", line 83, in uvloop.loop.Handle._run + File "uvloop/cbhandles.pyx", line 63, in uvloop.loop.Handle._run + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/server.py", line 70, in serve + await self._serve(sockets) + File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/contextlib.py", line 124, in __exit__ + next(self.gen) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/server.py", line 330, in capture_signals + signal.raise_signal(captured_signal) +KeyboardInterrupt + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 700, in lifespan + await receive() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/lifespan/on.py", line 137, in receive + return await self.receive_queue.get() + File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/queues.py", line 166, in get + await getter +asyncio.exceptions.CancelledError + +2025-01-09 00:53:41,696 [INFO] Stopping reloader process [37483] +2025-01-09 00:53:46,103 [INFO] Will watch for changes in these directories: ['/Users/siwatsirichai/Documents/GitHub/python-detector-worker'] +2025-01-09 00:53:46,103 [INFO] Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit) +2025-01-09 00:53:46,104 [INFO] Started reloader process [37591] using WatchFiles +2025-01-09 00:53:47,860 [INFO] Started server process [37599] +2025-01-09 00:53:47,862 [INFO] Waiting for application startup. +2025-01-09 00:53:47,862 [INFO] Application startup complete. +2025-01-09 00:54:51,976 [INFO] Shutting down +2025-01-09 00:54:52,080 [INFO] Waiting for application shutdown. +2025-01-09 00:54:52,083 [INFO] Application shutdown complete. +2025-01-09 00:54:52,083 [INFO] Finished server process [37599] +2025-01-09 00:54:52,102 [INFO] Stopping reloader process [37591] +2025-01-09 00:54:54,952 [INFO] Will watch for changes in these directories: ['/Users/siwatsirichai/Documents/GitHub/python-detector-worker'] +2025-01-09 00:54:54,953 [INFO] Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit) +2025-01-09 00:54:54,953 [INFO] Started reloader process [37680] using WatchFiles +2025-01-09 00:54:56,634 [INFO] Started server process [37693] +2025-01-09 00:54:56,636 [INFO] Waiting for application startup. +2025-01-09 00:54:56,636 [INFO] Application startup complete. +2025-01-09 00:54:56,882 [INFO] WebSocket connection accepted +2025-01-09 00:54:56,882 [INFO] ('127.0.0.1', 60381) - "WebSocket /" [accepted] +2025-01-09 00:54:56,884 [INFO] connection open +2025-01-09 00:54:56,885 [INFO] Started processing streams +2025-01-09 00:55:03,279 [INFO] Subscribed to camera camera1 with URL rtsp://192.168.0.66:8554/common_room +2025-01-09 00:55:13,896 [ERROR] Error in WebSocket connection: (, '') +2025-01-09 00:55:13,907 [INFO] Released camera camera1 +2025-01-09 00:55:13,908 [INFO] WebSocket connection closed +2025-01-09 00:55:13,908 [ERROR] Exception in ASGI application +Traceback (most recent call last): + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 243, in run_asgi + result = await self.app(self.scope, self.asgi_receive, self.asgi_send) # type: ignore[func-returns-value] + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__ + return await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/applications.py", line 1054, in __call__ + await super().__call__(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/applications.py", line 113, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/errors.py", line 152, in __call__ + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/middleware/exceptions.py", line 62, in __call__ + await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 715, in __call__ + await self.middleware_stack(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 735, in app + await route.handle(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 362, in handle + await self.app(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 95, in app + await wrap_app_handling_exceptions(app, session)(scope, receive, send) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app + raise exc + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app + await app(scope, receive, sender) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/routing.py", line 93, in app + await func(session) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/fastapi/routing.py", line 383, in app + await dependant.call(**solved_result.values) + File "/Users/siwatsirichai/Documents/GitHub/python-detector-worker/app.py", line 111, in detect + await websocket.close() + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 180, in close + await self.send({"type": "websocket.close", "code": code, "reason": reason or ""}) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/websockets.py", line 85, in send + await self._send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/starlette/_exception_handler.py", line 39, in sender + await send(message) + File "/Users/siwatsirichai/Library/Python/3.9/lib/python/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 359, in asgi_send + raise RuntimeError(msg % message_type) +RuntimeError: Unexpected ASGI message 'websocket.close', after sending 'websocket.close' or response already completed. +2025-01-09 00:55:13,943 [INFO] connection closed +2025-01-09 00:55:14,603 [INFO] Shutting down +2025-01-09 00:55:14,704 [INFO] Waiting for application shutdown. +2025-01-09 00:55:14,705 [INFO] Application shutdown complete. +2025-01-09 00:55:14,705 [INFO] Finished server process [37693] +2025-01-09 00:55:14,721 [INFO] Stopping reloader process [37680] diff --git a/app.py b/app.py index 7cd0407..666730f 100644 --- a/app.py +++ b/app.py @@ -1,759 +1,152 @@ -from typing import Any, Dict -import os -import json -import time -import queue -import torch -import cv2 -import numpy as np -import base64 -import logging -import threading -import requests -import asyncio -import psutil -import zipfile -import ssl -import urllib3 -import subprocess -import tempfile -from urllib.parse import urlparse -from requests.adapters import HTTPAdapter -from urllib3.util.ssl_ import create_urllib3_context -from fastapi import FastAPI, WebSocket, HTTPException +from fastapi import FastAPI, WebSocket from fastapi.websockets import WebSocketDisconnect -from fastapi.responses import Response from websockets.exceptions import ConnectionClosedError from ultralytics import YOLO - -# Import shared pipeline functions -from siwatsystem.pympta import load_pipeline_from_zip, run_pipeline +import torch +import cv2 +import base64 +import numpy as np +import json +import logging +import threading +import queue +import os +import requests +from urllib.parse import urlparse # Added import +import asyncio # Ensure asyncio is imported +import psutil # Added import app = FastAPI() -# Global dictionaries to keep track of models and streams -# "models" now holds a nested dict: { camera_id: { modelId: model_tree } } -models: Dict[str, Dict[str, Any]] = {} -streams: Dict[str, Dict[str, Any]] = {} -# Store session IDs per display -session_ids: Dict[str, int] = {} -# Track shared camera streams by camera URL -camera_streams: Dict[str, Dict[str, Any]] = {} -# Map subscriptions to their camera URL -subscription_to_camera: Dict[str, str] = {} -# Store latest frames for REST API access (separate from processing buffer) -latest_frames: Dict[str, Any] = {} +model = YOLO("yolov8n.pt") +if torch.cuda.is_available(): + model.to('cuda') + +# Retrieve class names from the model +class_names = model.names with open("config.json", "r") as f: config = json.load(f) poll_interval = config.get("poll_interval_ms", 100) -reconnect_interval = config.get("reconnect_interval_sec", 5) -TARGET_FPS = config.get("target_fps", 10) -poll_interval = 1000 / TARGET_FPS +reconnect_interval = config.get("reconnect_interval_sec", 5) # New setting +TARGET_FPS = config.get("target_fps", 10) # Add TARGET_FPS +poll_interval = 1000 / TARGET_FPS # Adjust poll_interval based on TARGET_FPS logging.info(f"Poll interval: {poll_interval}ms") max_streams = config.get("max_streams", 5) max_retries = config.get("max_retries", 3) # Configure logging logging.basicConfig( - level=logging.INFO, # Set to INFO level for less verbose output - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ - logging.FileHandler("detector_worker.log"), # Write logs to a file - logging.StreamHandler() # Also output to console + logging.FileHandler("app.log"), + logging.StreamHandler() ] ) -# Create a logger specifically for this application -logger = logging.getLogger("detector_worker") -logger.setLevel(logging.DEBUG) # Set app-specific logger to DEBUG level - -# Ensure all other libraries (including root) use at least INFO level -logging.getLogger().setLevel(logging.INFO) - -logger.info("Starting detector worker application") -logger.info(f"Configuration: Target FPS: {TARGET_FPS}, Max streams: {max_streams}, Max retries: {max_retries}") - # Ensure the models directory exists os.makedirs("models", exist_ok=True) -logger.info("Ensured models directory exists") -# Constants for heartbeat and timeouts +# Add constants for heartbeat HEARTBEAT_INTERVAL = 2 # seconds WORKER_TIMEOUT_MS = 10000 -logger.debug(f"Heartbeat interval set to {HEARTBEAT_INTERVAL} seconds") -# Locks for thread-safe operations -streams_lock = threading.Lock() -models_lock = threading.Lock() -logger.debug("Initialized thread locks") - -# Add helper to download mpta ZIP file from a remote URL -def download_mpta(url: str, dest_path: str) -> str: - try: - logger.info(f"Starting download of model from {url} to {dest_path}") - os.makedirs(os.path.dirname(dest_path), exist_ok=True) - response = requests.get(url, stream=True) - if response.status_code == 200: - file_size = int(response.headers.get('content-length', 0)) - logger.info(f"Model file size: {file_size/1024/1024:.2f} MB") - downloaded = 0 - with open(dest_path, "wb") as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) - downloaded += len(chunk) - if file_size > 0 and downloaded % (file_size // 10) < 8192: # Log approximately every 10% - logger.debug(f"Download progress: {downloaded/file_size*100:.1f}%") - logger.info(f"Successfully downloaded mpta file from {url} to {dest_path}") - return dest_path - else: - logger.error(f"Failed to download mpta file (status code {response.status_code}): {response.text}") - return None - except Exception as e: - logger.error(f"Exception downloading mpta file from {url}: {str(e)}", exc_info=True) - return None - -# Add helper to fetch snapshot image from HTTP/HTTPS URL -def fetch_snapshot(url: str): - try: - from requests.auth import HTTPBasicAuth, HTTPDigestAuth - - # Parse URL to extract credentials - parsed = urlparse(url) - - # Prepare headers - some cameras require User-Agent - headers = { - 'User-Agent': 'Mozilla/5.0 (compatible; DetectorWorker/1.0)' - } - - # Reconstruct URL without credentials - clean_url = f"{parsed.scheme}://{parsed.hostname}" - if parsed.port: - clean_url += f":{parsed.port}" - clean_url += parsed.path - if parsed.query: - clean_url += f"?{parsed.query}" - - auth = None - if parsed.username and parsed.password: - # Try HTTP Digest authentication first (common for IP cameras) - try: - auth = HTTPDigestAuth(parsed.username, parsed.password) - response = requests.get(clean_url, auth=auth, headers=headers, timeout=10) - if response.status_code == 200: - logger.debug(f"Successfully authenticated using HTTP Digest for {clean_url}") - elif response.status_code == 401: - # If Digest fails, try Basic auth - logger.debug(f"HTTP Digest failed, trying Basic auth for {clean_url}") - auth = HTTPBasicAuth(parsed.username, parsed.password) - response = requests.get(clean_url, auth=auth, headers=headers, timeout=10) - if response.status_code == 200: - logger.debug(f"Successfully authenticated using HTTP Basic for {clean_url}") - except Exception as auth_error: - logger.debug(f"Authentication setup error: {auth_error}") - # Fallback to original URL with embedded credentials - response = requests.get(url, headers=headers, timeout=10) - else: - # No credentials in URL, make request as-is - response = requests.get(url, headers=headers, timeout=10) - - if response.status_code == 200: - # Convert response content to numpy array - nparr = np.frombuffer(response.content, np.uint8) - # Decode image - frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR) - if frame is not None: - logger.debug(f"Successfully fetched snapshot from {clean_url}, shape: {frame.shape}") - return frame - else: - logger.error(f"Failed to decode image from snapshot URL: {clean_url}") - return None - else: - logger.error(f"Failed to fetch snapshot (status code {response.status_code}): {clean_url}") - return None - except Exception as e: - logger.error(f"Exception fetching snapshot from {url}: {str(e)}") - return None - -# Helper to get crop coordinates from stream -def get_crop_coords(stream): - return { - "cropX1": stream.get("cropX1"), - "cropY1": stream.get("cropY1"), - "cropX2": stream.get("cropX2"), - "cropY2": stream.get("cropY2") - } - -#################################################### -# REST API endpoint for image retrieval -#################################################### -@app.get("/camera/{camera_id}/image") -async def get_camera_image(camera_id: str): - """ - Get the current frame from a camera as JPEG image - """ - try: - # URL decode the camera_id to handle encoded characters like %3B for semicolon - from urllib.parse import unquote - original_camera_id = camera_id - camera_id = unquote(camera_id) - logger.debug(f"REST API request: original='{original_camera_id}', decoded='{camera_id}'") - - with streams_lock: - if camera_id not in streams: - logger.warning(f"Camera ID '{camera_id}' not found in streams. Current streams: {list(streams.keys())}") - raise HTTPException(status_code=404, detail=f"Camera {camera_id} not found or not active") - - # Check if we have a cached frame for this camera - if camera_id not in latest_frames: - logger.warning(f"No cached frame available for camera '{camera_id}'.") - raise HTTPException(status_code=404, detail=f"No frame available for camera {camera_id}") - - frame = latest_frames[camera_id] - logger.debug(f"Retrieved cached frame for camera '{camera_id}', frame shape: {frame.shape}") - # Encode frame as JPEG - success, buffer_img = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85]) - if not success: - raise HTTPException(status_code=500, detail="Failed to encode image as JPEG") - - # Return image as binary response - return Response(content=buffer_img.tobytes(), media_type="image/jpeg") - - except HTTPException: - raise - except Exception as e: - logger.error(f"Error retrieving image for camera {camera_id}: {str(e)}", exc_info=True) - raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") - -#################################################### -# Detection and frame processing functions -#################################################### @app.websocket("/") async def detect(websocket: WebSocket): - logger.info("WebSocket connection accepted") - persistent_data_dict = {} + import asyncio + import time - async def handle_detection(camera_id, stream, frame, websocket, model_tree, persistent_data): - try: - # Apply crop if specified - cropped_frame = frame - if all(coord is not None for coord in [stream.get("cropX1"), stream.get("cropY1"), stream.get("cropX2"), stream.get("cropY2")]): - cropX1, cropY1, cropX2, cropY2 = stream["cropX1"], stream["cropY1"], stream["cropX2"], stream["cropY2"] - cropped_frame = frame[cropY1:cropY2, cropX1:cropX2] - logger.debug(f"Applied crop coordinates ({cropX1}, {cropY1}, {cropX2}, {cropY2}) to frame for camera {camera_id}") - - logger.debug(f"Processing frame for camera {camera_id} with model {stream['modelId']}") - start_time = time.time() - - # Extract display identifier for pipeline context - subscription_parts = stream["subscriptionIdentifier"].split(';') - display_identifier = subscription_parts[0] if subscription_parts else None - - # Create context for pipeline execution (session_id will be generated by pipeline) - pipeline_context = { - "camera_id": camera_id, - "display_id": display_identifier - } - - detection_result = run_pipeline(cropped_frame, model_tree, context=pipeline_context) - process_time = (time.time() - start_time) * 1000 - logger.debug(f"Detection for camera {camera_id} completed in {process_time:.2f}ms") - - # Log the raw detection result for debugging - logger.debug(f"Raw detection result for camera {camera_id}:\n{json.dumps(detection_result, indent=2, default=str)}") - - # Extract session_id from pipeline result (generated during database record creation) - session_id = None - if detection_result and isinstance(detection_result, dict): - # Check if pipeline generated a session_id (happens when Car+Frontal detected together) - if "session_id" in detection_result: - session_id = detection_result["session_id"] - logger.debug(f"Extracted session_id from pipeline result: {session_id}") - - # Process detection result - run_pipeline returns the primary detection directly - if detection_result and isinstance(detection_result, dict) and "class" in detection_result: - highest_confidence_detection = detection_result - else: - # No detection found - highest_confidence_detection = { - "class": "none", - "confidence": 1.0, - "bbox": [0, 0, 0, 0], - "branch_results": {} - } - - # Convert detection format to match backend expectations exactly as in worker.md section 4.2 - detection_dict = { - "carModel": None, - "carBrand": None, - "carYear": None, - "bodyType": None, - "licensePlateText": None, - "licensePlateConfidence": None - } - - # Extract and process branch results from parallel classification - branch_results = highest_confidence_detection.get("branch_results", {}) - if branch_results: - logger.debug(f"Processing branch results: {branch_results}") - - # Transform branch results into backend-expected detection attributes - for branch_id, branch_data in branch_results.items(): - if isinstance(branch_data, dict): - logger.debug(f"Processing branch {branch_id}: {branch_data}") - - # Map common classification fields to backend-expected names - if "brand" in branch_data: - detection_dict["carBrand"] = branch_data["brand"] - if "body_type" in branch_data: - detection_dict["bodyType"] = branch_data["body_type"] - if "class" in branch_data: - class_name = branch_data["class"] - - # Map based on branch/model type - if "brand" in branch_id.lower(): - detection_dict["carBrand"] = class_name - elif "bodytype" in branch_id.lower() or "body" in branch_id.lower(): - detection_dict["bodyType"] = class_name - - logger.info(f"Detection payload after branch processing: {detection_dict}") - else: - logger.debug("No branch results found in detection result") - - detection_data = { - "type": "imageDetection", - "subscriptionIdentifier": stream["subscriptionIdentifier"], - "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S.%fZ", time.gmtime()), - "data": { - "detection": detection_dict, - "modelId": stream["modelId"], - "modelName": stream["modelName"] - } - } - - # Add session ID if available (generated by pipeline when Car+Frontal detected) - if session_id is not None: - detection_data["sessionId"] = session_id - logger.debug(f"Added session_id to WebSocket response: {session_id}") - - if highest_confidence_detection.get("class") != "none": - confidence = highest_confidence_detection.get("confidence", 0.0) - logger.info(f"Camera {camera_id}: Detected {highest_confidence_detection['class']} with confidence {confidence:.2f} using model {stream['modelName']}") - - # Log session ID if available - if session_id: - logger.debug(f"Detection associated with session ID: {session_id}") - - await websocket.send_json(detection_data) - logger.debug(f"Sent detection data to client for camera {camera_id}") - logger.debug(f"Sent this detection data: {detection_data}") - return persistent_data - except Exception as e: - logger.error(f"Error in handle_detection for camera {camera_id}: {str(e)}", exc_info=True) - return persistent_data + logging.info("WebSocket connection accepted") + + streams = {} def frame_reader(camera_id, cap, buffer, stop_event): + import time retries = 0 - logger.info(f"Starting frame reader thread for camera {camera_id}") - frame_count = 0 - last_log_time = time.time() - - try: - # Log initial camera status and properties - if cap.isOpened(): - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = cap.get(cv2.CAP_PROP_FPS) - logger.info(f"Camera {camera_id} opened successfully with resolution {width}x{height}, FPS: {fps}") - else: - logger.error(f"Camera {camera_id} failed to open initially") - - while not stop_event.is_set(): - try: - if not cap.isOpened(): - logger.error(f"Camera {camera_id} is not open before trying to read") - # Attempt to reopen - cap = cv2.VideoCapture(streams[camera_id]["rtsp_url"]) - time.sleep(reconnect_interval) - continue - - logger.debug(f"Attempting to read frame from camera {camera_id}") - ret, frame = cap.read() - - if not ret: - logger.warning(f"Connection lost for camera: {camera_id}, retry {retries+1}/{max_retries}") - cap.release() - time.sleep(reconnect_interval) - retries += 1 - if retries > max_retries and max_retries != -1: - logger.error(f"Max retries reached for camera: {camera_id}, stopping frame reader") - break - # Re-open - logger.info(f"Attempting to reopen RTSP stream for camera: {camera_id}") - cap = cv2.VideoCapture(streams[camera_id]["rtsp_url"]) - if not cap.isOpened(): - logger.error(f"Failed to reopen RTSP stream for camera: {camera_id}") - continue - logger.info(f"Successfully reopened RTSP stream for camera: {camera_id}") - continue - - # Successfully read a frame - frame_count += 1 - current_time = time.time() - # Log frame stats every 5 seconds - if current_time - last_log_time > 5: - logger.info(f"Camera {camera_id}: Read {frame_count} frames in the last {current_time - last_log_time:.1f} seconds") - frame_count = 0 - last_log_time = current_time - - logger.debug(f"Successfully read frame from camera {camera_id}, shape: {frame.shape}") - retries = 0 - - # Overwrite old frame if buffer is full - if not buffer.empty(): - try: - buffer.get_nowait() - logger.debug(f"[frame_reader] Removed old frame from buffer for camera {camera_id}") - except queue.Empty: - pass - buffer.put(frame) - logger.debug(f"[frame_reader] Added new frame to buffer for camera {camera_id}. Buffer size: {buffer.qsize()}") - - # Short sleep to avoid CPU overuse - time.sleep(0.01) - - except cv2.error as e: - logger.error(f"OpenCV error for camera {camera_id}: {e}", exc_info=True) + while not stop_event.is_set(): + try: + ret, frame = cap.read() + if not ret: + logging.warning(f"Connection lost for camera: {camera_id}, retry {retries+1}/{max_retries}") cap.release() time.sleep(reconnect_interval) retries += 1 - if retries > max_retries and max_retries != -1: - logger.error(f"Max retries reached after OpenCV error for camera {camera_id}") + if retries > max_retries: + logging.error(f"Max retries reached for camera: {camera_id}") break - logger.info(f"Attempting to reopen RTSP stream after OpenCV error for camera: {camera_id}") - cap = cv2.VideoCapture(streams[camera_id]["rtsp_url"]) + # Re-open the VideoCapture + cap = cv2.VideoCapture(streams[camera_id]['rtsp_url']) if not cap.isOpened(): - logger.error(f"Failed to reopen RTSP stream for camera {camera_id} after OpenCV error") + logging.error(f"Failed to reopen RTSP stream for camera: {camera_id}") continue - logger.info(f"Successfully reopened RTSP stream after OpenCV error for camera: {camera_id}") - except Exception as e: - logger.error(f"Unexpected error for camera {camera_id}: {str(e)}", exc_info=True) - cap.release() - break - except Exception as e: - logger.error(f"Error in frame_reader thread for camera {camera_id}: {str(e)}", exc_info=True) - finally: - logger.info(f"Frame reader thread for camera {camera_id} is exiting") - if cap and cap.isOpened(): + continue + retries = 0 # Reset on success + if not buffer.empty(): + try: + buffer.get_nowait() # Discard the old frame + except queue.Empty: + pass + buffer.put(frame) + except cv2.error as e: + logging.error(f"OpenCV error for camera {camera_id}: {e}") cap.release() - - def snapshot_reader(camera_id, snapshot_url, snapshot_interval, buffer, stop_event): - """Frame reader that fetches snapshots from HTTP/HTTPS URL at specified intervals""" - retries = 0 - logger.info(f"Starting snapshot reader thread for camera {camera_id} from {snapshot_url}") - frame_count = 0 - last_log_time = time.time() - - try: - interval_seconds = snapshot_interval / 1000.0 # Convert milliseconds to seconds - logger.info(f"Snapshot interval for camera {camera_id}: {interval_seconds}s") - - while not stop_event.is_set(): - try: - start_time = time.time() - frame = fetch_snapshot(snapshot_url) - - if frame is None: - logger.warning(f"Failed to fetch snapshot for camera: {camera_id}, retry {retries+1}/{max_retries}") - retries += 1 - if retries > max_retries and max_retries != -1: - logger.error(f"Max retries reached for snapshot camera: {camera_id}, stopping reader") - break - time.sleep(min(interval_seconds, reconnect_interval)) - continue - - # Successfully fetched a frame - frame_count += 1 - current_time = time.time() - # Log frame stats every 5 seconds - if current_time - last_log_time > 5: - logger.info(f"Camera {camera_id}: Fetched {frame_count} snapshots in the last {current_time - last_log_time:.1f} seconds") - frame_count = 0 - last_log_time = current_time - - logger.debug(f"Successfully fetched snapshot from camera {camera_id}, shape: {frame.shape}") - retries = 0 - - # Overwrite old frame if buffer is full - if not buffer.empty(): - try: - buffer.get_nowait() - logger.debug(f"[snapshot_reader] Removed old snapshot from buffer for camera {camera_id}") - except queue.Empty: - pass - buffer.put(frame) - logger.debug(f"[snapshot_reader] Added new snapshot to buffer for camera {camera_id}. Buffer size: {buffer.qsize()}") - - # Wait for the specified interval - elapsed = time.time() - start_time - sleep_time = max(interval_seconds - elapsed, 0) - if sleep_time > 0: - time.sleep(sleep_time) - - except Exception as e: - logger.error(f"Unexpected error fetching snapshot for camera {camera_id}: {str(e)}", exc_info=True) - retries += 1 - if retries > max_retries and max_retries != -1: - logger.error(f"Max retries reached after error for snapshot camera {camera_id}") - break - time.sleep(min(interval_seconds, reconnect_interval)) - except Exception as e: - logger.error(f"Error in snapshot_reader thread for camera {camera_id}: {str(e)}", exc_info=True) - finally: - logger.info(f"Snapshot reader thread for camera {camera_id} is exiting") - - async def reconcile_subscriptions(desired_subscriptions, websocket): - """ - Declarative reconciliation: Compare desired vs current subscriptions and make changes - """ - logger.info(f"Reconciling subscriptions: {len(desired_subscriptions)} desired") - - with streams_lock: - # Get current subscriptions - current_subscription_ids = set(streams.keys()) - desired_subscription_ids = set(sub["subscriptionIdentifier"] for sub in desired_subscriptions) - - # Find what to add and remove - to_add = desired_subscription_ids - current_subscription_ids - to_remove = current_subscription_ids - desired_subscription_ids - to_check_for_changes = current_subscription_ids & desired_subscription_ids - - logger.info(f"Reconciliation: {len(to_add)} to add, {len(to_remove)} to remove, {len(to_check_for_changes)} to check for changes") - - # Remove subscriptions that are no longer wanted - for subscription_id in to_remove: - await unsubscribe_internal(subscription_id) - - # Check existing subscriptions for parameter changes - for subscription_id in to_check_for_changes: - desired_sub = next(sub for sub in desired_subscriptions if sub["subscriptionIdentifier"] == subscription_id) - current_stream = streams[subscription_id] - - # Check if parameters changed - if has_subscription_changed(desired_sub, current_stream): - logger.info(f"Parameters changed for {subscription_id}, resubscribing") - await unsubscribe_internal(subscription_id) - await subscribe_internal(desired_sub, websocket) - - # Add new subscriptions - for subscription_id in to_add: - desired_sub = next(sub for sub in desired_subscriptions if sub["subscriptionIdentifier"] == subscription_id) - await subscribe_internal(desired_sub, websocket) - - def has_subscription_changed(desired_sub, current_stream): - """Check if subscription parameters have changed""" - return ( - desired_sub.get("rtspUrl") != current_stream.get("rtsp_url") or - desired_sub.get("snapshotUrl") != current_stream.get("snapshot_url") or - desired_sub.get("snapshotInterval") != current_stream.get("snapshot_interval") or - desired_sub.get("cropX1") != current_stream.get("cropX1") or - desired_sub.get("cropY1") != current_stream.get("cropY1") or - desired_sub.get("cropX2") != current_stream.get("cropX2") or - desired_sub.get("cropY2") != current_stream.get("cropY2") or - desired_sub.get("modelId") != current_stream.get("modelId") or - desired_sub.get("modelName") != current_stream.get("modelName") - ) - - async def subscribe_internal(subscription, websocket): - """Internal subscription logic extracted from original subscribe handler""" - subscriptionIdentifier = subscription.get("subscriptionIdentifier") - rtsp_url = subscription.get("rtspUrl") - snapshot_url = subscription.get("snapshotUrl") - snapshot_interval = subscription.get("snapshotInterval") - model_url = subscription.get("modelUrl") - modelId = subscription.get("modelId") - modelName = subscription.get("modelName") - cropX1 = subscription.get("cropX1") - cropY1 = subscription.get("cropY1") - cropX2 = subscription.get("cropX2") - cropY2 = subscription.get("cropY2") - - # Extract camera_id from subscriptionIdentifier - parts = subscriptionIdentifier.split(';') - if len(parts) != 2: - logger.error(f"Invalid subscriptionIdentifier format: {subscriptionIdentifier}") - return - - display_identifier, camera_identifier = parts - camera_id = subscriptionIdentifier - - # Load model if needed - if model_url: - with models_lock: - if (camera_id not in models) or (modelId not in models[camera_id]): - logger.info(f"Loading model from {model_url} for camera {camera_id}, modelId {modelId}") - extraction_dir = os.path.join("models", camera_identifier, str(modelId)) - os.makedirs(extraction_dir, exist_ok=True) - - # Handle model loading (same as original) - parsed = urlparse(model_url) - if parsed.scheme in ("http", "https"): - filename = os.path.basename(parsed.path) or f"model_{modelId}.mpta" - local_mpta = os.path.join(extraction_dir, filename) - local_path = download_mpta(model_url, local_mpta) - if not local_path: - logger.error(f"Failed to download model from {model_url}") - return - model_tree = load_pipeline_from_zip(local_path, extraction_dir) - else: - if not os.path.exists(model_url): - logger.error(f"Model file not found: {model_url}") - return - model_tree = load_pipeline_from_zip(model_url, extraction_dir) - - if model_tree is None: - logger.error(f"Failed to load model {modelId}") - return - - if camera_id not in models: - models[camera_id] = {} - models[camera_id][modelId] = model_tree - - # Create stream (same logic as original) - if camera_id and (rtsp_url or snapshot_url) and len(streams) < max_streams: - camera_url = snapshot_url if snapshot_url else rtsp_url - - # Check if we already have a stream for this camera URL - shared_stream = camera_streams.get(camera_url) - - if shared_stream: - # Reuse existing stream - buffer = shared_stream["buffer"] - stop_event = shared_stream["stop_event"] - thread = shared_stream["thread"] - mode = shared_stream["mode"] - shared_stream["ref_count"] = shared_stream.get("ref_count", 0) + 1 - else: - # Create new stream - buffer = queue.Queue(maxsize=1) - stop_event = threading.Event() - - if snapshot_url and snapshot_interval: - thread = threading.Thread(target=snapshot_reader, args=(camera_id, snapshot_url, snapshot_interval, buffer, stop_event)) - thread.daemon = True - thread.start() - mode = "snapshot" - shared_stream = { - "buffer": buffer, "thread": thread, "stop_event": stop_event, - "mode": mode, "url": snapshot_url, "snapshot_interval": snapshot_interval, "ref_count": 1 - } - camera_streams[camera_url] = shared_stream - elif rtsp_url: - cap = cv2.VideoCapture(rtsp_url) - if not cap.isOpened(): - logger.error(f"Failed to open RTSP stream for camera {camera_id}") - return - thread = threading.Thread(target=frame_reader, args=(camera_id, cap, buffer, stop_event)) - thread.daemon = True - thread.start() - mode = "rtsp" - shared_stream = { - "buffer": buffer, "thread": thread, "stop_event": stop_event, - "mode": mode, "url": rtsp_url, "cap": cap, "ref_count": 1 - } - camera_streams[camera_url] = shared_stream - else: - logger.error(f"No valid URL provided for camera {camera_id}") - return - - # Create stream info - stream_info = { - "buffer": buffer, "thread": thread, "stop_event": stop_event, - "modelId": modelId, "modelName": modelName, "subscriptionIdentifier": subscriptionIdentifier, - "cropX1": cropX1, "cropY1": cropY1, "cropX2": cropX2, "cropY2": cropY2, - "mode": mode, "camera_url": camera_url, "modelUrl": model_url - } - - if mode == "snapshot": - stream_info["snapshot_url"] = snapshot_url - stream_info["snapshot_interval"] = snapshot_interval - elif mode == "rtsp": - stream_info["rtsp_url"] = rtsp_url - stream_info["cap"] = shared_stream["cap"] - - streams[camera_id] = stream_info - subscription_to_camera[camera_id] = camera_url - logger.info(f"Subscribed to camera {camera_id}") - - async def unsubscribe_internal(subscription_id): - """Internal unsubscription logic""" - if subscription_id in streams: - stream = streams.pop(subscription_id) - camera_url = subscription_to_camera.pop(subscription_id, None) - - if camera_url and camera_url in camera_streams: - shared_stream = camera_streams[camera_url] - shared_stream["ref_count"] -= 1 - - if shared_stream["ref_count"] <= 0: - shared_stream["stop_event"].set() - shared_stream["thread"].join() - if "cap" in shared_stream: - shared_stream["cap"].release() - del camera_streams[camera_url] - - latest_frames.pop(subscription_id, None) - logger.info(f"Unsubscribed from camera {subscription_id}") + time.sleep(reconnect_interval) + retries += 1 + if retries > max_retries: + logging.error(f"Max retries reached after OpenCV error for camera: {camera_id}") + break + # Re-open the VideoCapture + cap = cv2.VideoCapture(streams[camera_id]['rtsp_url']) + if not cap.isOpened(): + logging.error(f"Failed to reopen RTSP stream for camera {camera_id} after OpenCV error") + continue + except Exception as e: + logging.error(f"Unexpected error for camera {camera_id}: {e}") + cap.release() + break async def process_streams(): - logger.info("Started processing streams") + global model, class_names # Added line + logging.info("Started processing streams") try: while True: start_time = time.time() - with streams_lock: - current_streams = list(streams.items()) - if current_streams: - logger.debug(f"Processing {len(current_streams)} active streams") - else: - logger.debug("No active streams to process") - - for camera_id, stream in current_streams: - buffer = stream["buffer"] - if buffer.empty(): - logger.debug(f"Frame buffer is empty for camera {camera_id}") - continue - - logger.debug(f"Got frame from buffer for camera {camera_id}") - frame = buffer.get() - - # Cache the frame for REST API access - latest_frames[camera_id] = frame.copy() - logger.debug(f"Cached frame for REST API access for camera {camera_id}") - - with models_lock: - model_tree = models.get(camera_id, {}).get(stream["modelId"]) - if not model_tree: - logger.warning(f"Model not found for camera {camera_id}, modelId {stream['modelId']}") - continue - logger.debug(f"Found model tree for camera {camera_id}, modelId {stream['modelId']}") - - key = (camera_id, stream["modelId"]) - persistent_data = persistent_data_dict.get(key, {}) - logger.debug(f"Starting detection for camera {camera_id} with modelId {stream['modelId']}") - updated_persistent_data = await handle_detection( - camera_id, stream, frame, websocket, model_tree, persistent_data - ) - persistent_data_dict[key] = updated_persistent_data - - elapsed_time = (time.time() - start_time) * 1000 # ms + # Round-robin processing + for camera_id, stream in list(streams.items()): + buffer = stream['buffer'] + if not buffer.empty(): + frame = buffer.get() + results = model(frame, stream=False) + boxes = [] + for r in results: + for box in r.boxes: + boxes.append({ + "class": class_names[int(box.cls[0])], + "confidence": float(box.conf[0]), + }) + # Broadcast to all subscribers of this URL + detection_data = { + "type": "imageDetection", + "cameraIdentifier": camera_id, + "timestamp": time.time(), + "data": { + "detections": boxes, + "modelId": stream['modelId'], + "modelName": stream['modelName'] + } + } + logging.debug(f"Sending detection data for camera {camera_id}: {detection_data}") + await websocket.send_json(detection_data) + elapsed_time = (time.time() - start_time) * 1000 # in ms sleep_time = max(poll_interval - elapsed_time, 0) - logger.debug(f"Frame processing cycle: {elapsed_time:.2f}ms, sleeping for: {sleep_time:.2f}ms") + logging.debug(f"Elapsed time: {elapsed_time}ms, sleeping for: {sleep_time}ms") await asyncio.sleep(sleep_time / 1000.0) except asyncio.CancelledError: - logger.info("Stream processing task cancelled") + logging.info("Stream processing task cancelled") except Exception as e: - logger.error(f"Error in process_streams: {str(e)}", exc_info=True) + logging.error(f"Error in process_streams: {e}") async def send_heartbeat(): while True: @@ -761,27 +154,22 @@ async def detect(websocket: WebSocket): cpu_usage = psutil.cpu_percent() memory_usage = psutil.virtual_memory().percent if torch.cuda.is_available(): - gpu_usage = torch.cuda.utilization() if hasattr(torch.cuda, 'utilization') else None - gpu_memory_usage = torch.cuda.memory_reserved() / (1024 ** 2) + gpu_usage = torch.cuda.memory_allocated() / (1024 ** 2) # Convert to MB + gpu_memory_usage = torch.cuda.memory_reserved() / (1024 ** 2) # Convert to MB else: gpu_usage = None gpu_memory_usage = None - + camera_connections = [ { - "subscriptionIdentifier": stream["subscriptionIdentifier"], - "modelId": stream["modelId"], - "modelName": stream["modelName"], - "online": True, - # Include all subscription parameters for proper change detection - "rtspUrl": stream.get("rtsp_url"), - "snapshotUrl": stream.get("snapshot_url"), - "snapshotInterval": stream.get("snapshot_interval"), - **{k: v for k, v in get_crop_coords(stream).items() if v is not None} + "cameraIdentifier": camera_id, + "modelId": stream['modelId'], + "modelName": stream['modelName'], + "online": True } for camera_id, stream in streams.items() ] - + state_report = { "type": "stateReport", "cpuUsage": cpu_usage, @@ -791,342 +179,202 @@ async def detect(websocket: WebSocket): "cameraConnections": camera_connections } await websocket.send_text(json.dumps(state_report)) - logger.debug(f"Sent stateReport as heartbeat: CPU {cpu_usage:.1f}%, Memory {memory_usage:.1f}%, {len(camera_connections)} active cameras") + logging.debug("Sent stateReport as heartbeat") await asyncio.sleep(HEARTBEAT_INTERVAL) except Exception as e: - logger.error(f"Error sending stateReport heartbeat: {e}") + logging.error(f"Error sending stateReport heartbeat: {e}") break async def on_message(): + global model, class_names # Changed from nonlocal to global + while True: + msg = await websocket.receive_text() + logging.debug(f"Received message: {msg}") + data = json.loads(msg) + msg_type = data.get("type") + + if msg_type == "subscribe": + payload = data.get("payload", {}) + camera_id = payload.get("cameraIdentifier") + rtsp_url = payload.get("rtspUrl") + model_url = payload.get("modelUrl") + modelId = payload.get("modelId") + modelName = payload.get("modelName") + + if model_url: + print(f"Downloading model from {model_url}") + parsed_url = urlparse(model_url) + filename = os.path.basename(parsed_url.path) + model_filename = os.path.join("models", filename) + # Download the model + response = requests.get(model_url, stream=True) + if response.status_code == 200: + with open(model_filename, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + logging.info(f"Downloaded model from {model_url} to {model_filename}") + model = YOLO(model_filename) + if torch.cuda.is_available(): + model.to('cuda') + class_names = model.names + else: + logging.error(f"Failed to download model from {model_url}") + continue + if camera_id and rtsp_url: + if camera_id not in streams and len(streams) < max_streams: + cap = cv2.VideoCapture(rtsp_url) + if not cap.isOpened(): + logging.error(f"Failed to open RTSP stream for camera {camera_id}") + continue + buffer = queue.Queue(maxsize=1) + stop_event = threading.Event() + thread = threading.Thread(target=frame_reader, args=(camera_id, cap, buffer, stop_event)) + thread.daemon = True + thread.start() + streams[camera_id] = { + 'cap': cap, + 'buffer': buffer, + 'thread': thread, + 'rtsp_url': rtsp_url, + 'stop_event': stop_event, + 'modelId': modelId, + 'modelName': modelName + } + logging.info(f"Subscribed to camera {camera_id} with modelId {modelId}, modelName {modelName} and URL {rtsp_url}") + elif camera_id and camera_id in streams: + stream = streams.pop(camera_id) + stream['cap'].release() + logging.info(f"Unsubscribed from camera {camera_id}") + elif msg_type == "unsubscribe": + payload = data.get("payload", {}) + camera_id = payload.get("cameraIdentifier") + if camera_id and camera_id in streams: + stream = streams.pop(camera_id) + stream['cap'].release() + logging.info(f"Unsubscribed from camera {camera_id}") + elif msg_type == "requestState": + # Handle state request + cpu_usage = psutil.cpu_percent() + memory_usage = psutil.virtual_memory().percent + if torch.cuda.is_available(): + gpu_usage = torch.cuda.memory_allocated() / (1024 ** 2) # Convert to MB + gpu_memory_usage = torch.cuda.memory_reserved() / (1024 ** 2) # Convert to MB + else: + gpu_usage = None + gpu_memory_usage = None + + camera_connections = [ + { + "cameraIdentifier": camera_id, + "modelId": stream['modelId'], + "modelName": stream['modelName'], + "online": True + } + for camera_id, stream in streams.items() + ] + + state_report = { + "type": "stateReport", + "cpuUsage": cpu_usage, + "memoryUsage": memory_usage, + "gpuUsage": gpu_usage, + "gpuMemoryUsage": gpu_memory_usage, + "cameraConnections": camera_connections + } + await websocket.send_text(json.dumps(state_report)) + else: + logging.error(f"Unknown message type: {msg_type}") + + await websocket.accept() + task = asyncio.create_task(process_streams()) + heartbeat_task = asyncio.create_task(send_heartbeat()) + message_task = asyncio.create_task(on_message()) + + await asyncio.gather(heartbeat_task, message_task) + + model = None + model_path = None + + try: while True: try: msg = await websocket.receive_text() - logger.debug(f"Received message: {msg}") + logging.debug(f"Received message: {msg}") data = json.loads(msg) - msg_type = data.get("type") - - if msg_type == "setSubscriptionList": - # Declarative approach: Backend sends list of subscriptions this worker should have - desired_subscriptions = data.get("subscriptions", []) - logger.info(f"Received subscription list with {len(desired_subscriptions)} subscriptions") - - await reconcile_subscriptions(desired_subscriptions, websocket) - - elif msg_type == "subscribe": - # Legacy support - convert single subscription to list - payload = data.get("payload", {}) - await reconcile_subscriptions([payload], websocket) - - elif msg_type == "unsubscribe": - # Legacy support - remove subscription - payload = data.get("payload", {}) - subscriptionIdentifier = payload.get("subscriptionIdentifier") - # Remove from current subscriptions and reconcile - current_subs = [] - with streams_lock: - for camera_id, stream in streams.items(): - if stream["subscriptionIdentifier"] != subscriptionIdentifier: - # Convert stream back to subscription format - current_subs.append({ - "subscriptionIdentifier": stream["subscriptionIdentifier"], - "rtspUrl": stream.get("rtsp_url"), - "snapshotUrl": stream.get("snapshot_url"), - "snapshotInterval": stream.get("snapshot_interval"), - "modelId": stream["modelId"], - "modelName": stream["modelName"], - "modelUrl": stream.get("modelUrl", ""), - "cropX1": stream.get("cropX1"), - "cropY1": stream.get("cropY1"), - "cropX2": stream.get("cropX2"), - "cropY2": stream.get("cropY2") - }) - await reconcile_subscriptions(current_subs, websocket) - - elif msg_type == "old_subscribe_logic_removed": - if model_url: - with models_lock: - if (camera_id not in models) or (modelId not in models[camera_id]): - logger.info(f"Loading model from {model_url} for camera {camera_id}, modelId {modelId}") - extraction_dir = os.path.join("models", camera_identifier, str(modelId)) - os.makedirs(extraction_dir, exist_ok=True) - # If model_url is remote, download it first. - parsed = urlparse(model_url) - if parsed.scheme in ("http", "https"): - logger.info(f"Downloading remote .mpta file from {model_url}") - filename = os.path.basename(parsed.path) or f"model_{modelId}.mpta" - local_mpta = os.path.join(extraction_dir, filename) - logger.debug(f"Download destination: {local_mpta}") - local_path = download_mpta(model_url, local_mpta) - if not local_path: - logger.error(f"Failed to download the remote .mpta file from {model_url}") - error_response = { - "type": "error", - "subscriptionIdentifier": subscriptionIdentifier, - "error": f"Failed to download model from {model_url}" - } - await websocket.send_json(error_response) - continue - model_tree = load_pipeline_from_zip(local_path, extraction_dir) - else: - logger.info(f"Loading local .mpta file from {model_url}") - # Check if file exists before attempting to load - if not os.path.exists(model_url): - logger.error(f"Local .mpta file not found: {model_url}") - logger.debug(f"Current working directory: {os.getcwd()}") - error_response = { - "type": "error", - "subscriptionIdentifier": subscriptionIdentifier, - "error": f"Model file not found: {model_url}" - } - await websocket.send_json(error_response) - continue - model_tree = load_pipeline_from_zip(model_url, extraction_dir) - if model_tree is None: - logger.error(f"Failed to load model {modelId} from .mpta file for camera {camera_id}") - error_response = { - "type": "error", - "subscriptionIdentifier": subscriptionIdentifier, - "error": f"Failed to load model {modelId}" - } - await websocket.send_json(error_response) - continue - if camera_id not in models: - models[camera_id] = {} - models[camera_id][modelId] = model_tree - logger.info(f"Successfully loaded model {modelId} for camera {camera_id}") - logger.debug(f"Model extraction directory: {extraction_dir}") - if camera_id and (rtsp_url or snapshot_url): - with streams_lock: - # Determine camera URL for shared stream management - camera_url = snapshot_url if snapshot_url else rtsp_url - - if camera_id not in streams and len(streams) < max_streams: - # Check if we already have a stream for this camera URL - shared_stream = camera_streams.get(camera_url) - - if shared_stream: - # Reuse existing stream - logger.info(f"Reusing existing stream for camera URL: {camera_url}") - buffer = shared_stream["buffer"] - stop_event = shared_stream["stop_event"] - thread = shared_stream["thread"] - mode = shared_stream["mode"] - - # Increment reference count - shared_stream["ref_count"] = shared_stream.get("ref_count", 0) + 1 - else: - # Create new stream - buffer = queue.Queue(maxsize=1) - stop_event = threading.Event() - - if snapshot_url and snapshot_interval: - logger.info(f"Creating new snapshot stream for camera {camera_id}: {snapshot_url}") - thread = threading.Thread(target=snapshot_reader, args=(camera_id, snapshot_url, snapshot_interval, buffer, stop_event)) - thread.daemon = True - thread.start() - mode = "snapshot" - - # Store shared stream info - shared_stream = { - "buffer": buffer, - "thread": thread, - "stop_event": stop_event, - "mode": mode, - "url": snapshot_url, - "snapshot_interval": snapshot_interval, - "ref_count": 1 - } - camera_streams[camera_url] = shared_stream - - elif rtsp_url: - logger.info(f"Creating new RTSP stream for camera {camera_id}: {rtsp_url}") - cap = cv2.VideoCapture(rtsp_url) - if not cap.isOpened(): - logger.error(f"Failed to open RTSP stream for camera {camera_id}") - continue - thread = threading.Thread(target=frame_reader, args=(camera_id, cap, buffer, stop_event)) - thread.daemon = True - thread.start() - mode = "rtsp" - - # Store shared stream info - shared_stream = { - "buffer": buffer, - "thread": thread, - "stop_event": stop_event, - "mode": mode, - "url": rtsp_url, - "cap": cap, - "ref_count": 1 - } - camera_streams[camera_url] = shared_stream - else: - logger.error(f"No valid URL provided for camera {camera_id}") - continue - - # Create stream info for this subscription - stream_info = { - "buffer": buffer, - "thread": thread, - "stop_event": stop_event, - "modelId": modelId, - "modelName": modelName, - "subscriptionIdentifier": subscriptionIdentifier, - "cropX1": cropX1, - "cropY1": cropY1, - "cropX2": cropX2, - "cropY2": cropY2, - "mode": mode, - "camera_url": camera_url - } - - if mode == "snapshot": - stream_info["snapshot_url"] = snapshot_url - stream_info["snapshot_interval"] = snapshot_interval - elif mode == "rtsp": - stream_info["rtsp_url"] = rtsp_url - stream_info["cap"] = shared_stream["cap"] - - streams[camera_id] = stream_info - subscription_to_camera[camera_id] = camera_url - - elif camera_id and camera_id in streams: - # If already subscribed, unsubscribe first - logger.info(f"Resubscribing to camera {camera_id}") - # Note: Keep models in memory for reuse across subscriptions - elif msg_type == "unsubscribe": - payload = data.get("payload", {}) - subscriptionIdentifier = payload.get("subscriptionIdentifier") - camera_id = subscriptionIdentifier - with streams_lock: - if camera_id and camera_id in streams: - stream = streams.pop(camera_id) - camera_url = subscription_to_camera.pop(camera_id, None) - - if camera_url and camera_url in camera_streams: - shared_stream = camera_streams[camera_url] - shared_stream["ref_count"] -= 1 - - # If no more references, stop the shared stream - if shared_stream["ref_count"] <= 0: - logger.info(f"Stopping shared stream for camera URL: {camera_url}") - shared_stream["stop_event"].set() - shared_stream["thread"].join() - if "cap" in shared_stream: - shared_stream["cap"].release() - del camera_streams[camera_url] - else: - logger.info(f"Shared stream for {camera_url} still has {shared_stream['ref_count']} references") - - # Clean up cached frame - latest_frames.pop(camera_id, None) - logger.info(f"Unsubscribed from camera {camera_id}") - # Note: Keep models in memory for potential reuse - elif msg_type == "requestState": - cpu_usage = psutil.cpu_percent() - memory_usage = psutil.virtual_memory().percent - if torch.cuda.is_available(): - gpu_usage = torch.cuda.utilization() if hasattr(torch.cuda, 'utilization') else None - gpu_memory_usage = torch.cuda.memory_reserved() / (1024 ** 2) + camera_id = data.get("cameraIdentifier") + rtsp_url = data.get("rtspUrl") + model_url = data.get("modelUrl") + modelId = data.get("modelId") + modelName = data.get("modelName") + + if model_url: + print(f"Downloading model from {model_url}") + parsed_url = urlparse(model_url) + filename = os.path.basename(parsed_url.path) + model_filename = os.path.join("models", filename) + # Download the model + response = requests.get(model_url, stream=True) + if response.status_code == 200: + with open(model_filename, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + logging.info(f"Downloaded model from {model_url} to {model_filename}") + model = YOLO(model_filename) + if torch.cuda.is_available(): + model.to('cuda') + class_names = model.names else: - gpu_usage = None - gpu_memory_usage = None - - camera_connections = [ - { - "subscriptionIdentifier": stream["subscriptionIdentifier"], - "modelId": stream["modelId"], - "modelName": stream["modelName"], - "online": True, - # Include all subscription parameters for proper change detection - "rtspUrl": stream.get("rtsp_url"), - "snapshotUrl": stream.get("snapshot_url"), - "snapshotInterval": stream.get("snapshot_interval"), - **{k: v for k, v in get_crop_coords(stream).items() if v is not None} + logging.error(f"Failed to download model from {model_url}") + continue + if camera_id and rtsp_url: + if camera_id not in streams and len(streams) < max_streams: + cap = cv2.VideoCapture(rtsp_url) + if not cap.isOpened(): + logging.error(f"Failed to open RTSP stream for camera {camera_id}") + continue + buffer = queue.Queue(maxsize=1) + stop_event = threading.Event() + thread = threading.Thread(target=frame_reader, args=(camera_id, cap, buffer, stop_event)) + thread.daemon = True + thread.start() + streams[camera_id] = { + 'cap': cap, + 'buffer': buffer, + 'thread': thread, + 'rtsp_url': rtsp_url, + 'stop_event': stop_event, + 'modelId': modelId, + 'modelName': modelName } - for camera_id, stream in streams.items() - ] - - state_report = { - "type": "stateReport", - "cpuUsage": cpu_usage, - "memoryUsage": memory_usage, - "gpuUsage": gpu_usage, - "gpuMemoryUsage": gpu_memory_usage, - "cameraConnections": camera_connections - } - await websocket.send_text(json.dumps(state_report)) - - elif msg_type == "setSessionId": - payload = data.get("payload", {}) - display_identifier = payload.get("displayIdentifier") - session_id = payload.get("sessionId") - - if display_identifier: - # Store session ID for this display - if session_id is None: - session_ids.pop(display_identifier, None) - logger.info(f"Cleared session ID for display {display_identifier}") - else: - session_ids[display_identifier] = session_id - logger.info(f"Set session ID {session_id} for display {display_identifier}") - - elif msg_type == "patchSession": - session_id = data.get("sessionId") - patch_data = data.get("data", {}) - - # For now, just acknowledge the patch - actual implementation depends on backend requirements - response = { - "type": "patchSessionResult", - "payload": { - "sessionId": session_id, - "success": True, - "message": "Session patch acknowledged" - } - } - await websocket.send_json(response) - logger.info(f"Acknowledged patch for session {session_id}") - - else: - logger.error(f"Unknown message type: {msg_type}") + logging.info(f"Subscribed to camera {camera_id} with modelId {modelId}, modelName {modelName} and URL {rtsp_url}") + elif camera_id and camera_id in streams: + stream = streams.pop(camera_id) + stream['cap'].release() + logging.info(f"Unsubscribed from camera {camera_id}") + elif data.get("command") == "stop": + logging.info("Received stop command") + break except json.JSONDecodeError: - logger.error("Received invalid JSON message") + logging.error("Received invalid JSON message") except (WebSocketDisconnect, ConnectionClosedError) as e: - logger.warning(f"WebSocket disconnected: {e}") - break + logging.warning(f"WebSocket disconnected: {e}") + break except Exception as e: - logger.error(f"Error handling message: {e}") + logging.error(f"Error handling message: {e}") break - try: - await websocket.accept() - stream_task = asyncio.create_task(process_streams()) - heartbeat_task = asyncio.create_task(send_heartbeat()) - message_task = asyncio.create_task(on_message()) - await asyncio.gather(heartbeat_task, message_task) except Exception as e: - logger.error(f"Error in detect websocket: {e}") + logging.error(f"Unexpected error in WebSocket connection: {e}") finally: - stream_task.cancel() - await stream_task - with streams_lock: - # Clean up shared camera streams - for camera_url, shared_stream in camera_streams.items(): - shared_stream["stop_event"].set() - shared_stream["thread"].join() - if "cap" in shared_stream: - shared_stream["cap"].release() - while not shared_stream["buffer"].empty(): - try: - shared_stream["buffer"].get_nowait() - except queue.Empty: - pass - logger.info(f"Released shared camera stream for {camera_url}") - - streams.clear() - camera_streams.clear() - subscription_to_camera.clear() - with models_lock: - models.clear() - latest_frames.clear() - session_ids.clear() - logger.info("WebSocket connection closed") + task.cancel() + await task + for camera_id, stream in streams.items(): + stream['stop_event'].set() + stream['thread'].join() + stream['cap'].release() + stream['buffer'].queue.clear() + logging.info(f"Released camera {camera_id} and cleaned up resources") + streams.clear() + if model_path and os.path.exists(model_path): + os.remove(model_path) + logging.info(f"Deleted model file {model_path}") + logging.info("WebSocket connection closed") diff --git a/config.json b/config.json index 311bbf4..b9ffa8f 100644 --- a/config.json +++ b/config.json @@ -3,5 +3,5 @@ "max_streams": 5, "target_fps": 2, "reconnect_interval_sec": 5, - "max_retries": -1 + "max_retries": 3 } diff --git a/debug/rtsp_webcam.py b/debug/rtsp_webcam.py deleted file mode 100644 index 4d9f3ae..0000000 --- a/debug/rtsp_webcam.py +++ /dev/null @@ -1,51 +0,0 @@ -import cv2 -import gi -import time - -gi.require_version('Gst', '1.0') -from gi.repository import Gst - -# Initialize GStreamer -Gst.init(None) - -# Open the default webcam -cap = cv2.VideoCapture(0) - -# Define the RTSP pipeline using GStreamer -rtsp_pipeline = ( - "appsrc ! videoconvert ! video/x-raw,format=I420 ! x264enc tune=zerolatency bitrate=2048 speed-preset=ultrafast " - "! rtph264pay config-interval=1 pt=96 ! udpsink host=127.0.0.1 port=8554" -) - -# Create GStreamer pipeline -pipeline = Gst.parse_launch(rtsp_pipeline) -appsrc = pipeline.get_by_name("appsrc") - -# Start streaming -pipeline.set_state(Gst.State.PLAYING) -time.sleep(1) - -while cap.isOpened(): - ret, frame = cap.read() - if not ret: - break - - # Convert frame to I420 format (YUV420) - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2YUV_I420) - data = frame.tobytes() - - # Push frame to GStreamer pipeline - buf = Gst.Buffer.new_allocate(None, len(data), None) - buf.fill(0, data) - appsrc.emit("push-buffer", buf) - - # Display frame locally (optional) - cv2.imshow("RTSP Streaming", frame) - - if cv2.waitKey(1) & 0xFF == ord('q'): - break - -# Cleanup -cap.release() -cv2.destroyAllWindows() -pipeline.set_state(Gst.State.NULL) diff --git a/docs/MasterElection.md b/docs/MasterElection.md deleted file mode 100644 index c5980b8..0000000 --- a/docs/MasterElection.md +++ /dev/null @@ -1,1449 +0,0 @@ -# Master Election Service Specification - Distributed Process Coordination - -## Overview - -The MasterElection service implements a Redis-based distributed leadership election and process coordination system for the CMS backend cluster. This service provides robust master-slave coordination with automatic failover, process registration, and TTL-based cleanup for multi-process backend deployments. - -**Key Architectural Principle**: Redis-based coordination with atomic Lua scripts ensures consistency and prevents split-brain scenarios while providing automatic cleanup through per-entry TTL expiration. - -## Architecture Components - -### Two-Tier Process Coordination - -The system manages two distinct coordination layers: - -1. **Master Election Layer**: Single leader election across all backend processes -2. **Process Registry Layer**: Individual process registration and heartbeat management - -### Leadership Election Pattern - -- **Single Master**: Only one backend process holds master lock at any time -- **Automatic Failover**: Master election triggers immediately when current master fails -- **Heartbeat-Based**: Master must renew lock every 10 seconds or lose leadership -- **Lua Script Atomicity**: All Redis operations use atomic Lua scripts to prevent race conditions -- **Event-Driven Transitions**: Role changes emit events for dependent services integration - -## Core Components - -### MasterElection Class -`cms-backend/services/MasterElection.ts` - -Primary coordination service that handles distributed leadership election and process lifecycle management. - -**Key Responsibilities:** -- Manages master lock acquisition and renewal using atomic Redis operations -- Provides process registration with automatic TTL-based expiration (45 seconds) -- Emits role transition events for dependent service coordination -- Handles slave registration and heartbeat management -- Maintains process-to-channel mapping for message routing - -### Process Management System - -**Process Registration:** -- Each backend process registers with unique UUID-based identifier -- Process metadata includes role, channel name, and capabilities -- TTL-based expiration (45 seconds) with heartbeat renewal -- Automatic cleanup of stale process entries without manual intervention - -**Channel Assignment:** -- Each process gets assigned a unique Redis pub/sub channel -- Channel mapping stored persistently for message routing -- Master process maintains channel-to-process mapping - -## Data Structures - -### MasterElectionEvents -```typescript -interface MasterElectionEvents { - 'master-acquired': () => void; // This process became master - 'master-lost': () => void; // This process lost master status - 'election-started': () => void; // Election process initiated - 'election-completed': (isMaster: boolean) => void; // Election finished - 'slave-registered': (slave: SlaveNode) => void; // New slave joined - 'slave-removed': (nodeId: string) => void; // Slave left/expired - 'error': (error: Error) => void; // Election/coordination errors -} -``` - -### ProcessInfo -```typescript -interface ProcessInfo { - processId: string; // Unique process identifier (UUID) - nodeId: string; // Node identifier (same as processId) - role: 'master' | 'slave'; // Current process role - lastSeen: string; // Last heartbeat timestamp (ISO string) - capabilities: ProcessCapabilities; // Process feature capabilities -} - -// Channel name derived as: `worker:slave:${processInfo.processId}` -``` - -### ProcessCapabilities -```typescript -interface ProcessCapabilities { - canProcessDetections: boolean; // Can handle AI detection processing - maxSubscriptions: number; // Maximum camera subscriptions supported - preferredWorkload: number; // Preferred subscription load (0-100) -} -``` - -### SlaveNode -```typescript -interface SlaveNode { - nodeId: string; // Unique slave node identifier - identifier: string; // Human-readable process identifier - registeredAt: string; // Initial registration timestamp - lastSeen: string; // Last heartbeat timestamp - metadata?: Record; // Optional process metadata -} -``` - -## Redis Data Architecture - -### Master Election Keys -- `master-election:master` - Current master process identifier with TTL lock -- `master-election:heartbeat` - Master heartbeat timestamp for liveness detection -- `master-election:master_process` - Detailed master process information (JSON) - -### Process Registry Keys (TTL-Enabled) -- `master-election:processes` - Hash map of all active processes with per-entry TTL (45s) -- Channel names derived directly from process ID: `worker:slave:{processId}` - no separate mapping needed - -### TTL Configuration -```typescript -// Per-entry TTL using hSetEx for automatic cleanup -PROCESS_TTL = 45; // Process registration expires after 45 seconds -HEARTBEAT_RENEWAL_INTERVAL = 10; // Process heartbeats renew TTL every 10 seconds -MASTER_LOCK_TTL = 30; // Master lock expires after 30 seconds -``` - -### Data Persistence Strategy -Uses **per-entry TTL with hSetEx** for automatic cleanup: -- Process entries automatically expire if heartbeats stop -- No manual cleanup processes required -- Prevents memory leaks from crashed processes -- Self-healing system that maintains only active processes -- Slave information derived from processes with role='slave' - no separate storage needed -- Channel names derived directly from process ID - no mapping table required - -## Master Election Algorithm - -### Election Flow Diagram - -```mermaid -graph TB - subgraph "Election Process" - START[Process Starts] --> ATTEMPT[attemptElection] - ATTEMPT --> ACQUIRE{acquireMasterLock} - - ACQUIRE -->|Success| MASTER[becomeMaster] - ACQUIRE -->|Failed| SLAVE[becomeSlave] - - MASTER --> HEARTBEAT[startHeartbeat] - SLAVE --> REGISTER[registerAsSlave] - - HEARTBEAT --> RENEW{renewMasterLock} - RENEW -->|Success| CONTINUE[Continue as Master] - RENEW -->|Failed| STEPDOWN[Step Down → SLAVE] - - REGISTER --> MONITOR[Monitor Master] - MONITOR --> CHECK{Master Exists?} - CHECK -->|Yes| WAIT[Wait and Monitor] - CHECK -->|No| ATTEMPT - - STEPDOWN --> SLAVE - WAIT --> MONITOR - CONTINUE --> RENEW - end - - subgraph "Atomic Operations" - ACQUIRE --> LUA1[Lua Script: SET master NX + SET heartbeat] - RENEW --> LUA2[Lua Script: Check owner + PEXPIRE + SET heartbeat] - STEPDOWN --> LUA3[Lua Script: Check owner + DEL master + DEL heartbeat] - end -``` - -### Atomic Lock Operations - -#### Master Lock Acquisition -```lua --- Atomic master lock acquisition with heartbeat -if redis.call("SET", KEYS[1], ARGV[1], "NX", "PX", ARGV[2]) then - redis.call("SET", KEYS[2], ARGV[3], "PX", ARGV[2]) - return 1 -else - return 0 -end -``` - -#### Master Lock Renewal -```lua --- Atomic master lock renewal with heartbeat update -if redis.call("GET", KEYS[1]) == ARGV[1] then - redis.call("PEXPIRE", KEYS[1], ARGV[2]) - redis.call("SET", KEYS[2], ARGV[3], "PX", ARGV[2]) - return 1 -else - return 0 -end -``` - -#### Master Lock Release -```lua --- Atomic master lock release -if redis.call("GET", KEYS[1]) == ARGV[1] then - redis.call("DEL", KEYS[1], KEYS[2]) - return 1 -else - return 0 -end -``` - -## Process Lifecycle Management - -### Process Registration Flow - -```mermaid -sequenceDiagram - participant P as Process - participant R as Redis - participant M as Master Process - - Note over P,M: Process Registration with TTL - - P->>+P: Generate UUID processId - P->>+P: Determine role (master/slave) - P->>+P: Assign channel name - - P->>+R: hSetEx(processes, processId, processInfo, {EX: 45}) - R-->>-P: Registration confirmed - - P->>+R: hSet(channels, processId, channelName) - R-->>-P: Channel mapping stored - - alt Process becomes master - P->>+R: set(master_process, processInfo) - R-->>-P: Master process registered - P->>+M: emit('master-acquired') - else Process becomes slave - P->>+R: hSet(slaves, nodeId, slaveInfo) - R-->>-P: Slave registered - P->>+M: emit('slave-registered', slaveInfo) - end - - Note over P,M: Heartbeat Loop (Every 10s) - - loop Every 10 seconds - P->>+P: updateProcessHeartbeat(processId) - P->>+R: hSetEx(processes, processId, updatedInfo, {EX: 45}) - Note over R: TTL renewed for 45 seconds - R-->>-P: Heartbeat recorded - end - - Note over P,M: Automatic Expiration (No heartbeat) - - R->>R: 45 seconds pass without heartbeat - R->>R: Process entry automatically expires - Note over R: No manual cleanup needed -``` - -### Master Election Scenarios - -#### Scenario 1: Initial Startup -```mermaid -sequenceDiagram - participant P1 as Process 1 - participant P2 as Process 2 - participant R as Redis - - Note over P1,R: First Process Startup - - P1->>+P1: attemptElection() - P1->>+R: Lua Script: SET master NX - R-->>-P1: Success (no existing master) - - P1->>+P1: becomeMaster() - P1->>+P1: emit('master-acquired') - P1->>+P1: startHeartbeat() every 10s - - Note over P1,R: Second Process Startup - - P2->>+P2: attemptElection() - P2->>+R: Lua Script: SET master NX - R-->>-P2: Failed (master exists) - - P2->>+P2: becomeSlave() - P2->>+R: hSet(slaves, nodeId, slaveInfo) - P2->>+P2: emit('election-completed', false) -``` - -#### Scenario 2: Master Failure and Failover -```mermaid -sequenceDiagram - participant P1 as Master Process - participant P2 as Slave Process 1 - participant P3 as Slave Process 2 - participant R as Redis - - Note over P1,R: Normal Operation - - P1->>+R: Heartbeat renewal every 10s - P2->>+P2: Monitor master existence every 5s - P3->>+P3: Monitor master existence every 5s - - Note over P1,R: Master Failure - - P1--XP1: Process crashes/network failure - - Note over R: Master lock expires after 30s - - R->>R: Master lock TTL expires - - Note over P2,R: Slave Detects Missing Master - - P2->>+R: checkMasterExists() Lua Script - R-->>-P2: Master not found or stale - - P2->>+P2: Random delay (0-2s) to reduce collisions - P2->>+R: attemptElection() - Lua Script: SET master NX - R-->>-P2: Success - became new master - - P2->>+P2: becomeMaster() - P2->>+P2: emit('master-acquired') - - Note over P3,R: Other Slave Detects New Master - - P3->>+R: checkMasterExists() - R-->>-P3: New master found - P3->>+P3: Continue as slave - no election needed -``` - -## TTL-Based Cleanup System - -### Per-Entry TTL Implementation - -```typescript -// Process registration with automatic TTL expiration -public async registerProcess(processInfo: ProcessInfo): Promise { - // Set process registration with 45 second TTL per entry - await redisClient.hSetEx( - this.processesKey, - { - [processInfo.processId]: JSON.stringify(processInfo) - }, - { - expiration: { - type: 'EX', - value: 45 // 45 second TTL per process entry - } - } - ); - - // Map process to channel (no TTL - cleaned up manually) - await redisClient.hSet( - this.processChannelsKey, - processInfo.processId, - processInfo.channelName - ); -} - -// Heartbeat renewal extends TTL automatically -public async updateProcessHeartbeat(processId: string): Promise { - const processData = await redisClient.hGet(this.processesKey, processId); - if (processData) { - const processInfo: ProcessInfo = JSON.parse(processData); - processInfo.lastSeen = new Date().toISOString(); - - // Update process and renew TTL on heartbeat (per-entry TTL) - await redisClient.hSetEx( - this.processesKey, - { - [processId]: JSON.stringify(processInfo) - }, - { - expiration: { - type: 'EX', - value: 45 // Renew 45 second TTL for this specific process entry - } - } - ); - } -} -``` - -### Cleanup Behavior - -```mermaid -graph TB - subgraph "TTL Cleanup Process" - REG[Process Registration] --> TTL[45s TTL Set] - TTL --> HB{Heartbeat Within 45s?} - - HB -->|Yes| RENEW[TTL Renewed to 45s] - HB -->|No| EXPIRE[Entry Automatically Expires] - - RENEW --> HB - EXPIRE --> GONE[Process Removed from Redis] - - GONE --> DETECT[Other Processes Detect Absence] - DETECT --> REBALANCE[Automatic Rebalancing] - end - - subgraph "Manual vs TTL Cleanup" - MANUAL[Manual Cleanup Process] - AUTOMATIC[TTL-Based Cleanup] - - MANUAL -.->|"❌ Complex"| ISSUES[Race Conditions
Memory Leaks
Stale Data] - AUTOMATIC -.->|"✅ Simple"| BENEFITS[Self-Healing
No Race Conditions
Guaranteed Cleanup] - end -``` - -## Event System Architecture - -### Event Emission Flow - -```mermaid -graph TD - subgraph "Election Events" - START[Election Started] --> ATTEMPT[Attempt Lock Acquisition] - ATTEMPT --> SUCCESS{Lock Acquired?} - - SUCCESS -->|Yes| MASTER[Become Master] - SUCCESS -->|No| SLAVE[Become Slave] - - MASTER --> MASTER_EVENT[emit('master-acquired')] - SLAVE --> SLAVE_EVENT[emit('election-completed', false)] - - MASTER_EVENT --> HEARTBEAT[Start Heartbeat Loop] - SLAVE_EVENT --> MONITOR[Start Master Monitoring] - end - - subgraph "Heartbeat Events" - HEARTBEAT --> RENEW{Renew Lock?} - RENEW -->|Success| CONTINUE[Continue as Master] - RENEW -->|Failed| LOST[emit('master-lost')] - - LOST --> STEPDOWN[Step Down to Slave] - STEPDOWN --> TRIGGER[Trigger New Election] - CONTINUE --> HEARTBEAT - end - - subgraph "Slave Management Events" - SLAVE_JOIN[New Slave Joins] --> SLAVE_REG[emit('slave-registered')] - SLAVE_TIMEOUT[Slave Heartbeat Timeout] --> SLAVE_REM[emit('slave-removed')] - - SLAVE_REG --> NOTIFY[Notify Dependent Services] - SLAVE_REM --> CLEANUP[Cleanup Assignments] - end -``` - -### Event Handler Integration - -```typescript -// Example: Camera module integration with MasterElection events -const masterElection = getMasterElection(); - -masterElection.on('master-acquired', () => { - // This process became master - start managing workers - masterSlaveWorkerCluster.becomeMaster(); - logger.info('Camera cluster: Became master, connecting to workers'); -}); - -masterElection.on('master-lost', () => { - // This process lost master status - become slave - masterSlaveWorkerCluster.becomeSlave(); - logger.info('Camera cluster: Became slave, disconnecting workers'); -}); - -masterElection.on('slave-registered', (slave: SlaveNode) => { - // New backend process joined - rebalance workload - masterSlaveWorkerCluster.handleSlaveJoined(slave); - logger.info(`Camera cluster: Slave ${slave.nodeId} joined`); -}); - -masterElection.on('slave-removed', (nodeId: string) => { - // Backend process left - reassign its workload - masterSlaveWorkerCluster.handleSlaveLeft(nodeId); - logger.info(`Camera cluster: Slave ${nodeId} removed`); -}); -``` - -## Process Coordination Patterns - -### Master Role Responsibilities - -```mermaid -graph TB - subgraph "Master Process Duties" - LOCK[Maintain Master Lock] --> HEARTBEAT[Send Heartbeats Every 10s] - HEARTBEAT --> MONITOR[Monitor All Slave Processes] - - MONITOR --> CLEANUP[Cleanup Stale Slave Entries] - CLEANUP --> BALANCE[Coordinate Resource Balancing] - - BALANCE --> WORKERS[Manage Worker Connections] - WORKERS --> ROUTE[Route Messages to Slaves] - - ROUTE --> STATUS[Provide Cluster Status] - STATUS --> LOCK - end - - subgraph "Master Failure Scenarios" - NETWORK[Network Partition] --> TIMEOUT[Lock Renewal Timeout] - CRASH[Process Crash] --> TIMEOUT - OVERLOAD[Resource Overload] --> TIMEOUT - - TIMEOUT --> EXPIRE[Master Lock Expires] - EXPIRE --> ELECTION[New Election Triggered] - ELECTION --> RECOVER[New Master Elected] - end -``` - -### Slave Role Responsibilities - -```mermaid -graph TB - subgraph "Slave Process Duties" - REGISTER[Register with Master Election] --> HEARTBEAT[Send Heartbeats Every 5s] - HEARTBEAT --> MONITOR[Monitor Master Existence] - - MONITOR --> PROCESS[Process Assigned Messages] - PROCESS --> REPORT[Report Status to Master] - - REPORT --> DETECT{Master Missing?} - DETECT -->|No| MONITOR - DETECT -->|Yes| ELECTION[Trigger Election] - - ELECTION --> ATTEMPT{Win Election?} - ATTEMPT -->|Yes| PROMOTE[Become Master] - ATTEMPT -->|No| CONTINUE[Continue as Slave] - - PROMOTE --> MASTER[Master Role Duties] - CONTINUE --> REGISTER - end -``` - -## Class Responsibilities Overview - -### Core Class Functions - -| Class | Primary Responsibility | Key Methods | Process Type | -|-------|----------------------|-------------|--------------| -| **MasterElection** | Distributed coordination and leadership election | • `start()` - Initialize election process
• `attemptElection()` - Try to acquire master lock
• `becomeMaster()` - Transition to master role
• `becomeSlave()` - Transition to slave role
• `waitForElectionComplete()` - Synchronous election waiting | Both Master & Slave | -| **Process Registry** | Process lifecycle management | • `registerProcess()` - Register with TTL
• `updateProcessHeartbeat()` - Renew TTL
• `getAllProcesses()` - Get active processes
• `getProcessesByRole()` - Filter by master/slave
• `unregisterProcess()` - Manual cleanup | Both Master & Slave | -| **Master Lock Manager** | Atomic lock operations | • `acquireMasterLock()` - Lua script lock acquisition
• `renewMasterLock()` - Lua script lock renewal
• `releaseMasterLock()` - Lua script lock release
• `checkMasterExists()` - Lua script master validation | Both Master & Slave | -| **Slave Management** | Slave registration and monitoring | • `registerAsSlave()` - Register as slave node
• `updateSlaveHeartbeat()` - Update slave status
• `cleanupStaleSlaves()` - Remove expired slaves
• `getSlaves()` - Get all registered slaves | Both Master & Slave | - -## Object Relationship Diagrams - -### Core Class Structure and Dependencies - -```mermaid -classDiagram - class MasterElection { - -nodeId: string - -identifier: string - -isMaster: boolean - -lockTtl: number - -heartbeatInterval: number - +start() - +stop() - +getIsMaster(): boolean - +getNodeId(): string - +waitForElectionComplete(): Promise~boolean~ - -attemptElection() - -acquireMasterLock(): Promise~boolean~ - -renewMasterLock(): Promise~boolean~ - -releaseMasterLock() - -becomeMaster() - -becomeSlave() - -checkMasterExists(): Promise~boolean~ - } - - class ProcessRegistry { - +registerProcess(processInfo) - +updateProcessHeartbeat(processId) - +getAllProcesses(): Promise~ProcessInfo[]~ - +getMasterProcess(): Promise~ProcessInfo~ - +getProcessesByRole(role): Promise~ProcessInfo[]~ - +unregisterProcess(processId) - +getProcessChannel(processId): Promise~string~ - } - - class SlaveManagement { - +registerAsSlave() - +unregisterFromSlaves() - +updateSlaveHeartbeat() - +getSlaves(): Promise~SlaveNode[]~ - +getSlave(nodeId): Promise~SlaveNode~ - +getSlaveCount(): Promise~number~ - -cleanupStaleSlaves() - -startSlaveManagement() - -stopSlaveManagement() - } - - class EventEmitter { - +on(event, listener) - +emit(event, ...args) - +once(event, listener) - +off(event, listener) - } - - MasterElection --|> EventEmitter : extends - MasterElection --* ProcessRegistry : contains - MasterElection --* SlaveManagement : contains - - MasterElection --> Redis : uses for coordination - ProcessRegistry --> Redis : uses hSetEx for TTL - SlaveManagement --> Redis : uses for slave state -``` - -### Redis Operations and Key Management - -```mermaid -graph TB - subgraph "Redis Key Structure" - MASTER[master-election:master
String - Current master ID with TTL] - HEARTBEAT[master-election:heartbeat
String - Master heartbeat timestamp] - MASTER_PROC[master-election:master_process
String - Master ProcessInfo JSON] - - PROCESSES[master-election:processes
Hash - ProcessInfo with per-entry TTL] - CHANNELS[master-election:channels
Hash - ProcessID → Channel mapping] - SLAVES[master-election:slaves
Hash - SlaveNode data] - end - - subgraph "Atomic Operations" - LUA1[Master Acquisition
SET master NX + SET heartbeat] - LUA2[Master Renewal
Check owner + PEXPIRE + SET heartbeat] - LUA3[Master Release
Check owner + DEL master + heartbeat] - LUA4[Master Check
GET master + GET heartbeat + validate TTL] - end - - subgraph "TTL Operations" - HSETEX1[Process Registration
hSetEx with 45s TTL per entry] - HSETEX2[Heartbeat Renewal
hSetEx renews TTL to 45s] - AUTO[Automatic Expiration
Redis removes expired entries] - end - - MASTER --> LUA1 - MASTER --> LUA2 - MASTER --> LUA3 - HEARTBEAT --> LUA1 - HEARTBEAT --> LUA2 - HEARTBEAT --> LUA4 - - PROCESSES --> HSETEX1 - PROCESSES --> HSETEX2 - PROCESSES --> AUTO -``` - -## Method Call Flow Analysis - -### Election and Role Transition Flow - -```mermaid -sequenceDiagram - participant App as Application - participant ME as MasterElection - participant R as Redis - participant Dep as Dependent Services - - Note over App,Dep: Election Initialization - - App->>+ME: start() - ME->>+ME: attemptElection() - ME->>+ME: emit('election-started') - - ME->>+R: Lua Script: acquireMasterLock() - - alt Lock acquired successfully - R-->>-ME: Success (1) - ME->>+ME: becomeMaster() - ME->>+ME: startHeartbeat() - every 10s - ME->>+ME: startSlaveManagement() - ME->>+Dep: emit('master-acquired') - ME->>+ME: emit('election-completed', true) - else Lock acquisition failed - R-->>-ME: Failed (0) - ME->>+ME: becomeSlave() - ME->>+R: hSet(slaves, nodeId, slaveInfo) - ME->>+ME: startPeriodicCheck() - every 5s - ME->>+Dep: emit('election-completed', false) - end - - Note over App,Dep: Heartbeat and Monitoring Loop - - loop Every 10 seconds (Master) / 5 seconds (Slave) - alt Process is Master - ME->>+R: Lua Script: renewMasterLock() - alt Renewal successful - R-->>-ME: Success (1) - ME->>+ME: Continue as master - else Renewal failed - R-->>-ME: Failed (0) - ME->>+ME: becomeSlave() - ME->>+Dep: emit('master-lost') - ME->>+ME: attemptElection() after delay - end - else Process is Slave - ME->>+R: Lua Script: checkMasterExists() - alt Master exists and healthy - R-->>-ME: Master found (1) - ME->>+ME: Continue monitoring - else No master or stale - R-->>-ME: No master (0) - ME->>+ME: attemptElection() with random delay - end - end - end -``` - -### Process Registration and TTL Management Flow - -```mermaid -sequenceDiagram - participant P as Process - participant ME as MasterElection - participant R as Redis - participant Auto as Redis TTL - - Note over P,Auto: Process Registration with TTL - - P->>+ME: registerProcess(processInfo) - - ME->>+R: hSetEx(processes, processId, processInfo, {EX: 45}) - Note over R: Entry set with 45 second TTL - R-->>-ME: Registration confirmed - - ME->>+R: hSet(channels, processId, channelName) - R-->>-ME: Channel mapping stored - - alt Process is master - ME->>+R: set(master_process, processInfo) - R-->>-ME: Master process info stored - end - - ME-->>-P: Registration complete - - Note over P,Auto: Heartbeat Loop (Every 10s) - - loop Every 10 seconds - P->>+ME: updateProcessHeartbeat(processId) - - ME->>+R: hGet(processes, processId) - R-->>-ME: Current process data - - ME->>+ME: Update lastSeen timestamp - - ME->>+R: hSetEx(processes, processId, updatedInfo, {EX: 45}) - Note over R: TTL renewed to 45 seconds - R-->>-ME: Heartbeat recorded - - ME-->>-P: Heartbeat updated - end - - Note over P,Auto: Automatic TTL Expiration (No heartbeat) - - Note over Auto: 45 seconds pass without heartbeat - Auto->>Auto: Process entry automatically expires - Auto->>R: Remove expired entry from hash - - Note over P,Auto: Other processes detect absence - - P->>+ME: getAllProcesses() - ME->>+R: hGetAll(processes) - R-->>-ME: Only active processes returned - Note over ME: Expired process not included - ME-->>-P: Updated process list -``` - -## System Architecture Diagrams - -### Master Election Cluster Architecture - -```mermaid -graph TB - subgraph "Backend Process Cluster" - M[Master Process
Elected Leader
🏆] - S1[Slave Process 1
Follower] - S2[Slave Process 2
Follower] - S3[Slave Process N
Follower] - end - - subgraph "Redis Coordination Layer" - R[(Redis Server)] - subgraph "Election Keys" - MK[master-election:master
Lock with TTL] - HK[master-election:heartbeat
Timestamp] - end - subgraph "Process Registry (TTL)" - PK[master-election:processes
Hash with per-entry TTL] - CK[master-election:channels
Process→Channel mapping] - end - subgraph "Slave Management" - SK[master-election:slaves
Slave registration data] - end - end - - subgraph "Dependent Services" - CAM[Camera Module
MasterSlaveWorkerCluster] - DS[Display Service
WebSocket Cluster] - OTHER[Other Services
...] - end - - M ===|Master Lock
Heartbeat Every 10s| MK - M ===|Timestamp Update| HK - M ===|TTL Registration
Heartbeat Renewal| PK - - S1 <-->|Monitor Master
Every 5s| R - S2 <-->|Monitor Master
Every 5s| R - S3 <-->|Monitor Master
Every 5s| R - - S1 ===|Slave Registration
Heartbeat Every 5s| SK - S2 ===|Slave Registration
Heartbeat Every 5s| SK - S3 ===|Slave Registration
Heartbeat Every 5s| SK - - M -.->|master-acquired
slave-registered
slave-removed| CAM - M -.->|Role transition events| DS - M -.->|Coordination events| OTHER - - S1 -.->|election-completed
master-lost| CAM - S2 -.->|Election events| DS - S3 -.->|Status events| OTHER -``` - -### TTL-Based Cleanup Architecture - -```mermaid -graph TB - subgraph "Process Lifecycle with TTL" - START[Process Starts] --> REG[Register with 45s TTL] - REG --> ACTIVE[Process Active] - - ACTIVE --> HB{Heartbeat?} - HB -->|Every 10s| RENEW[Renew TTL to 45s] - HB -->|Missed| COUNT[Count down TTL] - - RENEW --> ACTIVE - COUNT --> EXPIRE{TTL = 0?} - EXPIRE -->|No| COUNT - EXPIRE -->|Yes| CLEANUP[Redis Auto-Remove] - - CLEANUP --> DETECT[Other Processes Detect] - DETECT --> REBALANCE[Trigger Rebalancing] - end - - subgraph "Traditional Manual Cleanup vs TTL" - subgraph "❌ Manual Cleanup Problems" - RACE[Race Conditions] - LEAK[Memory Leaks] - STALE[Stale Data] - COMPLEX[Complex Logic] - end - - subgraph "✅ TTL-Based Benefits" - AUTO[Automatic Cleanup] - RELIABLE[Reliable Expiration] - SIMPLE[Simple Implementation] - SELF[Self-Healing] - end - end - - subgraph "TTL Management Operations" - HSETEX[hSetEx(key, field, value, {EX: 45})] - RENEWAL[Heartbeat renews TTL automatically] - EXPIRY[Redis removes expired entries] - - HSETEX --> RENEWAL - RENEWAL --> EXPIRY - EXPIRY --> HSETEX - end -``` - -### Election Timing and Coordination - -```mermaid -gantt - title Master Election Timeline - dateFormat X - axisFormat %s - - section Master Lock - Master Lock TTL (30s) :milestone, m1, 0, 0s - Lock Renewal (10s) :10, 20s - Lock Renewal (10s) :20, 30s - Lock Expires :milestone, m2, 30, 30s - - section Process TTL - Process Registration (45s) :milestone, p1, 0, 0s - Heartbeat Renewal (10s) :10, 20s - Heartbeat Renewal (10s) :20, 30s - Heartbeat Renewal (10s) :30, 40s - Process Expires :milestone, p2, 45, 45s - - section Election Events - Initial Election :milestone, e1, 0, 0s - Slave Monitoring (5s) :5, 10s - Slave Monitoring (5s) :10, 15s - Master Failure Detected :milestone, e2, 30, 30s - New Election Started :32, 35s - New Master Elected :milestone, e3, 35, 35s -``` - -## Event System Architecture - -### Event Flow and Dependencies - -```mermaid -graph TD - subgraph "MasterElection Events" - ES[election-started] --> EA{Election Attempt} - EA -->|Success| MA[master-acquired] - EA -->|Failed| EC[election-completed(false)] - - MA --> HB[Start Heartbeat Loop] - EC --> MON[Start Master Monitoring] - - HB --> RENEW{Heartbeat Success?} - RENEW -->|Success| CONT[Continue as Master] - RENEW -->|Failed| ML[master-lost] - - ML --> STEP[Step Down to Slave] - STEP --> MON - - CONT --> HB - MON --> CHECK{Master Missing?} - CHECK -->|Yes| ES - CHECK -->|No| MON - end - - subgraph "Slave Management Events" - SR[slave-registered] --> UP[Update Assignments] - SREM[slave-removed] --> CLEAN[Cleanup Assignments] - - UP --> NOTIFY[Notify Services] - CLEAN --> REBAL[Rebalance Load] - end - - subgraph "Error Handling Events" - ERR[error] --> LOG[Log Error Details] - LOG --> RECOVER[Attempt Recovery] - RECOVER --> ES - end - - subgraph "External Service Integration" - MA -.->|becomeMaster()| CAMERA[Camera Module] - ML -.->|becomeSlave()| CAMERA - SR -.->|slaveJoined()| CAMERA - SREM -.->|slaveLeft()| CAMERA - - MA -.->|Master role| DISPLAY[Display Service] - ML -.->|Slave role| DISPLAY - - MA -.->|Coordinate| OTHER[Other Services] - ML -.->|Follow| OTHER - end -``` - -### Event Sequence Patterns - -#### Master Failure and Recovery Pattern - -```mermaid -sequenceDiagram - participant M as Master Process - participant S1 as Slave 1 - participant S2 as Slave 2 - participant R as Redis - participant Svc as Dependent Services - - Note over M,Svc: Normal Operation - M->>R: Heartbeat renewal every 10s - S1->>R: Monitor master every 5s - S2->>R: Monitor master every 5s - - Note over M,Svc: Master Failure - M--XM: Process crashes - - Note over R: Master lock expires (30s) - R->>R: Lock TTL expires - - Note over S1,S2: Slaves detect master failure - S1->>R: checkMasterExists() → false - S2->>R: checkMasterExists() → false - - Note over S1,S2: Election race with random delay - S1->>S1: Random delay 1.2s - S2->>S2: Random delay 0.8s - - S2->>R: attemptElection() first - R->>S2: Success - became master - S2->>S2: emit('master-acquired') - S2->>Svc: becomeMaster() event - - S1->>R: attemptElection() second - R->>S1: Failed - master exists - S1->>S1: Continue as slave - - Note over S2,Svc: New master operational - S2->>R: Start heartbeat renewal - Svc->>S2: Acknowledge new master -``` - -## Configuration and Tuning - -### Timing Configuration - -```typescript -// MasterElection constructor parameters -interface MasterElectionConfig { - lockName: string = 'master-election'; // Redis key prefix - lockTtl: number = 30000; // Master lock TTL (30 seconds) - heartbeatInterval: number = 10000; // Master heartbeat interval (10 seconds) - checkInterval: number = 5000; // Slave monitoring interval (5 seconds) - identifier: string = 'cms-backend'; // Human-readable process identifier -} - -// TTL Configuration -const PROCESS_TTL_SECONDS = 45; // Process registration TTL -const SLAVE_TIMEOUT_MS = 15000; // Slave cleanup threshold (3x heartbeat) -const ELECTION_RANDOM_DELAY_MAX = 2000; // Max random delay to prevent collisions -``` - -### Redis Key Structure - -```typescript -// Election and coordination keys -const REDIS_KEYS = { - // Master election coordination - master: `${lockName}:master`, // Current master ID with TTL - heartbeat: `${lockName}:heartbeat`, // Master heartbeat timestamp - masterProcess: `${lockName}:master_process`, // Master ProcessInfo JSON - - // Process registry with TTL - processes: `${lockName}:processes`, // Hash: processId → ProcessInfo (TTL per entry) - channels: `${lockName}:channels`, // Hash: processId → channelName - - // Slave management - slaves: `${lockName}:slaves`, // Hash: nodeId → SlaveNode -}; - -// TTL settings -const TTL_CONFIG = { - masterLock: 30, // seconds - Master lock expiration - processEntry: 45, // seconds - Process registration TTL - heartbeatRenewal: 10, // seconds - How often to renew heartbeats - slaveMonitoring: 5, // seconds - How often slaves check master -}; -``` - -### Performance Characteristics - -#### Scalability Metrics -- **Election Speed**: < 100ms for uncontested election -- **Failover Time**: < 5 seconds from master failure to new election -- **Process Registration**: < 10ms per process registration -- **TTL Cleanup**: Automatic, no performance impact on application - -#### Resource Usage -- **Memory**: O(n) where n = number of backend processes -- **Redis Operations**: Atomic Lua scripts prevent race conditions -- **Network**: Minimal - only heartbeats and election attempts -- **CPU**: Negligible overhead for coordination operations - -#### Reliability Guarantees -- **Split-Brain Prevention**: Atomic Lua scripts ensure single master -- **Automatic Recovery**: TTL-based cleanup handles all failure scenarios -- **Event Consistency**: All role transitions emit events for service coordination -- **State Persistence**: Process registry survives Redis restarts - -## Public Interface Specification - -The MasterElection service provides a clean, event-driven interface for distributed coordination across backend processes. - -### Primary Interface: MasterElection Class - -#### Core Lifecycle Methods - -```typescript -/** - * Initialize and start the master election process - * @returns Promise - Resolves when election completes - */ -public async start(): Promise - -/** - * Stop master election and cleanup resources - * @returns Promise - Resolves when cleanup completes - */ -public async stop(): Promise - -/** - * Wait for election to complete with timeout - * @param timeoutMs - Maximum time to wait (default: 30000) - * @returns Promise - true if became master, false if slave - */ -public async waitForElectionComplete(timeoutMs: number = 30000): Promise -``` - -#### Status and Information Methods - -```typescript -/** - * Check if this process is currently the master - * @returns boolean - true if master, false if slave - */ -public getIsMaster(): boolean - -/** - * Get this process's unique node identifier - * @returns string - UUID-based node identifier - */ -public getNodeId(): string - -/** - * Get this process's human-readable identifier - * @returns string - Process identifier (e.g., 'cms-backend') - */ -public getIdentifier(): string - -/** - * Get or set process metadata for coordination - * @param metadata - Optional metadata to set - * @returns Record - Current metadata - */ -public setMetadata(metadata: Record): void -public getMetadata(): Record -``` - -#### Process Registry Methods - -```typescript -/** - * Register a process in the distributed registry with TTL - * @param processInfo - Process information including role and capabilities - * @returns Promise - */ -public async registerProcess(processInfo: ProcessInfo): Promise - -/** - * Update process heartbeat to renew TTL (45 seconds) - * @param processId - Process identifier to update - * @returns Promise - */ -public async updateProcessHeartbeat(processId: string): Promise - -/** - * Get all currently registered processes (auto-filtered by TTL) - * @returns Promise - Array of active processes - */ -public async getAllProcesses(): Promise - -/** - * Get current master process information - * @returns Promise - Master process or null if none - */ -public async getMasterProcess(): Promise - -/** - * Get processes filtered by role - * @param role - 'master' or 'slave' - * @returns Promise - Processes with specified role - */ -public async getProcessesByRole(role: 'master' | 'slave'): Promise -``` - -#### Slave Management Methods - -```typescript -/** - * Get all registered slave nodes - * @returns Promise - Array of active slaves - */ -public async getSlaves(): Promise - -/** - * Get specific slave node information - * @param nodeId - Slave node identifier - * @returns Promise - Slave info or null if not found - */ -public async getSlave(nodeId: string): Promise - -/** - * Get count of registered slave nodes - * @returns Promise - Number of active slaves - */ -public async getSlaveCount(): Promise -``` - -### Event System Interface - -#### Event Registration - -```typescript -// Type-safe event registration -masterElection.on('master-acquired', () => { - // This process became the master - console.log('Became master - start coordinating resources'); -}); - -masterElection.on('master-lost', () => { - // This process lost master status - console.log('Lost master status - step down to slave role'); -}); - -masterElection.on('election-completed', (isMaster: boolean) => { - // Election finished - role determined - console.log(`Election completed - role: ${isMaster ? 'MASTER' : 'SLAVE'}`); -}); - -masterElection.on('slave-registered', (slave: SlaveNode) => { - // New backend process joined cluster - console.log(`New slave joined: ${slave.nodeId}`); -}); - -masterElection.on('slave-removed', (nodeId: string) => { - // Backend process left cluster (TTL expired) - console.log(`Slave removed: ${nodeId}`); -}); - -masterElection.on('error', (error: Error) => { - // Election or coordination error occurred - console.error('Master election error:', error); -}); -``` - -#### Event Timing Guarantees - -- **master-acquired**: Emitted immediately after successful lock acquisition -- **master-lost**: Emitted immediately after failed lock renewal -- **election-completed**: Emitted after initial election resolves (master or slave) -- **slave-registered**: Emitted when new slave joins (master only) -- **slave-removed**: Emitted when slave TTL expires (master only) -- **error**: Emitted on Redis connection issues or election failures - -### Usage Patterns - -#### Basic Initialization and Coordination - -```typescript -import { initialize, getMasterElection } from '~/services/MasterElection'; - -// Initialize master election with custom settings -await initialize( - 'cms-cluster', // lockName - Redis key prefix - 30000, // lockTtl - Master lock TTL (30s) - 10000, // heartbeatInterval - Master heartbeat (10s) - 5000, // checkInterval - Slave monitoring (5s) - 'cms-backend-prod' // identifier - Human-readable name -); - -// Get election instance for event handling -const masterElection = getMasterElection(); - -// Wait for initial election to complete -const isMaster = await masterElection.waitForElectionComplete(); -console.log(`Process started as: ${isMaster ? 'MASTER' : 'SLAVE'}`); -``` - -#### Service Integration Pattern - -```typescript -// Camera module integration example -class CameraClusterService { - private masterElection: MasterElection; - - constructor() { - this.masterElection = getMasterElection(); - this.setupElectionHandlers(); - } - - private setupElectionHandlers() { - // Handle master role transitions - this.masterElection.on('master-acquired', () => { - this.becomeMaster(); - }); - - this.masterElection.on('master-lost', () => { - this.becomeSlave(); - }); - - // Handle cluster membership changes - this.masterElection.on('slave-registered', (slave) => { - this.handleSlaveJoined(slave); - }); - - this.masterElection.on('slave-removed', (nodeId) => { - this.handleSlaveLeft(nodeId); - }); - } - - private async becomeMaster() { - console.log('Camera service: Becoming master'); - - // Connect to all Python ML workers - await this.connectToAllWorkers(); - - // Start managing cluster assignments - this.startClusterManagement(); - - // Begin rebalancing subscriptions - this.startRebalancing(); - } - - private async becomeSlave() { - console.log('Camera service: Becoming slave'); - - // Disconnect from Python workers (master-only) - await this.disconnectFromWorkers(); - - // Stop cluster management - this.stopClusterManagement(); - - // Start listening for routed messages - this.startSlaveMessageHandling(); - } -} -``` - -#### Process Registration with Custom Capabilities - -```typescript -// Register this process with specific capabilities -await masterElection.registerProcess({ - processId: masterElection.getNodeId(), - nodeId: masterElection.getNodeId(), - role: masterElection.getIsMaster() ? 'master' : 'slave', - channelName: `worker:slave:${masterElection.getNodeId()}`, - lastSeen: new Date().toISOString(), - capabilities: { - canProcessDetections: true, // Can handle AI detection callbacks - maxSubscriptions: 100, // Maximum camera subscriptions - preferredWorkload: 80 // Preferred load percentage (0-100) - } -}); - -// Start heartbeat loop to maintain registration -setInterval(async () => { - await masterElection.updateProcessHeartbeat(masterElection.getNodeId()); -}, 10000); // Every 10 seconds -``` - -#### Cluster Monitoring and Status - -```typescript -// Monitor cluster status and health -async function monitorClusterHealth() { - // Get all active processes (TTL-filtered automatically) - const allProcesses = await masterElection.getAllProcesses(); - console.log(`Active processes: ${allProcesses.length}`); - - // Get current master - const masterProcess = await masterElection.getMasterProcess(); - if (masterProcess) { - console.log(`Master: ${masterProcess.processId} (${masterProcess.capabilities.maxSubscriptions} max subscriptions)`); - } - - // Get all slaves - const slaves = await masterElection.getSlaves(); - console.log(`Slaves: ${slaves.length}`); - slaves.forEach(slave => { - console.log(` Slave ${slave.nodeId}: last seen ${slave.lastSeen}`); - }); - - // Check if this process is master - if (masterElection.getIsMaster()) { - console.log('This process is the master - coordinating cluster'); - } else { - console.log('This process is a slave - following master'); - } -} - -// Run monitoring every 30 seconds -setInterval(monitorClusterHealth, 30000); -``` - -#### Graceful Shutdown Pattern - -```typescript -// Graceful shutdown with proper cleanup -process.on('SIGTERM', async () => { - console.log('Shutting down master election...'); - - try { - // Stop election and cleanup resources - await masterElection.stop(); - - // Master automatically releases lock - // Process TTL will expire naturally - // Slaves will detect and trigger new election - - console.log('Master election shutdown complete'); - } catch (error) { - console.error('Error during election shutdown:', error); - } - - process.exit(0); -}); -``` - -### Error Handling and Recovery - -#### Election Failure Scenarios - -```typescript -// Handle various failure modes -masterElection.on('error', (error) => { - console.error('Master election error:', error.message); - - // Common error types: - if (error.message.includes('Redis')) { - // Redis connection issues - console.log('Redis connectivity problem - will retry automatically'); - - } else if (error.message.includes('timeout')) { - // Election timeout - console.log('Election timeout - may indicate network issues'); - - } else if (error.message.includes('lock')) { - // Lock acquisition issues - console.log('Lock contention - normal during elections'); - } - - // Service continues running - election will retry automatically -}); - -// Handle network partitions -masterElection.on('master-lost', () => { - console.log('Lost master status - likely network partition or overload'); - - // Dependent services should gracefully step down - // New election will start automatically after random delay -}); -``` - -#### Recovery Guarantees - -- **Split-Brain Prevention**: Atomic Lua scripts ensure only one master exists -- **Automatic Failover**: New elections triggered immediately when master fails -- **TTL-Based Cleanup**: Processes automatically removed when heartbeats stop -- **State Recovery**: Process registry rebuilds automatically from active heartbeats -- **Event Consistency**: All role changes emit events for service coordination - -### Integration with Dependent Services - -The MasterElection service is designed to coordinate multiple backend services that need distributed leadership: - -#### Camera Module Integration -- Master: Connects to Python ML workers, manages subscriptions -- Slaves: Process routed detection messages, forward commands - -#### Display WebSocket Cluster -- Master: Manages WebSocket connection assignments across processes -- Slaves: Handle assigned display connections, route messages - -#### Database Migration Coordination -- Master: Executes database migrations and schema changes -- Slaves: Wait for master to complete before proceeding - -This specification provides a comprehensive understanding of the MasterElection service's distributed coordination capabilities and integration patterns for multi-process backend systems. \ No newline at end of file diff --git a/docs/WorkerConnection.md b/docs/WorkerConnection.md deleted file mode 100644 index b11ba61..0000000 --- a/docs/WorkerConnection.md +++ /dev/null @@ -1,1498 +0,0 @@ -# Worker Connection Architecture Specification - Pure Declarative State Management - -## Overview - -The Camera Module implements a pure VMware DRS-like declarative architecture for managing connections to Python ML workers. This system uses the database as the single source of truth for desired subscription state, with automatic regeneration and reconciliation providing intelligent camera management, real-time object detection, and AI-powered content selection with automatic load balancing capabilities. - -**Key Architectural Principle**: Database mutations trigger complete state regeneration rather than incremental updates, ensuring consistency and eliminating complex state synchronization issues. - -## Architecture Components - -### Two-Cluster System - -The system consists of two distinct but coordinated clusters: - -1. **Backend Process Cluster**: Multiple CMS backend processes with leader election -2. **Worker Cluster**: Python ML workers for object detection processing - -### Master-Slave WebSocket Architecture - -- **Master Process**: Single elected backend process that maintains WebSocket connections to Python workers -- **Slave Processes**: All other backend processes that handle message routing and processing -- **Message Routing**: Master forwards worker messages to assigned slaves via Redis pub/sub channels -- **MasterElection Integration**: Automated master/slave role management with event-driven transitions -- **Seamless Scaling**: Backend processes can be added/removed without affecting WebSocket connections - -## Core Components - -### DetectorCluster -`cms-backend/modules/camera/services/DetectorCluster.ts` - -Primary interface for camera operations that abstracts the underlying distributed architecture. - -**Key Responsibilities:** -- Routes camera subscription requests through the cluster -- Manages detection callback registration and event emission -- Bridges CameraService with underlying MasterSlaveWorkerCluster -- Provides unified API regardless of master/slave status - -### MasterSlaveWorkerCluster -`cms-backend/modules/camera/services/MasterSlaveWorkerCluster.ts` - -Core distributed cluster implementation that handles declarative state management and worker assignment reconciliation. - -**Master Mode Responsibilities:** -- Maintains WebSocket connections to all Python workers -- Manages desired vs actual subscription state separation -- Implements VMware DRS-like global rebalancing algorithm -- Processes automatic reconciliation every 30 seconds -- Responds to slave join/leave events from MasterElection -- Generates fresh pre-signed model URLs for worker assignments - -**Slave Mode Responsibilities:** -- Submits desired subscription state changes to master -- Processes detection results routed from master -- Event-driven role transitions managed by MasterElection -- No direct worker management (delegated to master) - -### DetectorConnection -`cms-backend/modules/camera/services/DetectorConnection.ts` - -Individual WebSocket connection handler for Python workers. - -**Key Features:** -- Connection lifecycle management (connect, disconnect, reconnect) -- Exponential backoff reconnection with 10-second intervals -- Subscription state management and restoration after reconnection -- Real-time heartbeat monitoring with 10-second timeout -- Resource usage tracking (CPU, memory, GPU) - -## Data Structures - -### WorkerConnectionState -```typescript -interface WorkerConnectionState { - url: string; // Worker WebSocket URL - processId: string; // Backend process managing this worker - online: boolean; // Connection status - cpuUsage: number | null; // Worker CPU utilization - memoryUsage: number | null; // Worker memory usage - gpuUsage: number | null; // Worker GPU utilization - gpuMemoryUsage: number | null; // Worker GPU memory usage - subscriptionCount: number; // Active camera subscriptions - subscriptions: string[]; // List of subscription identifiers - lastHeartbeat: string; // Last heartbeat timestamp - connectedAt: string; // Connection established timestamp -} -``` - -### DesiredCameraSubscription -```typescript -interface DesiredCameraSubscription { - subscriptionIdentifier: string; // Format: ${displayId};${cameraId} - rtspUrl: string; // Camera RTSP stream URL - modelId: number; // AI model database ID - modelName: string; // AI model identifier - createdAt: string; // Subscription creation timestamp - - // Snapshot configuration - snapshotUrl?: string; // Optional snapshot endpoint URL - snapshotInterval?: number; // Snapshot interval in milliseconds - - // Image cropping parameters - cropX1?: number; // Crop region top-left X - cropY1?: number; // Crop region top-left Y - cropX2?: number; // Crop region bottom-right X - cropY2?: number; // Crop region bottom-right Y -} -``` - -### ActualCameraSubscription -```typescript -interface ActualCameraSubscription { - subscriptionIdentifier: string; // Format: ${displayId};${cameraId} - assignedWorkerUrl: string; // Worker handling this subscription - modelUrl: string; // AI model presigned URL (1hr TTL) - status: 'active' | 'pending' | 'failed' | 'recovering'; - assignedAt: string; // Worker assignment timestamp - lastSeen: string; // Last activity timestamp -} -``` - -### SlaveState -```typescript -interface SlaveState { - slaveId: string; // Unique slave identifier (process ID) - processId: string; // Backend process ID (same as slaveId) - online: boolean; // Always true (maintained by MasterElection) - workload: number; // Number of assigned workers (calculated) - lastSeen: string; // Last heartbeat from MasterElection - capabilities?: Record; // Metadata from MasterElection -} -``` - -### DetectorWorkerCommand -```typescript -interface DetectorWorkerCommand { - type: DetectorWorkerCommandType; - payload?: { - subscriptionIdentifier: string; - rtspUrl: string; - snapshotUrl?: string; - snapshotInterval?: number; - modelUrl: string; - modelName: string; - modelId: number; - cropX1?: number; - cropY1?: number; - cropX2?: number; - cropY2?: number; - }; -} - -enum DetectorWorkerCommandType { - SUBSCRIBE = "subscribe", - UNSUBSCRIBE = "unsubscribe", - REQUEST_STATE = "requestState", - PATCH_SESSION_RESULT = "patchSessionResult", - SET_SESSION_ID = "setSessionId" -} -``` - -### ImageDetectionResponse -```typescript -interface ImageDetectionResponse { - subscriptionIdentifier: string; - timestamp: Date; - data: { - detection: { - carModel?: string; - carBrand?: string; - carYear?: number; - bodyType?: string; - licensePlateText?: string; - licensePlateType?: string; - }; - modelId: number; - modelName: string; - }; -} -``` - -## Redis Data Architecture - -### Persistent Storage Keys -- `worker:connections` - Worker connection states and health metrics -- `worker:assignments` - Worker-to-slave assignment mappings -- `worker:desired_subscriptions` - Desired camera subscription state (user intent) -- `worker:actual_subscriptions` - Actual worker subscription assignments (system state) -- `master-election:slaves` - Slave registration and heartbeat (managed by MasterElection) - -### Communication Channels -- `worker:slave:{slaveId}` - Individual slave message routing channels -- `worker:messages:upstream` - Worker-to-master communication channel (currently unused) -- `worker:assignments:changed` - Assignment change broadcast notifications -- `worker:master:commands` - Database change notification channel (slaves → master) - -### Data Persistence Strategy -All Redis data uses **manual cleanup only** (no TTL) to ensure: -- Reliable state recovery after process restarts -- Consistent subscription persistence across failovers -- Predictable cleanup during planned maintenance -- Debug visibility into system state history - -## Pure Declarative Architecture (VMware DRS-like) - -### Concept Overview -The system implements a pure declarative approach similar to VMware Distributed Resource Scheduler (DRS), where: -- **Database**: Single source of truth for desired state (Display+Camera+Playlist combinations) -- **Actual State**: What subscriptions are currently running on workers (stored in `worker:actual_subscriptions`) -- **Regeneration**: Master regenerates complete desired state from database on every change notification -- **Reconciliation**: Master continuously reconciles desired vs actual state via global rebalancing - -### Pure Declarative Benefits -- **Database as Truth**: Desired state always derived fresh from database, eliminating state synchronization issues -- **Zero Incremental Updates**: No complex state management, just "regenerate everything on change" -- **Automatic Recovery**: System heals itself by comparing database state vs actual worker state -- **Load Balancing**: Global optimization across all workers and subscriptions -- **Fault Tolerance**: Desired state survives all failures since it's always derived from database -- **Simplicity**: Database mutations just trigger regeneration - no complex command protocols - -### Pure Declarative Flow -```typescript -// Triggered by any database change -async handleDatabaseChange(changeType: string, entityId: string) { - // 1. Any process detects database change - await triggerSubscriptionUpdate(changeType, entityId); - - // 2. Master receives regeneration request - async handleMasterCommand(message) { - if (data.type === 'regenerate_subscriptions') { - await regenerateDesiredStateFromDatabase(); - } - } - - // 3. Master regenerates complete desired state from database - async regenerateDesiredStateFromDatabase() { - const activeDisplays = await db.display.findMany({ - where: { - AND: [ - { cameraIdentifier: { not: null } }, - { playlistId: { not: null } } - ] - }, - include: { camera: true, playlist: { include: { model: true } } } - }); - - // Generate fresh desired subscriptions from database - await storeDesiredSubscriptions(generateFromDisplays(activeDisplays)); - - // Trigger reconciliation - await rebalanceCameraSubscriptions(); - } - - // 4. Reconciliation (same VMware DRS algorithm) - async rebalanceCameraSubscriptions() { - const desired = await getDesiredSubscriptions(); // Fresh from database - const actual = await getActualSubscriptions(); // Current worker state - - // Find and fix differences using load balancing - await reconcileDifferences(desired, actual); - } -} - -// VMware DRS-like worker selection (unchanged) -function findBestWorkerVMwareDRS(workers, currentLoads) { - return workers - .map(worker => ({ - worker, - score: (currentLoads.get(worker.url) * 0.4) + // 40% load balance - (worker.cpuUsage * 0.35) + // 35% CPU usage - (worker.memoryUsage * 0.25) // 25% memory usage - })) - .sort((a, b) => a.score - b.score)[0].worker; // Lower score = better -} -``` - -### Simplified Reconciliation Flow -1. **Database Change**: Any process modifies database (Display, Camera, Playlist, Model) -2. **Trigger Notification**: Process sends `regenerate_subscriptions` to `worker:master:commands` -3. **Complete Regeneration**: Master queries database for all active Display+Camera+Playlist combinations -4. **Desired State Creation**: Master generates fresh desired subscriptions from database query results -5. **Diff Analysis**: Master compares fresh desired state vs current actual state on workers -6. **Global Reconciliation**: Master applies VMware DRS algorithm to reconcile differences -7. **Worker Commands**: Master sends subscription/unsubscription commands to workers -8. **State Update**: Master updates actual subscription state in Redis - -### Key Simplifications vs Previous Architecture -- **No Incremental State Management**: No complex tracking of individual subscription changes -- **No State Synchronization Issues**: Desired state always freshly derived from database -- **No Complex Command Protocols**: Only one command type: `regenerate_subscriptions` -- **No Partial Update Bugs**: Complete regeneration eliminates edge cases and race conditions -- **Zero Database-Redis Divergence**: Database is always the authoritative source -- **Simpler Service Layer**: Services just update database + trigger, no subscription logic - -## Class Responsibilities Overview - -### Core Class Functions - -| Class | Primary Responsibility | Key Functions | Process Type | -|-------|----------------------|---------------|--------------| -| **DetectorCluster** | Public API facade and event management | • `subscribeToCamera()` - Legacy interface (triggers regeneration)
• `addDetectionListener()` - Callback registration
• `getState()` - Cluster monitoring
• Event emission to external services | Both Master & Slave | -| **MasterSlaveWorkerCluster** | Pure declarative cluster coordination | **Master**: `regenerateDesiredStateFromDatabase()`, `rebalanceCameraSubscriptions()`, `connectToAllWorkers()`
**Slave**: Minimal role - just routes detection messages
**Both**: `handleDetectionMessage()` for callbacks | Both (different roles) | -| **DetectorConnection** | Individual worker WebSocket management | • `initialize()` - WebSocket connection setup
• `subscribeToCamera()` - Send subscription to worker
• `handleImageDetectionResponse()` - Process AI results
• `resubscribeAll()` - Restore subscriptions after reconnect | Master Only | -| **CameraService** | Database operations + trigger notifications | • `addCamera()` - Database create + trigger regeneration
• `updateCamera()` - Database update + trigger regeneration
• `removeCamera()` - Database delete + trigger regeneration | Both Master & Slave | -| **DisplayService** | Database operations + trigger notifications | • `registerDisplay()` - Database create + trigger regeneration
• `updateDisplay()` - Database update + trigger regeneration
• `deleteDisplay()` - Database delete + trigger regeneration | Both Master & Slave | -| **SubscriptionTrigger** | Simple notification system | • `triggerSubscriptionUpdate()` - Send regeneration request to master | Both Master & Slave | - -## Object Relationship Diagrams - -### Core Class Structure and Methods - -```mermaid -classDiagram - class CameraService { - +addCamera(identifier, rtspUrl) - +removeCamera(identifier) - +resubscribeCamera(identifier) - +getCameras() - +updateCamera(...) - -processDetection(data) - } - - class DetectorCluster { - +initialize() - +subscribeToCamera(...) - +unsubscribeFromCamera(subscriptionId) - +unsubscribeFromAllWithCameraID(cameraId) - +getState() - +addDetectionListener(subscriptionId, callback) - +addGlobalDetectionListener(callback) - -handleWorkerDetection(data) - } - - class MasterSlaveWorkerCluster { - +initialize() - +subscribeToCamera(...) - +storeCameraSubscription(subscription) - +getClusterState() - +shutdown() - -connectToAllWorkers() [MASTER] - -rebalanceCameraSubscriptions() [MASTER] - -triggerRebalancing() [MASTER] - -becomeMaster() - -becomeSlave() - -setupMasterElectionListeners() - } - - class DetectorConnection { - +initialize() - +subscribeToCamera(...) - +unsubscribeFromCamera(subscriptionId) - +getCameraImage(cameraId) - +setSessionId(displayId, sessionId) - +getState() - -connect() - -resubscribeAll() - -handleImageDetectionResponse(data) - -scheduleReconnect() - } - - CameraService --> DetectorCluster : "subscribeToCamera()\ngetState()" - DetectorCluster --> MasterSlaveWorkerCluster : "initialize()\nstoreCameraSubscription()" - MasterSlaveWorkerCluster --> DetectorConnection : "[MASTER] creates connections" -``` - -### Direct Function Call Relationships - -```mermaid -graph TD - API[API Routes] --> CS[CameraService] - CS --> |subscribeToCamera
getState
unsubscribeFromAllWithCameraID| DC[DetectorCluster] - DC --> |initialize
storeCameraSubscription
getClusterState
subscribeToCamera| MSC[MasterSlaveWorkerCluster] - - subgraph "Master Process Only" - MSC --> |connectToAllWorkers
creates connections| CONN[DetectorConnection] - CONN --> |WebSocket calls| PW[Python ML Worker] - end - - ME[MasterElection] --> |getIsMaster
getNodeId
getSlaves| MSC - WL[WorkerLogger] --> |attachToDetectorCluster| DC - - classDef masterOnly fill:#ffcccc - classDef external fill:#ffffcc - - class CONN masterOnly - class PW external - class API external -``` - -### Event-Driven Communication - -```mermaid -graph LR - subgraph "Internal Events" - MSC[MasterSlaveWorkerCluster] -.-> |emit detection| DC[DetectorCluster] - MSC -.-> |emit worker:online
emit worker:offline| DC - DC -.-> |emit worker:detection_result
emit worker:online
emit worker:offline| CS[CameraService] - DC -.-> |emit events| WL[WorkerLogger] - ME[MasterElection] -.-> |master-acquired
master-lost
slave-registered
slave-removed| MSC - end - - subgraph "Callback System" - CS -.-> |callback registration| DC - DC -.-> |detection callbacks| CS - end - - subgraph "WebSocket Events (Master Only)" - CONN[DetectorConnection] -.-> |handleWorkerMessage
handleWorkerOnline
handleWorkerOffline| MSC - PW[Python ML Worker] -.-> |IMAGE_DETECTION
STATE_REPORT| CONN - end - - classDef events fill:#e6f3ff - classDef callbacks fill:#fff2e6 - classDef websocket fill:#ffe6e6 - - class MSC,DC,CS,WL events - class CONN,PW websocket -``` - -### Redis Communication Patterns - -```mermaid -graph TB - subgraph "Master Process" - M[Master MasterSlaveWorkerCluster] - end - - subgraph "Slave Processes" - S1[Slave Process 1] - S2[Slave Process 2] - end - - subgraph "Redis Channels" - SC1[worker:slave:slave1] - SC2[worker:slave:slave2] - MC[worker:master:commands] - AC[worker:assignments:changed] - end - - subgraph "Redis Storage" - WC[worker:connections] - WA[worker:assignments] - WS[worker:slaves] - CS[worker:camera_subscriptions] - end - - M --> |publish detection routing| SC1 - M --> |publish detection routing| SC2 - M --> |publish assignments| AC - M --> |hSet/hGet state| WC - M --> |hSet/hGet assignments| WA - M --> |hSet/hGet subscriptions| CS - - S1 --> |publish commands| MC - S2 --> |publish commands| MC - S1 --> |hSet registration| WS - S2 --> |hSet registration| WS - - SC1 --> |subscribe| S1 - SC2 --> |subscribe| S2 - MC --> |subscribe| M - AC --> |subscribe all| S1 - AC --> |subscribe all| S2 -``` - -## Method Call Flow Analysis - -### Camera Subscription Flow (External Request → Worker) - -```mermaid -sequenceDiagram - participant API as API Routes - participant CS as CameraService - participant DB as Database - participant ST as SubscriptionTrigger - participant R as Redis - participant MSC as MasterSlaveCluster - participant CONN as DetectorConnection - participant W as Python Worker - - Note over API,W: Pure Declarative Flow - API->>+CS: POST /api/camera - CS->>+DB: db.cameraEntity.create({...}) - DB-->>-CS: Camera created - CS->>+ST: triggerSubscriptionUpdate('camera.created', id) - ST->>+R: publish(worker:master:commands, {type: 'regenerate_subscriptions', ...}) - - Note over R,MSC: Only Master Processes Commands - R->>+MSC: Master receives regeneration request - MSC->>+MSC: regenerateDesiredStateFromDatabase() - MSC->>+DB: Query all Display+Camera+Playlist combinations - DB-->>-MSC: Active display configurations - MSC->>+MSC: Generate fresh desired subscriptions - MSC->>+R: Store desired state in Redis - MSC->>+MSC: rebalanceCameraSubscriptions() - MSC->>+MSC: findBestWorkerForSubscription() - MSC->>+CONN: subscribeToCamera(subscriptionId, rtspUrl, ...) - CONN->>+W: WebSocket: {type: "subscribe", payload: {...}} - W-->>-CONN: WebSocket: {type: "stateReport", ...} - CONN->>-MSC: handleWorkerOnline(workerUrl) - MSC->>-R: Update actual subscription state - - Note over W,CS: Detection Processing (unchanged) - W->>CONN: Detection results - CONN->>MSC: Route to assigned slave - MSC->>CS: Detection callback - CS-->>-API: Camera added successfully -``` - -### Detection Processing Flow (Worker → External Callback) - -```mermaid -sequenceDiagram - participant W as Python Worker - participant CONN as DetectorConnection - participant MSC as MasterSlaveCluster - participant R as Redis - participant DC as DetectorCluster - participant CS as CameraService - - Note over W,CS: AI Detection Result Processing - W->>+CONN: WebSocket: {type: "imageDetection", subscriptionIdentifier, data} - CONN->>+MSC: handleWorkerMessage(ImageDetectionResponse) - - Note over MSC: Master finds assigned slave - MSC->>+MSC: findWorkerForSubscription(subscriptionId) - MSC->>+R: hGet(worker:assignments, workerUrl) - MSC->>+R: publish(worker:slave:{slaveId}, {type: 'detection', ...}) - - Note over R: Redis routes to assigned slave - R-->>+MSC: Slave receives detection message - MSC->>+MSC: handleDetectionMessage(message) - MSC->>+DC: emit('detection', detectionData) - - Note over DC: Process detection and trigger callbacks - DC->>+DC: handleWorkerDetection(data) - DC->>+DC: detectionListeners.get(subscriptionId).forEach(callback) - DC->>+CS: callback(detectionData) - DC->>+DC: emit('worker:detection_result', {url, cameraId, detections}) - - Note over CS: External service processes detection - CS->>+CS: processDetection(data) - CS-->>CS: updateAnalytics(), triggerDecisionTrees() -``` - -### Master Election and Failover Flow - -```mermaid -sequenceDiagram - participant ME as MasterElection - participant MSC1 as MasterSlaveCluster (Process 1) - participant MSC2 as MasterSlaveCluster (Process 2) - participant R as Redis - participant W1 as Python Worker 1 - participant W2 as Python Worker 2 - - Note over ME,W2: Master Failover Scenario - - %% Initial state - ME->>+MSC1: emit('master-acquired') - MSC1->>+MSC1: becomeMaster() - ME->>+MSC2: emit('master-lost') - MSC2->>+MSC2: becomeSlave() - - ME->>+R: Automatic slave registration - MSC1->>+W1: WebSocket connection (Master) - MSC1->>+W2: WebSocket connection (Master) - - Note over MSC1: Original master fails - MSC1--xMSC1: Process crash/network failure - - %% MasterElection detects failure and triggers new election - ME->>+ME: Detect failed master, trigger election - ME->>+MSC2: emit('master-acquired') - MSC2->>+MSC2: becomeMaster() - - %% Master recovery process - MSC2->>+MSC2: connectToAllWorkers() - MSC2->>+W1: WebSocket reconnection - MSC2->>+W2: WebSocket reconnection - - MSC2->>+MSC2: healClusterAssignments() - MSC2->>+R: hGetAll(worker:camera_subscriptions) - MSC2->>+MSC2: rebalanceCameraSubscriptions() - - %% Restore subscriptions - MSC2->>+W1: WebSocket: SUBSCRIBE commands - MSC2->>+W2: WebSocket: SUBSCRIBE commands - - Note over MSC2,W2: New master operational - slave registration handled by MasterElection -``` - -## System Architecture Diagrams - -### Master-Slave Cluster Architecture - -```mermaid -graph TB - subgraph "Backend Process Cluster" - M[Master Process
NodeJS Backend] - S1[Slave Process 1
NodeJS Backend] - S2[Slave Process 2
NodeJS Backend] - S3[Slave Process N
NodeJS Backend] - end - - subgraph "Python Worker Cluster" - W1[Python ML Worker 1
WebSocket Server] - W2[Python ML Worker 2
WebSocket Server] - W3[Python ML Worker N
WebSocket Server] - end - - subgraph "Redis Coordination Layer" - R[(Redis)] - R --- C1[worker:slave:* channels] - R --- C2[worker:connections state] - R --- C3[worker:assignments mapping] - R --- C4[worker:camera_subscriptions] - end - - M ===|WebSocket Connections
Only Master| W1 - M ===|WebSocket Connections
Only Master| W2 - M ===|WebSocket Connections
Only Master| W3 - - M <-->|Pub/Sub Messages| R - S1 <-->|Pub/Sub Messages| R - S2 <-->|Pub/Sub Messages| R - S3 <-->|Pub/Sub Messages| R - - M -.->|Route Messages| S1 - M -.->|Route Messages| S2 - M -.->|Route Messages| S3 -``` - -### Data Flow Architecture - -```mermaid -sequenceDiagram - participant CS as CameraService - participant DC as DetectorCluster - participant MS as MasterSlaveCluster - participant R as Redis - participant W as Python Worker - participant S as Slave Process - - Note over CS,S: Camera Subscription Flow - - CS->>DC: subscribeToCamera(cameraId, rtspUrl, modelUrl, ...) - DC->>MS: storeCameraSubscription({...}) - - alt Master Process - MS->>MS: findBestWorkerForSubscription() - MS->>R: hSet(camera_subscriptions, subscriptionId, {...}) - MS->>W: WebSocket: SUBSCRIBE command - W->>MS: STATE_REPORT (subscription confirmed) - MS->>R: publish(worker:slave:{slaveId}, detection_message) - else Slave Process - MS->>R: publish(worker:master:commands, subscribe_command) - Note over MS: Routes to master for execution - end - - Note over CS,S: Detection Processing Flow - - W->>MS: WebSocket: IMAGE_DETECTION response - MS->>MS: findSlaveForWorker(workerUrl) - MS->>R: publish(worker:slave:{slaveId}, detection_data) - R->>S: Redis pub/sub delivery - S->>DC: emit('detection', detectionData) - DC->>CS: callback(detectionData) -``` - -### Subscription Lifecycle Management - -```mermaid -stateDiagram-v2 - [*] --> Pending: Camera Subscription Request - - Pending --> Active: Worker accepts subscription - Pending --> Failed: Worker rejects/unavailable - Pending --> Recovering: Assignment change needed - - Active --> Recovering: Worker goes offline - Active --> [*]: Unsubscribe request - - Recovering --> Active: Reassigned to online worker - Recovering --> Failed: No workers available - Recovering --> [*]: Subscription expired - - Failed --> Recovering: Worker becomes available - Failed --> [*]: Max retries exceeded - - note right of Recovering - Automatic rebalancing every 30s - Master detects offline workers - Reassigns to healthy workers - end note -``` - -### Worker Connection State Machine - -```mermaid -stateDiagram-v2 - [*] --> Connecting: initialize() - - Connecting --> Online: WebSocket connected + STATE_REPORT received - Connecting --> Reconnecting: Connection failed - - Online --> Offline: Heartbeat timeout (10s) - Online --> Reconnecting: WebSocket error/close - Online --> [*]: close() called - - Offline --> Reconnecting: Scheduled reconnect (10s) - Offline --> [*]: close() called - - Reconnecting --> Online: Reconnection successful - Reconnecting --> Reconnecting: Reconnection failed (retry) - Reconnecting --> [*]: close() called - - note right of Online - - Sends heartbeat every 2s - - Processes subscriptions - - Reports resource usage - - Handles detection results - end note -``` - -### Redis Channel Communication Flow - -```mermaid -graph LR - subgraph "Master Process" - M[Master] - WS1[WebSocket to Worker 1] - WS2[WebSocket to Worker 2] - end - - subgraph "Slave Processes" - S1[Slave 1] - S2[Slave 2] - end - - subgraph "Redis Channels" - CH1[worker:slave:slave1] - CH2[worker:slave:slave2] - CH3[worker:messages:upstream] - CH4[worker:assignments:changed] - end - - WS1 -->|Detection Data| M - WS2 -->|Detection Data| M - - M -->|Route by Assignment| CH1 - M -->|Route by Assignment| CH2 - - CH1 -->|Subscribed| S1 - CH2 -->|Subscribed| S2 - - S1 -->|Commands/Responses| CH3 - S2 -->|Commands/Responses| CH3 - CH3 -->|Subscribed| M - - M -->|Assignment Updates| CH4 - CH4 -->|Subscribed| S1 - CH4 -->|Subscribed| S2 -``` - -### Detailed Message Flow by Channel - -```mermaid -graph TB - subgraph "Python ML Workers" - W1[Worker 1
ws://worker1:8000] - W2[Worker 2
ws://worker2:8000] - W3[Worker N
ws://workerN:8000] - end - - subgraph "Master Process (Only One)" - M[Master Backend Process] - subgraph "Master Managed Data" - WC1[WebSocket Connection Pool] - AS[Assignment State] - SUB[Subscription Manager] - end - end - - subgraph "Redis Channels & Storage" - subgraph "Individual Slave Channels" - SC1["worker:slave:slave-uuid-1"] - SC2["worker:slave:slave-uuid-2"] - SC3["worker:slave:slave-uuid-N"] - end - - subgraph "Master Coordination Channels" - MC["worker:master:commands"] - ACH["worker:assignments:changed"] - UPC["worker:messages:upstream"] - SEC["worker:subscription:events"] - end - - subgraph "Persistent Storage" - WCS["worker:connections
(Worker Health States)"] - WAS["worker:assignments
(Worker→Slave Mapping)"] - WSS["worker:slaves
(Slave Registration)"] - CSS["worker:camera_subscriptions
(Subscription Persistence)"] - end - end - - subgraph "Slave Processes" - S1[Slave Process 1
slave-uuid-1] - S2[Slave Process 2
slave-uuid-2] - S3[Slave Process N
slave-uuid-N] - end - - %% WebSocket Communications (Master Only) - W1 -.->|"WebSocket Messages:
• IMAGE_DETECTION
• STATE_REPORT
• PATCH_SESSION"| WC1 - W2 -.->|"WebSocket Messages:
• IMAGE_DETECTION
• STATE_REPORT
• PATCH_SESSION"| WC1 - W3 -.->|"WebSocket Messages:
• IMAGE_DETECTION
• STATE_REPORT
• PATCH_SESSION"| WC1 - - WC1 -.->|"WebSocket Commands:
• SUBSCRIBE
• UNSUBSCRIBE
• REQUEST_STATE
• SET_SESSION_ID"| W1 - WC1 -.->|"WebSocket Commands:
• SUBSCRIBE
• UNSUBSCRIBE
• REQUEST_STATE
• SET_SESSION_ID"| W2 - WC1 -.->|"WebSocket Commands:
• SUBSCRIBE
• UNSUBSCRIBE
• REQUEST_STATE
• SET_SESSION_ID"| W3 - - %% Master Redis Operations - M -->|"hSet() operations:
• Worker states
• Assignments
• Subscriptions"| WCS - M -->|"hSet() operations:
• Worker→Slave mapping
• Load balancing data"| WAS - M -->|"hSet() operations:
• Subscription details
• Assignment tracking"| CSS - - %% Master to Slave Routing - M -->|"Detection Routing:
{type: 'detection',
workerUrl: string,
data: ImageDetectionResponse,
timestamp: string}"| SC1 - M -->|"Detection Routing:
{type: 'detection',
workerUrl: string,
data: ImageDetectionResponse,
timestamp: string}"| SC2 - M -->|"Detection Routing:
{type: 'detection',
workerUrl: string,
data: ImageDetectionResponse,
timestamp: string}"| SC3 - - M -->|"Assignment Updates:
{type: 'assignments_updated',
assignments: Record,
timestamp: string}"| ACH - - %% Slave to Master Communication - S1 -->|"Slave Commands:
{type: 'subscribe_camera',
subscriptionIdentifier: string,
rtspUrl: string,
modelUrl: string,
modelId: number,
snapshotUrl?: string,
cropX1?: number, ...}"| MC - S2 -->|"Slave Commands:
{type: 'subscribe_camera',
subscriptionIdentifier: string,
rtspUrl: string,
modelUrl: string,
modelId: number,
snapshotUrl?: string,
cropX1?: number, ...}"| MC - S3 -->|"Slave Commands:
{type: 'subscribe_camera',
subscriptionIdentifier: string,
rtspUrl: string,
modelUrl: string,
modelId: number,
snapshotUrl?: string,
cropX1?: number, ...}"| MC - - %% Slave Registration and Heartbeats - S1 -->|"hSet() Slave Registration:
{slaveId: string,
processId: string,
online: boolean,
workload: number,
lastSeen: string,
capabilities: {...}}"| WSS - S2 -->|"hSet() Slave Registration:
{slaveId: string,
processId: string,
online: boolean,
workload: number,
lastSeen: string,
capabilities: {...}}"| WSS - S3 -->|"hSet() Slave Registration:
{slaveId: string,
processId: string,
online: boolean,
workload: number,
lastSeen: string,
capabilities: {...}}"| WSS - - %% Channel Subscriptions - SC1 -->|"Subscribed"| S1 - SC2 -->|"Subscribed"| S2 - SC3 -->|"Subscribed"| S3 - - MC -->|"Subscribed"| M - ACH -->|"Subscribed (All Slaves)"| S1 - ACH -->|"Subscribed (All Slaves)"| S2 - ACH -->|"Subscribed (All Slaves)"| S3 - - style M fill:#ff9999 - style WC1 fill:#ffcc99 - style AS fill:#ffcc99 - style SUB fill:#ffcc99 - style S1 fill:#99ccff - style S2 fill:#99ccff - style S3 fill:#99ccff -``` - -### Channel Message Specification - -| Channel Name | Direction | Message Type | Sender | Receiver | Payload Structure | Purpose | -|--------------|-----------|--------------|---------|-----------|-------------------|---------| -| `worker:slave:{slaveId}` | Master→Slave | `detection` | Master Process | Assigned Slave | `{type: 'detection', workerUrl: string, data: ImageDetectionResponse, timestamp: string}` | Route AI detection results from workers to processing slaves | -| `worker:master:commands` | Slave→Master | `regenerate_subscriptions` | Any Process | Master Process | `{type: 'regenerate_subscriptions', reason: string, triggeredBy: string, timestamp: string}` | Notify master that database changed and subscriptions need regeneration | -| `worker:assignments:changed` | Master→All Slaves | `assignments_updated` | Master Process | All Slave Processes | `{type: 'assignments_updated', assignments: Record, timestamp: string}` | Broadcast worker-to-slave assignment changes for rebalancing | -| `worker:messages:upstream` | Slave→Master | Various | Any Slave Process | Master Process | `{type: string, slaveId: string, data: any, timestamp: string}` | General slave-to-master communication (currently unused) | - -### Redis Hash Storage Specification - -| Redis Key | Data Type | Content | Update Pattern | Cleanup Strategy | -|-----------|-----------|---------|----------------|-------------------| -| `worker:connections` | Hash Map | `{[workerUrl]: JSON.stringify(WorkerConnectionState)}` | Master updates every 2s | Manual cleanup only | -| `worker:assignments` | Hash Map | `{[workerUrl]: slaveId}` | Master updates on rebalancing | Manual cleanup only | -| `worker:camera_subscriptions` | Hash Map | `{[subscriptionId]: JSON.stringify(CameraSubscription)}` | Master on subscription changes | Manual cleanup only | -| `master-election:slaves` | Hash Map | `{[nodeId]: JSON.stringify(SlaveNode)}` | MasterElection service manages | TTL-based cleanup | - -### WebSocket Message Protocol - -| Direction | Message Type | JSON Structure | Trigger | Response Expected | -|-----------|--------------|----------------|---------|-------------------| -| Backend→Worker | `SUBSCRIBE` | `{type: "subscribe", payload: {subscriptionIdentifier, rtspUrl, snapshotUrl?, snapshotInterval?, modelUrl, modelName, modelId, cropX1?, cropY1?, cropX2?, cropY2?}}` | Camera subscription request | STATE_REPORT confirmation | -| Backend→Worker | `UNSUBSCRIBE` | `{type: "unsubscribe", payload: {subscriptionIdentifier}}` | Camera unsubscription | STATE_REPORT confirmation | -| Backend→Worker | `REQUEST_STATE` | `{type: "requestState"}` | Health check or monitoring | STATE_REPORT response | -| Backend→Worker | `SET_SESSION_ID` | `{type: "setSessionId", payload: {displayIdentifier, sessionId}}` | Associate session with display | None | -| Backend→Worker | `PATCH_SESSION_RESULT` | `{type: "patchSessionResult", payload: {sessionId, success, message?}}` | Session update response | None | -| Worker→Backend | `IMAGE_DETECTION` | `{type: "imageDetection", subscriptionIdentifier, timestamp, data: {detection: {carModel?, carBrand?, carYear?, bodyType?, licensePlateText?, licensePlateType?}, modelId, modelName}}` | AI detection result | None | -| Worker→Backend | `STATE_REPORT` | `{type: "stateReport", cpuUsage, memoryUsage, gpuUsage?, gpuMemoryUsage?, cameraConnections: [{subscriptionIdentifier, modelId, modelName, online, cropX?, cropY?}]}` | Periodic health report (every 2s) | None | -| Worker→Backend | `PATCH_SESSION` | `{type: "patchSession", sessionId, data: any}` | Session data update from ML processing | PATCH_SESSION_RESULT | - -## Event System Architecture - -### Event Flow Hierarchy - -```mermaid -graph TD - subgraph "Service Layer" - CS[CameraService] - end - - subgraph "Cluster Layer" - DC[DetectorCluster] - DC --> DCE[Detection Events] - DC --> WOE[Worker Online Events] - DC --> WOFE[Worker Offline Events] - end - - subgraph "Worker Management Layer" - MS[MasterSlaveWorkerCluster] - MS --> DE[detection] - MS --> WC[worker:connected] - MS --> WD[worker:disconnected] - MS --> WSE[worker:websocket_error] - MS --> WON[worker:online] - MS --> WOFF[worker:offline] - MS --> WSR[worker:state_report] - end - - subgraph "Connection Layer" - DConn[DetectorConnection] - DConn --> IMG[IMAGE_DETECTION] - DConn --> STATE[STATE_REPORT] - DConn --> PATCH[PATCH_SESSION] - end - - DConn --> MS - MS --> DC - DC --> CS - - IMG -.-> DE - STATE -.-> WSR - WC -.-> WOE - WD -.-> WOFE -``` - -### Message Types and Routing - -#### WebSocket Message Types (Python Worker → Backend) -- `IMAGE_DETECTION`: AI detection results from camera streams -- `STATE_REPORT`: Worker health, resource usage, and subscription status -- `PATCH_SESSION`: Session data updates from worker processing - -#### Redis Channel Message Types -- `detection`: Detection results routed from master to assigned slave -- `command_response`: Command acknowledgment and status updates -- `heartbeat`: Worker and slave health monitoring messages -- `assignments_updated`: Worker-to-slave assignment change notifications - -#### Internal Event Types -- `worker:online`: Worker connection established and ready -- `worker:offline`: Worker connection lost or health check failed -- `worker:connected`: WebSocket connection opened (not necessarily ready) -- `worker:disconnected`: WebSocket connection closed -- `worker:websocket_error`: WebSocket communication errors -- `worker:detection_result`: Processed detection with metadata -- `worker:state_report`: Worker resource and subscription status - -## Subscription Management - -### Camera Subscription Flow - -1. **Registration Phase** - - `CameraService.subscribeToCamera()` → `DetectorCluster.subscribeToCamera()` - - Master process finds optimal worker using load balancing algorithm - - Subscription stored in Redis with full configuration including crop parameters - - Master sends WebSocket SUBSCRIBE command to assigned worker - -2. **Processing Phase** - - Python worker establishes RTSP connection to camera - - Worker performs AI inference on video stream frames - - Detection results sent back via WebSocket with subscription identifier - - Master routes results to appropriate slave based on worker assignments - -3. **Rebalancing Phase** - - Master monitors worker health every 30 seconds - - Orphaned subscriptions (offline workers) automatically detected - - Load balancing algorithm reassigns cameras to healthy workers - - Fresh model URLs generated to handle S3 presigned URL expiration - -### Load Balancing Algorithm - -```typescript -// Simplified load balancing logic -function findBestWorkerForSubscription(onlineWorkers, allSubscriptions) { - return onlineWorkers - .sort((a, b) => { - const loadA = getSubscriptionCount(a.url); - const loadB = getSubscriptionCount(b.url); - if (loadA !== loadB) { - return loadA - loadB; // Prefer lower load - } - return (a.cpuUsage || 0) - (b.cpuUsage || 0); // Then prefer lower CPU - })[0]; -} -``` - -### Automatic Failover Process - -1. **Detection**: Master detects worker offline via missed heartbeats (10s timeout) -2. **Identification**: System identifies all camera subscriptions assigned to offline worker -3. **Reassignment**: Load balancer selects optimal replacement worker -4. **Migration**: Subscription updated in Redis with new worker assignment -5. **Resubscription**: Master sends SUBSCRIBE command to new worker with fresh model URL -6. **Verification**: New worker confirms subscription and begins processing - -## Resource Management - -### Connection Pooling -- Master maintains persistent WebSocket connections to all configured workers -- Connection sharing across all backend processes reduces resource overhead -- Automatic reconnection with exponential backoff prevents connection storms - -### Memory Management -- Redis data uses manual cleanup to prevent accidental state loss -- Subscription callbacks stored in local memory with automatic cleanup on unsubscribe -- Worker resource usage tracked in real-time to prevent overload - -### CPU and GPU Monitoring -- Workers report resource usage every 2 seconds via STATE_REPORT messages -- Load balancing algorithm considers CPU usage when assigning new subscriptions -- GPU utilization tracked for ML model optimization and capacity planning - -## Error Handling - -### Connection Error Recovery -- **Exponential Backoff**: 10-second fixed interval reconnection attempts -- **Circuit Breaker**: Automatic failover prevents overwhelming failed workers -- **Graceful Degradation**: System continues operating with available workers - -### Master Election Failover -- **Leadership Transfer**: New master elected via Redis-based coordination -- **State Recovery**: Worker connections and subscriptions restored from Redis persistence -- **Seamless Transition**: No subscription loss during master failover process - -### Monitoring and Observability - -#### Structured Logging Topics -- `detector-cluster`: High-level cluster operations and state changes -- `master-slave-worker-cluster`: Worker assignment and rebalancing operations -- `DetectorConnection`: WebSocket connection events and message processing - -#### Monitoring Information -- Subscription identifier format: `${displayId};${cameraId}` for traceability -- Worker assignment tracking with process ID and timestamp correlation -- Redis pub/sub message routing with structured logging -- Heartbeat and health check timing with millisecond precision - -## Configuration Parameters - -### Timing Configuration -```typescript -const WORKER_TIMEOUT_MS = 10000; // Worker heartbeat timeout -const SLAVE_HEARTBEAT_INTERVAL = 5000; // Slave heartbeat frequency -const SLAVE_TIMEOUT = 15000; // Slave registration timeout -const REBALANCE_INTERVAL = 30000; // Automatic rebalancing frequency -const STATE_UPDATE_INTERVAL = 2000; // Worker state update frequency -const RECONNECT_DELAY = 10000; // WebSocket reconnection delay -``` - -### Environment Variables -```bash -DETECTOR_WORKERS=ws://worker1:8000,ws://worker2:8000 # Python worker URLs -REDIS_HOST=localhost # Redis coordination server -REDIS_PORT=6379 # Redis server port -REDIS_PASSWORD=secure_password # Redis authentication -DETECT_DEBUG=true # Enable detailed structured logging -``` - -## Performance Characteristics - -### Scalability Metrics -- **Horizontal Scaling**: Add backend processes without WebSocket connection changes -- **Worker Scaling**: Python ML workers scale independently of backend processes -- **Redis Optimization**: Efficient pub/sub routing with minimal memory overhead - -### Throughput Capabilities -- **Camera Subscriptions**: Support for 100+ simultaneous camera streams per worker -- **Detection Processing**: Sub-second AI inference with real-time result delivery -- **Message Routing**: Sub-millisecond Redis pub/sub message delivery - -### Resource Efficiency -- **Connection Multiplexing**: Single WebSocket per worker shared across all processes -- **Memory Usage**: Lightweight subscription state with callback cleanup -- **Network Optimization**: Binary WebSocket frames with JSON payload compression - -## Public Interface Specification - -The distributed worker cluster exposes a clean, simplified interface to external services like CameraService, hiding the complexity of the underlying master-slave architecture. All interactions go through the `DetectorCluster` class, which serves as the primary facade. - -### Primary Interface: DetectorCluster - -The `DetectorCluster` class in `/services/DetectorCluster.ts` provides the main public interface that external services interact with. It abstracts away the distributed architecture complexity and provides consistent behavior regardless of whether the current process is a master or slave. - -#### Core Interface Methods - -##### Camera Subscription Management - -```typescript -/** - * Subscribe to a camera stream for AI detection processing - * @param subscriptionIdentifier - Unique identifier format: "${displayId};${cameraId}" - * @param rtspUrl - RTSP stream URL for the camera - * @param modelUrl - Pre-signed S3 URL for AI model (1hr TTL) - * @param modelId - Database ID of the AI model - * @param modelName - Human-readable model identifier - * @param callback - Function called when detection results are received - * @param snapshotUrl - Optional HTTP endpoint for camera snapshots - * @param snapshotInterval - Optional snapshot capture interval in milliseconds - * @param cropX1, cropY1, cropX2, cropY2 - Optional image crop coordinates - * @returns Promise - Always returns true (errors thrown as exceptions) - */ -public async subscribeToCamera( - subscriptionIdentifier: string, - rtspUrl: string, - modelUrl: string, - modelId: number, - modelName: string, - callback: Function, - snapshotUrl?: string, - snapshotInterval?: number, - cropX1?: number, - cropY1?: number, - cropX2?: number, - cropY2?: number -): Promise -``` - -**Behavior:** -- **Master Process**: Stores subscription in Redis, assigns to optimal worker, sends WebSocket command -- **Slave Process**: Routes subscription request to master via Redis pub/sub -- **Callback Registration**: Stores callback locally for detection result processing -- **Persistence**: All subscription details stored in Redis for failover recovery -- **Load Balancing**: Automatically selects best available worker based on CPU and subscription load - -```typescript -/** - * Unsubscribe from a specific camera stream - * @param subscriptionIdentifier - The subscription to remove - * @returns Promise - Success status - */ -public async unsubscribeFromCamera(subscriptionIdentifier: string): Promise -``` - -**Behavior:** -- Removes local callback listeners immediately -- Subscription cleanup handled automatically by cluster rebalancing -- Safe to call multiple times (idempotent operation) - -```typescript -/** - * Remove all subscriptions for a specific camera across all displays - * @param cameraIdentifier - The camera ID to unsubscribe from all displays - * @returns Promise - */ -public async unsubscribeFromAllWithCameraID(cameraIdentifier: string): Promise -``` - -**Behavior:** -- Finds all subscription identifiers matching pattern `*;${cameraIdentifier}` -- Removes all local callbacks for matched subscriptions -- Cluster automatically handles worker-side cleanup - -##### Event Registration and Callbacks - -```typescript -/** - * Register a callback for detection results from a specific subscription - * @param subscriptionIdentifier - Target subscription - * @param callback - Function to call with detection data - */ -public addDetectionListener(subscriptionIdentifier: string, callback: Function): void - -/** - * Register a global callback for all detection results - * @param callback - Function to call with any detection data - */ -public addGlobalDetectionListener(callback: Function): void -``` - -**Detection Callback Signature:** -```typescript -type DetectionCallback = (data: { - subscriptionIdentifier: string; - timestamp: Date; - data: { - detection: { - carModel?: string; - carBrand?: string; - carYear?: number; - bodyType?: string; - licensePlateText?: string; - licensePlateType?: string; - }; - modelId: number; - modelName: string; - }; -}) => void; -``` - -##### Cluster State Management - -```typescript -/** - * Get comprehensive cluster state for monitoring and status reporting - * @returns Promise - */ -public async getState(): Promise - -/** - * Legacy method - rebalancing now happens automatically - * @returns Promise - Always returns true - */ -public async rebalanceWorkers(): Promise -``` - -**DetectorClusterState Interface:** -```typescript -interface DetectorClusterState { - processId: string; // Current process identifier - isMaster: boolean; // Whether this process is the master - slaveId: string; // This process's slave identifier - totalWorkers: number; // Number of Python ML workers - totalSlaves: number; // Number of backend slave processes - workers: WorkerState[]; // Detailed worker health and status - slaves: SlaveInfo[]; // Slave process information - assignments: Record; // workerUrl -> slaveId mapping -} -``` - -##### Session Management (Future Implementation) - -```typescript -/** - * Associate a session ID with a camera subscription for tracking - * @param subscriptionIdentifier - Target subscription - * @param sessionId - Session ID to associate (null to clear) - * @returns Promise - Success status - */ -public async setSessionId(subscriptionIdentifier: string, sessionId: number | null): Promise - -/** - * Get current camera image via worker REST API - * @param cameraIdentifier - Camera to capture from - * @returns Promise - JPEG image data - */ -public async getCameraImage(cameraIdentifier: string): Promise -``` - -**Note:** These methods are currently not fully implemented in master-slave mode. - -### Event System Interface - -The cluster emits events that external services can listen to for system monitoring and integration: - -#### Emitted Events - -```typescript -// Detection result processed -detectorCluster.on('worker:detection_result', (event: { - url: string; // Worker URL (always 'cluster-managed') - cameraId: string; // Subscription identifier - detections: number; // Number of objects detected (0 or 1) -}) => void); - -// Worker status changes -detectorCluster.on('worker:online', (event: { url: string }) => void); -detectorCluster.on('worker:offline', (event: { url: string }) => void); - -// Connection events -detectorCluster.on('worker:connecting', (event: { url: string }) => void); -detectorCluster.on('worker:disconnected', (event: { url: string, reason: string }) => void); -detectorCluster.on('worker:websocket_error', (event: { url: string, error: string }) => void); -``` - -### Usage Examples - -#### Basic Camera Subscription (CameraService Integration) - -```typescript -import { detectorCluster } from '~/modules/camera/services/CameraService'; - -// Subscribe to camera with AI detection -const success = await detectorCluster.subscribeToCamera( - `display-123;camera-456`, // subscriptionIdentifier - 'rtsp://192.168.1.100:554/stream1', // rtspUrl - 'https://s3.bucket.com/model.onnx', // modelUrl (pre-signed) - 42, // modelId - 'vehicle-detection-v2', // modelName - (detectionData) => { // callback - console.log('Detection:', detectionData.data.detection); - // Process car model, license plate, etc. - }, - 'http://192.168.1.100/snapshot.jpg', // snapshotUrl (optional) - 5000, // snapshotInterval (optional) - 100, 50, 800, 600 // crop coordinates (optional) -); -``` - -#### Event Monitoring Integration - -```typescript -// Monitor worker health -detectorCluster.on('worker:online', (event) => { - console.log(`Worker ${event.url} came online`); - // Update dashboard, send notifications, etc. -}); - -detectorCluster.on('worker:offline', (event) => { - console.log(`Worker ${event.url} went offline`); - // Alert administrators, trigger failover procedures -}); - -// Monitor detection activity -detectorCluster.on('worker:detection_result', (event) => { - if (event.detections > 0) { - console.log(`Camera ${event.cameraId} detected objects`); - // Trigger content changes, log analytics, etc. - } -}); -``` - -#### Cluster State Monitoring - -```typescript -// Get comprehensive cluster status -const state = await detectorCluster.getState(); - -console.log(`Process ${state.processId} is ${state.isMaster ? 'MASTER' : 'SLAVE'}`); -console.log(`Cluster: ${state.totalWorkers} workers, ${state.totalSlaves} slaves`); - -// Monitor worker health -state.workers.forEach(worker => { - console.log(`Worker ${worker.url}: ${worker.online ? 'ONLINE' : 'OFFLINE'}`); - console.log(` CPU: ${worker.cpuUsage}%, Memory: ${worker.memoryUsage}%`); - console.log(` Subscriptions: ${worker.subscriptionCount}`); -}); - -// Check assignments -Object.entries(state.assignments).forEach(([workerUrl, slaveId]) => { - console.log(`Worker ${workerUrl} assigned to slave ${slaveId}`); -}); -``` - -#### Bulk Camera Management - -```typescript -// Remove all subscriptions for a camera being deleted -await detectorCluster.unsubscribeFromAllWithCameraID('camera-456'); - -// Re-subscribe camera to all displays after configuration change -const displays = await getDisplaysForCamera('camera-456'); -for (const display of displays) { - await detectorCluster.subscribeToCamera( - `${display.id};camera-456`, - camera.rtspUrl, - freshModelUrl, - modelId, - modelName, - createDetectionHandler(display.id, camera.id), - camera.snapshotUrl, - camera.snapshotInterval, - display.cropX1, display.cropY1, - display.cropX2, display.cropY2 - ); -} -``` - -### Error Handling Interface - -The cluster interface follows consistent error handling patterns: - -#### Exception Types - -```typescript -// Subscription errors -try { - await detectorCluster.subscribeToCamera(...); -} catch (error) { - // Possible errors: - // - "No workers available for assignment" - // - "Invalid subscription identifier format" - // - "Model URL expired or inaccessible" - // - Redis connection errors -} - -// State retrieval errors -try { - const state = await detectorCluster.getState(); -} catch (error) { - // Returns safe default state on errors - // Logs detailed error information -} -``` - -#### Graceful Degradation - -- **No Workers Available**: Subscriptions stored in Redis, will activate when workers come online -- **Master Process Failure**: New master elected, all subscriptions restored from Redis -- **Redis Connection Issues**: Local callbacks continue working, subscriptions restored when connection recovers -- **Invalid Parameters**: Clear error messages with parameter validation - -### Integration Patterns - -#### Service Layer Integration - -```typescript -// CameraService.ts example -export class CameraService { - constructor() { - // Initialize cluster connection - detectorCluster.initialize(); - - // Set up global detection processing - detectorCluster.addGlobalDetectionListener(this.processDetection.bind(this)); - } - - async subscribeCamera(displayId: string, camera: CameraEntity) { - const subscriptionId = `${displayId};${camera.cameraIdentifier}`; - - return await detectorCluster.subscribeToCamera( - subscriptionId, - camera.rtspUrl, - await this.getModelUrl(camera.modelId), - camera.modelId, - camera.modelName, - (data) => this.handleDetection(displayId, camera.id, data), - camera.snapshotUrl, - camera.snapshotInterval, - camera.cropX1, camera.cropY1, - camera.cropX2, camera.cropY2 - ); - } - - private processDetection(data: ImageDetectionResponse) { - // Global detection processing logic - this.updateAnalytics(data); - this.triggerDecisionTrees(data); - } -} -``` - -### Interface Guarantees and Contracts - -#### Reliability Guarantees - -- **At-Least-Once Detection Delivery**: Detection callbacks will be called at least once per detection -- **Subscription Persistence**: Subscriptions survive process restarts and master failovers -- **Automatic Reconnection**: Workers automatically reconnect with exponential backoff -- **Load Balancing**: New subscriptions automatically assigned to least loaded workers - -#### Performance Characteristics - -- **Subscription Latency**: < 100ms for new camera subscriptions -- **Detection Latency**: < 50ms from worker to callback (excluding AI processing time) -- **State Query Performance**: < 10ms for cluster state retrieval -- **Memory Usage**: O(n) where n = number of active subscriptions - -#### Thread Safety - -- **Callback Execution**: All callbacks executed on main event loop (Node.js single-threaded) -- **Concurrent Subscriptions**: Multiple simultaneous subscriptions handled safely -- **State Consistency**: Redis operations use atomic transactions where needed - -This interface specification provides external services with a clear understanding of how to integrate with the distributed worker cluster while maintaining abstraction from the underlying complexity. - -## Architecture Evolution: From Complex to Pure Declarative - -### Previous Architecture Limitations (Addressed) -- **Complex State Synchronization**: Incremental updates between database, Redis desired state, and worker actual state created synchronization complexity -- **Command Protocol Complexity**: Multiple command types (`subscribe_camera`, `unsubscribe_camera`) with complex payloads and error handling -- **State Divergence**: Database and Redis desired state could diverge, causing inconsistent behavior -- **Partial Update Complexity**: Complex logic for handling individual subscription changes led to edge cases and race conditions -- **Service Layer Complexity**: Camera/Display services contained complex subscription management logic - -### Current Pure Declarative Architecture Benefits -- **Single Source of Truth**: Database is the only source for desired state - no secondary state stores to synchronize -- **Zero State Divergence**: Desired state is always freshly derived from database queries, eliminating synchronization complexity -- **Simplified Protocol**: Only one command type (`regenerate_subscriptions`) with minimal payload -- **Consistent State Management**: Complete regeneration eliminates all edge cases and partial update complexity -- **Service Layer Simplicity**: Services just update database + trigger regeneration - no subscription logic -- **Operational Resilience**: System is self-healing and predictable - any database change triggers complete reconciliation - -### VMware DRS-like Benefits -- **Global Optimization**: Every regeneration considers all subscriptions globally for optimal load balancing -- **Automatic Recovery**: System automatically heals from any inconsistent state by regenerating from database -- **Resource Efficiency**: Workers assigned based on real-time CPU/memory metrics with load balancing -- **Fault Tolerance**: Complete state recovery from database after any failure (process crashes, network interruptions, etc.) - -### Performance Characteristics -- **Regeneration Speed**: Database queries are fast (~10ms) even with hundreds of displays -- **Reconciliation Efficiency**: Only changed subscriptions are actually modified on workers -- **Memory Efficiency**: No persistent state storage outside of database and current worker assignments -- **Network Efficiency**: Minimal command protocol reduces Redis pub/sub overhead - -This pure declarative architecture provides the reliability and simplicity of Kubernetes-style declarative resource management while maintaining the performance and scalability needed for real-time camera processing systems. \ No newline at end of file diff --git a/pipeline_webcam.py b/pipeline_webcam.py deleted file mode 100755 index 9da3a1b..0000000 --- a/pipeline_webcam.py +++ /dev/null @@ -1,137 +0,0 @@ -import argparse -import os -import cv2 -import time -import logging -import shutil -import threading # added threading -import yaml # for silencing YOLO - -from siwatsystem.pympta import load_pipeline_from_zip, run_pipeline - -# Configure logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") - -# Silence YOLO logging -os.environ["YOLO_VERBOSE"] = "False" -for logger_name in ["ultralytics", "ultralytics.hub", "ultralytics.yolo.utils"]: - logging.getLogger(logger_name).setLevel(logging.WARNING) - -# Global variables for frame sharing -global_frame = None -global_ret = False -capture_running = False - -def video_capture_loop(cap): - global global_frame, global_ret, capture_running - while capture_running: - global_ret, global_frame = cap.read() - time.sleep(0.01) # slight delay to reduce CPU usage - -def clear_cache(cache_dir: str): - if os.path.exists(cache_dir): - shutil.rmtree(cache_dir) - -def log_pipeline_flow(frame, model_tree, level=0): - """ - Wrapper around run_pipeline that logs the model flow and detection results. - Returns the same output as the original run_pipeline function. - """ - indent = " " * level - model_id = model_tree.get("modelId", "unknown") - logging.info(f"{indent}→ Running model: {model_id}") - - detection, bbox = run_pipeline(frame, model_tree, return_bbox=True) - - if detection: - confidence = detection.get("confidence", 0) * 100 - class_name = detection.get("class", "unknown") - object_id = detection.get("id", "N/A") - - logging.info(f"{indent}✓ Detected: {class_name} (ID: {object_id}, confidence: {confidence:.1f}%)") - - # Check if any branches were triggered - triggered = False - for branch in model_tree.get("branches", []): - trigger_classes = branch.get("triggerClasses", []) - min_conf = branch.get("minConfidence", 0) - - if class_name in trigger_classes and detection.get("confidence", 0) >= min_conf: - triggered = True - if branch.get("crop", False) and bbox: - x1, y1, x2, y2 = bbox - cropped_frame = frame[y1:y2, x1:x2] - logging.info(f"{indent} ⌊ Triggering branch with cropped region {x1},{y1} to {x2},{y2}") - branch_result = log_pipeline_flow(cropped_frame, branch, level + 1) - else: - logging.info(f"{indent} ⌊ Triggering branch with full frame") - branch_result = log_pipeline_flow(frame, branch, level + 1) - - if branch_result[0]: # If branch detection successful, return it - return branch_result - - if not triggered and model_tree.get("branches"): - logging.info(f"{indent} ⌊ No branches triggered") - else: - logging.info(f"{indent}✗ No detection for {model_id}") - - return detection, bbox - -def main(mpta_file: str, video_source: str): - global capture_running - CACHE_DIR = os.path.join(".", ".mptacache") - clear_cache(CACHE_DIR) - logging.info(f"Loading pipeline from local file: {mpta_file}") - model_tree = load_pipeline_from_zip(mpta_file, CACHE_DIR) - if model_tree is None: - logging.error("Failed to load pipeline.") - return - - cap = cv2.VideoCapture(video_source) - if not cap.isOpened(): - logging.error(f"Cannot open video source {video_source}") - return - - # Start video capture in a separate thread - capture_running = True - capture_thread = threading.Thread(target=video_capture_loop, args=(cap,)) - capture_thread.start() - - logging.info("Press 'q' to exit.") - try: - while True: - # Use the global frame and ret updated by the thread - if not global_ret or global_frame is None: - continue # wait until a frame is available - - frame = global_frame.copy() # local copy to work with - - # Replace run_pipeline with our logging version - detection, bbox = log_pipeline_flow(frame, model_tree) - - if bbox: - x1, y1, x2, y2 = bbox - cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - label = detection["class"] if detection else "Detection" - cv2.putText(frame, label, (x1, y1 - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2) - - cv2.imshow("Pipeline Webcam", frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - finally: - # Stop capture thread and cleanup - capture_running = False - capture_thread.join() - cap.release() - cv2.destroyAllWindows() - clear_cache(CACHE_DIR) - logging.info("Cleaned up .mptacache directory on shutdown.") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run pipeline webcam utility.") - parser.add_argument("--mpta-file", type=str, required=True, help="Path to the local pipeline mpta (ZIP) file.") - parser.add_argument("--video", type=str, default="0", help="Video source (default webcam index 0).") - args = parser.parse_args() - video_source = int(args.video) if args.video.isdigit() else args.video - main(args.mpta_file, video_source) diff --git a/pympta.md b/pympta.md deleted file mode 100644 index e35fec2..0000000 --- a/pympta.md +++ /dev/null @@ -1,327 +0,0 @@ -# pympta: Modular Pipeline Task Executor - -`pympta` is a Python module designed to load and execute modular, multi-stage AI pipelines defined in a special package format (`.mpta`). It is primarily used within the detector worker to run complex computer vision tasks where the output of one model can trigger a subsequent model on a specific region of interest. - -## Core Concepts - -### 1. MPTA Package (`.mpta`) - -An `.mpta` file is a standard `.zip` archive with a different extension. It bundles all the necessary components for a pipeline to run. - -A typical `.mpta` file has the following structure: - -``` -my_pipeline.mpta/ -├── pipeline.json -├── model1.pt -├── model2.pt -└── ... -``` - -- **`pipeline.json`**: (Required) The manifest file that defines the structure of the pipeline, the models to use, and the logic connecting them. -- **Model Files (`.pt`, etc.)**: The actual pre-trained model files (e.g., PyTorch, ONNX). The pipeline currently uses `ultralytics.YOLO` models. - -### 2. Pipeline Structure - -A pipeline is a tree-like structure of "nodes," defined in `pipeline.json`. - -- **Root Node**: The entry point of the pipeline. It processes the initial, full-frame image. -- **Branch Nodes**: Child nodes that are triggered by specific detection results from their parent. For example, a root node might detect a "vehicle," which then triggers a branch node to detect a "license plate" within the vehicle's bounding box. - -This modular structure allows for creating complex and efficient inference logic, avoiding the need to run every model on every frame. - -## `pipeline.json` Specification - -This file defines the entire pipeline logic. The root object contains a `pipeline` key for the pipeline definition, optional `redis` key for Redis configuration, and optional `postgresql` key for database integration. - -### Top-Level Object Structure - -| Key | Type | Required | Description | -| ------------ | ------ | -------- | ------------------------------------------------------- | -| `pipeline` | Object | Yes | The root node object of the pipeline. | -| `redis` | Object | No | Configuration for connecting to a Redis server. | -| `postgresql` | Object | No | Configuration for connecting to a PostgreSQL database. | - -### Redis Configuration (`redis`) - -| Key | Type | Required | Description | -| ---------- | ------ | -------- | ------------------------------------------------------- | -| `host` | String | Yes | The hostname or IP address of the Redis server. | -| `port` | Number | Yes | The port number of the Redis server. | -| `password` | String | No | The password for Redis authentication. | -| `db` | Number | No | The Redis database number to use. Defaults to `0`. | - -### PostgreSQL Configuration (`postgresql`) - -| Key | Type | Required | Description | -| ---------- | ------ | -------- | ------------------------------------------------------- | -| `host` | String | Yes | The hostname or IP address of the PostgreSQL server. | -| `port` | Number | Yes | The port number of the PostgreSQL server. | -| `database` | String | Yes | The database name to connect to. | -| `username` | String | Yes | The username for database authentication. | -| `password` | String | Yes | The password for database authentication. | - -### Node Object Structure - -| Key | Type | Required | Description | -| ------------------- | ------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------- | -| `modelId` | String | Yes | A unique identifier for this model node (e.g., "vehicle-detector"). | -| `modelFile` | String | Yes | The path to the model file within the `.mpta` archive (e.g., "yolov8n.pt"). | -| `minConfidence` | Float | Yes | The minimum confidence score (0.0 to 1.0) required for a detection to be considered valid and potentially trigger a branch. | -| `triggerClasses` | Array | Yes | A list of class names that, when detected by the parent, can trigger this node. For the root node, this lists all classes of interest. | -| `crop` | Boolean | No | If `true`, the image is cropped to the parent's detection bounding box before being passed to this node's model. Defaults to `false`. | -| `cropClass` | String | No | The specific class to use for cropping (e.g., "Frontal" for frontal view cropping). | -| `multiClass` | Boolean | No | If `true`, enables multi-class detection mode where multiple classes can be detected simultaneously. | -| `expectedClasses` | Array | No | When `multiClass` is true, defines which classes are expected. At least one must be detected for processing to continue. | -| `parallel` | Boolean | No | If `true`, this branch will be processed in parallel with other parallel branches. | -| `branches` | Array | No | A list of child node objects that can be triggered by this node's detections. | -| `actions` | Array | No | A list of actions to execute upon a successful detection in this node. | -| `parallelActions` | Array | No | A list of actions to execute after all specified branches have completed. | - -### Action Object Structure - -Actions allow the pipeline to interact with Redis and PostgreSQL databases. They are executed sequentially for a given detection. - -#### Action Context & Dynamic Keys - -All actions have access to a dynamic context for formatting keys and messages. The context is created for each detection event and includes: - -- All key-value pairs from the detection result (e.g., `class`, `confidence`, `id`). -- `{timestamp_ms}`: The current Unix timestamp in milliseconds. -- `{timestamp}`: Formatted timestamp string (YYYY-MM-DDTHH-MM-SS). -- `{uuid}`: A unique identifier (UUID4) for the detection event. -- `{filename}`: Generated filename with UUID. -- `{camera_id}`: Full camera subscription identifier. -- `{display_id}`: Display identifier extracted from subscription. -- `{session_id}`: Session ID for database operations. -- `{image_key}`: If a `redis_save_image` action has already been executed for this event, this placeholder will be replaced with the key where the image was stored. - -#### `redis_save_image` - -Saves the current image frame (or cropped sub-image) to a Redis key. - -| Key | Type | Required | Description | -| ---------------- | ------ | -------- | ------------------------------------------------------------------------------------------------------- | -| `type` | String | Yes | Must be `"redis_save_image"`. | -| `key` | String | Yes | The Redis key to save the image to. Can contain any of the dynamic placeholders. | -| `region` | String | No | Specific detected region to crop and save (e.g., "Frontal"). | -| `format` | String | No | Image format: "jpeg" or "png". Defaults to "jpeg". | -| `quality` | Number | No | JPEG quality (1-100). Defaults to 90. | -| `expire_seconds` | Number | No | If provided, sets an expiration time (in seconds) for the Redis key. | - -#### `redis_publish` - -Publishes a message to a Redis channel. - -| Key | Type | Required | Description | -| --------- | ------ | -------- | ------------------------------------------------------------------------------------------------------- | -| `type` | String | Yes | Must be `"redis_publish"`. | -| `channel` | String | Yes | The Redis channel to publish the message to. | -| `message` | String | Yes | The message to publish. Can contain any of the dynamic placeholders, including `{image_key}`. | - -#### `postgresql_update_combined` - -Updates PostgreSQL database with results from multiple branches after they complete. - -| Key | Type | Required | Description | -| ------------------ | ------------- | -------- | ------------------------------------------------------------------------------------------------------- | -| `type` | String | Yes | Must be `"postgresql_update_combined"`. | -| `table` | String | Yes | The database table name (will be prefixed with `gas_station_1.` schema). | -| `key_field` | String | Yes | The field to use as the update key (typically "session_id"). | -| `key_value` | String | Yes | Template for the key value (e.g., "{session_id}"). | -| `waitForBranches` | Array | Yes | List of branch model IDs to wait for completion before executing update. | -| `fields` | Object | Yes | Field mapping object where keys are database columns and values are templates (e.g., "{branch.field}").| - -### Complete Example `pipeline.json` - -This example demonstrates a comprehensive pipeline for vehicle detection with parallel classification and database integration: - -```json -{ - "redis": { - "host": "10.100.1.3", - "port": 6379, - "password": "your-redis-password", - "db": 0 - }, - "postgresql": { - "host": "10.100.1.3", - "port": 5432, - "database": "inference", - "username": "root", - "password": "your-db-password" - }, - "pipeline": { - "modelId": "car_frontal_detection_v1", - "modelFile": "car_frontal_detection_v1.pt", - "crop": false, - "triggerClasses": ["Car", "Frontal"], - "minConfidence": 0.8, - "multiClass": true, - "expectedClasses": ["Car", "Frontal"], - "actions": [ - { - "type": "redis_save_image", - "region": "Frontal", - "key": "inference:{display_id}:{timestamp}:{session_id}:{filename}", - "expire_seconds": 600, - "format": "jpeg", - "quality": 90 - }, - { - "type": "redis_publish", - "channel": "car_detections", - "message": "{\"event\":\"frontal_detected\"}" - } - ], - "branches": [ - { - "modelId": "car_brand_cls_v1", - "modelFile": "car_brand_cls_v1.pt", - "crop": true, - "cropClass": "Frontal", - "resizeTarget": [224, 224], - "triggerClasses": ["Frontal"], - "minConfidence": 0.85, - "parallel": true, - "branches": [] - }, - { - "modelId": "car_bodytype_cls_v1", - "modelFile": "car_bodytype_cls_v1.pt", - "crop": true, - "cropClass": "Car", - "resizeTarget": [224, 224], - "triggerClasses": ["Car"], - "minConfidence": 0.85, - "parallel": true, - "branches": [] - } - ], - "parallelActions": [ - { - "type": "postgresql_update_combined", - "table": "car_frontal_info", - "key_field": "session_id", - "key_value": "{session_id}", - "waitForBranches": ["car_brand_cls_v1", "car_bodytype_cls_v1"], - "fields": { - "car_brand": "{car_brand_cls_v1.brand}", - "car_body_type": "{car_bodytype_cls_v1.body_type}" - } - } - ] - } -} -``` - -## API Reference - -The `pympta` module exposes two main functions. - -### `load_pipeline_from_zip(zip_source: str, target_dir: str) -> dict` - -Loads, extracts, and parses an `.mpta` file to build a pipeline tree in memory. It also establishes Redis and PostgreSQL connections if configured in `pipeline.json`. - -- **Parameters:** - - `zip_source` (str): The file path to the local `.mpta` zip archive. - - `target_dir` (str): A directory path where the archive's contents will be extracted. -- **Returns:** - - A dictionary representing the root node of the pipeline, ready to be used with `run_pipeline`. Returns `None` if loading fails. - -### `run_pipeline(frame, node: dict, return_bbox: bool = False, context: dict = None)` - -Executes the inference pipeline on a single image frame. - -- **Parameters:** - - `frame`: The input image frame (e.g., a NumPy array from OpenCV). - - `node` (dict): The pipeline node to execute (typically the root node returned by `load_pipeline_from_zip`). - - `return_bbox` (bool): If `True`, the function returns a tuple `(detection, bounding_box)`. Otherwise, it returns only the `detection`. - - `context` (dict): Optional context dictionary containing camera_id, display_id, session_id for action formatting. -- **Returns:** - - The final detection result from the last executed node in the chain. A detection is a dictionary like `{'class': 'car', 'confidence': 0.95, 'id': 1}`. If no detection meets the criteria, it returns `None` (or `(None, None)` if `return_bbox` is `True`). - -## Database Integration - -The pipeline system includes automatic PostgreSQL database management: - -### Table Schema (`gas_station_1.car_frontal_info`) - -The system automatically creates and manages the following table structure: - -```sql -CREATE TABLE IF NOT EXISTS gas_station_1.car_frontal_info ( - display_id VARCHAR(255), - captured_timestamp VARCHAR(255), - session_id VARCHAR(255) PRIMARY KEY, - license_character VARCHAR(255) DEFAULT NULL, - license_type VARCHAR(255) DEFAULT 'No model available', - car_brand VARCHAR(255) DEFAULT NULL, - car_model VARCHAR(255) DEFAULT NULL, - car_body_type VARCHAR(255) DEFAULT NULL, - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW() -); -``` - -### Workflow - -1. **Initial Record Creation**: When both "Car" and "Frontal" are detected, an initial database record is created with a UUID session_id. -2. **Redis Storage**: Vehicle images are stored in Redis with keys containing the session_id. -3. **Parallel Classification**: Brand and body type classification run concurrently. -4. **Database Update**: After all branches complete, the database record is updated with classification results. - -## Usage Example - -This snippet shows how to use `pympta` with the enhanced features: - -```python -import cv2 -from siwatsystem.pympta import load_pipeline_from_zip, run_pipeline - -# 1. Define paths -MPTA_FILE = "path/to/your/pipeline.mpta" -CACHE_DIR = ".mptacache" - -# 2. Load the pipeline from the .mpta file -# This reads pipeline.json and loads the YOLO models into memory. -model_tree = load_pipeline_from_zip(MPTA_FILE, CACHE_DIR) - -if not model_tree: - print("Failed to load pipeline.") - exit() - -# 3. Open a video source -cap = cv2.VideoCapture(0) - -while True: - ret, frame = cap.read() - if not ret: - break - - # 4. Run the pipeline on the current frame with context - context = { - "camera_id": "display-001;cam-001", - "display_id": "display-001", - "session_id": None # Will be generated automatically - } - - detection_result, bounding_box = run_pipeline(frame, model_tree, return_bbox=True, context=context) - - # 5. Display the results - if detection_result: - print(f"Detected: {detection_result['class']} with confidence {detection_result['confidence']:.2f}") - if bounding_box: - x1, y1, x2, y2 = bounding_box - cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - cv2.putText(frame, detection_result['class'], (x1, y1 - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2) - - cv2.imshow("Pipeline Output", frame) - - if cv2.waitKey(1) & 0xFF == ord('q'): - break - -cap.release() -cv2.destroyAllWindows() -``` \ No newline at end of file diff --git a/requirements.base.txt b/requirements.base.txt deleted file mode 100644 index af22160..0000000 --- a/requirements.base.txt +++ /dev/null @@ -1,7 +0,0 @@ -torch -torchvision -ultralytics -opencv-python -scipy -filterpy -psycopg2-binary \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6eaf131..46a2624 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ fastapi uvicorn -websockets -fastapi[standard] -redis -urllib3<2.0.0 \ No newline at end of file +torch +torchvision +ultralytics +opencv-python \ No newline at end of file diff --git a/siwatsystem/database.py b/siwatsystem/database.py deleted file mode 100644 index 6340986..0000000 --- a/siwatsystem/database.py +++ /dev/null @@ -1,211 +0,0 @@ -import psycopg2 -import psycopg2.extras -from typing import Optional, Dict, Any -import logging -import uuid - -logger = logging.getLogger(__name__) - -class DatabaseManager: - def __init__(self, config: Dict[str, Any]): - self.config = config - self.connection: Optional[psycopg2.extensions.connection] = None - - def connect(self) -> bool: - try: - self.connection = psycopg2.connect( - host=self.config['host'], - port=self.config['port'], - database=self.config['database'], - user=self.config['username'], - password=self.config['password'] - ) - logger.info("PostgreSQL connection established successfully") - return True - except Exception as e: - logger.error(f"Failed to connect to PostgreSQL: {e}") - return False - - def disconnect(self): - if self.connection: - self.connection.close() - self.connection = None - logger.info("PostgreSQL connection closed") - - def is_connected(self) -> bool: - try: - if self.connection and not self.connection.closed: - cur = self.connection.cursor() - cur.execute("SELECT 1") - cur.fetchone() - cur.close() - return True - except: - pass - return False - - def update_car_info(self, session_id: str, brand: str, model: str, body_type: str) -> bool: - if not self.is_connected(): - if not self.connect(): - return False - - try: - cur = self.connection.cursor() - query = """ - INSERT INTO car_frontal_info (session_id, car_brand, car_model, car_body_type, updated_at) - VALUES (%s, %s, %s, %s, NOW()) - ON CONFLICT (session_id) - DO UPDATE SET - car_brand = EXCLUDED.car_brand, - car_model = EXCLUDED.car_model, - car_body_type = EXCLUDED.car_body_type, - updated_at = NOW() - """ - cur.execute(query, (session_id, brand, model, body_type)) - self.connection.commit() - cur.close() - logger.info(f"Updated car info for session {session_id}: {brand} {model} ({body_type})") - return True - except Exception as e: - logger.error(f"Failed to update car info: {e}") - if self.connection: - self.connection.rollback() - return False - - def execute_update(self, table: str, key_field: str, key_value: str, fields: Dict[str, str]) -> bool: - if not self.is_connected(): - if not self.connect(): - return False - - try: - cur = self.connection.cursor() - - # Build the UPDATE query dynamically - set_clauses = [] - values = [] - - for field, value in fields.items(): - if value == "NOW()": - set_clauses.append(f"{field} = NOW()") - else: - set_clauses.append(f"{field} = %s") - values.append(value) - - # Add schema prefix if table doesn't already have it - full_table_name = table if '.' in table else f"gas_station_1.{table}" - - query = f""" - INSERT INTO {full_table_name} ({key_field}, {', '.join(fields.keys())}) - VALUES (%s, {', '.join(['%s'] * len(fields))}) - ON CONFLICT ({key_field}) - DO UPDATE SET {', '.join(set_clauses)} - """ - - # Add key_value to the beginning of values list - all_values = [key_value] + list(fields.values()) + values - - cur.execute(query, all_values) - self.connection.commit() - cur.close() - logger.info(f"Updated {table} for {key_field}={key_value}") - return True - except Exception as e: - logger.error(f"Failed to execute update on {table}: {e}") - if self.connection: - self.connection.rollback() - return False - - def create_car_frontal_info_table(self) -> bool: - """Create the car_frontal_info table in gas_station_1 schema if it doesn't exist.""" - if not self.is_connected(): - if not self.connect(): - return False - - try: - cur = self.connection.cursor() - - # Create schema if it doesn't exist - cur.execute("CREATE SCHEMA IF NOT EXISTS gas_station_1") - - # Create table if it doesn't exist - create_table_query = """ - CREATE TABLE IF NOT EXISTS gas_station_1.car_frontal_info ( - display_id VARCHAR(255), - captured_timestamp VARCHAR(255), - session_id VARCHAR(255) PRIMARY KEY, - license_character VARCHAR(255) DEFAULT NULL, - license_type VARCHAR(255) DEFAULT 'No model available', - car_brand VARCHAR(255) DEFAULT NULL, - car_model VARCHAR(255) DEFAULT NULL, - car_body_type VARCHAR(255) DEFAULT NULL, - updated_at TIMESTAMP DEFAULT NOW() - ) - """ - - cur.execute(create_table_query) - - # Add columns if they don't exist (for existing tables) - alter_queries = [ - "ALTER TABLE gas_station_1.car_frontal_info ADD COLUMN IF NOT EXISTS car_brand VARCHAR(255) DEFAULT NULL", - "ALTER TABLE gas_station_1.car_frontal_info ADD COLUMN IF NOT EXISTS car_model VARCHAR(255) DEFAULT NULL", - "ALTER TABLE gas_station_1.car_frontal_info ADD COLUMN IF NOT EXISTS car_body_type VARCHAR(255) DEFAULT NULL", - "ALTER TABLE gas_station_1.car_frontal_info ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP DEFAULT NOW()" - ] - - for alter_query in alter_queries: - try: - cur.execute(alter_query) - logger.debug(f"Executed: {alter_query}") - except Exception as e: - # Ignore errors if column already exists (for older PostgreSQL versions) - if "already exists" in str(e).lower(): - logger.debug(f"Column already exists, skipping: {alter_query}") - else: - logger.warning(f"Error in ALTER TABLE: {e}") - - self.connection.commit() - cur.close() - logger.info("Successfully created/verified car_frontal_info table with all required columns") - return True - - except Exception as e: - logger.error(f"Failed to create car_frontal_info table: {e}") - if self.connection: - self.connection.rollback() - return False - - def insert_initial_detection(self, display_id: str, captured_timestamp: str, session_id: str = None) -> str: - """Insert initial detection record and return the session_id.""" - if not self.is_connected(): - if not self.connect(): - return None - - # Generate session_id if not provided - if not session_id: - session_id = str(uuid.uuid4()) - - try: - # Ensure table exists - if not self.create_car_frontal_info_table(): - logger.error("Failed to create/verify table before insertion") - return None - - cur = self.connection.cursor() - insert_query = """ - INSERT INTO gas_station_1.car_frontal_info - (display_id, captured_timestamp, session_id, license_character, license_type, car_brand, car_model, car_body_type) - VALUES (%s, %s, %s, NULL, 'No model available', NULL, NULL, NULL) - ON CONFLICT (session_id) DO NOTHING - """ - - cur.execute(insert_query, (display_id, captured_timestamp, session_id)) - self.connection.commit() - cur.close() - logger.info(f"Inserted initial detection record with session_id: {session_id}") - return session_id - - except Exception as e: - logger.error(f"Failed to insert initial detection record: {e}") - if self.connection: - self.connection.rollback() - return None \ No newline at end of file diff --git a/siwatsystem/pympta.py b/siwatsystem/pympta.py deleted file mode 100644 index fd1485d..0000000 --- a/siwatsystem/pympta.py +++ /dev/null @@ -1,867 +0,0 @@ -import os -import json -import logging -import torch -import cv2 -import zipfile -import shutil -import traceback -import redis -import time -import uuid -import concurrent.futures -from ultralytics import YOLO -from urllib.parse import urlparse -from .database import DatabaseManager - -# Create a logger specifically for this module -logger = logging.getLogger("detector_worker.pympta") - -def validate_redis_config(redis_config: dict) -> bool: - """Validate Redis configuration parameters.""" - required_fields = ["host", "port"] - for field in required_fields: - if field not in redis_config: - logger.error(f"Missing required Redis config field: {field}") - return False - - if not isinstance(redis_config["port"], int) or redis_config["port"] <= 0: - logger.error(f"Invalid Redis port: {redis_config['port']}") - return False - - return True - -def validate_postgresql_config(pg_config: dict) -> bool: - """Validate PostgreSQL configuration parameters.""" - required_fields = ["host", "port", "database", "username", "password"] - for field in required_fields: - if field not in pg_config: - logger.error(f"Missing required PostgreSQL config field: {field}") - return False - - if not isinstance(pg_config["port"], int) or pg_config["port"] <= 0: - logger.error(f"Invalid PostgreSQL port: {pg_config['port']}") - return False - - return True - -def crop_region_by_class(frame, regions_dict, class_name): - """Crop a specific region from frame based on detected class.""" - if class_name not in regions_dict: - logger.warning(f"Class '{class_name}' not found in detected regions") - return None - - bbox = regions_dict[class_name]['bbox'] - x1, y1, x2, y2 = bbox - cropped = frame[y1:y2, x1:x2] - - if cropped.size == 0: - logger.warning(f"Empty crop for class '{class_name}' with bbox {bbox}") - return None - - return cropped - -def format_action_context(base_context, additional_context=None): - """Format action context with dynamic values.""" - context = {**base_context} - if additional_context: - context.update(additional_context) - return context - -def load_pipeline_node(node_config: dict, mpta_dir: str, redis_client, db_manager=None) -> dict: - # Recursively load a model node from configuration. - model_path = os.path.join(mpta_dir, node_config["modelFile"]) - if not os.path.exists(model_path): - logger.error(f"Model file {model_path} not found. Current directory: {os.getcwd()}") - logger.error(f"Directory content: {os.listdir(os.path.dirname(model_path))}") - raise FileNotFoundError(f"Model file {model_path} not found.") - logger.info(f"Loading model for node {node_config['modelId']} from {model_path}") - model = YOLO(model_path) - if torch.cuda.is_available(): - logger.info(f"CUDA available. Moving model {node_config['modelId']} to GPU") - model.to("cuda") - else: - logger.info(f"CUDA not available. Using CPU for model {node_config['modelId']}") - - # Prepare trigger class indices for optimization - trigger_classes = node_config.get("triggerClasses", []) - trigger_class_indices = None - if trigger_classes and hasattr(model, "names"): - # Convert class names to indices for the model - trigger_class_indices = [i for i, name in model.names.items() - if name in trigger_classes] - logger.debug(f"Converted trigger classes to indices: {trigger_class_indices}") - - node = { - "modelId": node_config["modelId"], - "modelFile": node_config["modelFile"], - "triggerClasses": trigger_classes, - "triggerClassIndices": trigger_class_indices, - "crop": node_config.get("crop", False), - "cropClass": node_config.get("cropClass"), - "minConfidence": node_config.get("minConfidence", None), - "multiClass": node_config.get("multiClass", False), - "expectedClasses": node_config.get("expectedClasses", []), - "parallel": node_config.get("parallel", False), - "actions": node_config.get("actions", []), - "parallelActions": node_config.get("parallelActions", []), - "model": model, - "branches": [], - "redis_client": redis_client, - "db_manager": db_manager - } - logger.debug(f"Configured node {node_config['modelId']} with trigger classes: {node['triggerClasses']}") - for child in node_config.get("branches", []): - logger.debug(f"Loading branch for parent node {node_config['modelId']}") - node["branches"].append(load_pipeline_node(child, mpta_dir, redis_client, db_manager)) - return node - -def load_pipeline_from_zip(zip_source: str, target_dir: str) -> dict: - logger.info(f"Attempting to load pipeline from {zip_source} to {target_dir}") - os.makedirs(target_dir, exist_ok=True) - zip_path = os.path.join(target_dir, "pipeline.mpta") - - # Parse the source; only local files are supported here. - parsed = urlparse(zip_source) - if parsed.scheme in ("", "file"): - local_path = parsed.path if parsed.scheme == "file" else zip_source - logger.debug(f"Checking if local file exists: {local_path}") - if os.path.exists(local_path): - try: - shutil.copy(local_path, zip_path) - logger.info(f"Copied local .mpta file from {local_path} to {zip_path}") - except Exception as e: - logger.error(f"Failed to copy local .mpta file from {local_path}: {str(e)}", exc_info=True) - return None - else: - logger.error(f"Local file {local_path} does not exist. Current directory: {os.getcwd()}") - # List all subdirectories of models directory to help debugging - if os.path.exists("models"): - logger.error(f"Content of models directory: {os.listdir('models')}") - for root, dirs, files in os.walk("models"): - logger.error(f"Directory {root} contains subdirs: {dirs} and files: {files}") - else: - logger.error("The models directory doesn't exist") - return None - else: - logger.error(f"HTTP download functionality has been moved. Use a local file path here. Received: {zip_source}") - return None - - try: - if not os.path.exists(zip_path): - logger.error(f"Zip file not found at expected location: {zip_path}") - return None - - logger.debug(f"Extracting .mpta file from {zip_path} to {target_dir}") - # Extract contents and track the directories created - extracted_dirs = [] - with zipfile.ZipFile(zip_path, "r") as zip_ref: - file_list = zip_ref.namelist() - logger.debug(f"Files in .mpta archive: {file_list}") - - # Extract and track the top-level directories - for file_path in file_list: - parts = file_path.split('/') - if len(parts) > 1: - top_dir = parts[0] - if top_dir and top_dir not in extracted_dirs: - extracted_dirs.append(top_dir) - - # Now extract the files - zip_ref.extractall(target_dir) - - logger.info(f"Successfully extracted .mpta file to {target_dir}") - logger.debug(f"Extracted directories: {extracted_dirs}") - - # Check what was actually created after extraction - actual_dirs = [d for d in os.listdir(target_dir) if os.path.isdir(os.path.join(target_dir, d))] - logger.debug(f"Actual directories created: {actual_dirs}") - except zipfile.BadZipFile as e: - logger.error(f"Bad zip file {zip_path}: {str(e)}", exc_info=True) - return None - except Exception as e: - logger.error(f"Failed to extract .mpta file {zip_path}: {str(e)}", exc_info=True) - return None - finally: - if os.path.exists(zip_path): - os.remove(zip_path) - logger.debug(f"Removed temporary zip file: {zip_path}") - - # Use the first extracted directory if it exists, otherwise use the expected name - pipeline_name = os.path.basename(zip_source) - pipeline_name = os.path.splitext(pipeline_name)[0] - - # Find the directory with pipeline.json - mpta_dir = None - # First try the expected directory name - expected_dir = os.path.join(target_dir, pipeline_name) - if os.path.exists(expected_dir) and os.path.exists(os.path.join(expected_dir, "pipeline.json")): - mpta_dir = expected_dir - logger.debug(f"Found pipeline.json in the expected directory: {mpta_dir}") - else: - # Look through all subdirectories for pipeline.json - for subdir in actual_dirs: - potential_dir = os.path.join(target_dir, subdir) - if os.path.exists(os.path.join(potential_dir, "pipeline.json")): - mpta_dir = potential_dir - logger.info(f"Found pipeline.json in directory: {mpta_dir} (different from expected: {expected_dir})") - break - - if not mpta_dir: - logger.error(f"Could not find pipeline.json in any extracted directory. Directory content: {os.listdir(target_dir)}") - return None - - pipeline_json_path = os.path.join(mpta_dir, "pipeline.json") - if not os.path.exists(pipeline_json_path): - logger.error(f"pipeline.json not found in the .mpta file. Files in directory: {os.listdir(mpta_dir)}") - return None - - try: - with open(pipeline_json_path, "r") as f: - pipeline_config = json.load(f) - logger.info(f"Successfully loaded pipeline configuration from {pipeline_json_path}") - logger.debug(f"Pipeline config: {json.dumps(pipeline_config, indent=2)}") - - # Establish Redis connection if configured - redis_client = None - if "redis" in pipeline_config: - redis_config = pipeline_config["redis"] - if not validate_redis_config(redis_config): - logger.error("Invalid Redis configuration, skipping Redis connection") - else: - try: - redis_client = redis.Redis( - host=redis_config["host"], - port=redis_config["port"], - password=redis_config.get("password"), - db=redis_config.get("db", 0), - decode_responses=True - ) - redis_client.ping() - logger.info(f"Successfully connected to Redis at {redis_config['host']}:{redis_config['port']}") - except redis.exceptions.ConnectionError as e: - logger.error(f"Failed to connect to Redis: {e}") - redis_client = None - - # Establish PostgreSQL connection if configured - db_manager = None - if "postgresql" in pipeline_config: - pg_config = pipeline_config["postgresql"] - if not validate_postgresql_config(pg_config): - logger.error("Invalid PostgreSQL configuration, skipping database connection") - else: - try: - db_manager = DatabaseManager(pg_config) - if db_manager.connect(): - logger.info(f"Successfully connected to PostgreSQL at {pg_config['host']}:{pg_config['port']}") - else: - logger.error("Failed to connect to PostgreSQL") - db_manager = None - except Exception as e: - logger.error(f"Error initializing PostgreSQL connection: {e}") - db_manager = None - - return load_pipeline_node(pipeline_config["pipeline"], mpta_dir, redis_client, db_manager) - except json.JSONDecodeError as e: - logger.error(f"Error parsing pipeline.json: {str(e)}", exc_info=True) - return None - except KeyError as e: - logger.error(f"Missing key in pipeline.json: {str(e)}", exc_info=True) - return None - except Exception as e: - logger.error(f"Error loading pipeline.json: {str(e)}", exc_info=True) - return None - -def execute_actions(node, frame, detection_result, regions_dict=None): - if not node["redis_client"] or not node["actions"]: - return - - # Create a dynamic context for this detection event - from datetime import datetime - action_context = { - **detection_result, - "timestamp_ms": int(time.time() * 1000), - "uuid": str(uuid.uuid4()), - "timestamp": datetime.now().strftime("%Y-%m-%dT%H-%M-%S"), - "filename": f"{uuid.uuid4()}.jpg" - } - - for action in node["actions"]: - try: - if action["type"] == "redis_save_image": - key = action["key"].format(**action_context) - - # Check if we need to crop a specific region - region_name = action.get("region") - image_to_save = frame - - if region_name and regions_dict: - cropped_image = crop_region_by_class(frame, regions_dict, region_name) - if cropped_image is not None: - image_to_save = cropped_image - logger.debug(f"Cropped region '{region_name}' for redis_save_image") - else: - logger.warning(f"Could not crop region '{region_name}', saving full frame instead") - - # Encode image with specified format and quality (default to JPEG) - img_format = action.get("format", "jpeg").lower() - quality = action.get("quality", 90) - - if img_format == "jpeg": - encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality] - success, buffer = cv2.imencode('.jpg', image_to_save, encode_params) - elif img_format == "png": - success, buffer = cv2.imencode('.png', image_to_save) - else: - success, buffer = cv2.imencode('.jpg', image_to_save, [cv2.IMWRITE_JPEG_QUALITY, quality]) - - if not success: - logger.error(f"Failed to encode image for redis_save_image") - continue - - expire_seconds = action.get("expire_seconds") - if expire_seconds: - node["redis_client"].setex(key, expire_seconds, buffer.tobytes()) - logger.info(f"Saved image to Redis with key: {key} (expires in {expire_seconds}s)") - else: - node["redis_client"].set(key, buffer.tobytes()) - logger.info(f"Saved image to Redis with key: {key}") - action_context["image_key"] = key - elif action["type"] == "redis_publish": - channel = action["channel"] - try: - # Handle JSON message format by creating it programmatically - message_template = action["message"] - - # Check if the message is JSON-like (starts and ends with braces) - if message_template.strip().startswith('{') and message_template.strip().endswith('}'): - # Create JSON data programmatically to avoid formatting issues - json_data = {} - - # Add common fields - json_data["event"] = "frontal_detected" - json_data["display_id"] = action_context.get("display_id", "unknown") - json_data["session_id"] = action_context.get("session_id") - json_data["timestamp"] = action_context.get("timestamp", "") - json_data["image_key"] = action_context.get("image_key", "") - - # Convert to JSON string - message = json.dumps(json_data) - else: - # Use regular string formatting for non-JSON messages - message = message_template.format(**action_context) - - # Publish to Redis - if not node["redis_client"]: - logger.error("Redis client is None, cannot publish message") - continue - - # Test Redis connection - try: - node["redis_client"].ping() - logger.debug("Redis connection is active") - except Exception as ping_error: - logger.error(f"Redis connection test failed: {ping_error}") - continue - - result = node["redis_client"].publish(channel, message) - logger.info(f"Published message to Redis channel '{channel}': {message}") - logger.info(f"Redis publish result (subscribers count): {result}") - - # Additional debug info - if result == 0: - logger.warning(f"No subscribers listening to channel '{channel}'") - else: - logger.info(f"Message delivered to {result} subscriber(s)") - - except KeyError as e: - logger.error(f"Missing key in redis_publish message template: {e}") - logger.debug(f"Available context keys: {list(action_context.keys())}") - except Exception as e: - logger.error(f"Error in redis_publish action: {e}") - logger.debug(f"Message template: {action['message']}") - logger.debug(f"Available context keys: {list(action_context.keys())}") - import traceback - logger.debug(f"Full traceback: {traceback.format_exc()}") - except Exception as e: - logger.error(f"Error executing action {action['type']}: {e}") - -def execute_parallel_actions(node, frame, detection_result, regions_dict): - """Execute parallel actions after all required branches have completed.""" - if not node.get("parallelActions"): - return - - logger.debug("Executing parallel actions...") - branch_results = detection_result.get("branch_results", {}) - - for action in node["parallelActions"]: - try: - action_type = action.get("type") - logger.debug(f"Processing parallel action: {action_type}") - - if action_type == "postgresql_update_combined": - # Check if all required branches have completed - wait_for_branches = action.get("waitForBranches", []) - missing_branches = [branch for branch in wait_for_branches if branch not in branch_results] - - if missing_branches: - logger.warning(f"Cannot execute postgresql_update_combined: missing branch results for {missing_branches}") - continue - - logger.info(f"All required branches completed: {wait_for_branches}") - - # Execute the database update - execute_postgresql_update_combined(node, action, detection_result, branch_results) - else: - logger.warning(f"Unknown parallel action type: {action_type}") - - except Exception as e: - logger.error(f"Error executing parallel action {action.get('type', 'unknown')}: {e}") - import traceback - logger.debug(f"Full traceback: {traceback.format_exc()}") - -def execute_postgresql_update_combined(node, action, detection_result, branch_results): - """Execute a PostgreSQL update with combined branch results.""" - if not node.get("db_manager"): - logger.error("No database manager available for postgresql_update_combined action") - return - - try: - table = action["table"] - key_field = action["key_field"] - key_value_template = action["key_value"] - fields = action["fields"] - - # Create context for key value formatting - action_context = {**detection_result} - key_value = key_value_template.format(**action_context) - - logger.info(f"Executing database update: table={table}, {key_field}={key_value}") - - # Process field mappings - mapped_fields = {} - for db_field, value_template in fields.items(): - try: - mapped_value = resolve_field_mapping(value_template, branch_results, action_context) - if mapped_value is not None: - mapped_fields[db_field] = mapped_value - logger.debug(f"Mapped field: {db_field} = {mapped_value}") - else: - logger.warning(f"Could not resolve field mapping for {db_field}: {value_template}") - except Exception as e: - logger.error(f"Error mapping field {db_field} with template '{value_template}': {e}") - - if not mapped_fields: - logger.warning("No fields mapped successfully, skipping database update") - return - - # Execute the database update - success = node["db_manager"].execute_update(table, key_field, key_value, mapped_fields) - - if success: - logger.info(f"Successfully updated database: {table} with {len(mapped_fields)} fields") - else: - logger.error(f"Failed to update database: {table}") - - except KeyError as e: - logger.error(f"Missing required field in postgresql_update_combined action: {e}") - except Exception as e: - logger.error(f"Error in postgresql_update_combined action: {e}") - import traceback - logger.debug(f"Full traceback: {traceback.format_exc()}") - -def resolve_field_mapping(value_template, branch_results, action_context): - """Resolve field mapping templates like {car_brand_cls_v1.brand}.""" - try: - # Handle simple context variables first (non-branch references) - if not '.' in value_template: - return value_template.format(**action_context) - - # Handle branch result references like {model_id.field} - import re - branch_refs = re.findall(r'\{([^}]+\.[^}]+)\}', value_template) - - resolved_template = value_template - for ref in branch_refs: - try: - model_id, field_name = ref.split('.', 1) - - if model_id in branch_results: - branch_data = branch_results[model_id] - if field_name in branch_data: - field_value = branch_data[field_name] - resolved_template = resolved_template.replace(f'{{{ref}}}', str(field_value)) - logger.debug(f"Resolved {ref} to {field_value}") - else: - logger.warning(f"Field '{field_name}' not found in branch '{model_id}' results. Available fields: {list(branch_data.keys())}") - return None - else: - logger.warning(f"Branch '{model_id}' not found in results. Available branches: {list(branch_results.keys())}") - return None - except ValueError as e: - logger.error(f"Invalid branch reference format: {ref}") - return None - - # Format any remaining simple variables - try: - final_value = resolved_template.format(**action_context) - return final_value - except KeyError as e: - logger.warning(f"Could not resolve context variable in template: {e}") - return resolved_template - - except Exception as e: - logger.error(f"Error resolving field mapping '{value_template}': {e}") - return None - -def validate_pipeline_execution(node, regions_dict): - """ - Pre-validate that all required branches will execute successfully before - committing to Redis actions and database records. - - Returns: - - (True, []) if pipeline can execute completely - - (False, missing_branches) if some required branches won't execute - """ - # Get all branches that parallel actions are waiting for - required_branches = set() - - for action in node.get("parallelActions", []): - if action.get("type") == "postgresql_update_combined": - wait_for_branches = action.get("waitForBranches", []) - required_branches.update(wait_for_branches) - - if not required_branches: - # No parallel actions requiring specific branches - logger.debug("No parallel actions with waitForBranches - validation passes") - return True, [] - - logger.debug(f"Pre-validation: checking if required branches {list(required_branches)} will execute") - - # Check each required branch - missing_branches = [] - - for branch in node.get("branches", []): - branch_id = branch["modelId"] - - if branch_id not in required_branches: - continue # This branch is not required by parallel actions - - # Check if this branch would be triggered - trigger_classes = branch.get("triggerClasses", []) - min_conf = branch.get("minConfidence", 0) - - branch_triggered = False - for det_class in regions_dict: - det_confidence = regions_dict[det_class]["confidence"] - - if (det_class in trigger_classes and det_confidence >= min_conf): - branch_triggered = True - logger.debug(f"Pre-validation: branch {branch_id} WILL be triggered by {det_class} (conf={det_confidence:.3f} >= {min_conf})") - break - - if not branch_triggered: - missing_branches.append(branch_id) - logger.warning(f"Pre-validation: branch {branch_id} will NOT be triggered - no matching classes or insufficient confidence") - logger.debug(f" Required: {trigger_classes} with min_conf={min_conf}") - logger.debug(f" Available: {[(cls, regions_dict[cls]['confidence']) for cls in regions_dict]}") - - if missing_branches: - logger.error(f"Pipeline pre-validation FAILED: required branches {missing_branches} will not execute") - return False, missing_branches - else: - logger.info(f"Pipeline pre-validation PASSED: all required branches {list(required_branches)} will execute") - return True, [] - -def run_pipeline(frame, node: dict, return_bbox: bool=False, context=None): - """ - Enhanced pipeline that supports: - - Multi-class detection (detecting multiple classes simultaneously) - - Parallel branch processing - - Region-based actions and cropping - - Context passing for session/camera information - """ - try: - task = getattr(node["model"], "task", None) - - # ─── Classification stage ─────────────────────────────────── - if task == "classify": - results = node["model"].predict(frame, stream=False) - if not results: - return (None, None) if return_bbox else None - - r = results[0] - probs = r.probs - if probs is None: - return (None, None) if return_bbox else None - - top1_idx = int(probs.top1) - top1_conf = float(probs.top1conf) - class_name = node["model"].names[top1_idx] - - det = { - "class": class_name, - "confidence": top1_conf, - "id": None, - class_name: class_name # Add class name as key for backward compatibility - } - - # Add specific field mappings for database operations based on model type - model_id = node.get("modelId", "").lower() - if "brand" in model_id or "brand_cls" in model_id: - det["brand"] = class_name - elif "bodytype" in model_id or "body" in model_id: - det["body_type"] = class_name - elif "color" in model_id: - det["color"] = class_name - - execute_actions(node, frame, det) - return (det, None) if return_bbox else det - - # ─── Detection stage - Multi-class support ────────────────── - tk = node["triggerClassIndices"] - logger.debug(f"Running detection for node {node['modelId']} with trigger classes: {node.get('triggerClasses', [])} (indices: {tk})") - logger.debug(f"Node configuration: minConfidence={node['minConfidence']}, multiClass={node.get('multiClass', False)}") - - res = node["model"].track( - frame, - stream=False, - persist=True, - **({"classes": tk} if tk else {}) - )[0] - - # Collect all detections above confidence threshold - all_detections = [] - all_boxes = [] - regions_dict = {} - - logger.debug(f"Raw detection results from model: {len(res.boxes) if res.boxes is not None else 0} detections") - - for i, box in enumerate(res.boxes): - conf = float(box.cpu().conf[0]) - cid = int(box.cpu().cls[0]) - name = node["model"].names[cid] - - logger.debug(f"Detection {i}: class='{name}' (id={cid}), confidence={conf:.3f}, threshold={node['minConfidence']}") - - if conf < node["minConfidence"]: - logger.debug(f" -> REJECTED: confidence {conf:.3f} < threshold {node['minConfidence']}") - continue - - xy = box.cpu().xyxy[0] - x1, y1, x2, y2 = map(int, xy) - bbox = (x1, y1, x2, y2) - - detection = { - "class": name, - "confidence": conf, - "id": box.id.item() if hasattr(box, "id") else None, - "bbox": bbox - } - - all_detections.append(detection) - all_boxes.append(bbox) - - logger.debug(f" -> ACCEPTED: {name} with confidence {conf:.3f}, bbox={bbox}") - - # Store highest confidence detection for each class - if name not in regions_dict or conf > regions_dict[name]["confidence"]: - regions_dict[name] = { - "bbox": bbox, - "confidence": conf, - "detection": detection - } - logger.debug(f" -> Updated regions_dict['{name}'] with confidence {conf:.3f}") - - logger.info(f"Detection summary: {len(all_detections)} accepted detections from {len(res.boxes) if res.boxes is not None else 0} total") - logger.info(f"Detected classes: {list(regions_dict.keys())}") - - if not all_detections: - logger.warning("No detections above confidence threshold - returning null") - return (None, None) if return_bbox else None - - # ─── Multi-class validation ───────────────────────────────── - if node.get("multiClass", False) and node.get("expectedClasses"): - expected_classes = node["expectedClasses"] - detected_classes = list(regions_dict.keys()) - - logger.info(f"Multi-class validation: expected={expected_classes}, detected={detected_classes}") - - # Check if at least one expected class is detected (flexible mode) - matching_classes = [cls for cls in expected_classes if cls in detected_classes] - missing_classes = [cls for cls in expected_classes if cls not in detected_classes] - - logger.debug(f"Matching classes: {matching_classes}, Missing classes: {missing_classes}") - - if not matching_classes: - # No expected classes found at all - logger.warning(f"PIPELINE REJECTED: No expected classes detected. Expected: {expected_classes}, Detected: {detected_classes}") - return (None, None) if return_bbox else None - - if missing_classes: - logger.info(f"Partial multi-class detection: {matching_classes} found, {missing_classes} missing") - else: - logger.info(f"Complete multi-class detection success: {detected_classes}") - else: - logger.debug("No multi-class validation - proceeding with all detections") - - # ─── Pre-validate pipeline execution ──────────────────────── - pipeline_valid, missing_branches = validate_pipeline_execution(node, regions_dict) - - if not pipeline_valid: - logger.error(f"Pipeline execution validation FAILED - required branches {missing_branches} cannot execute") - logger.error("Aborting pipeline: no Redis actions or database records will be created") - return (None, None) if return_bbox else None - - # ─── Execute actions with region information ──────────────── - detection_result = { - "detections": all_detections, - "regions": regions_dict, - **(context or {}) - } - - # ─── Create initial database record when Car+Frontal detected ──── - if node.get("db_manager") and node.get("multiClass", False): - # Only create database record if we have both Car and Frontal - has_car = "Car" in regions_dict - has_frontal = "Frontal" in regions_dict - - if has_car and has_frontal: - # Generate UUID session_id since client session is None for now - import uuid as uuid_lib - from datetime import datetime - generated_session_id = str(uuid_lib.uuid4()) - - # Insert initial detection record - display_id = detection_result.get("display_id", "unknown") - timestamp = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") - - inserted_session_id = node["db_manager"].insert_initial_detection( - display_id=display_id, - captured_timestamp=timestamp, - session_id=generated_session_id - ) - - if inserted_session_id: - # Update detection_result with the generated session_id for actions and branches - detection_result["session_id"] = inserted_session_id - detection_result["timestamp"] = timestamp # Update with proper timestamp - logger.info(f"Created initial database record with session_id: {inserted_session_id}") - else: - logger.debug(f"Database record not created - missing required classes. Has Car: {has_car}, Has Frontal: {has_frontal}") - - execute_actions(node, frame, detection_result, regions_dict) - - # ─── Parallel branch processing ───────────────────────────── - if node["branches"]: - branch_results = {} - - # Filter branches that should be triggered - active_branches = [] - for br in node["branches"]: - trigger_classes = br.get("triggerClasses", []) - min_conf = br.get("minConfidence", 0) - - logger.debug(f"Evaluating branch {br['modelId']}: trigger_classes={trigger_classes}, min_conf={min_conf}") - - # Check if any detected class matches branch trigger - branch_triggered = False - for det_class in regions_dict: - det_confidence = regions_dict[det_class]["confidence"] - logger.debug(f" Checking detected class '{det_class}' (confidence={det_confidence:.3f}) against triggers {trigger_classes}") - - if (det_class in trigger_classes and det_confidence >= min_conf): - active_branches.append(br) - branch_triggered = True - logger.info(f"Branch {br['modelId']} activated by class '{det_class}' (conf={det_confidence:.3f} >= {min_conf})") - break - - if not branch_triggered: - logger.debug(f"Branch {br['modelId']} not triggered - no matching classes or insufficient confidence") - - if active_branches: - if node.get("parallel", False) or any(br.get("parallel", False) for br in active_branches): - # Run branches in parallel - with concurrent.futures.ThreadPoolExecutor(max_workers=len(active_branches)) as executor: - futures = {} - - for br in active_branches: - crop_class = br.get("cropClass", br.get("triggerClasses", [])[0] if br.get("triggerClasses") else None) - sub_frame = frame - - logger.info(f"Starting parallel branch: {br['modelId']}, crop_class: {crop_class}") - - if br.get("crop", False) and crop_class: - cropped = crop_region_by_class(frame, regions_dict, crop_class) - if cropped is not None: - sub_frame = cv2.resize(cropped, (224, 224)) - logger.debug(f"Successfully cropped {crop_class} region for {br['modelId']}") - else: - logger.warning(f"Failed to crop {crop_class} region for {br['modelId']}, skipping branch") - continue - - future = executor.submit(run_pipeline, sub_frame, br, True, context) - futures[future] = br - - # Collect results - for future in concurrent.futures.as_completed(futures): - br = futures[future] - try: - result, _ = future.result() - if result: - branch_results[br["modelId"]] = result - logger.info(f"Branch {br['modelId']} completed: {result}") - except Exception as e: - logger.error(f"Branch {br['modelId']} failed: {e}") - else: - # Run branches sequentially - for br in active_branches: - crop_class = br.get("cropClass", br.get("triggerClasses", [])[0] if br.get("triggerClasses") else None) - sub_frame = frame - - logger.info(f"Starting sequential branch: {br['modelId']}, crop_class: {crop_class}") - - if br.get("crop", False) and crop_class: - cropped = crop_region_by_class(frame, regions_dict, crop_class) - if cropped is not None: - sub_frame = cv2.resize(cropped, (224, 224)) - logger.debug(f"Successfully cropped {crop_class} region for {br['modelId']}") - else: - logger.warning(f"Failed to crop {crop_class} region for {br['modelId']}, skipping branch") - continue - - try: - result, _ = run_pipeline(sub_frame, br, True, context) - if result: - branch_results[br["modelId"]] = result - logger.info(f"Branch {br['modelId']} completed: {result}") - else: - logger.warning(f"Branch {br['modelId']} returned no result") - except Exception as e: - logger.error(f"Error in sequential branch {br['modelId']}: {e}") - import traceback - logger.debug(f"Branch error traceback: {traceback.format_exc()}") - - # Store branch results in detection_result for parallel actions - detection_result["branch_results"] = branch_results - - # ─── Execute Parallel Actions ─────────────────────────────── - if node.get("parallelActions") and "branch_results" in detection_result: - execute_parallel_actions(node, frame, detection_result, regions_dict) - - # ─── Return detection result ──────────────────────────────── - primary_detection = max(all_detections, key=lambda x: x["confidence"]) - primary_bbox = primary_detection["bbox"] - - # Add branch results and session_id to primary detection for compatibility - if "branch_results" in detection_result: - primary_detection["branch_results"] = detection_result["branch_results"] - if "session_id" in detection_result: - primary_detection["session_id"] = detection_result["session_id"] - - return (primary_detection, primary_bbox) if return_bbox else primary_detection - - except Exception as e: - logger.error(f"Error in node {node.get('modelId')}: {e}") - traceback.print_exc() - return (None, None) if return_bbox else None diff --git a/test_protocol.py b/test_protocol.py deleted file mode 100644 index 74af7d8..0000000 --- a/test_protocol.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to verify the worker implementation follows the protocol -""" -import json -import asyncio -import websockets -import time - -async def test_protocol(): - """Test the worker protocol implementation""" - uri = "ws://localhost:8000" - - try: - async with websockets.connect(uri) as websocket: - print("✓ Connected to worker") - - # Test 1: Check if we receive heartbeat (stateReport) - print("\n1. Testing heartbeat...") - try: - message = await asyncio.wait_for(websocket.recv(), timeout=5) - data = json.loads(message) - if data.get("type") == "stateReport": - print("✓ Received stateReport heartbeat") - print(f" - CPU Usage: {data.get('cpuUsage', 'N/A')}%") - print(f" - Memory Usage: {data.get('memoryUsage', 'N/A')}%") - print(f" - Camera Connections: {len(data.get('cameraConnections', []))}") - else: - print(f"✗ Expected stateReport, got {data.get('type')}") - except asyncio.TimeoutError: - print("✗ No heartbeat received within 5 seconds") - - # Test 2: Request state - print("\n2. Testing requestState...") - await websocket.send(json.dumps({"type": "requestState"})) - try: - message = await asyncio.wait_for(websocket.recv(), timeout=5) - data = json.loads(message) - if data.get("type") == "stateReport": - print("✓ Received stateReport response") - else: - print(f"✗ Expected stateReport, got {data.get('type')}") - except asyncio.TimeoutError: - print("✗ No response to requestState within 5 seconds") - - # Test 3: Set session ID - print("\n3. Testing setSessionId...") - session_message = { - "type": "setSessionId", - "payload": { - "displayIdentifier": "display-001", - "sessionId": 12345 - } - } - await websocket.send(json.dumps(session_message)) - print("✓ Sent setSessionId message") - - # Test 4: Test patchSession - print("\n4. Testing patchSession...") - patch_message = { - "type": "patchSession", - "sessionId": 12345, - "data": { - "currentCar": { - "carModel": "Civic", - "carBrand": "Honda" - } - } - } - await websocket.send(json.dumps(patch_message)) - - # Wait for patchSessionResult - try: - message = await asyncio.wait_for(websocket.recv(), timeout=5) - data = json.loads(message) - if data.get("type") == "patchSessionResult": - print("✓ Received patchSessionResult") - print(f" - Success: {data.get('payload', {}).get('success')}") - print(f" - Message: {data.get('payload', {}).get('message')}") - else: - print(f"✗ Expected patchSessionResult, got {data.get('type')}") - except asyncio.TimeoutError: - print("✗ No patchSessionResult received within 5 seconds") - - # Test 5: Test subscribe message format (without actual camera) - print("\n5. Testing subscribe message format...") - subscribe_message = { - "type": "subscribe", - "payload": { - "subscriptionIdentifier": "display-001;cam-001", - "snapshotUrl": "http://example.com/snapshot.jpg", - "snapshotInterval": 5000, - "modelUrl": "http://example.com/model.mpta", - "modelName": "Test Model", - "modelId": 101, - "cropX1": 100, - "cropY1": 200, - "cropX2": 300, - "cropY2": 400 - } - } - await websocket.send(json.dumps(subscribe_message)) - print("✓ Sent subscribe message (will fail without actual camera/model)") - - # Listen for a few more messages to catch any errors - print("\n6. Listening for additional messages...") - for i in range(3): - try: - message = await asyncio.wait_for(websocket.recv(), timeout=2) - data = json.loads(message) - msg_type = data.get("type") - print(f" - Received {msg_type}") - if msg_type == "error": - print(f" Error: {data.get('error')}") - except asyncio.TimeoutError: - break - - print("\n✓ Protocol test completed successfully!") - - except Exception as e: - print(f"✗ Connection failed: {e}") - print("Make sure the worker is running on localhost:8000") - -if __name__ == "__main__": - asyncio.run(test_protocol()) \ No newline at end of file diff --git a/worker.md b/worker.md deleted file mode 100644 index c485db5..0000000 --- a/worker.md +++ /dev/null @@ -1,495 +0,0 @@ -# Worker Communication Protocol - -This document outlines the WebSocket-based communication protocol between the CMS backend and a detector worker. As a worker developer, your primary responsibility is to implement a WebSocket server that adheres to this protocol. - -## 1. Connection - -The worker must run a WebSocket server, preferably on port `8000`. The backend system, which is managed by a container orchestration service, will automatically discover and establish a WebSocket connection to your worker. - -Upon a successful connection from the backend, you should begin sending `stateReport` messages as heartbeats. - -## 2. Communication Overview - -Communication is bidirectional and asynchronous. All messages are JSON objects with a `type` field that indicates the message's purpose, and an optional `payload` field containing the data. - -- **Worker -> Backend:** You will send messages to the backend to report status, forward detection events, or request changes to session data. -- **Backend -> Worker:** The backend will send commands to you to manage camera subscriptions. - -## 3. Dynamic Configuration via MPTA File - -To enable modularity and dynamic configuration, the backend will send you a URL to a `.mpta` file when it issues a `subscribe` command. This file is a renamed `.zip` archive that contains everything your worker needs to perform its task. - -**Your worker is responsible for:** - -1. Fetching this file from the provided URL. -2. Extracting its contents. -3. Interpreting the contents to configure its internal pipeline. - -**The contents of the `.mpta` file are entirely up to the user who configures the model in the CMS.** This allows for maximum flexibility. For example, the archive could contain: - -- AI/ML Models: Pre-trained models for libraries like TensorFlow, PyTorch, or ONNX. -- Configuration Files: A `config.json` or `pipeline.yaml` that defines a sequence of operations, specifies model paths, or sets detection thresholds. -- Scripts: Custom Python scripts for pre-processing or post-processing. -- API Integration Details: A JSON file with endpoint information and credentials for interacting with third-party detection services. - -Essentially, the `.mpta` file is a self-contained package that tells your worker _how_ to process the video stream for a given subscription. - -## 4. Messages from Worker to Backend - -These are the messages your worker is expected to send to the backend. - -### 4.1. State Report (Heartbeat) - -This message is crucial for the backend to monitor your worker's health and status, including GPU usage. - -- **Type:** `stateReport` -- **When to Send:** Periodically (e.g., every 2 seconds) after a connection is established. - -**Payload:** - -```json -{ - "type": "stateReport", - "cpuUsage": 75.5, - "memoryUsage": 40.2, - "gpuUsage": 60.0, - "gpuMemoryUsage": 25.1, - "cameraConnections": [ - { - "subscriptionIdentifier": "display-001;cam-001", - "modelId": 101, - "modelName": "General Object Detection", - "online": true, - "cropX1": 100, - "cropY1": 200, - "cropX2": 300, - "cropY2": 400 - } - ] -} -``` - -> **Note:** -> -> - `cropX1`, `cropY1`, `cropX2`, `cropY2` (optional, integer) should be included in each camera connection to indicate the crop coordinates for that subscription. - -### 4.2. Image Detection - -Sent when the worker detects a relevant object. The `detection` object should be flat and contain key-value pairs corresponding to the detected attributes. - -- **Type:** `imageDetection` - -**Payload Example:** - -```json -{ - "type": "imageDetection", - "subscriptionIdentifier": "display-001;cam-001", - "timestamp": "2025-07-14T12:34:56.789Z", - "data": { - "detection": { - "carModel": "Civic", - "carBrand": "Honda", - "carYear": 2023, - "bodyType": "Sedan", - "licensePlateText": "ABCD1234", - "licensePlateConfidence": 0.95 - }, - "modelId": 101, - "modelName": "US-LPR-and-Vehicle-ID" - } -} -``` - -### 4.3. Patch Session - -> **Note:** Patch messages are only used when the worker can't keep up and needs to retroactively send detections. Normally, detections should be sent in real-time using `imageDetection` messages. Use `patchSession` only to update session data after the fact. - -Allows the worker to request a modification to an active session's data. The `data` payload must be a partial object of the `DisplayPersistentData` structure. - -- **Type:** `patchSession` - -**Payload Example:** - -```json -{ - "type": "patchSession", - "sessionId": 12345, - "data": { - "currentCar": { - "carModel": "Civic", - "carBrand": "Honda", - "licensePlateText": "ABCD1234" - } - } -} -``` - -The backend will respond with a `patchSessionResult` command. - -#### `DisplayPersistentData` Structure - -The `data` object in the `patchSession` message is merged with the existing `DisplayPersistentData` on the backend. Here is its structure: - -```typescript -interface DisplayPersistentData { - progressionStage: - | 'welcome' - | 'car_fueling' - | 'car_waitpayment' - | 'car_postpayment' - | null; - qrCode: string | null; - adsPlayback: { - playlistSlotOrder: number; // The 'order' of the current slot - adsId: number | null; - adsUrl: string | null; - } | null; - currentCar: { - carModel?: string; - carBrand?: string; - carYear?: number; - bodyType?: string; - licensePlateText?: string; - licensePlateType?: string; - } | null; - fuelPump: { - /* FuelPumpData structure */ - } | null; - weatherData: { - /* WeatherResponse structure */ - } | null; - sessionId: number | null; -} -``` - -#### Patching Behavior - -- The patch is a **deep merge**. -- **`undefined`** values are ignored. -- **`null`** values will set the corresponding field to `null`. -- Nested objects are merged recursively. - -## 5. Commands from Backend to Worker - -These are the commands your worker will receive from the backend. - -### 5.1. Subscribe to Camera - -Instructs the worker to process a camera's RTSP stream using the configuration from the specified `.mpta` file. - -- **Type:** `subscribe` - -**Payload:** - -```json -{ - "type": "subscribe", - "payload": { - "subscriptionIdentifier": "display-001;cam-002", - "rtspUrl": "rtsp://user:pass@host:port/stream", - "snapshotUrl": "http://go2rtc/snapshot/1", - "snapshotInterval": 5000, - "modelUrl": "http://storage/models/us-lpr.mpta", - "modelName": "US-LPR-and-Vehicle-ID", - "modelId": 102, - "cropX1": 100, - "cropY1": 200, - "cropX2": 300, - "cropY2": 400 - } -} -``` - -> **Note:** -> -> - `cropX1`, `cropY1`, `cropX2`, `cropY2` (optional, integer) specify the crop coordinates for the camera stream. These values are configured per display and passed in the subscription payload. If not provided, the worker should process the full frame. -> -> **Important:** -> If multiple displays are bound to the same camera, your worker must ensure that only **one stream** is opened per camera. When you receive multiple subscriptions for the same camera (with different `subscriptionIdentifier` values), you should: -> -> - Open the RTSP stream **once** for that camera if using RTSP. -> - Capture each snapshot only once per cycle, and reuse it for all display subscriptions sharing that camera. -> - Capture each frame/image only once per cycle. -> - Reuse the same captured image and snapshot for all display subscriptions that share the camera, processing and routing detection results separately for each display as needed. -> This avoids unnecessary load and bandwidth usage, and ensures consistent detection results and snapshots across all displays sharing the same camera. - -### 5.2. Unsubscribe from Camera - -Instructs the worker to stop processing a camera's stream. - -- **Type:** `unsubscribe` - -**Payload:** - -```json -{ - "type": "unsubscribe", - "payload": { - "subscriptionIdentifier": "display-001;cam-002" - } -} -``` - -### 5.3. Request State - -Direct request for the worker's current state. Respond with a `stateReport` message. - -- **Type:** `requestState` - -**Payload:** - -```json -{ - "type": "requestState" -} -``` - -### 5.4. Patch Session Result - -Backend's response to a `patchSession` message. - -- **Type:** `patchSessionResult` - -**Payload:** - -```json -{ - "type": "patchSessionResult", - "payload": { - "sessionId": 12345, - "success": true, - "message": "Session updated successfully." - } -} -``` - -### 5.5. Set Session ID - -Allows the backend to instruct the worker to associate a session ID with a subscription. This is useful for linking detection events to a specific session. The session ID can be `null` to indicate no active session. - -- **Type:** `setSessionId` - -**Payload:** - -```json -{ - "type": "setSessionId", - "payload": { - "displayIdentifier": "display-001", - "sessionId": 12345 - } -} -``` - -Or to clear the session: - -```json -{ - "type": "setSessionId", - "payload": { - "displayIdentifier": "display-001", - "sessionId": null - } -} -``` - -> **Note:** -> -> - The worker should store the session ID for the given subscription and use it in subsequent detection or patch messages as appropriate. If `sessionId` is `null`, the worker should treat the subscription as having no active session. - -## Subscription Identifier Format - -The `subscriptionIdentifier` used in all messages is constructed as: - -``` -displayIdentifier;cameraIdentifier -``` - -This uniquely identifies a camera subscription for a specific display. - -### Session ID Association - -When the backend sends a `setSessionId` command, it will only provide the `displayIdentifier` (not the full `subscriptionIdentifier`). - -**Worker Responsibility:** - -- The worker must match the `displayIdentifier` to all active subscriptions for that display (i.e., all `subscriptionIdentifier` values that start with `displayIdentifier;`). -- The worker should set or clear the session ID for all matching subscriptions. - -## 6. Example Communication Log - -This section shows a typical sequence of messages between the backend and the worker. Patch messages are not included, as they are only used when the worker cannot keep up. - -> **Note:** Unsubscribe is triggered when a user removes a camera or when the node is too heavily loaded and needs rebalancing. - -1. **Connection Established** & **Heartbeat** - - **Worker -> Backend** - ```json - { - "type": "stateReport", - "cpuUsage": 70.2, - "memoryUsage": 38.1, - "gpuUsage": 55.0, - "gpuMemoryUsage": 20.0, - "cameraConnections": [] - } - ``` -2. **Backend Subscribes Camera** - - **Backend -> Worker** - ```json - { - "type": "subscribe", - "payload": { - "subscriptionIdentifier": "display-001;entry-cam-01", - "rtspUrl": "rtsp://192.168.1.100/stream1", - "modelUrl": "http://storage/models/vehicle-id.mpta", - "modelName": "Vehicle Identification", - "modelId": 201 - } - } - ``` -3. **Worker Acknowledges in Heartbeat** - - **Worker -> Backend** - ```json - { - "type": "stateReport", - "cpuUsage": 72.5, - "memoryUsage": 39.0, - "gpuUsage": 57.0, - "gpuMemoryUsage": 21.0, - "cameraConnections": [ - { - "subscriptionIdentifier": "display-001;entry-cam-01", - "modelId": 201, - "modelName": "Vehicle Identification", - "online": true - } - ] - } - ``` -4. **Worker Detects a Car** - - **Worker -> Backend** - ```json - { - "type": "imageDetection", - "subscriptionIdentifier": "display-001;entry-cam-01", - "timestamp": "2025-07-15T10:00:00.000Z", - "data": { - "detection": { - "carBrand": "Honda", - "carModel": "CR-V", - "bodyType": "SUV", - "licensePlateText": "GEMINI-AI", - "licensePlateConfidence": 0.98 - }, - "modelId": 201, - "modelName": "Vehicle Identification" - } - } - ``` - - **Worker -> Backend** - ```json - { - "type": "imageDetection", - "subscriptionIdentifier": "display-001;entry-cam-01", - "timestamp": "2025-07-15T10:00:01.000Z", - "data": { - "detection": { - "carBrand": "Toyota", - "carModel": "Corolla", - "bodyType": "Sedan", - "licensePlateText": "CMS-1234", - "licensePlateConfidence": 0.97 - }, - "modelId": 201, - "modelName": "Vehicle Identification" - } - } - ``` - - **Worker -> Backend** - ```json - { - "type": "imageDetection", - "subscriptionIdentifier": "display-001;entry-cam-01", - "timestamp": "2025-07-15T10:00:02.000Z", - "data": { - "detection": { - "carBrand": "Ford", - "carModel": "Focus", - "bodyType": "Hatchback", - "licensePlateText": "CMS-5678", - "licensePlateConfidence": 0.96 - }, - "modelId": 201, - "modelName": "Vehicle Identification" - } - } - ``` -5. **Backend Unsubscribes Camera** - - **Backend -> Worker** - ```json - { - "type": "unsubscribe", - "payload": { - "subscriptionIdentifier": "display-001;entry-cam-01" - } - } - ``` -6. **Worker Acknowledges Unsubscription** - - **Worker -> Backend** - ```json - { - "type": "stateReport", - "cpuUsage": 68.0, - "memoryUsage": 37.0, - "gpuUsage": 50.0, - "gpuMemoryUsage": 18.0, - "cameraConnections": [] - } - ``` - -## 7. HTTP API: Image Retrieval - -In addition to the WebSocket protocol, the worker exposes an HTTP endpoint for retrieving the latest image frame from a camera. - -### Endpoint - -``` -GET /camera/{camera_id}/image -``` - -- **`camera_id`**: The full `subscriptionIdentifier` (e.g., `display-001;cam-001`). - -### Response - -- **Success (200):** Returns the latest JPEG image from the camera stream. - - - `Content-Type: image/jpeg` - - Binary JPEG data. - -- **Error (404):** If the camera is not found or no frame is available. - - - JSON error response. - -- **Error (500):** Internal server error. - -### Example Request - -``` -GET /camera/display-001;cam-001/image -``` - -### Example Response - -- **Headers:** - ``` - Content-Type: image/jpeg - ``` -- **Body:** Binary JPEG image. - -### Notes - -- The endpoint returns the most recent frame available for the specified camera subscription. -- If multiple displays share the same camera, each subscription has its own buffer; the endpoint uses the buffer for the given `camera_id`. -- This API is useful for debugging, monitoring, or integrating with external systems that require direct image access. diff --git a/yolov8n.pt b/yolov8n.pt new file mode 100644 index 0000000..0db4ca4 Binary files /dev/null and b/yolov8n.pt differ