enhance logging for model loading and pipeline processing; update log levels and add detailed error messages
All checks were successful
Build Backend Application and Docker Image / build-docker (push) Successful in 9m22s

This commit is contained in:
Siwat Sirichai 2025-05-28 19:18:58 +07:00
parent 3511d6ad7a
commit d4754fcd27
3 changed files with 325 additions and 82 deletions

237
app.py
View file

@ -41,41 +41,61 @@ max_retries = config.get("max_retries", 3)
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s [%(levelname)s] %(message)s",
level=logging.INFO, # Set to INFO level for less verbose output
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
handlers=[
logging.FileHandler("app.log"),
logging.StreamHandler()
logging.FileHandler("detector_worker.log"), # Write logs to a file
logging.StreamHandler() # Also output to console
]
)
# Create a logger specifically for this application
logger = logging.getLogger("detector_worker")
logger.setLevel(logging.DEBUG) # Set app-specific logger to DEBUG level
# Ensure all other libraries (including root) use at least INFO level
logging.getLogger().setLevel(logging.INFO)
logger.info("Starting detector worker application")
logger.info(f"Configuration: Target FPS: {TARGET_FPS}, Max streams: {max_streams}, Max retries: {max_retries}")
# Ensure the models directory exists
os.makedirs("models", exist_ok=True)
logger.info("Ensured models directory exists")
# Constants for heartbeat and timeouts
HEARTBEAT_INTERVAL = 2 # seconds
WORKER_TIMEOUT_MS = 10000
logger.debug(f"Heartbeat interval set to {HEARTBEAT_INTERVAL} seconds")
# Locks for thread-safe operations
streams_lock = threading.Lock()
models_lock = threading.Lock()
logger.debug("Initialized thread locks")
# Add helper to download mpta ZIP file from a remote URL
def download_mpta(url: str, dest_path: str) -> str:
try:
logger.info(f"Starting download of model from {url} to {dest_path}")
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
response = requests.get(url, stream=True)
if response.status_code == 200:
file_size = int(response.headers.get('content-length', 0))
logger.info(f"Model file size: {file_size/1024/1024:.2f} MB")
downloaded = 0
with open(dest_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
logging.info(f"Downloaded mpta file from {url} to {dest_path}")
downloaded += len(chunk)
if file_size > 0 and downloaded % (file_size // 10) < 8192: # Log approximately every 10%
logger.debug(f"Download progress: {downloaded/file_size*100:.1f}%")
logger.info(f"Successfully downloaded mpta file from {url} to {dest_path}")
return dest_path
else:
logging.error(f"Failed to download mpta file (status code {response.status_code})")
logger.error(f"Failed to download mpta file (status code {response.status_code}): {response.text}")
return None
except Exception as e:
logging.error(f"Exception downloading mpta file from {url}: {e}")
logger.error(f"Exception downloading mpta file from {url}: {str(e)}", exc_info=True)
return None
####################################################
@ -83,12 +103,17 @@ def download_mpta(url: str, dest_path: str) -> str:
####################################################
@app.websocket("/")
async def detect(websocket: WebSocket):
logging.info("WebSocket connection accepted")
logger.info("WebSocket connection accepted")
persistent_data_dict = {}
async def handle_detection(camera_id, stream, frame, websocket, model_tree, persistent_data):
try:
logger.debug(f"Processing frame for camera {camera_id} with model {stream['modelId']}")
start_time = time.time()
detection_result = run_pipeline(frame, model_tree)
process_time = (time.time() - start_time) * 1000
logger.debug(f"Detection for camera {camera_id} completed in {process_time:.2f}ms")
detection_data = {
"type": "imageDetection",
"cameraIdentifier": camera_id,
@ -99,87 +124,157 @@ async def detect(websocket: WebSocket):
"modelName": stream["modelName"]
}
}
logging.debug(f"Sending detection data for camera {camera_id}: {detection_data}")
if detection_result:
detection_count = len(detection_result.get("detections", []))
logger.info(f"Camera {camera_id}: Detected {detection_count} objects with model {stream['modelName']}")
await websocket.send_json(detection_data)
return persistent_data
except Exception as e:
logging.error(f"Error in handle_detection for camera {camera_id}: {e}")
logger.error(f"Error in handle_detection for camera {camera_id}: {str(e)}", exc_info=True)
return persistent_data
def frame_reader(camera_id, cap, buffer, stop_event):
retries = 0
logger.info(f"Starting frame reader thread for camera {camera_id}")
frame_count = 0
last_log_time = time.time()
try:
# Log initial camera status and properties
if cap.isOpened():
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
logger.info(f"Camera {camera_id} opened successfully with resolution {width}x{height}, FPS: {fps}")
else:
logger.error(f"Camera {camera_id} failed to open initially")
while not stop_event.is_set():
try:
if not cap.isOpened():
logger.error(f"Camera {camera_id} is not open before trying to read")
# Attempt to reopen
cap = cv2.VideoCapture(streams[camera_id]["rtsp_url"])
time.sleep(reconnect_interval)
continue
logger.debug(f"Attempting to read frame from camera {camera_id}")
ret, frame = cap.read()
if not ret:
logging.warning(f"Connection lost for camera: {camera_id}, retry {retries+1}/{max_retries}")
logger.warning(f"Connection lost for camera: {camera_id}, retry {retries+1}/{max_retries}")
cap.release()
time.sleep(reconnect_interval)
retries += 1
if retries > max_retries and max_retries != -1:
logging.error(f"Max retries reached for camera: {camera_id}")
logger.error(f"Max retries reached for camera: {camera_id}, stopping frame reader")
break
# Re-open
logger.info(f"Attempting to reopen RTSP stream for camera: {camera_id}")
cap = cv2.VideoCapture(streams[camera_id]["rtsp_url"])
if not cap.isOpened():
logging.error(f"Failed to reopen RTSP stream for camera: {camera_id}")
logger.error(f"Failed to reopen RTSP stream for camera: {camera_id}")
continue
logger.info(f"Successfully reopened RTSP stream for camera: {camera_id}")
continue
# Successfully read a frame
frame_count += 1
current_time = time.time()
# Log frame stats every 5 seconds
if current_time - last_log_time > 5:
logger.info(f"Camera {camera_id}: Read {frame_count} frames in the last {current_time - last_log_time:.1f} seconds")
frame_count = 0
last_log_time = current_time
logger.debug(f"Successfully read frame from camera {camera_id}, shape: {frame.shape}")
retries = 0
# Overwrite old frame if buffer is full
if not buffer.empty():
try:
buffer.get_nowait()
logger.debug(f"Removed old frame from buffer for camera {camera_id}")
except queue.Empty:
pass
buffer.put(frame)
logger.debug(f"Added new frame to buffer for camera {camera_id}")
# Short sleep to avoid CPU overuse
time.sleep(0.01)
except cv2.error as e:
logging.error(f"OpenCV error for camera {camera_id}: {e}")
logger.error(f"OpenCV error for camera {camera_id}: {e}", exc_info=True)
cap.release()
time.sleep(reconnect_interval)
retries += 1
if retries > max_retries and max_retries != -1:
logging.error(f"Max retries reached after OpenCV error for camera {camera_id}")
logger.error(f"Max retries reached after OpenCV error for camera {camera_id}")
break
logger.info(f"Attempting to reopen RTSP stream after OpenCV error for camera: {camera_id}")
cap = cv2.VideoCapture(streams[camera_id]["rtsp_url"])
if not cap.isOpened():
logging.error(f"Failed to reopen RTSP stream for camera {camera_id} after OpenCV error")
logger.error(f"Failed to reopen RTSP stream for camera {camera_id} after OpenCV error")
continue
logger.info(f"Successfully reopened RTSP stream after OpenCV error for camera: {camera_id}")
except Exception as e:
logging.error(f"Unexpected error for camera {camera_id}: {e}")
logger.error(f"Unexpected error for camera {camera_id}: {str(e)}", exc_info=True)
cap.release()
break
except Exception as e:
logging.error(f"Error in frame_reader thread for camera {camera_id}: {e}")
logger.error(f"Error in frame_reader thread for camera {camera_id}: {str(e)}", exc_info=True)
finally:
logger.info(f"Frame reader thread for camera {camera_id} is exiting")
if cap and cap.isOpened():
cap.release()
async def process_streams():
logging.info("Started processing streams")
logger.info("Started processing streams")
try:
while True:
start_time = time.time()
with streams_lock:
current_streams = list(streams.items())
if current_streams:
logger.debug(f"Processing {len(current_streams)} active streams")
else:
logger.debug("No active streams to process")
for camera_id, stream in current_streams:
buffer = stream["buffer"]
if not buffer.empty():
frame = buffer.get()
with models_lock:
model_tree = models.get(camera_id, {}).get(stream["modelId"])
key = (camera_id, stream["modelId"])
persistent_data = persistent_data_dict.get(key, {})
updated_persistent_data = await handle_detection(
camera_id, stream, frame, websocket, model_tree, persistent_data
)
persistent_data_dict[key] = updated_persistent_data
if buffer.empty():
logger.debug(f"Frame buffer is empty for camera {camera_id}")
continue
logger.debug(f"Got frame from buffer for camera {camera_id}")
frame = buffer.get()
with models_lock:
model_tree = models.get(camera_id, {}).get(stream["modelId"])
if not model_tree:
logger.warning(f"Model not found for camera {camera_id}, modelId {stream['modelId']}")
continue
logger.debug(f"Found model tree for camera {camera_id}, modelId {stream['modelId']}")
key = (camera_id, stream["modelId"])
persistent_data = persistent_data_dict.get(key, {})
logger.debug(f"Starting detection for camera {camera_id} with modelId {stream['modelId']}")
updated_persistent_data = await handle_detection(
camera_id, stream, frame, websocket, model_tree, persistent_data
)
persistent_data_dict[key] = updated_persistent_data
elapsed_time = (time.time() - start_time) * 1000 # ms
sleep_time = max(poll_interval - elapsed_time, 0)
logging.debug(f"Elapsed time: {elapsed_time}ms, sleeping for: {sleep_time}ms")
logger.debug(f"Frame processing cycle: {elapsed_time:.2f}ms, sleeping for: {sleep_time:.2f}ms")
await asyncio.sleep(sleep_time / 1000.0)
except asyncio.CancelledError:
logging.info("Stream processing task cancelled")
logger.info("Stream processing task cancelled")
except Exception as e:
logging.error(f"Error in process_streams: {e}")
logger.error(f"Error in process_streams: {str(e)}", exc_info=True)
async def send_heartbeat():
while True:
@ -212,17 +307,17 @@ async def detect(websocket: WebSocket):
"cameraConnections": camera_connections
}
await websocket.send_text(json.dumps(state_report))
logging.debug("Sent stateReport as heartbeat")
logger.debug("Sent stateReport as heartbeat")
await asyncio.sleep(HEARTBEAT_INTERVAL)
except Exception as e:
logging.error(f"Error sending stateReport heartbeat: {e}")
logger.error(f"Error sending stateReport heartbeat: {e}")
break
async def on_message():
while True:
try:
msg = await websocket.receive_text()
logging.debug(f"Received message: {msg}")
logger.debug(f"Received message: {msg}")
data = json.loads(msg)
msg_type = data.get("type")
@ -236,35 +331,67 @@ async def detect(websocket: WebSocket):
if model_url:
with models_lock:
if camera_id not in models:
models[camera_id] = {}
if modelId not in models[camera_id]:
logging.info(f"Loading model from {model_url}")
if (camera_id not in models) or (modelId not in models[camera_id]):
logger.info(f"Loading model from {model_url} for camera {camera_id}, modelId {modelId}")
extraction_dir = os.path.join("models", camera_id, str(modelId))
os.makedirs(extraction_dir, exist_ok=True)
# If model_url is remote, download it first.
parsed = urlparse(model_url)
if parsed.scheme in ("http", "https"):
logger.info(f"Downloading remote model from {model_url}")
local_mpta = os.path.join(extraction_dir, os.path.basename(parsed.path))
logger.debug(f"Download destination: {local_mpta}")
local_path = download_mpta(model_url, local_mpta)
if not local_path:
logging.error("Failed to download the remote mpta file.")
logger.error(f"Failed to download the remote mpta file from {model_url}")
error_response = {
"type": "error",
"cameraIdentifier": camera_id,
"error": f"Failed to download model from {model_url}"
}
await websocket.send_json(error_response)
continue
model_tree = load_pipeline_from_zip(local_path, extraction_dir)
else:
logger.info(f"Loading local model from {model_url}")
# Check if file exists before attempting to load
if not os.path.exists(model_url):
logger.error(f"Local model file not found: {model_url}")
logger.debug(f"Current working directory: {os.getcwd()}")
error_response = {
"type": "error",
"cameraIdentifier": camera_id,
"error": f"Model file not found: {model_url}"
}
await websocket.send_json(error_response)
continue
model_tree = load_pipeline_from_zip(model_url, extraction_dir)
if model_tree is None:
logging.error("Failed to load model from mpta file.")
logger.error(f"Failed to load model {modelId} from mpta file for camera {camera_id}")
error_response = {
"type": "error",
"cameraIdentifier": camera_id,
"error": f"Failed to load model {modelId}"
}
await websocket.send_json(error_response)
continue
if camera_id not in models:
models[camera_id] = {}
models[camera_id][modelId] = model_tree
logging.info(f"Loaded model {modelId} for camera {camera_id}")
logger.info(f"Successfully loaded model {modelId} for camera {camera_id}")
success_response = {
"type": "modelLoaded",
"cameraIdentifier": camera_id,
"modelId": modelId
}
await websocket.send_json(success_response)
if camera_id and rtsp_url:
with streams_lock:
if camera_id not in streams and len(streams) < max_streams:
cap = cv2.VideoCapture(rtsp_url)
if not cap.isOpened():
logging.error(f"Failed to open RTSP stream for camera {camera_id}")
logger.error(f"Failed to open RTSP stream for camera {camera_id}")
continue
buffer = queue.Queue(maxsize=1)
stop_event = threading.Event()
@ -280,12 +407,12 @@ async def detect(websocket: WebSocket):
"modelId": modelId,
"modelName": modelName
}
logging.info(f"Subscribed to camera {camera_id} with modelId {modelId}, modelName {modelName}, URL {rtsp_url}")
logger.info(f"Subscribed to camera {camera_id} with modelId {modelId}, modelName {modelName}, URL {rtsp_url}")
elif camera_id and camera_id in streams:
# If already subscribed, unsubscribe
stream = streams.pop(camera_id)
stream["cap"].release()
logging.info(f"Unsubscribed from camera {camera_id}")
logger.info(f"Unsubscribed from camera {camera_id}")
with models_lock:
if camera_id in models and modelId in models[camera_id]:
del models[camera_id][modelId]
@ -294,14 +421,14 @@ async def detect(websocket: WebSocket):
elif msg_type == "unsubscribe":
payload = data.get("payload", {})
camera_id = payload.get("cameraIdentifier")
logging.debug(f"Unsubscribing from camera {camera_id}")
logger.debug(f"Unsubscribing from camera {camera_id}")
with streams_lock:
if camera_id and camera_id in streams:
stream = streams.pop(camera_id)
stream["stop_event"].set()
stream["thread"].join()
stream["cap"].release()
logging.info(f"Unsubscribed from camera {camera_id}")
logger.info(f"Unsubscribed from camera {camera_id}")
with models_lock:
if camera_id in models:
del models[camera_id]
@ -335,14 +462,14 @@ async def detect(websocket: WebSocket):
}
await websocket.send_text(json.dumps(state_report))
else:
logging.error(f"Unknown message type: {msg_type}")
logger.error(f"Unknown message type: {msg_type}")
except json.JSONDecodeError:
logging.error("Received invalid JSON message")
logger.error("Received invalid JSON message")
except (WebSocketDisconnect, ConnectionClosedError) as e:
logging.warning(f"WebSocket disconnected: {e}")
logger.warning(f"WebSocket disconnected: {e}")
break
except Exception as e:
logging.error(f"Error handling message: {e}")
logger.error(f"Error handling message: {e}")
break
try:
@ -352,7 +479,7 @@ async def detect(websocket: WebSocket):
message_task = asyncio.create_task(on_message())
await asyncio.gather(heartbeat_task, message_task)
except Exception as e:
logging.error(f"Error in detect websocket: {e}")
logger.error(f"Error in detect websocket: {e}")
finally:
stream_task.cancel()
await stream_task
@ -366,8 +493,8 @@ async def detect(websocket: WebSocket):
stream["buffer"].get_nowait()
except queue.Empty:
pass
logging.info(f"Released camera {camera_id} and cleaned up resources")
logger.info(f"Released camera {camera_id} and cleaned up resources")
streams.clear()
with models_lock:
models.clear()
logging.info("WebSocket connection closed")
logger.info("WebSocket connection closed")