From 42b08c62519a2f559139bc825f6454657224a0ab Mon Sep 17 00:00:00 2001 From: Siwat Sirichai Date: Tue, 11 Nov 2025 02:30:17 +0700 Subject: [PATCH] expo backoff decode handling --- services/stream_decoder.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/services/stream_decoder.py b/services/stream_decoder.py index 5b30585..95b2b49 100644 --- a/services/stream_decoder.py +++ b/services/stream_decoder.py @@ -380,23 +380,30 @@ class StreamDecoder: # Set the CUDA device for this thread torch.cuda.set_device(self.gpu_id) - retry_count = 0 - max_retries = 5 + consecutive_failures = 0 + backoff_delay = 2.0 # Start with 2 seconds + max_backoff = 30.0 # Cap at 30 seconds while not self._stop_flag.is_set(): # Initialize connection if not self._init_rtsp_connection(): - retry_count += 1 - if retry_count >= max_retries: - print(f"Max retries reached for {self.rtsp_url}") - self._set_status(ConnectionStatus.ERROR) - break + consecutive_failures += 1 + # Exponential backoff: 2s, 4s, 8s, 16s, 30s, 30s, ... + backoff_delay = min( + 2.0 * (2 ** (consecutive_failures - 1)), max_backoff + ) + + print( + f"Connection failed for {self.rtsp_url}, retry in {backoff_delay:.0f}s (attempt {consecutive_failures})" + ) self._set_status(ConnectionStatus.RECONNECTING) - self._stop_flag.wait(timeout=2.0) + self._stop_flag.wait(timeout=backoff_delay) continue - retry_count = 0 # Reset on successful connection + # Successfully connected - reset failure tracking + consecutive_failures = 0 + backoff_delay = 2.0 try: # Decode loop - iterate through packets from PyAV @@ -473,7 +480,7 @@ class StreamDecoder: print(f"Error in decode loop for {self.rtsp_url}: {e}") self._set_status(ConnectionStatus.RECONNECTING) self._cleanup() - self._stop_flag.wait(timeout=2.0) + # Will retry connection at top of loop with exponential backoff def _cleanup(self): """Cleanup resources"""