fix(collector/bridge/gateway): rate limit 구조적 수정 — 점진적 백오프 + adaptive 폴링 + burst-friendly 윈도우
This commit is contained in:
27
bridge.py
27
bridge.py
@@ -176,8 +176,9 @@ class RemoteTransport(BridgeTransport):
|
||||
# Rate limit backoff
|
||||
self._rate_limited_until = 0.0 # timestamp until which we should not send requests
|
||||
self._backoff_seconds = 0.0 # current backoff duration (exponential)
|
||||
self._BACKOFF_BASE = 1.0
|
||||
self._BACKOFF_BASE = 2.0
|
||||
self._BACKOFF_MAX = 60.0
|
||||
self._success_streak = 0 # consecutive successes for gradual backoff reduction
|
||||
|
||||
# Retry queue: list of (method, path, data) tuples
|
||||
self._retry_queue: list[tuple[str, str, dict | None]] = []
|
||||
@@ -207,6 +208,7 @@ class RemoteTransport(BridgeTransport):
|
||||
|
||||
def _apply_backoff(self, retry_after: float = 0):
|
||||
"""Apply exponential backoff for rate limiting."""
|
||||
self._success_streak = 0 # Reset success streak on any failure
|
||||
if retry_after > 0:
|
||||
self._backoff_seconds = min(retry_after, self._BACKOFF_MAX)
|
||||
else:
|
||||
@@ -217,11 +219,22 @@ class RemoteTransport(BridgeTransport):
|
||||
self._rate_limited_until = time.time() + self._backoff_seconds
|
||||
logger.warning(f"RemoteTransport: backing off {self._backoff_seconds:.0f}s (until +{self._backoff_seconds:.0f}s)")
|
||||
|
||||
def _reset_backoff(self):
|
||||
"""Reset backoff after a successful request."""
|
||||
if self._backoff_seconds > 0:
|
||||
self._backoff_seconds = 0
|
||||
self._rate_limited_until = 0
|
||||
def _on_request_success(self):
|
||||
"""Gradually reduce backoff after consecutive successes.
|
||||
|
||||
Instead of instantly resetting to 0 (which causes the 1s oscillation loop
|
||||
when 7 loops share one transport), require sustained success before reducing.
|
||||
"""
|
||||
if self._backoff_seconds <= 0:
|
||||
return # Already at zero, nothing to do
|
||||
self._success_streak += 1
|
||||
if self._success_streak >= 5:
|
||||
# Halve the backoff (gradual cooldown)
|
||||
self._backoff_seconds = self._backoff_seconds / 2
|
||||
if self._backoff_seconds < 0.5:
|
||||
self._backoff_seconds = 0
|
||||
self._rate_limited_until = 0
|
||||
self._success_streak = 0
|
||||
|
||||
async def _arequest(self, method: str, path: str, data: dict | None = None) -> dict | None:
|
||||
"""Async non-blocking HTTP request to Gateway API."""
|
||||
@@ -250,7 +263,7 @@ class RemoteTransport(BridgeTransport):
|
||||
logger.info("RemoteTransport: ✅ Gateway connected")
|
||||
self.connected = True
|
||||
self._consecutive_failures = 0
|
||||
self._reset_backoff()
|
||||
self._on_request_success()
|
||||
return result
|
||||
except Exception as e:
|
||||
self._consecutive_failures += 1
|
||||
|
||||
Reference in New Issue
Block a user