fix(collector/bridge/gateway): rate limit 구조적 수정 — 점진적 백오프 + adaptive 폴링 + burst-friendly 윈도우

This commit is contained in:
Variet Worker
2026-03-12 22:33:49 +09:00
parent 5cdf7777a5
commit 56de71470d
3 changed files with 110 additions and 36 deletions

View File

@@ -176,8 +176,9 @@ class RemoteTransport(BridgeTransport):
# Rate limit backoff
self._rate_limited_until = 0.0 # timestamp until which we should not send requests
self._backoff_seconds = 0.0 # current backoff duration (exponential)
self._BACKOFF_BASE = 1.0
self._BACKOFF_BASE = 2.0
self._BACKOFF_MAX = 60.0
self._success_streak = 0 # consecutive successes for gradual backoff reduction
# Retry queue: list of (method, path, data) tuples
self._retry_queue: list[tuple[str, str, dict | None]] = []
@@ -207,6 +208,7 @@ class RemoteTransport(BridgeTransport):
def _apply_backoff(self, retry_after: float = 0):
"""Apply exponential backoff for rate limiting."""
self._success_streak = 0 # Reset success streak on any failure
if retry_after > 0:
self._backoff_seconds = min(retry_after, self._BACKOFF_MAX)
else:
@@ -217,11 +219,22 @@ class RemoteTransport(BridgeTransport):
self._rate_limited_until = time.time() + self._backoff_seconds
logger.warning(f"RemoteTransport: backing off {self._backoff_seconds:.0f}s (until +{self._backoff_seconds:.0f}s)")
def _reset_backoff(self):
"""Reset backoff after a successful request."""
if self._backoff_seconds > 0:
self._backoff_seconds = 0
self._rate_limited_until = 0
def _on_request_success(self):
"""Gradually reduce backoff after consecutive successes.
Instead of instantly resetting to 0 (which causes the 1s oscillation loop
when 7 loops share one transport), require sustained success before reducing.
"""
if self._backoff_seconds <= 0:
return # Already at zero, nothing to do
self._success_streak += 1
if self._success_streak >= 5:
# Halve the backoff (gradual cooldown)
self._backoff_seconds = self._backoff_seconds / 2
if self._backoff_seconds < 0.5:
self._backoff_seconds = 0
self._rate_limited_until = 0
self._success_streak = 0
async def _arequest(self, method: str, path: str, data: dict | None = None) -> dict | None:
"""Async non-blocking HTTP request to Gateway API."""
@@ -250,7 +263,7 @@ class RemoteTransport(BridgeTransport):
logger.info("RemoteTransport: ✅ Gateway connected")
self.connected = True
self._consecutive_failures = 0
self._reset_backoff()
self._on_request_success()
return result
except Exception as e:
self._consecutive_failures += 1