fix(bridge): 429 Rate Limit 무한 루프 방지 — 지수 백오프 + Collector 폴링 보호 + rate limit 완화

This commit is contained in:
Variet Worker
2026-03-12 00:49:37 +09:00
parent feb8c05a73
commit 52c9526fdb
6 changed files with 75 additions and 9 deletions

View File

@@ -173,6 +173,12 @@ class RemoteTransport(BridgeTransport):
self._consecutive_failures = 0
self._max_failures_before_warning = 3
# Rate limit backoff
self._rate_limited_until = 0.0 # timestamp until which we should not send requests
self._backoff_seconds = 0.0 # current backoff duration (exponential)
self._BACKOFF_BASE = 1.0
self._BACKOFF_MAX = 60.0
# Retry queue: list of (method, path, data) tuples
self._retry_queue: list[tuple[str, str, dict | None]] = []
self._retry_queue_max = 100
@@ -194,8 +200,35 @@ class RemoteTransport(BridgeTransport):
if self._session and not self._session.closed:
await self._session.close()
@property
def is_rate_limited(self) -> bool:
"""Check if we are currently in a rate-limit backoff period."""
return time.time() < self._rate_limited_until
def _apply_backoff(self, retry_after: float = 0):
"""Apply exponential backoff for rate limiting."""
if retry_after > 0:
self._backoff_seconds = min(retry_after, self._BACKOFF_MAX)
else:
if self._backoff_seconds == 0:
self._backoff_seconds = self._BACKOFF_BASE
else:
self._backoff_seconds = min(self._backoff_seconds * 2, self._BACKOFF_MAX)
self._rate_limited_until = time.time() + self._backoff_seconds
logger.warning(f"RemoteTransport: backing off {self._backoff_seconds:.0f}s (until +{self._backoff_seconds:.0f}s)")
def _reset_backoff(self):
"""Reset backoff after a successful request."""
if self._backoff_seconds > 0:
self._backoff_seconds = 0
self._rate_limited_until = 0
async def _arequest(self, method: str, path: str, data: dict | None = None) -> dict | None:
"""Async non-blocking HTTP request to Gateway API."""
# Skip if in backoff period (except health checks)
if self.is_rate_limited and path != "/health":
return None
session = await self._get_session()
url = f"{self.base_url}{path}"
try:
@@ -207,7 +240,8 @@ class RemoteTransport(BridgeTransport):
if resp.status == 401:
logger.error("RemoteTransport: 401 Unauthorized — check GATEWAY_API_KEY")
elif resp.status == 429:
logger.warning("RemoteTransport: 429 Rate limited")
retry_after = float(resp.headers.get("Retry-After", 0))
self._apply_backoff(retry_after)
else:
logger.warning(f"RemoteTransport: {method} {path}{resp.status}")
return None
@@ -216,6 +250,7 @@ class RemoteTransport(BridgeTransport):
logger.info("RemoteTransport: ✅ Gateway connected")
self.connected = True
self._consecutive_failures = 0
self._reset_backoff()
return result
except Exception as e:
self._consecutive_failures += 1
@@ -224,6 +259,9 @@ class RemoteTransport(BridgeTransport):
elif self._consecutive_failures < self._max_failures_before_warning:
logger.warning(f"RemoteTransport: {method} {path}{e}")
self.connected = False
# Apply backoff on connection failures too
if self._consecutive_failures >= self._max_failures_before_warning:
self._apply_backoff()
return None
async def _arequest_retry(self, method: str, path: str, data: dict | None = None) -> dict | None: