fix(collector/bridge/gateway): rate limit 구조적 수정 — 점진적 백오프 + adaptive 폴링 + burst-friendly 윈도우
This commit is contained in:
113
collector.py
113
collector.py
@@ -45,7 +45,9 @@ class CollectorBridge:
|
||||
# Pre-populate with existing pending files → skip on startup (prevents 만료됨 spam)
|
||||
self._startup_pending: set[str] = set()
|
||||
self._forwarded_pending: set[str] = set()
|
||||
self._forwarded_timestamps: dict[str, float] = {} # rid → when forwarded
|
||||
self._pending_hashes: dict[str, str] = {} # rid → content hash (for MERGE/status detection)
|
||||
self._RESPONSE_POLL_TTL = 300 # 5 min — stop polling responses for old pending
|
||||
for fname in self.local.list_json_files("pending"):
|
||||
rid = fname.replace(".json", "")
|
||||
self._startup_pending.add(rid)
|
||||
@@ -60,20 +62,29 @@ class CollectorBridge:
|
||||
logger.info(f"[COLLECTOR] skipping {len(self._startup_pending)} existing pending files")
|
||||
|
||||
async def start(self):
|
||||
"""Start the Collector polling loops."""
|
||||
"""Start the Collector polling loops with staggered offsets.
|
||||
|
||||
Each loop starts with a different delay to prevent all loops from waking
|
||||
up at the same time and causing burst requests to Gateway.
|
||||
"""
|
||||
self._running = True
|
||||
logger.info(f"[COLLECTOR] started for project={self.project_name}")
|
||||
|
||||
async def _staggered(coro, offset: float):
|
||||
await asyncio.sleep(offset)
|
||||
await coro()
|
||||
|
||||
tasks = [
|
||||
self._forward_pending_loop(),
|
||||
self._poll_responses_loop(),
|
||||
self._poll_commands_loop(),
|
||||
self._forward_chat_snapshots_loop(),
|
||||
self._forward_registrations_loop(),
|
||||
self._health_check_loop(),
|
||||
self._retry_flush_loop(),
|
||||
_staggered(self._forward_pending_loop, 0.0),
|
||||
_staggered(self._poll_responses_loop, 0.5),
|
||||
_staggered(self._poll_commands_loop, 1.0),
|
||||
_staggered(self._forward_chat_snapshots_loop, 1.5),
|
||||
_staggered(self._forward_registrations_loop, 2.0),
|
||||
_staggered(self._health_check_loop, 2.5),
|
||||
_staggered(self._retry_flush_loop, 3.0),
|
||||
]
|
||||
if self.event_queue:
|
||||
tasks.append(self._forward_events_loop())
|
||||
tasks.append(_staggered(self._forward_events_loop, 3.5))
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
async def stop(self):
|
||||
@@ -128,6 +139,7 @@ class CollectorBridge:
|
||||
# Forward to Gateway (new or updated)
|
||||
await self.remote.awrite_json("pending", fname, data)
|
||||
self._forwarded_pending.add(rid)
|
||||
self._forwarded_timestamps[rid] = time.time()
|
||||
self._pending_hashes[rid] = content_hash
|
||||
|
||||
if is_new:
|
||||
@@ -150,7 +162,11 @@ class CollectorBridge:
|
||||
# ─── Poll Gateway responses → local ───
|
||||
|
||||
async def _poll_responses_loop(self):
|
||||
"""Poll Gateway for responses and write them locally for Extension."""
|
||||
"""Poll Gateway for responses and write them locally for Extension.
|
||||
|
||||
Only polls responses for recently-forwarded pending (within _RESPONSE_POLL_TTL).
|
||||
Expired entries are removed from tracking to prevent request accumulation.
|
||||
"""
|
||||
while self._running:
|
||||
try:
|
||||
# Skip cycle if rate-limited
|
||||
@@ -158,16 +174,31 @@ class CollectorBridge:
|
||||
await asyncio.sleep(self._poll_interval)
|
||||
continue
|
||||
|
||||
# Check each forwarded pending for a response
|
||||
for rid in list(self._forwarded_pending):
|
||||
if rid in self._startup_pending:
|
||||
continue # Don't poll responses for pre-startup files
|
||||
now = time.time()
|
||||
# Clean up expired forwarded pending (stop polling responses for old ones)
|
||||
expired = [
|
||||
rid for rid, ts in self._forwarded_timestamps.items()
|
||||
if now - ts > self._RESPONSE_POLL_TTL
|
||||
]
|
||||
for rid in expired:
|
||||
self._forwarded_pending.discard(rid)
|
||||
self._forwarded_timestamps.pop(rid, None)
|
||||
self._pending_hashes.pop(rid, None)
|
||||
if expired:
|
||||
logger.info(f"[COLLECTOR] expired {len(expired)} stale forwarded pending (>{self._RESPONSE_POLL_TTL}s)")
|
||||
|
||||
# Check each active forwarded pending for a response
|
||||
active_rids = [
|
||||
rid for rid in self._forwarded_pending
|
||||
if rid not in self._startup_pending
|
||||
]
|
||||
for rid in active_rids:
|
||||
# Rate-limit guard: stop polling if we got rate-limited mid-cycle
|
||||
if self.remote.is_rate_limited:
|
||||
break
|
||||
data = await self.remote.aread_json("response", f"{rid}.json")
|
||||
if data is None or data.get("waiting"):
|
||||
await asyncio.sleep(0.2) # Throttle between individual response polls
|
||||
await asyncio.sleep(0.3) # Throttle between individual response polls
|
||||
continue
|
||||
|
||||
# Write response locally for Extension to pick up
|
||||
@@ -175,6 +206,7 @@ class CollectorBridge:
|
||||
# Also delete local pending file (Extension expects this)
|
||||
self.local.delete_file("pending", f"{rid}.json")
|
||||
self._forwarded_pending.discard(rid)
|
||||
self._forwarded_timestamps.pop(rid, None)
|
||||
approved = data.get("approved", "?")
|
||||
logger.info(f"[COLLECTOR] ← Gateway: response {rid[:12]} approved={approved}")
|
||||
|
||||
@@ -207,27 +239,56 @@ class CollectorBridge:
|
||||
return projects
|
||||
|
||||
async def _poll_commands_loop(self):
|
||||
"""Poll Gateway for commands for ALL local projects.
|
||||
"""Poll Gateway for commands with adaptive per-project intervals.
|
||||
|
||||
Discovers projects from bridge/register/ (written by each AG Extension)
|
||||
and polls commands for each. Extension-side filtering (project_name check)
|
||||
ensures each AG window only processes its own commands.
|
||||
When a project returns empty commands repeatedly, its poll interval
|
||||
increases (3s → 10s → 30s → 60s). On receiving a command, interval
|
||||
resets to base. This prevents idle projects from wasting requests.
|
||||
"""
|
||||
# Per-project adaptive state
|
||||
project_intervals: dict[str, float] = {} # project → current interval
|
||||
project_last_poll: dict[str, float] = {} # project → last poll timestamp
|
||||
_BASE_INTERVAL = 3.0
|
||||
_IDLE_STEPS = [10.0, 30.0, 60.0] # progressive idle intervals
|
||||
project_empty_streak: dict[str, int] = {} # project → consecutive empty polls
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
# Skip cycle if rate-limited
|
||||
if not self.remote.is_rate_limited:
|
||||
projects = self._discover_local_projects()
|
||||
now = time.time()
|
||||
for project in projects:
|
||||
if self.remote.is_rate_limited:
|
||||
break # Stop mid-cycle if rate-limited
|
||||
break
|
||||
|
||||
# Check if this project's interval has elapsed
|
||||
interval = project_intervals.get(project, _BASE_INTERVAL)
|
||||
last = project_last_poll.get(project, 0)
|
||||
if now - last < interval:
|
||||
continue # Not time yet for this project
|
||||
|
||||
project_last_poll[project] = now
|
||||
commands = await self.remote.apoll_commands(project)
|
||||
for cmd in commands:
|
||||
cmd_id = cmd.get("id", str(int(time.time() * 1000)))
|
||||
fname = f"{cmd_id}.json"
|
||||
self.local.write_json("commands", fname, cmd)
|
||||
logger.info(f"[COLLECTOR] ← Gateway: command [{project}] {cmd.get('text', '?')[:30]}")
|
||||
await asyncio.sleep(0.3) # Throttle between projects to avoid rate limit bursts
|
||||
|
||||
if commands:
|
||||
# Got commands → reset to base interval
|
||||
project_intervals[project] = _BASE_INTERVAL
|
||||
project_empty_streak[project] = 0
|
||||
for cmd in commands:
|
||||
cmd_id = cmd.get("id", str(int(time.time() * 1000)))
|
||||
fname = f"{cmd_id}.json"
|
||||
self.local.write_json("commands", fname, cmd)
|
||||
logger.info(f"[COLLECTOR] ← Gateway: command [{project}] {cmd.get('text', '?')[:30]}")
|
||||
else:
|
||||
# Empty → increase interval progressively
|
||||
streak = project_empty_streak.get(project, 0) + 1
|
||||
project_empty_streak[project] = streak
|
||||
if streak <= len(_IDLE_STEPS):
|
||||
project_intervals[project] = _IDLE_STEPS[streak - 1]
|
||||
# else stays at max (60s)
|
||||
|
||||
await asyncio.sleep(0.3) # Throttle between projects
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[COLLECTOR] poll_commands error: {e}")
|
||||
|
||||
Reference in New Issue
Block a user