refactor: 아키텍처 수정 — 동기HTTP→aiohttp + 연결 모니터링 + 재시도큐
#1 동기 HTTP → async aiohttp (Critical) - RemoteTransport: urllib.request → aiohttp.ClientSession - 모든 HTTP 요청이 non-blocking으로 전환 - 이벤트 루프 블로킹 문제 해결 #2 연결 상태 모니터링 - RemoteTransport: connected 플래그 + 연속 실패 카운터 - Collector: 30초마다 health check → 실패 시 경고 로그 - 연결 복구 시 '✅ Gateway connected' 메시지 #3 실패 재시도 큐 - RemoteTransport: _retry_queue (최대 100건) - POST 실패 시 큐에 저장, 연결 복구 후 자동 재전송 - Collector: 10초마다 retry flush
This commit is contained in:
41
collector.py
41
collector.py
@@ -69,14 +69,17 @@ class CollectorBridge:
|
||||
self._poll_commands_loop(),
|
||||
self._forward_chat_snapshots_loop(),
|
||||
self._forward_registrations_loop(),
|
||||
self._health_check_loop(),
|
||||
self._retry_flush_loop(),
|
||||
]
|
||||
if self.event_queue:
|
||||
tasks.append(self._forward_events_loop())
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the Collector."""
|
||||
"""Stop the Collector and close HTTP session."""
|
||||
self._running = False
|
||||
await self.remote.close()
|
||||
logger.info("[COLLECTOR] stopped")
|
||||
|
||||
# ─── Forward local pending → Gateway ───
|
||||
@@ -114,7 +117,7 @@ class CollectorBridge:
|
||||
continue # Skip pre-existing files from before startup
|
||||
|
||||
# Forward to Gateway (new or updated)
|
||||
self.remote.write_json("pending", fname, data)
|
||||
await self.remote.awrite_json("pending", fname, data)
|
||||
self._forwarded_pending.add(rid)
|
||||
self._pending_hashes[rid] = content_hash
|
||||
|
||||
@@ -143,7 +146,7 @@ class CollectorBridge:
|
||||
try:
|
||||
# Check each forwarded pending for a response
|
||||
for rid in list(self._forwarded_pending):
|
||||
data = self.remote.read_json("response", f"{rid}.json")
|
||||
data = await self.remote.aread_json("response", f"{rid}.json")
|
||||
if data is None or data.get("waiting"):
|
||||
continue
|
||||
|
||||
@@ -166,7 +169,7 @@ class CollectorBridge:
|
||||
"""Poll Gateway for commands and write them locally for Extension."""
|
||||
while self._running:
|
||||
try:
|
||||
commands = self.remote.poll_commands(self.project_name)
|
||||
commands = await self.remote.apoll_commands(self.project_name)
|
||||
for cmd in commands:
|
||||
cmd_id = cmd.get("id", str(int(time.time() * 1000)))
|
||||
fname = f"{cmd_id}.json"
|
||||
@@ -192,7 +195,7 @@ class CollectorBridge:
|
||||
project = data.get("project_name", self.project_name)
|
||||
content = data.get("content", "")
|
||||
if content:
|
||||
self.remote.send_chat(project, content)
|
||||
await self.remote.asend_chat(project, content)
|
||||
logger.info(f"[COLLECTOR] → Gateway: chat snapshot len={len(content)}")
|
||||
f.unlink() # Cleanup after forwarding
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
@@ -219,7 +222,7 @@ class CollectorBridge:
|
||||
conv_id = data.get("conversation_id", "")
|
||||
project = data.get("project_name", "")
|
||||
if conv_id and project:
|
||||
self.remote.register_session(conv_id, project)
|
||||
await self.remote.aregister_session(conv_id, project)
|
||||
forwarded_regs.add(f.name)
|
||||
logger.info(f"[COLLECTOR] → Gateway: register {conv_id[:8]} → {project}")
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
@@ -246,9 +249,33 @@ class CollectorBridge:
|
||||
"content": event.content,
|
||||
"timestamp": event.timestamp,
|
||||
}
|
||||
self.remote._request("POST", "/api/event", event_data)
|
||||
await self.remote.asend_event(event_data)
|
||||
logger.info(f"[COLLECTOR] → Gateway: event {event.event_type.value} {event.file_name}")
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"[COLLECTOR] forward_event error: {e}")
|
||||
|
||||
# ─── Health check ───
|
||||
|
||||
async def _health_check_loop(self):
|
||||
"""Periodically check Gateway connectivity."""
|
||||
while self._running:
|
||||
try:
|
||||
ok = await self.remote.health_check()
|
||||
if not ok and self.remote.connected:
|
||||
logger.warning("[COLLECTOR] ❌ Gateway health check failed")
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.sleep(30)
|
||||
|
||||
# ─── Retry flush ───
|
||||
|
||||
async def _retry_flush_loop(self):
|
||||
"""Periodically flush failed request retry queue."""
|
||||
while self._running:
|
||||
try:
|
||||
await self.remote.flush_retry_queue()
|
||||
except Exception as e:
|
||||
logger.error(f"[COLLECTOR] retry flush error: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
Reference in New Issue
Block a user