fix: 전체 시스템 감사 — 6건 수정 (보안 + 안정성)

Bug 1 (만료됨 스팸): Collector 시작 시 기존 pending skip
Bug 2 (pending 미삭제): Gateway에서 response 소비 시 pending도 삭제
Bug 3 (재시작 중복): Bug 1로 해결

Security 1: API 요청 1MB 크기 제한 (client_max_size)
Security 2: IP별 rate limiting (10 req/s)
Security 3: _commands 메모리 누수 방지 (TTL 30분)
This commit is contained in:
Variet Worker
2026-03-11 22:42:05 +09:00
parent 7eca0763c9
commit 58a421f5a6
2 changed files with 57 additions and 11 deletions

View File

@@ -38,10 +38,17 @@ class CollectorBridge:
self.remote = remote self.remote = remote
self.project_name = project_name self.project_name = project_name
self.event_queue = event_queue self.event_queue = event_queue
self._forwarded_pending: set[str] = set() # already forwarded request IDs
self._poll_interval = 3 # seconds self._poll_interval = 3 # seconds
self._running = False self._running = False
# Pre-populate with existing pending files → skip on startup (prevents 만료됨 spam)
self._forwarded_pending: set[str] = set()
for fname in self.local.list_json_files("pending"):
rid = fname.replace(".json", "")
self._forwarded_pending.add(rid)
if self._forwarded_pending:
logger.info(f"[COLLECTOR] skipping {len(self._forwarded_pending)} existing pending files")
async def start(self): async def start(self):
"""Start the Collector polling loops.""" """Start the Collector polling loops."""
self._running = True self._running = True

View File

@@ -20,11 +20,17 @@ import asyncio
import json import json
import time import time
import logging import logging
from collections import defaultdict
from pathlib import Path from pathlib import Path
from aiohttp import web from aiohttp import web
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Rate limiting
RATE_LIMIT_WINDOW = 1.0 # seconds
RATE_LIMIT_MAX = 10 # max requests per window per IP
COMMAND_TTL = 1800 # 30 min — stale commands auto-deleted
class GatewayAPI: class GatewayAPI:
"""HTTP API server for Collector ↔ Gateway communication.""" """HTTP API server for Collector ↔ Gateway communication."""
@@ -34,11 +40,15 @@ class GatewayAPI:
self.host = host self.host = host
self.port = port self.port = port
self.api_key = api_key self.api_key = api_key
self.app = web.Application(middlewares=[self._auth_middleware]) self.app = web.Application(
middlewares=[self._auth_middleware],
client_max_size=1024 * 1024, # Security: 1MB max request body
)
self._setup_routes() self._setup_routes()
# In-memory stores (Gateway is stateless across restarts) # In-memory stores
self._commands: dict[str, list[dict]] = {} # project → [command dicts] self._commands: dict[str, list[dict]] = {} # project → [command dicts]
self._rate_limits: dict[str, list[float]] = defaultdict(list) # IP → [timestamps]
def _setup_routes(self): def _setup_routes(self):
self.app.router.add_get("/health", self._health) self.app.router.add_get("/health", self._health)
@@ -59,15 +69,29 @@ class GatewayAPI:
if request.path == "/health": if request.path == "/health":
return await handler(request) return await handler(request)
# All /api/* routes require auth # All /api/* routes require auth + rate limit
if request.path.startswith("/api/") and self.api_key: if request.path.startswith("/api/"):
auth = request.headers.get("Authorization", "") # Auth check
if auth != f"Bearer {self.api_key}": if self.api_key:
logger.warning(f"[GATEWAY] 401 Unauthorized: {request.method} {request.path} from {request.remote}") auth = request.headers.get("Authorization", "")
if auth != f"Bearer {self.api_key}":
logger.warning(f"[GATEWAY] 401 Unauthorized: {request.method} {request.path} from {request.remote}")
return web.json_response(
{"error": "Unauthorized", "detail": "Invalid or missing API key"},
status=401,
)
# Rate limit check
ip = request.remote or "unknown"
now = time.time()
window = [t for t in self._rate_limits[ip] if now - t < RATE_LIMIT_WINDOW]
if len(window) >= RATE_LIMIT_MAX:
logger.warning(f"[GATEWAY] 429 Rate limited: {ip}")
return web.json_response( return web.json_response(
{"error": "Unauthorized", "detail": "Invalid or missing API key"}, {"error": "Too Many Requests"},
status=401, status=429,
) )
window.append(now)
self._rate_limits[ip] = window
return await handler(request) return await handler(request)
@@ -119,8 +143,9 @@ class GatewayAPI:
if data is None: if data is None:
return web.json_response({"waiting": True, "request_id": rid}) return web.json_response({"waiting": True, "request_id": rid})
# Serve response and delete file (one-time consumption) # Serve response and delete both response + pending files (one-time consumption)
self.bot.bridge.transport.delete_file("response", f"{rid}.json") self.bot.bridge.transport.delete_file("response", f"{rid}.json")
self.bot.bridge.transport.delete_file("pending", f"{rid}.json") # Bug 2 fix
return web.json_response(data) return web.json_response(data)
# ─── Chat Snapshots (Collector → Gateway → Discord) ─── # ─── Chat Snapshots (Collector → Gateway → Discord) ───
@@ -184,7 +209,21 @@ class GatewayAPI:
"""Bot pushes a command for a Collector to pick up.""" """Bot pushes a command for a Collector to pick up."""
if project not in self._commands: if project not in self._commands:
self._commands[project] = [] self._commands[project] = []
command.setdefault("_ts", time.time()) # TTL tracking
self._commands[project].append(command) self._commands[project].append(command)
# Auto-cleanup stale commands (Security 3: memory leak prevention)
self._cleanup_stale_commands()
def _cleanup_stale_commands(self):
"""Remove commands older than COMMAND_TTL."""
now = time.time()
for project in list(self._commands.keys()):
self._commands[project] = [
cmd for cmd in self._commands[project]
if now - cmd.get("_ts", now) < COMMAND_TTL
]
if not self._commands[project]:
del self._commands[project]
# ─── Brain Events (Collector → Gateway → Discord) ─── # ─── Brain Events (Collector → Gateway → Discord) ───