refactor: 전면 재설계 - 시작 시 채널 스팸 제거, content hash 중복 방지, 단일 이벤트 경로
This commit is contained in:
50
watcher.py
50
watcher.py
@@ -2,9 +2,12 @@
|
||||
|
||||
Uses watchdog to detect file creation/modification events in the brain directory.
|
||||
Emits events to an asyncio queue for the Discord bot to consume.
|
||||
|
||||
Key design: ONLY emits events for meaningful content changes using hash dedup.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import time
|
||||
import logging
|
||||
from pathlib import Path
|
||||
@@ -21,8 +24,7 @@ logger = logging.getLogger(__name__)
|
||||
class EventType(Enum):
|
||||
"""Types of brain events."""
|
||||
SESSION_START = "session_start" # New conversation directory created
|
||||
SESSION_END = "session_end" # Conversation directory removed (or program exit)
|
||||
FILE_CHANGED = "file_changed" # Watched file created/modified
|
||||
FILE_CHANGED = "file_changed" # Watched file modified
|
||||
FILE_CREATED = "file_created" # Watched file first created
|
||||
|
||||
|
||||
@@ -38,18 +40,19 @@ class BrainEvent:
|
||||
|
||||
|
||||
class BrainEventHandler(FileSystemEventHandler):
|
||||
"""Watchdog handler that filters and debounces brain events."""
|
||||
"""Watchdog handler that filters, debounces, and deduplicates brain events."""
|
||||
|
||||
def __init__(self, event_queue: asyncio.Queue, loop: asyncio.AbstractEventLoop):
|
||||
super().__init__()
|
||||
self.event_queue = event_queue
|
||||
self.loop = loop
|
||||
self._last_events: dict[str, float] = {} # path -> timestamp (debounce)
|
||||
self._last_events: dict[str, float] = {} # path -> timestamp (debounce)
|
||||
self._content_hashes: dict[str, str] = {} # path -> md5 hash (dedup)
|
||||
self._known_sessions: set[str] = set()
|
||||
self._initialize_known_sessions()
|
||||
|
||||
def _initialize_known_sessions(self):
|
||||
"""Scan existing brain directories to establish baseline."""
|
||||
"""Scan existing brain directories to establish baseline (no events emitted)."""
|
||||
brain_path = Config.BRAIN_PATH
|
||||
if brain_path.exists():
|
||||
for entry in brain_path.iterdir():
|
||||
@@ -58,12 +61,10 @@ class BrainEventHandler(FileSystemEventHandler):
|
||||
logger.info(f"Found {len(self._known_sessions)} existing sessions at startup")
|
||||
|
||||
def _is_conversation_id(self, name: str) -> bool:
|
||||
"""Check if directory name looks like a UUID conversation ID."""
|
||||
parts = name.split("-")
|
||||
return len(parts) == 5 and all(len(p) >= 4 for p in parts)
|
||||
|
||||
def _get_conversation_id(self, path: Path) -> str | None:
|
||||
"""Extract conversation ID from file path."""
|
||||
brain_path = Config.BRAIN_PATH
|
||||
try:
|
||||
relative = path.relative_to(brain_path)
|
||||
@@ -75,7 +76,6 @@ class BrainEventHandler(FileSystemEventHandler):
|
||||
return None
|
||||
|
||||
def _should_debounce(self, path_str: str) -> bool:
|
||||
"""Check if this event should be debounced."""
|
||||
now = time.time()
|
||||
last = self._last_events.get(path_str, 0)
|
||||
if now - last < Config.DEBOUNCE_SECONDS:
|
||||
@@ -83,8 +83,20 @@ class BrainEventHandler(FileSystemEventHandler):
|
||||
self._last_events[path_str] = now
|
||||
return False
|
||||
|
||||
def _content_changed(self, path_str: str, content: str) -> bool:
|
||||
"""Check if content actually changed using MD5 hash."""
|
||||
new_hash = hashlib.md5(content.encode()).hexdigest()
|
||||
old_hash = self._content_hashes.get(path_str)
|
||||
if old_hash == new_hash:
|
||||
return False
|
||||
self._content_hashes[path_str] = new_hash
|
||||
return True
|
||||
|
||||
def _is_watched_file(self, file_name: str) -> bool:
|
||||
"""Strict filter: only watch primary artifact files."""
|
||||
return file_name in Config.WATCHED_FILES
|
||||
|
||||
def _emit(self, event: BrainEvent):
|
||||
"""Thread-safe emit to asyncio queue."""
|
||||
self.loop.call_soon_threadsafe(self.event_queue.put_nowait, event)
|
||||
|
||||
def on_created(self, event: FileSystemEvent):
|
||||
@@ -98,10 +110,8 @@ class BrainEventHandler(FileSystemEventHandler):
|
||||
self._handle_file_event(Path(event.src_path), EventType.FILE_CHANGED)
|
||||
|
||||
def _handle_directory_created(self, path: Path):
|
||||
"""Detect new session directories."""
|
||||
conv_id = self._get_conversation_id(path)
|
||||
if conv_id and conv_id not in self._known_sessions:
|
||||
# Check if this is a direct child of brain/
|
||||
if path.parent == Config.BRAIN_PATH:
|
||||
self._known_sessions.add(conv_id)
|
||||
logger.info(f"New session detected: {conv_id}")
|
||||
@@ -111,17 +121,17 @@ class BrainEventHandler(FileSystemEventHandler):
|
||||
))
|
||||
|
||||
def _handle_file_event(self, path: Path, event_type: EventType):
|
||||
"""Process file creation/modification events."""
|
||||
conv_id = self._get_conversation_id(path)
|
||||
if not conv_id:
|
||||
return
|
||||
|
||||
file_name = path.name
|
||||
if file_name not in Config.WATCHED_FILES:
|
||||
# Check suffix patterns
|
||||
if not any(file_name.endswith(s) for s in Config.WATCHED_SUFFIXES):
|
||||
return
|
||||
|
||||
# STRICT filter: only primary artifacts
|
||||
if not self._is_watched_file(file_name):
|
||||
return
|
||||
|
||||
# Debounce: skip rapid-fire events for same file
|
||||
if self._should_debounce(str(path)):
|
||||
return
|
||||
|
||||
@@ -132,7 +142,11 @@ class BrainEventHandler(FileSystemEventHandler):
|
||||
logger.warning(f"Failed to read {path}: {e}")
|
||||
return
|
||||
|
||||
logger.info(f"File event: {event_type.value} {conv_id}/{file_name}")
|
||||
# Content hash dedup: skip if content hasn't actually changed
|
||||
if not self._content_changed(str(path), content):
|
||||
return
|
||||
|
||||
logger.info(f"File event: {event_type.value} {conv_id[:8]}/{file_name}")
|
||||
self._emit(BrainEvent(
|
||||
event_type=event_type,
|
||||
conversation_id=conv_id,
|
||||
@@ -152,7 +166,6 @@ class BrainWatcher:
|
||||
self.handler = BrainEventHandler(event_queue, loop)
|
||||
|
||||
def start(self):
|
||||
"""Start watching the brain directory."""
|
||||
brain_path = Config.BRAIN_PATH
|
||||
if not brain_path.exists():
|
||||
logger.error(f"Brain path does not exist: {brain_path}")
|
||||
@@ -163,7 +176,6 @@ class BrainWatcher:
|
||||
logger.info(f"Watching brain directory: {brain_path}")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the watcher."""
|
||||
self.observer.stop()
|
||||
self.observer.join()
|
||||
logger.info("Brain watcher stopped")
|
||||
|
||||
Reference in New Issue
Block a user