Files
gravity_control/parser.py

219 lines
6.4 KiB
Python

"""Markdown → Discord text parser.
Handles:
- task.md checkbox progress extraction
- MD → Discord-friendly text conversion
- Long text splitting for Discord's 2000 char limit
"""
import re
from dataclasses import dataclass
@dataclass
class TaskProgress:
"""Parsed progress from task.md."""
total: int = 0
done: int = 0
in_progress: int = 0
pending: int = 0
current_task: str = ""
sections: list = None
def __post_init__(self):
if self.sections is None:
self.sections = []
@property
def summary_line(self) -> str:
bar_len = 10
filled = round(self.done / max(self.total, 1) * bar_len)
bar = "" * filled + "" * (bar_len - filled)
return f"[{bar}] {self.done}/{self.total} 완료"
def parse_task_progress(content: str) -> TaskProgress:
"""Parse task.md and extract checkbox progress."""
progress = TaskProgress()
current_section = ""
for line in content.splitlines():
# Section headers
header_match = re.match(r'^#{1,3}\s+(.+)', line)
if header_match:
current_section = header_match.group(1).strip()
continue
# Checkboxes
checkbox_match = re.match(r'^\s*-\s*\[([ x/])\]\s*(.+)', line)
if checkbox_match:
state, text = checkbox_match.groups()
progress.total += 1
if state == 'x':
progress.done += 1
elif state == '/':
progress.in_progress += 1
progress.current_task = text.strip()
else:
progress.pending += 1
progress.sections.append({
"section": current_section,
"state": state,
"text": text.strip()
})
return progress
def md_to_discord_text(content: str, max_length: int = 1900) -> list[str]:
"""Convert markdown to Discord-friendly text, splitting into chunks.
Preserves:
- Headers → **bold**
- Code blocks → unchanged (Discord supports ```)
- Checkboxes → emoji representation
- Tables → simplified text
Strips:
- Mermaid diagrams
- HTML comments
- Alert syntax (> [!NOTE] etc.)
Returns list of text chunks, each under max_length.
"""
lines = content.splitlines()
output_lines = []
in_mermaid = False
in_code_block = False
for line in lines:
# Skip mermaid blocks
if re.match(r'^```mermaid', line):
in_mermaid = True
output_lines.append("*(mermaid 다이어그램 생략)*")
continue
if in_mermaid:
if line.strip() == '```':
in_mermaid = False
continue
# Track code blocks
if re.match(r'^```', line) and not in_mermaid:
in_code_block = not in_code_block
output_lines.append(line)
continue
if in_code_block:
output_lines.append(line)
continue
# Skip HTML comments
if re.match(r'^\s*<!--.*-->\s*$', line):
continue
# Skip alert syntax but keep content
alert_match = re.match(r'>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]', line)
if alert_match:
alert_type = alert_match.group(1)
emoji_map = {
"NOTE": "📝", "TIP": "💡", "IMPORTANT": "",
"WARNING": "⚠️", "CAUTION": "🔴"
}
output_lines.append(f"{emoji_map.get(alert_type, '📌')} **{alert_type}**")
continue
# Convert headers to bold
header_match = re.match(r'^(#{1,3})\s+(.+)', line)
if header_match:
level = len(header_match.group(1))
text = header_match.group(2)
if level == 1:
output_lines.append(f"\n**━━ {text} ━━**\n")
elif level == 2:
output_lines.append(f"\n**▸ {text}**")
else:
output_lines.append(f"**{text}**")
continue
# Convert checkboxes to emoji
checkbox_match = re.match(r'^(\s*)-\s*\[([ x/])\]\s*(.+)', line)
if checkbox_match:
indent, state, text = checkbox_match.groups()
emoji = {"x": "", "/": "🔄", " ": ""}.get(state, "")
output_lines.append(f"{indent}{emoji} {text}")
continue
# Convert blockquote markers
if line.startswith("> "):
output_lines.append(f"{line[2:]}")
continue
# Pass through everything else
output_lines.append(line)
# Join and split into chunks
full_text = "\n".join(output_lines).strip()
return split_text(full_text, max_length)
def split_text(text: str, max_length: int = 1900) -> list[str]:
"""Split text into chunks respecting Discord's message limit.
Tries to split on newlines first, then on spaces.
"""
if len(text) <= max_length:
return [text]
chunks = []
current = ""
for line in text.split("\n"):
if len(current) + len(line) + 1 > max_length:
if current:
chunks.append(current)
current = ""
# If single line is too long, split on spaces
if len(line) > max_length:
words = line.split(" ")
for word in words:
if len(current) + len(word) + 1 > max_length:
if current:
chunks.append(current)
current = word
else:
current = f"{current} {word}" if current else word
else:
current = line
else:
current = f"{current}\n{line}" if current else line
if current:
chunks.append(current)
return chunks
def format_task_embed_text(progress: TaskProgress) -> str:
"""Format task progress as a compact Discord text message."""
lines = [
f"📋 **진행 상황** {progress.summary_line}",
]
if progress.current_task:
lines.append(f"🔄 현재: {progress.current_task}")
# Group by section
current_section = ""
for item in progress.sections:
if item["section"] != current_section:
current_section = item["section"]
lines.append(f"\n**{current_section}**")
emoji = {"x": "", "/": "🔄", " ": ""}.get(item["state"], "")
lines.append(f" {emoji} {item['text']}")
return "\n".join(lines)