variet-agent/core/file_applier.py

"""File Applier — Coder 출력을 실제 파일에 적용.

Gemini가 출력할 수 있는 다양한 형식을 모두 지원:
1. === FILE: path === ... === END FILE ===
2. ```lang:path/to/file.py ... ```
3. ```lang\n// file: path/to/file.py ... ```
4. **path/to/file.py** + 코드블록
5. `path/to/file.py`: + 코드블록
6. 순수 마크다운 코드블록 (파일명 헤더 포함)
"""

import re
import logging
from pathlib import Path
from dataclasses import dataclass

logger = logging.getLogger("variet.applier")

# 소스 파일 확장자 (파일명 판별용)
SOURCE_EXTENSIONS = {
    ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cs", ".cpp", ".c", ".h",
    ".go", ".rs", ".rb", ".php", ".html", ".css", ".scss", ".yaml", ".yml",
    ".json", ".toml", ".md", ".sql", ".sh", ".ps1", ".bat", ".xml", ".vue",
    ".svelte", ".astro",
}


@dataclass
class FileChange:
    """파일 변경 단위."""
    path: str           # 상대 경로
    content: str        # 전체 파일 내용
    is_new: bool        # 신규 파일 여부
    source: str = ""    # 어떤 패턴으로 감지했는지


def _looks_like_filepath(text: str) -> bool:
    """텍스트가 파일 경로처럼 보이는지."""
    text = text.strip().strip("`").strip("*").strip('"').strip("'")
    if not text or len(text) > 200:
        return False
    # 확장자가 있는지
    if Path(text).suffix.lower() in SOURCE_EXTENSIONS:
        return True
    # 경로 구분자가 있는지
    if "/" in text or "\\" in text:
        return Path(text).suffix != ""
    return False


def _clean_path(raw: str) -> str:
    """경로에서 불필요한 장식 제거."""
    path = raw.strip()
    # 마크다운 장식 제거
    path = path.strip("`").strip("*").strip('"').strip("'")
    # 앞뒤 공백/콜론 제거
    path = path.strip().rstrip(":").strip()
    # 윈도우 → 유닉스
    path = path.replace("\\", "/")
    # 선행 ./ 제거
    if path.startswith("./"):
        path = path[2:]
    return path


def parse_code_output(raw: str) -> list[FileChange]:
    """Coder 출력에서 파일 블록을 추출.

    여러 패턴을 순서대로 시도하여 가장 많이 매칭되는 것을 사용.
    """
    results = []

    # 패턴 1: === FILE: path === ... === END FILE ===
    p1 = _parse_file_markers(raw)
    if p1:
        results.extend(p1)

    # 패턴 2: ```lang:path/to/file.py ... ```
    p2 = _parse_lang_colon_path(raw)
    if p2:
        results.extend(p2)

    # 패턴 3: // file: path 또는 # file: path (코드블록 내부 주석)
    p3 = _parse_comment_filepath(raw)
    if p3:
        results.extend(p3)

    # 패턴 4: **path/to/file.py** 또는 `path/to/file.py` 뒤에 코드블록
    p4 = _parse_header_then_codeblock(raw)
    if p4:
        results.extend(p4)

    # 중복 제거 (같은 경로의 파일은 마지막 것 사용)
    seen = {}
    for fc in results:
        seen[fc.path] = fc
    return list(seen.values())


def _parse_file_markers(raw: str) -> list[FileChange]:
    """패턴 1: === FILE: path === ... === END FILE ==="""
    pattern = re.compile(
        r'===\s*FILE:\s*(.+?)\s*===\s*\n(.*?)\n\s*===\s*END\s*FILE\s*===',
        re.DOTALL,
    )
    changes = []
    for match in pattern.finditer(raw):
        path = _clean_path(match.group(1))
        content = match.group(2)
        if path:
            changes.append(FileChange(path=path, content=content, is_new=False, source="file_marker"))
    return changes


def _parse_lang_colon_path(raw: str) -> list[FileChange]:
    """패턴 2: ```lang:path/to/file.py ... ```"""
    pattern = re.compile(
        r'```\w*:(.+?)\n(.*?)\n```',
        re.DOTALL,
    )
    changes = []
    for match in pattern.finditer(raw):
        path = _clean_path(match.group(1))
        content = match.group(2)
        if _looks_like_filepath(path):
            changes.append(FileChange(path=path, content=content, is_new=False, source="lang_colon"))
    return changes


def _parse_comment_filepath(raw: str) -> list[FileChange]:
    """패턴 3: 코드블록 내 첫 줄이 // file: path 또는 # file: path"""
    pattern = re.compile(
        r'```(\w*)\n(.*?)\n```',
        re.DOTALL,
    )
    changes = []
    for match in pattern.finditer(raw):
        content = match.group(2)
        lines = content.split("\n", 1)
        if not lines:
            continue

        first_line = lines[0].strip()
        # // file: path, # file: path, /* file: path */
        file_match = re.match(
            r'(?://|#|/\*)\s*[Ff]ile:\s*(.+?)(?:\s*\*/)?$',
            first_line,
        )
        if file_match:
            path = _clean_path(file_match.group(1))
            actual_content = lines[1] if len(lines) > 1 else ""
            if _looks_like_filepath(path):
                changes.append(FileChange(
                    path=path, content=actual_content, is_new=False, source="comment_filepath"
                ))
    return changes


def _parse_header_then_codeblock(raw: str) -> list[FileChange]:
    """패턴 4: **path** 또는 `path` 일 줄 + 바로 다음 코드블록.

    예:
        **api/server.py**
        ```python
        content...
        ```

    또는:
        `core/utils.py`:
        ```python
        content...
        ```
    """
    # 파일 경로 헤더 + 코드블록 패턴
    pattern = re.compile(
        r'(?:\*\*([^*\n]+?)\*\*|`([^`\n]+?)`)\s*:?\s*\n+'
        r'```\w*\n(.*?)\n```',
        re.DOTALL,
    )
    changes = []
    for match in pattern.finditer(raw):
        path = _clean_path(match.group(1) or match.group(2))
        content = match.group(3)
        if _looks_like_filepath(path):
            changes.append(FileChange(
                path=path, content=content, is_new=False, source="header_codeblock"
            ))
    return changes


def apply_changes(
    changes: list[FileChange],
    project_path: str | Path,
    dry_run: bool = False,
) -> list[dict]:
    """파일 변경사항을 프로젝트에 적용.

    Args:
        changes: parse_code_output() 결과
        project_path: 프로젝트 루트 경로
        dry_run: True면 실제 파일 쓰기 없이 결과만 반환

    Returns:
        적용 결과 리스트 [{"path": ..., "action": ..., "lines": N, "source": ...}]
    """
    root = Path(project_path).resolve()
    results = []

    for change in changes:
        # 경로 정규화 + 보안: 프로젝트 밖 경로 차단
        target = (root / change.path).resolve()
        if not str(target).startswith(str(root)):
            logger.warning(f"경로 보안 위반 - 스킵: {change.path}")
            results.append({
                "path": change.path,
                "action": "skipped",
                "reason": "프로젝트 외부 경로",
                "source": change.source,
            })
            continue

        # 빈 내용 스킵
        if not change.content.strip():
            logger.warning(f"빈 내용 - 스킵: {change.path}")
            results.append({
                "path": change.path,
                "action": "skipped",
                "reason": "내용 없음",
                "source": change.source,
            })
            continue

        is_new = not target.exists()
        line_count = len(change.content.splitlines())

        if dry_run:
            results.append({
                "path": change.path,
                "action": "would_create" if is_new else "would_modify",
                "lines": line_count,
                "source": change.source,
            })
            continue

        # 디렉토리 생성
        target.parent.mkdir(parents=True, exist_ok=True)

        # 파일 쓰기
        target.write_text(change.content, encoding="utf-8")
        action = "created" if is_new else "modified"
        logger.info(f"파일 {action}: {change.path} ({line_count}L, via {change.source})")

        results.append({
            "path": change.path,
            "action": action,
            "lines": line_count,
            "source": change.source,
        })

    return results