feat: Task Pipeline + Planner E2E 성공 — stdin기반 GeminiCaller 확정 #task-189 #task-190

2026-03-06 17:37:06 +09:00
parent 9192770300
commit 57c9cb6143
7 changed files with 357 additions and 26 deletions
--- a/core/gemini_caller.py
+++ b/core/gemini_caller.py
@@ -1,10 +1,10 @@
-"""GeminiCaller — gemini -p 역할별 headless 호출.
+"""GeminiCaller — gemini headless 호출.

+stdin으로 시스템 프롬프트 + 컨텍스트를 직접 전달합니다.
 cmd /c 래핑으로 PowerShell 실행 정책 우회.
 """

 import asyncio
-import json
 import time
 from pathlib import Path

@@ -21,12 +21,9 @@ class GeminiCaller:
        self.last_call_time = 0.0

    async def call(self, role: str, context: str, timeout: int = 120) -> str:
-        """역할별 프롬프트로 gemini -p 호출.
+        """역할별 프롬프트로 gemini 호출.

-        Args:
-            role: 프롬프트 파일명 (planner, coder, reviewer, tester)
-            context: 전달할 컨텍스트
-            timeout: 최대 대기 시간 (초)
+        시스템 프롬프트와 컨텍스트를 하나로 합쳐 stdin으로 전달.
        """
        # 시스템 프롬프트 로드
        prompt_file = ROLE_PROMPTS_DIR / f"{role}.md"
@@ -35,31 +32,24 @@ class GeminiCaller:
        else:
            system_prompt = f"You are a {role}. Respond in Korean."

-        # cmd 구성
-        cmd_parts = ["gemini", "-p", context]
-
-        if system_prompt:
-            cmd_parts.extend(["--system", system_prompt])
-
-        cmd_parts.extend(["--approval-mode", "yolo"])
-
-        if self.project_path:
-            cmd_parts.extend(["--include-directories", self.project_path])
-
-        # cmd /c 래핑 (PowerShell 실행 정책 우회)
-        escaped_context = context.replace('"', '\\"')
-        cmd_str = f'gemini -p "{escaped_context}" --approval-mode yolo'
-        if self.project_path:
-            cmd_str += f' --include-directories "{self.project_path}"'
+        # 시스템 프롬프트 + 컨텍스트를 하나의 입력으로 합침
+        full_input = (
+            f"=== SYSTEM INSTRUCTIONS ===\n"
+            f"{system_prompt}\n\n"
+            f"=== USER INPUT ===\n"
+            f"{context}"
+        )

        try:
-            proc = await asyncio.create_subprocess_shell(
-                f'cmd /c {cmd_str}',
+            proc = await asyncio.create_subprocess_exec(
+                "cmd", "/c", "gemini --approval-mode yolo",
+                stdin=asyncio.subprocess.PIPE,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
            )
            stdout, stderr = await asyncio.wait_for(
-                proc.communicate(), timeout=timeout
+                proc.communicate(input=full_input.encode("utf-8")),
+                timeout=timeout
            )

            self.call_count += 1
@@ -85,3 +75,5 @@ class GeminiCaller:
    async def call_simple(self, prompt: str, timeout: int = 60) -> str:
        """시스템 프롬프트 없이 단순 호출."""
        return await self.call("default", prompt, timeout)
+
+
--- a/core/task_pipeline.py
+++ b/core/task_pipeline.py
@@ -0,0 +1,141 @@
+"""Task Pipeline — Plan → Code → Review → Ship.
+
+E2E 파이프라인을 구성하고 실행합니다.
+"""
+
+import asyncio
+import json
+import re
+from pathlib import Path
+from core.project_indexer import ProjectIndex
+from core.context_manager import ContextManager
+from core.gemini_caller import GeminiCaller
+
+
+class TaskPipeline:
+    """작업 파이프라인: 사용자 요청을 분해하고 순차 실행합니다."""
+
+    def __init__(self, project_path: str, token_budget: int = 50_000):
+        self.project_path = project_path
+        self.index = ProjectIndex(project_path)
+        self.ctx = ContextManager(self.index, token_budget)
+        self.gemini = GeminiCaller(project_path)
+        self.log: list[dict] = []
+
+    def setup(self):
+        """프로젝트 인덱싱."""
+        self.index.scan()
+        return self
+
+    async def plan(self, user_request: str) -> dict:
+        """Planner로 작업 분해."""
+        structure = self.index.get_structure_summary()
+        prompt = (
+            f"## User Request\n{user_request}\n\n"
+            f"## Project Structure\n{structure}\n\n"
+            f"Decompose this request into concrete tasks."
+        )
+
+        response = await self.gemini.call("planner", prompt, timeout=90)
+        self._log("plan", user_request, response)
+
+        # JSON 추출
+        plan = self._extract_json(response)
+        return plan or {"summary": response, "tasks": [], "raw": response}
+
+    async def code(self, task: dict) -> str:
+        """Coder로 코드 수정."""
+        # 관련 파일 컨텍스트 수집
+        context = self.ctx.gather(task.get("description", task.get("title", "")))
+
+        prompt = (
+            f"## Task\n{json.dumps(task, ensure_ascii=False, indent=2)}\n\n"
+            f"## Context\n{context}\n\n"
+            f"Implement the changes described in the task."
+        )
+
+        response = await self.gemini.call("coder", prompt, timeout=120)
+        self._log("code", task.get("title", ""), response)
+        return response
+
+    async def review(self, task: dict, code_output: str) -> dict:
+        """Reviewer로 코드 리뷰."""
+        prompt = (
+            f"## Task\n{json.dumps(task, ensure_ascii=False, indent=2)}\n\n"
+            f"## Code Output\n{code_output}\n\n"
+            f"Review the code changes."
+        )
+
+        response = await self.gemini.call("reviewer", prompt, timeout=90)
+        self._log("review", task.get("title", ""), response)
+
+        review = self._extract_json(response)
+        return review or {"passed": True, "summary": response, "raw": response}
+
+    async def execute(self, user_request: str) -> dict:
+        """전체 파이프라인 실행."""
+        result = {
+            "request": user_request,
+            "plan": None,
+            "tasks_completed": [],
+            "reviews": [],
+        }
+
+        # 1. Plan
+        plan = await self.plan(user_request)
+        result["plan"] = plan
+
+        tasks = plan.get("tasks", [])
+        if not tasks:
+            result["error"] = "Planner returned no tasks"
+            return result
+
+        # 2. Code + Review for each task
+        for task in tasks:
+            code_output = await self.code(task)
+
+            review = await self.review(task, code_output)
+
+            if not review.get("passed", True):
+                # 리뷰 실패 시 한 번 재시도
+                code_output = await self.code({
+                    **task,
+                    "description": task.get("description", "") +
+                    f"\n\n## Review Feedback\n{json.dumps(review.get('issues', []), ensure_ascii=False)}"
+                })
+                review = await self.review(task, code_output)
+
+            result["tasks_completed"].append({
+                "task": task,
+                "output": code_output[:500],  # 요약
+                "review": review,
+            })
+
+        return result
+
+    def _extract_json(self, text: str) -> dict | None:
+        """텍스트에서 JSON 블록 추출."""
+        # ```json ... ``` 패턴
+        match = re.search(r"```json\s*\n(.*?)\n\s*```", text, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group(1))
+            except json.JSONDecodeError:
+                pass
+
+        # { ... } 직접 찾기
+        match = re.search(r"\{.*\}", text, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group(0))
+            except json.JSONDecodeError:
+                pass
+
+        return None
+
+    def _log(self, phase: str, input_summary: str, output: str):
+        self.log.append({
+            "phase": phase,
+            "input": input_summary[:200],
+            "output": output[:500],
+        })
--- a/prompts/coder.md
+++ b/prompts/coder.md
@@ -0,0 +1,29 @@
+You are a **Coder** — an AI that implements specific code changes.
+
+## Your Role
+
+Given a task description and relevant source files, you MUST:
+
+1. **Read** the provided source files carefully
+2. **Implement** the requested changes
+3. **Output** the complete modified file(s)
+
+## Output Format
+
+For each modified file, output:
+
+```
+=== FILE: path/to/file.py ===
+(complete file content here)
+=== END FILE ===
+```
+
+## Rules
+
+- Output the COMPLETE file, not just the diff
+- Preserve existing code structure and style
+- Add comments for non-obvious changes
+- Handle edge cases
+- Do NOT add unnecessary changes beyond what was requested
+- If creating a new file, include proper docstring/header
+- Respond in the same language as the existing code comments
--- a/prompts/planner.md
+++ b/prompts/planner.md
@@ -0,0 +1,38 @@
+You are a **Planner** — an AI that decomposes user requests into concrete, actionable tasks.
+
+## Your Role
+
+Given a user request and project context, you MUST:
+
+1. **Analyze** the request and understand what needs to change
+2. **Identify** which files are affected
+3. **Decompose** into specific tasks, each modifying 1-3 files
+
+## Output Format
+
+Respond in this exact JSON format:
+
+```json
+{
+  "summary": "Brief summary of what needs to be done",
+  "tasks": [
+    {
+      "id": 1,
+      "title": "Task title",
+      "files": ["path/to/file.py"],
+      "description": "Specific changes to make",
+      "type": "create|modify|delete"
+    }
+  ],
+  "risk": "low|medium|high",
+  "estimated_calls": 3
+}
+```
+
+## Rules
+
+- Keep task count minimal (1-5 tasks)
+- Each task should be independently executable
+- Order tasks by dependency (dependencies first)
+- Be specific about what to change, not vague
+- Respond in Korean for summaries, English for code references
--- a/prompts/reviewer.md
+++ b/prompts/reviewer.md
@@ -0,0 +1,37 @@
+You are a **Reviewer** — an AI that reviews code changes for correctness and quality.
+
+## Your Role
+
+Given original and modified code, you MUST:
+
+1. **Compare** the original and modified versions
+2. **Check** for correctness, bugs, edge cases
+3. **Verify** the changes match the task requirements
+4. **Assess** code quality and style
+
+## Output Format
+
+```json
+{
+  "passed": true|false,
+  "issues": [
+    {
+      "severity": "critical|warning|info",
+      "file": "path/to/file.py",
+      "line": 42,
+      "description": "Issue description"
+    }
+  ],
+  "suggestions": ["Optional improvement suggestions"],
+  "summary": "Review summary in Korean"
+}
+```
+
+## Rules
+
+- Be thorough but not pedantic
+- Focus on correctness over style
+- Critical issues = must fix before merge
+- Warnings = should fix, won't block
+- Info = nice to have improvements
+- If no issues found, set passed=true with empty issues array
--- a/tests/test_pipeline_e2e.py
+++ b/tests/test_pipeline_e2e.py
@@ -0,0 +1,56 @@
+"""E2E Test: Task Pipeline with real Gemini CLI.
+
+Tests Planner phase against the variet-agent project.
+"""
+
+import sys
+import io
+import asyncio
+import json
+
+if sys.stdout.encoding != "utf-8":
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
+sys.path.insert(0, r"C:\Users\CafeVariet-GL552VW\Desktop\source_diff\variet-agent")
+
+from core.task_pipeline import TaskPipeline
+
+PROJECT = r"C:\Users\CafeVariet-GL552VW\Desktop\source_diff\variet-agent"
+
+
+async def test_planner():
+    print("=" * 60)
+    print("E2E TEST: Planner")
+    print("=" * 60)
+
+    pipeline = TaskPipeline(PROJECT, token_budget=30_000)
+    pipeline.setup()
+
+    plan = await pipeline.plan(
+        "project_indexer.py의 find_relevant 함수가 공백이 포함된 쿼리를 처리하지 못합니다. "
+        "'gemini caller'로 검색하면 gemini_caller.py를 찾지 못합니다. "
+        "밑줄과 공백을 동일하게 처리하도록 개선해주세요."
+    )
+
+    print(f"\n📋 Plan result:")
+    print(json.dumps(plan, ensure_ascii=False, indent=2))
+
+    if plan.get("tasks"):
+        print(f"\n✅ Planner returned {len(plan['tasks'])} tasks")
+        for t in plan["tasks"]:
+            print(f"  - {t.get('title', t.get('id', '?'))}: {t.get('description', '')[:80]}")
+    else:
+        print(f"\n⚠️ No structured tasks, raw response:")
+        print(plan.get("raw", plan.get("summary", ""))[:500])
+
+    return plan
+
+
+async def main():
+    plan = await test_planner()
+    print(f"\n{'=' * 60}")
+    print(f"Gemini calls: {1}")
+    print(f"✅ E2E Planner test complete!")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tests/test_quick.py
+++ b/tests/test_quick.py
@@ -0,0 +1,38 @@
+"""Debug: test --system flag with stdin."""
+import asyncio, tempfile, os
+
+async def test():
+    # Write system prompt to temp file
+    sys_file = tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False, encoding="utf-8")
+    sys_file.write("You are a planner. Respond with a JSON object containing: summary, tasks array.")
+    sys_file.close()
+
+    ctx = "List 2 tasks to improve a Python project. Respond in JSON format."
+    
+    print(f"System file: {sys_file.name}")
+    print(f"Context length: {len(ctx)}")
+    
+    # Test 1: with --system
+    proc = await asyncio.create_subprocess_exec(
+        "cmd", "/c",
+        f"gemini --system {sys_file.name} --approval-mode yolo",
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, stderr = await asyncio.wait_for(
+        proc.communicate(input=ctx.encode("utf-8")),
+        timeout=90
+    )
+    
+    out = stdout.decode('utf-8', errors='replace')
+    err = stderr.decode('utf-8', errors='replace')
+    
+    lines = [l for l in out.splitlines() if "YOLO" not in l and "Loaded cached" not in l]
+    print(f"\nSTDOUT ({len(out)} bytes): {out[:500]}")
+    print(f"STDERR ({len(err)} bytes): {err[:500]}")
+    print(f"CLEANED: {'|'.join(lines[:10])}")
+    
+    os.unlink(sys_file.name)
+
+asyncio.run(test())