feat(pipeline): YouTube Tab → PDF 자동 추출 파이프라인 초기 구현

- 5단계 파이프라인: 다운로드 → 프레임 추출 → 패턴 감지 → 중복 제거 → PDF 생성
- 3가지 패턴 지원: overlay, split, scroll
- MSE 기반 픽셀 비교 프레임 중복 제거
- split 모드: 42% 크롭 + 밝기 필터 + Tab 라인 검증
- overlay 모드: 320x120 정규화 + 슬라이딩 윈도우 비교
- 프로젝트 문서 초기 작성 (architecture, tech-stack, STATUS, known-issues)
This commit is contained in:
2026-03-24 23:25:17 +09:00
commit 3d3f74b082
18 changed files with 1989 additions and 0 deletions

View File

@@ -0,0 +1,328 @@
"""Vikunja safe task updater — preserves existing fields when updating tasks.
Usage:
python vikunja_helper.py done 75 # Mark task #75 as done
python vikunja_helper.py done 71 77 78 # Mark multiple tasks done
python vikunja_helper.py undone 75 # Mark task #75 as not done
python vikunja_helper.py comment 75 "text" # Add comment to task #75
python vikunja_helper.py desc 75 "text" # Set description (appends if exists)
python vikunja_helper.py create "title" "desc" --labels Backend,Priority:High
python vikunja_helper.py create "title" "desc" --done --labels Frontend,Priority:Mid
python vikunja_helper.py label 75 Backend Priority:High # Add labels to task
python vikunja_helper.py list # List all tasks
python vikunja_helper.py list todo # List TODO only
python vikunja_helper.py list done # List DONE only
python vikunja_helper.py projects # List all Vikunja projects
python vikunja_helper.py report # Project status report (current)
python vikunja_helper.py report <project_id> # Project status report (specific)
"""
import sys
import json
import urllib.request
import urllib.error
import io
# Fix Windows console encoding (cp949 → utf-8)
if sys.stdout.encoding != "utf-8":
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
# ============================================================
# ⚙️ CONFIGURATION — PROJECT_ID만 프로젝트별로 변경하세요
# ============================================================
API_BASE = "https://plan.variet.net/api/v1"
TOKEN = "tk_070f8e0b715e818bb7178c3815ed5389040eddca"
PROJECT_ID = 7 # Variet Agent 프로젝트
# ============================================================
HEADERS = {
"Authorization": f"Bearer {TOKEN}",
"Content-Type": "application/json",
}
# Label name → Vikunja label ID mapping
# Customize for your project's labels
LABEL_MAP = {
"Backend": 1, "Frontend": 2, "Engine": 3, "Infra": 4, "Test": 5,
"Priority:High": 6, "Priority:Mid": 7, "Priority:Low": 8,
"Agent": 17, "Tool": 18, "AI/LLM": 19,
}
def api_get(path: str):
req = urllib.request.Request(f"{API_BASE}{path}", headers=HEADERS)
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read().decode("utf-8"))
def api_post(path: str, data: dict):
body = json.dumps(data).encode("utf-8")
req = urllib.request.Request(f"{API_BASE}{path}", data=body, headers=HEADERS, method="POST")
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read().decode("utf-8"))
def api_put(path: str, data: dict):
body = json.dumps(data).encode("utf-8")
req = urllib.request.Request(f"{API_BASE}{path}", data=body, headers=HEADERS, method="PUT")
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read().decode("utf-8"))
def get_task(task_id: int) -> dict:
return api_get(f"/tasks/{task_id}")
def safe_update_task(task_id: int, updates: dict) -> dict:
task = get_task(task_id)
safe_body = {
"title": task.get("title", ""),
"description": task.get("description", ""),
"priority": task.get("priority", 0),
"done": task.get("done", False),
}
safe_body.update(updates)
return api_post(f"/tasks/{task_id}", safe_body)
def mark_done(task_ids: list):
for tid in task_ids:
result = safe_update_task(tid, {"done": True})
title = result.get("title", "?")
print(f" ✅ #{tid} → done=True [{title}]")
def mark_undone(task_ids: list):
for tid in task_ids:
result = safe_update_task(tid, {"done": False})
title = result.get("title", "?")
print(f" ⬜ #{tid} → done=False [{title}]")
def add_comment(task_id: int, comment: str):
result = api_put(f"/tasks/{task_id}/comments", {"comment": comment})
print(f" 💬 #{task_id} comment added (id={result.get('id', '?')})")
def set_description(task_id: int, desc: str, append: bool = True):
task = get_task(task_id)
existing = task.get("description", "") or ""
if append and existing:
new_desc = existing.rstrip() + "\n\n" + desc
else:
new_desc = desc
result = safe_update_task(task_id, {"description": new_desc})
print(f" 📝 #{task_id} description updated [{result.get('title', '?')}]")
def list_tasks(filter_: str = "all"):
all_tasks = []
page = 1
while True:
batch = api_get(f"/projects/{PROJECT_ID}/tasks?per_page=50&page={page}")
if not batch:
break
all_tasks.extend(batch)
if len(batch) < 50:
break
page += 1
if filter_ == "todo":
all_tasks = [t for t in all_tasks if not t["done"]]
elif filter_ == "done":
all_tasks = [t for t in all_tasks if t["done"]]
all_tasks.sort(key=lambda t: t["id"])
for t in all_tasks:
status = "" if t["done"] else ""
desc = (t.get("description") or "")[:50].replace("\n", " ")
labels = ", ".join(l["title"] for l in (t.get("labels") or []))
print(f" {status} #{t['id']:3d} {t['title'][:40]:<40} [{labels}] {desc}")
print(f"\n Total: {len(all_tasks)} tasks")
def add_labels(task_id: int, label_names: list):
for name in label_names:
label_id = LABEL_MAP.get(name)
if not label_id:
print(f" ⚠️ Unknown label '{name}'. Valid: {', '.join(LABEL_MAP.keys())}")
continue
try:
api_put(f"/tasks/{task_id}/labels", {"label_id": label_id})
print(f" 🏷️ #{task_id} + {name} (id={label_id})")
except Exception as e:
if "already" in str(e).lower() or "409" in str(e):
print(f" 🏷️ #{task_id} already has {name}")
else:
print(f" ⚠️ #{task_id} label {name} failed: {e}")
def create_task(title: str, description: str = "", done: bool = False, labels: list = None):
payload = {"title": title, "description": description}
result = api_put(f"/projects/{PROJECT_ID}/tasks", payload)
task_id = result["id"]
print(f" ✨ #{task_id} created: {result.get('title', '?')}")
if labels:
add_labels(task_id, labels)
if done:
result = safe_update_task(task_id, {"done": True})
print(f" ✅ #{task_id} → done=True")
return result
def list_projects():
"""Vikunja 전체 프로젝트 목록 + 태스크 통계."""
projects = api_get("/projects")
print("📂 프로젝트 목록:")
for p in projects:
pid = p["id"]
title = p["title"]
# 각 프로젝트의 태스크 수 조회
try:
tasks = api_get(f"/projects/{pid}/tasks?per_page=200")
todo = sum(1 for t in tasks if not t["done"])
done = sum(1 for t in tasks if t["done"])
print(f" #{pid:<3d} {title:<30s} TODO: {todo} DONE: {done}")
except Exception:
print(f" #{pid:<3d} {title:<30s} (조회 실패)")
print(f"\n Total: {len(projects)} projects")
def report(project_id: int = None):
"""프로젝트 종합 현황 보고 (태스크 + git log + devlog)."""
import subprocess
from pathlib import Path
from datetime import datetime, timedelta
pid = project_id or PROJECT_ID
# 1) 프로젝트 이름 조회
try:
projects = api_get("/projects")
proj_name = next((p["title"] for p in projects if p["id"] == pid), f"Project #{pid}")
except Exception:
proj_name = f"Project #{pid}"
print(f"=== 프로젝트 현황: {proj_name} (#{pid}) ===")
# 2) 태스크 현황
try:
tasks = api_get(f"/projects/{pid}/tasks?per_page=200")
todo_tasks = [t for t in tasks if not t["done"]]
done_tasks = [t for t in tasks if t["done"]]
total = len(tasks)
rate = f"{len(done_tasks)/total*100:.0f}%" if total else "N/A"
print(f"\n[태스크]")
print(f" TODO: {len(todo_tasks)}건 | DONE: {len(done_tasks)}건 | 완료율: {rate}")
if todo_tasks:
print(f" 미완료:")
for t in todo_tasks:
labels = ", ".join(l["title"] for l in (t.get("labels") or []))
label_str = f" [{labels}]" if labels else ""
desc = (t.get("description") or "")[:40].replace("\n", " ")
desc_str = f" {desc}" if desc else ""
print(f" ⬜ #{t['id']} {t['title'][:50]}{label_str}{desc_str}")
if done_tasks:
# 최근 완료 5건만 표시
recent_done = sorted(done_tasks, key=lambda t: t.get("done_at", ""), reverse=True)[:5]
print(f" 최근 완료 (최대 5건):")
for t in recent_done:
print(f" ✅ #{t['id']} {t['title'][:50]}")
except Exception as e:
print(f" 태스크 조회 실패: {e}")
# 3) Git log (현재 디렉토리 기준)
print(f"\n[최근 커밋 5건]")
try:
result = subprocess.run(
["git", "log", "--oneline", "-5"],
capture_output=True, timeout=10,
encoding="utf-8", errors="replace",
)
if result.returncode == 0 and result.stdout.strip():
for line in result.stdout.strip().splitlines():
print(f" {line}")
else:
print(" (git log 없음)")
except Exception:
print(" (git 실행 불가)")
# 4) Devlog (오늘/어제)
print(f"\n[Devlog]")
today = datetime.now().strftime("%Y-%m-%d")
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
devlog_found = False
for date_str in [today, yesterday]:
devlog_path = Path("docs/devlog") / f"{date_str}.md"
if devlog_path.exists():
content = devlog_path.read_text(encoding="utf-8").strip()
# 최대 500자
if len(content) > 500:
content = content[:500] + "\n ...(생략)"
print(f" [{date_str}]")
for line in content.splitlines():
print(f" {line}")
devlog_found = True
break
if not devlog_found:
print(" (최근 devlog 없음)")
def main():
if len(sys.argv) < 2:
print(__doc__)
return
cmd = sys.argv[1].lower()
if cmd == "done":
ids = [int(x) for x in sys.argv[2:]]
mark_done(ids)
elif cmd == "undone":
ids = [int(x) for x in sys.argv[2:]]
mark_undone(ids)
elif cmd == "comment":
add_comment(int(sys.argv[2]), sys.argv[3])
elif cmd == "desc":
set_description(int(sys.argv[2]), sys.argv[3])
elif cmd == "list":
f = sys.argv[2] if len(sys.argv) > 2 else "all"
list_tasks(f)
elif cmd == "label":
if len(sys.argv) < 4:
print("Usage: vikunja_helper.py label TASK_ID Label1 Label2 ...")
return
add_labels(int(sys.argv[2]), sys.argv[3:])
elif cmd == "create":
title = sys.argv[2] if len(sys.argv) > 2 else ""
desc = sys.argv[3] if len(sys.argv) > 3 and not sys.argv[3].startswith("--") else ""
is_done = "--done" in sys.argv
labels = None
for i, arg in enumerate(sys.argv):
if arg == "--labels" and i + 1 < len(sys.argv):
labels = sys.argv[i + 1].split(",")
break
if not title:
print("Error: title is required")
return
create_task(title, desc, done=is_done, labels=labels)
elif cmd == "projects":
list_projects()
elif cmd == "report":
pid = int(sys.argv[2]) if len(sys.argv) > 2 else None
report(pid)
else:
print(f"Unknown command: {cmd}")
print(__doc__)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,100 @@
"""Gitea Wiki helper: list, read, create, update wiki pages.
Usage:
wiki_helper.py list — list all pages
wiki_helper.py read <title> — read a page
wiki_helper.py create <title> <file> — create a page from file
wiki_helper.py update <title> <file> — update a page from file
"""
import sys, io, json, base64, urllib.request, urllib.error
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
# ============================================================
# ⚙️ CONFIGURATION — GITEA_REPO만 프로젝트별로 변경하세요
# ============================================================
GITEA_BASE_URL = "https://git.variet.net"
GITEA_OWNER = "Variet"
GITEA_REPO = "variet-agent" # Variet Agent 프로젝트
GITEA_TOKEN = "3a01b4b15a39921572e64c413353e870d4d2161b"
# ============================================================
BASE = f"{GITEA_BASE_URL}/api/v1/repos/{GITEA_OWNER}/{GITEA_REPO}/wiki"
HEADERS = {"Authorization": f"token {GITEA_TOKEN}", "Content-Type": "application/json"}
def _req(method, path, data=None):
url = f"{BASE}{path}"
body = json.dumps(data).encode() if data else None
req = urllib.request.Request(url, data=body, headers=HEADERS, method=method)
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read().decode())
except urllib.error.HTTPError as e:
err = e.read().decode()
print(f" ⚠️ HTTP {e.code}: {err}")
return None
def _find_sub_url(title):
pages = _req("GET", "/pages")
if pages:
for p in pages:
if p.get("title", "").lower() == title.lower():
return p.get("sub_url", title)
return title
def list_pages():
pages = _req("GET", "/pages")
if pages:
print(f"=== {len(pages)} Wiki Pages ===")
for p in pages:
print(f" {p.get('title', '?')}")
return pages
def read_page(title):
sub = _find_sub_url(title)
page = _req("GET", f"/page/{sub}")
if page and page.get("content_base64"):
content = base64.b64decode(page["content_base64"]).decode("utf-8")
return content
return None
def create_page(title, content):
data = {
"title": title,
"content_base64": base64.b64encode(content.encode()).decode(),
}
result = _req("POST", "/new", data)
if result:
print(f" ✅ Created wiki page: {title}")
return result
def update_page(title, content):
sub = _find_sub_url(title)
data = {
"title": title,
"content_base64": base64.b64encode(content.encode()).decode(),
}
result = _req("PATCH", f"/page/{sub}", data)
if result:
print(f" ✅ Updated wiki page: {title}")
return result
if __name__ == "__main__":
cmd = sys.argv[1] if len(sys.argv) > 1 else "list"
if cmd == "list":
list_pages()
elif cmd == "read" and len(sys.argv) > 2:
content = read_page(sys.argv[2])
if content:
print(content[:5000])
else:
print(f" Page '{sys.argv[2]}' not found")
elif cmd == "create" and len(sys.argv) > 3:
with open(sys.argv[3], "r", encoding="utf-8") as f:
create_page(sys.argv[2], f.read())
elif cmd == "update" and len(sys.argv) > 3:
with open(sys.argv[3], "r", encoding="utf-8") as f:
update_page(sys.argv[2], f.read())
else:
print("Usage: wiki_helper.py list|read <title>|create <title> <file>|update <title> <file>")