feat(pipeline): YouTube Tab → PDF 자동 추출 파이프라인 초기 구현

- 5단계 파이프라인: 다운로드 → 프레임 추출 → 패턴 감지 → 중복 제거 → PDF 생성 - 3가지 패턴 지원: overlay, split, scroll - MSE 기반 픽셀 비교 프레임 중복 제거 - split 모드: 42% 크롭 + 밝기 필터 + Tab 라인 검증 - overlay 모드: 320x120 정규화 + 슬라이딩 윈도우 비교 - 프로젝트 문서 초기 작성 (architecture, tech-stack, STATUS, known-issues)
2026-03-24 23:25:17 +09:00
commit 3d3f74b082
18 changed files with 1989 additions and 0 deletions
--- a/youtube_tab_to_pdf.py
+++ b/youtube_tab_to_pdf.py
@@ -0,0 +1,627 @@
+#!/usr/bin/env python3
+"""
+YouTube Tab → PDF 캡처 파이프라인
+YouTube 기타 TAB 영상에서 Tab 프레임을 추출하여 깔끔한 PDF로 만듭니다.
+
+사용법:
+    python youtube_tab_to_pdf.py "https://youtu.be/VIDEO_ID"
+    python youtube_tab_to_pdf.py "https://youtu.be/VIDEO_ID" -o output.pdf --debug
+"""
+
+import argparse
+import os
+import sys
+import subprocess
+import shutil
+import re
+import tempfile
+from pathlib import Path
+from typing import List, Tuple, Optional
+
+import cv2
+import numpy as np
+from PIL import Image
+
+# Windows 콘솔 인코딩 강제 UTF-8
+if sys.platform == "win32":
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+
+
+# ─── Configuration ───────────────────────────────────────────────────────
+
+DEFAULT_FPS = 2           # 프레임 추출 빈도 (초당 N프레임)
+DEFAULT_CROP_RATIO = 0.55 # 상단 크롭 비율 (스크롤형)
+SIMILARITY_THRESHOLD = 0.95  # 프레임 유사도 임계값 (SSIM 대신 히스토그램 비교)
+OVERLAY_MIN_AREA_RATIO = 0.05  # 오버레이 박스 최소 면적 비율
+OVERLAY_MAX_AREA_RATIO = 0.6   # 오버레이 박스 최대 면적 비율
+MIN_TAB_LINES = 4              # Tab 악보 최소 수평 라인 수 (6줄 중 4줄 이상)
+SPLIT_TOP_RATIO = 0.42         # 분할 화면 상단 영역 비율 (핸드캠 제외)
+PDF_DPI = 150
+PDF_PAGE_WIDTH_MM = 210   # A4
+
+
+# ─── Step 1: Download ────────────────────────────────────────────────────
+
+def _find_yt_dlp() -> str:
+    """yt-dlp 실행 파일 경로 찾기"""
+    yt_dlp = shutil.which("yt-dlp")
+    if yt_dlp:
+        return yt_dlp
+    # pip user-installed path (Windows)
+    for pyver in ["Python312", "Python311", "Python310"]:
+        user_scripts = Path(os.environ.get("APPDATA", "")) / "Python" / pyver / "Scripts"
+        yt_dlp_path = user_scripts / "yt-dlp.exe"
+        if yt_dlp_path.exists():
+            return str(yt_dlp_path)
+    # conda env Scripts
+    conda_path = Path(sys.executable).parent / "Scripts" / "yt-dlp.exe"
+    if conda_path.exists():
+        return str(conda_path)
+    raise RuntimeError("yt-dlp를 찾을 수 없습니다. pip install yt-dlp를 실행하세요.")
+
+
+def download_video(url: str, output_dir: Path) -> Tuple[Path, str]:
+    """yt-dlp로 YouTube 영상 다운로드. 반환: (파일 경로, 제목)"""
+    print("[1/5] 영상 다운로드 중...")
+
+    yt_dlp = _find_yt_dlp()
+
+    # 제목 추출 (encoding 안전 처리)
+    result = subprocess.run(
+        [yt_dlp, "--get-title", "--encoding", "utf-8", url],
+        capture_output=True, encoding="utf-8", errors="replace"
+    )
+    title = (result.stdout or "").strip() or "untitled"
+    # 파일명 안전 문자로 변환
+    safe_title = re.sub(r'[\\/:*?"<>|\x00-\x1f]', '_', title)[:80]
+
+    video_path = output_dir / f"{safe_title}.mp4"
+
+    if video_path.exists():
+        print(f"  → 이미 다운로드됨: {video_path.name}")
+        return video_path, safe_title
+
+    subprocess.run(
+        [yt_dlp,
+         "-f", "best[height<=720][ext=mp4]/best[ext=mp4]/best",
+         "-o", str(video_path), url],
+        encoding="utf-8", errors="replace",
+        check=True
+    )
+    print(f"  → 다운로드 완료: {video_path.name}")
+    return video_path, safe_title
+
+
+# ─── Step 2: Frame Extraction ────────────────────────────────────────────
+
+def extract_frames(video_path: Path, fps: float = DEFAULT_FPS) -> List[np.ndarray]:
+    """OpenCV VideoCapture로 프레임 추출"""
+    print(f"[2/5] 프레임 추출 중 (fps={fps})...")
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        raise RuntimeError(f"영상을 열 수 없습니다: {video_path}")
+
+    video_fps = cap.get(cv2.CAP_PROP_FPS)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_interval = max(1, int(video_fps / fps))
+
+    frames = []
+    frame_idx = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame_idx % frame_interval == 0:
+            frames.append(frame)
+        frame_idx += 1
+
+    cap.release()
+    print(f"  → {len(frames)}개 프레임 추출 (전체 {total_frames}프레임, 원본 {video_fps:.1f}fps)")
+    return frames
+
+
+# ─── Step 3: Pattern Detection ───────────────────────────────────────────
+
+def _has_tab_lines(region: np.ndarray, min_lines: int = MIN_TAB_LINES) -> bool:
+    """영역 내에 Tab 악보 수평 라인(기타 6줄)이 있는지 확인"""
+    if region is None or region.size == 0:
+        return False
+
+    gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY) if len(region.shape) == 3 else region
+    h, w = gray.shape
+    if h < 20 or w < 50:
+        return False
+
+    # 이진화 (밝은 배경 + 어두운 라인)
+    _, binary = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
+
+    # 수평 라인 강조: 가로 커널 모폴로지
+    horiz_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (max(w // 4, 30), 1))
+    horiz = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horiz_kernel)
+
+    # HoughLinesP로 수평 라인 검출
+    lines = cv2.HoughLinesP(horiz, 1, np.pi / 180, threshold=50,
+                            minLineLength=w // 3, maxLineGap=20)
+    if lines is None:
+        return False
+
+    # 거의 수평인 라인만 필터 (각도 < 5도)
+    horizontal_ys = []
+    for line in lines:
+        x1, y1, x2, y2 = line[0]
+        if abs(y2 - y1) < max(5, abs(x2 - x1) * 0.087):  # ~5도
+            horizontal_ys.append((y1 + y2) / 2)
+
+    if len(horizontal_ys) < min_lines:
+        return False
+
+    # Y좌표 클러스터링: 가까운 라인을 하나로 묶기 (6줄 그룹 검출)
+    horizontal_ys.sort()
+    clusters = []
+    for y in horizontal_ys:
+        if not clusters or y - clusters[-1] > h * 0.02:  # 2% 거리 이상이면 새 클러스터
+            clusters.append(y)
+
+    return len(clusters) >= min_lines
+
+
+def _detect_white_region(frame: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
+    """흰색 사각형 영역 검출 (Tab 여부 무관). 반환: (x, y, w, h) or None"""
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    h, w = gray.shape
+
+    _, thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
+    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
+    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    total_area = h * w
+    best = None
+    best_area = 0
+
+    for cnt in contours:
+        x, y, cw, ch = cv2.boundingRect(cnt)
+        area = cw * ch
+        ratio = area / total_area
+
+        if (OVERLAY_MIN_AREA_RATIO < ratio < OVERLAY_MAX_AREA_RATIO
+                and cw > ch * 0.5
+                and area > best_area):
+            best = (x, y, cw, ch)
+            best_area = area
+
+    return best
+
+
+def _detect_tab_overlay(frame: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
+    """Tab 악보가 포함된 흰색 오버레이 박스 검출. 반환: (x, y, w, h) or None"""
+    bbox = _detect_white_region(frame)
+    if bbox is None:
+        return None
+
+    x, y, w, h = bbox
+    region = frame[y:y + h, x:x + w]
+
+    # Tab 수평 라인이 있는 경우에만 반환
+    if _has_tab_lines(region, min_lines=3):
+        return bbox
+    return None
+
+
+def _detect_split_screen(frames: List[np.ndarray], sample_count: int = 10) -> bool:
+    """분할 화면 감지: 상단이 밝은 Tab 용지, 하단이 어두운 핸드캠인지 확인
+
+    엄격한 기준:
+    - 상단 평균 밝기 > 180 (Tab 용지는 거의 흰색)
+    - 하단 평균 밝기 < 100 (핸드캠은 일반적으로 어두움)
+    - 밝기 차이 > 80
+    - 상단에 Tab 수평 라인이 4개 이상 존재
+    """
+    DETECT_SPLIT = 0.5  # 감지용 분할 비율
+
+    if len(frames) < sample_count:
+        sample_count = len(frames)
+
+    indices = np.linspace(0, len(frames) - 1, sample_count, dtype=int)
+    split_count = 0
+
+    for idx in indices:
+        frame = frames[idx]
+        fh, fw = frame.shape[:2]
+        top_half = frame[0:int(fh * DETECT_SPLIT), :]
+        bottom_half = frame[int(fh * DETECT_SPLIT):, :]
+
+        top_brightness = np.mean(cv2.cvtColor(top_half, cv2.COLOR_BGR2GRAY))
+        bottom_brightness = np.mean(cv2.cvtColor(bottom_half, cv2.COLOR_BGR2GRAY))
+
+        # 엄격한 밝기 기준: Tab 용지(>180) + 어두운 핸드캠(<100) + 큰 차이(>80)
+        if (top_brightness > 180 and bottom_brightness < 100
+                and top_brightness - bottom_brightness > 80
+                and _has_tab_lines(top_half, min_lines=4)):
+            split_count += 1
+
+    ratio = split_count / sample_count
+    return ratio > 0.3
+
+
+def detect_pattern(frames: List[np.ndarray], sample_count: int = 20) -> str:
+    """영상 패턴 감지: 'scroll', 'overlay', 또는 'split'
+
+    감지 순서:
+    1. overlay — Tab 오버레이 박스가 가장 구체적이므로 최우선
+    2. split — 상단 Tab 용지 + 하단 핸드캠 = 엄격한 밝기 기준
+    3. scroll — 기본 (상단 크롭)
+    """
+    print("[3/5] 영상 패턴 분석 중...")
+
+    if len(frames) < sample_count:
+        sample_count = len(frames)
+
+    indices = np.linspace(0, len(frames) - 1, sample_count, dtype=int)
+    sample_frames = [frames[i] for i in indices]
+
+    # 1) 오버레이 검출 먼저 — Tab 라인이 있는 흰 박스 (가장 구체적)
+    overlay_count = 0
+    for frame in sample_frames:
+        if _detect_tab_overlay(frame) is not None:
+            overlay_count += 1
+
+    overlay_ratio = overlay_count / sample_count
+    if overlay_ratio > 0.3:
+        print(f"  → 패턴: overlay (Tab 오버레이 감지율: {overlay_ratio:.0%})")
+        return "overlay"
+
+    # 2) 분할 화면(split) 검출 — 상단 Tab 용지 + 하단 핸드캠
+    if _detect_split_screen(frames, sample_count):
+        print("  → 패턴: split (상단 Tab + 하단 핸드캠)")
+        return "split"
+
+    # 3) 기본: 스크롤형
+    print(f"  → 패턴: scroll (오버레이 감지율: {overlay_ratio:.0%})")
+    return "scroll"
+
+
+# ─── Step 4: Extract Unique Tab Frames ────────────────────────────────────
+
+def compare_frames(frame1: np.ndarray, frame2: np.ndarray) -> float:
+    """두 프레임의 유사도 비교 (0~1, 1=동일).
+
+    픽셀 수준 정규화 상호상관(NCC) 사용 — 히스토그램 방식보다
+    Tab 내용 변화(프렛 번호, 마디 위치 등)를 정확히 감지.
+    """
+    # 그레이스케일 변환
+    g1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) if len(frame1.shape) == 3 else frame1
+    g2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) if len(frame2.shape) == 3 else frame2
+
+    # 크기 맞추기
+    if g1.shape != g2.shape:
+        g2 = cv2.resize(g2, (g1.shape[1], g1.shape[0]))
+
+    # 표준화된 크기로 축소 (속도 + 노이즈 감소)
+    target_w = 320
+    if g1.shape[1] > target_w:
+        scale = target_w / g1.shape[1]
+        new_size = (target_w, int(g1.shape[0] * scale))
+        g1 = cv2.resize(g1, new_size)
+        g2 = cv2.resize(g2, new_size)
+
+    # 정규화 상호상관 (NCC): 픽셀 수준 비교
+    # MSE 기반: 0=동일, 높을수록 다름 → 유사도로 변환
+    g1_f = g1.astype(np.float32) / 255.0
+    g2_f = g2.astype(np.float32) / 255.0
+    mse = np.mean((g1_f - g2_f) ** 2)
+
+    # MSE → 유사도 변환 (0~1, 1=동일)
+    # factor 8: MSE 0.005→sim 0.96, MSE 0.06→sim 0.52, MSE 0.13+→sim 0.0
+    similarity = 1.0 - min(mse * 8.0, 1.0)
+    return max(0.0, similarity)
+
+
+def extract_unique_scroll(frames: List[np.ndarray],
+                          crop_ratio: float = DEFAULT_CROP_RATIO,
+                          threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    """스크롤형: 상단 크롭 후 중복 제거"""
+    print("[4/5] 스크롤형 Tab 프레임 추출 중...")
+
+    unique = []
+    prev_crop = None
+
+    for i, frame in enumerate(frames):
+        h, w = frame.shape[:2]
+        crop = frame[0:int(h * crop_ratio), :]
+
+        if prev_crop is None:
+            unique.append(crop)
+            prev_crop = crop
+            continue
+
+        sim = compare_frames(crop, prev_crop)
+        if sim < threshold:
+            unique.append(crop)
+            prev_crop = crop
+
+    print(f"  → {len(unique)}개 고유 프레임 선별 (임계값: {threshold})")
+    return unique
+
+
+def _normalize_overlay(crop: np.ndarray, target_w: int = 320,
+                        target_h: int = 120) -> np.ndarray:
+    """오버레이 크롭을 고정 크기 흰색 캔버스 위에 배치 (비교 정규화용)"""
+    h, w = crop.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w = int(w * scale)
+    new_h = int(h * scale)
+    resized = cv2.resize(crop, (new_w, new_h))
+
+    # 흰색 캔버스에 중앙 배치
+    canvas = np.full((target_h, target_w, 3), 255, dtype=np.uint8)
+    offset_x = (target_w - new_w) // 2
+    offset_y = (target_h - new_h) // 2
+    canvas[offset_y:offset_y + new_h, offset_x:offset_x + new_w] = resized
+    return canvas
+
+
+def extract_unique_overlay(frames: List[np.ndarray],
+                           threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    """오버레이형: Tab 라인이 있는 흰 박스 영역 검출 후 중복 제거
+
+    슬라이딩 윈도우 비교: 각 프레임을 최근 N개 고유 프레임과 비교하여
+    점진적 변화 누적(drift)에 의한 중복을 방지.
+    """
+    print("[4/5] 오버레이형 Tab 프레임 추출 중...")
+
+    WINDOW_SIZE = 5  # 최근 5개 고유 프레임과 비교
+    MIN_CROP_H = 40  # 최소 크롭 높이 (너무 작은 검출 제외)
+    MIN_CROP_W = 100 # 최소 크롭 폭
+
+    unique = []
+    recent_normalized = []  # 최근 고유 프레임 정규화 결과
+
+    for i, frame in enumerate(frames):
+        bbox = _detect_tab_overlay(frame)
+        if bbox is None:
+            continue
+
+        x, y, w, h = bbox
+        # 최소 크기 필터
+        if h < MIN_CROP_H or w < MIN_CROP_W:
+            continue
+
+        # 약간의 패딩 추가
+        pad = 10
+        x = max(0, x - pad)
+        y = max(0, y - pad)
+        w = min(frame.shape[1] - x, w + 2 * pad)
+        h = min(frame.shape[0] - y, h + 2 * pad)
+
+        overlay_crop = frame[y:y + h, x:x + w]
+        normalized = _normalize_overlay(overlay_crop)
+
+        # 최근 N개 고유 프레임과 비교 — 하나라도 유사하면 건너뛰기
+        is_duplicate = False
+        for ref_norm in recent_normalized:
+            sim = compare_frames(normalized, ref_norm)
+            if sim >= threshold:
+                is_duplicate = True
+                break
+
+        if not is_duplicate:
+            unique.append(overlay_crop)
+            recent_normalized.append(normalized)
+            # 윈도우 크기 유지
+            if len(recent_normalized) > WINDOW_SIZE:
+                recent_normalized.pop(0)
+
+    print(f"  → {len(unique)}개 고유 오버레이 프레임 선별")
+    return unique
+
+
+def extract_unique_split(frames: List[np.ndarray],
+                         crop_ratio: float = SPLIT_TOP_RATIO,
+                         threshold: float = 0.95) -> List[np.ndarray]:
+    """분할 화면형: 상단 Tab 영역 크롭 후 중복 제거
+
+    MSE 기반 비교에서 동일 프레임은 sim>0.999, 커서만 이동 시 ~0.995.
+    실제 Tab 전환 시 sim 0.60~0.91. threshold=0.95가 적절한 균형점.
+    """
+    print(f"[4/5] 분할 화면형 Tab 프레임 추출 중 (crop={crop_ratio:.0%}, sim={threshold})...")
+
+    unique = []
+    prev_crop = None
+
+    for i, frame in enumerate(frames):
+        h, w = frame.shape[:2]
+        crop = frame[0:int(h * crop_ratio), :]
+
+        # 밝기 필터: 어두운 프레임(인트로/아웃트로) 제외
+        gray_crop = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
+        mean_brightness = np.mean(gray_crop)
+        if mean_brightness < 120:  # 어두운 프레임 건너뛰기
+            continue
+
+        # Tab 라인이 있는 프레임만 선별
+        if not _has_tab_lines(crop, min_lines=3):
+            continue
+
+        if prev_crop is None:
+            unique.append(crop)
+            prev_crop = crop
+            continue
+
+        sim = compare_frames(crop, prev_crop)
+        if sim < threshold:
+            unique.append(crop)
+            prev_crop = crop
+
+    print(f"  → {len(unique)}개 고유 분할화면 프레임 선별")
+    return unique
+
+
+# ─── Step 5: Generate PDF ─────────────────────────────────────────────────
+
+def generate_pdf(frames: List[np.ndarray], output_path: Path,
+                 debug_dir: Optional[Path] = None) -> None:
+    """고유 프레임들을 하나의 PDF로 합성"""
+    print("[5/5] PDF 생성 중...")
+
+    if not frames:
+        print("  ⚠ 추출된 프레임이 없습니다!")
+        return
+
+    pil_images = []
+    for i, frame in enumerate(frames):
+        # BGR → RGB
+        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        img = Image.fromarray(rgb)
+
+        # 디버그 모드: 개별 이미지 저장
+        if debug_dir:
+            img.save(debug_dir / f"frame_{i:04d}.png")
+
+        pil_images.append(img)
+
+    # PDF 생성: 첫 이미지에 나머지를 append
+    # 각 프레임을 PDF 페이지로 변환 (원본 크기 유지)
+    pdf_pages = []
+    for img in pil_images:
+        # RGB → PDF 호환 (RGBA 미지원이므로 RGB로)
+        if img.mode != 'RGB':
+            img = img.convert('RGB')
+        pdf_pages.append(img)
+
+    if pdf_pages:
+        first_page = pdf_pages[0]
+        rest_pages = pdf_pages[1:] if len(pdf_pages) > 1 else []
+        first_page.save(
+            str(output_path),
+            save_all=True,
+            append_images=rest_pages,
+            resolution=PDF_DPI,
+        )
+        print(f"  → PDF 생성 완료: {output_path}")
+        print(f"     {len(pdf_pages)} 페이지, 파일 크기: {output_path.stat().st_size / 1024:.0f} KB")
+
+
+# ─── Also generate single long PNG ────────────────────────────────────────
+
+def generate_long_image(frames: List[np.ndarray], output_path: Path) -> None:
+    """모든 프레임을 하나의 긴 이미지로 이어붙이기"""
+    if not frames:
+        return
+
+    # 가장 넓은 프레임에 맞춰 통일
+    max_width = max(f.shape[1] for f in frames)
+    resized = []
+    for f in frames:
+        if f.shape[1] != max_width:
+            scale = max_width / f.shape[1]
+            new_h = int(f.shape[0] * scale)
+            f = cv2.resize(f, (max_width, new_h))
+        resized.append(f)
+
+    concat = np.vstack(resized)
+    rgb = cv2.cvtColor(concat, cv2.COLOR_BGR2RGB)
+    img = Image.fromarray(rgb)
+    img.save(str(output_path))
+    print(f"  → 롱 이미지 생성: {output_path} ({img.width}x{img.height})")
+
+
+# ─── Main Pipeline ────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="YouTube 기타 TAB 영상 → PDF 캡처",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+예시:
+  python youtube_tab_to_pdf.py "https://youtu.be/90BWvJY6KbE"
+  python youtube_tab_to_pdf.py "https://youtu.be/Ri9g4lwnrJQ" -o my_tab.pdf --debug
+  python youtube_tab_to_pdf.py "https://youtu.be/VIDEO" --pattern overlay --crop-ratio 0.6
+        """,
+    )
+    parser.add_argument("url", help="YouTube 영상 URL")
+    parser.add_argument("-o", "--output", help="출력 PDF 파일 경로")
+    parser.add_argument("--crop-ratio", type=float, default=DEFAULT_CROP_RATIO,
+                        help=f"Tab 영역 크롭 비율 (기본: {DEFAULT_CROP_RATIO})")
+    parser.add_argument("--fps", type=float, default=DEFAULT_FPS,
+                        help=f"프레임 추출 빈도 (기본: {DEFAULT_FPS})")
+    parser.add_argument("--similarity", type=float, default=SIMILARITY_THRESHOLD,
+                        help=f"프레임 유사도 임계값 (기본: {SIMILARITY_THRESHOLD})")
+    parser.add_argument("--pattern", choices=["auto", "scroll", "overlay", "split"],
+                        default="auto", help="영상 패턴 (기본: auto)")
+    parser.add_argument("--debug", action="store_true", help="중간 이미지 저장")
+
+    args = parser.parse_args()
+
+    # 출력 디렉토리 설정
+    output_dir = Path("output")
+    output_dir.mkdir(exist_ok=True)
+
+    # Debug 디렉토리
+    debug_dir = None
+    if args.debug:
+        debug_dir = output_dir / "debug_frames"
+        debug_dir.mkdir(exist_ok=True)
+
+    # ── Step 1: Download ──
+    video_path, safe_title = download_video(args.url, output_dir)
+
+    # ── Step 2: Extract Frames ──
+    frames = extract_frames(video_path, fps=args.fps)
+    if not frames:
+        print("❌ 프레임을 추출할 수 없습니다.")
+        sys.exit(1)
+
+    # ── Step 3: Detect Pattern ──
+    if args.pattern == "auto":
+        pattern = detect_pattern(frames)
+    else:
+        pattern = args.pattern
+        print(f"[3/5] 패턴 수동 지정: {pattern}")
+
+    # ── Step 4: Extract Unique Frames ──
+    if pattern == "scroll":
+        unique_frames = extract_unique_scroll(
+            frames, crop_ratio=args.crop_ratio, threshold=args.similarity
+        )
+    elif pattern == "split":
+        # split 모드: 자체 최적값 사용 (crop=42%, sim=0.98)
+        # CLI에서 명시 지정 시에만 override
+        split_kwargs = {}
+        if args.crop_ratio != DEFAULT_CROP_RATIO:  # 사용자가 직접 지정한 경우
+            split_kwargs['crop_ratio'] = args.crop_ratio
+        if args.similarity != SIMILARITY_THRESHOLD:
+            split_kwargs['threshold'] = args.similarity
+        unique_frames = extract_unique_split(frames, **split_kwargs)
+    else:
+        unique_frames = extract_unique_overlay(
+            frames, threshold=args.similarity
+        )
+
+    if not unique_frames:
+        print("❌ 고유 프레임을 찾을 수 없습니다. --similarity 값을 낮추거나 --pattern을 수동 지정해보세요.")
+        sys.exit(1)
+
+    # ── Step 5: Generate Output ──
+    if args.output:
+        pdf_path = Path(args.output)
+    else:
+        pdf_path = output_dir / f"{safe_title}.pdf"
+
+    generate_pdf(unique_frames, pdf_path, debug_dir=debug_dir)
+
+    # 보너스: 긴 이미지도 생성
+    long_img_path = pdf_path.with_suffix(".png")
+    generate_long_image(unique_frames, long_img_path)
+
+    print(f"\n✅ 완료!")
+    print(f"   PDF: {pdf_path}")
+    print(f"   PNG: {long_img_path}")
+    if debug_dir:
+        debug_count = len(list(debug_dir.glob("*.png")))
+        print(f"   Debug: {debug_dir} ({debug_count}개 이미지)")
+
+
+if __name__ == "__main__":
+    main()