#!/usr/bin/env python3
"""
YouTube Tab → PDF 캡처 파이프라인
YouTube 기타 TAB 영상에서 Tab 프레임을 추출하여 깔끔한 A4 PDF 악보로 만듭니다.

사용법:
    python youtube_tab_to_pdf.py "https://youtu.be/VIDEO_ID"
    python youtube_tab_to_pdf.py "https://youtu.be/VIDEO_ID" -o output.pdf --debug
"""

import argparse
import os
import sys
import subprocess
import shutil
import re
from pathlib import Path
from typing import List, Tuple, Optional

import cv2
from video_cv_tracker import TemporalTracker
import numpy as np
import img2pdf
from PIL import Image

_ocr_reader = None

def _get_ocr_reader():
    global _ocr_reader
    if _ocr_reader is None:
        print("  → EasyOCR 모델 로딩 중 (초회 1번)...")
        try:
            import easyocr
            _ocr_reader = easyocr.Reader(['en'])
        except ImportError:
            print("  [경고] easyocr 라이브러리가 없습니다. OCR 중복 검증을 건너뜁니다.")
            return None
    return _ocr_reader

def _dedup_by_measure_number(frames: List[np.ndarray]) -> List[np.ndarray]:
    """OCR을 이용해 Tab 좌측 상단의 마디 번호를 읽고,
       연속으로 동일한 번호가 검출되면 중복으로 간주하고 제거합니다."""
    reader = _get_ocr_reader()
    if not reader:
        return frames

    print(f"  → 마디번호 기반 3차 중복 검증 시작 ({len(frames)} 프레임)")
    unique = []
    last_measure_num = None

    for i, frame in enumerate(frames):
        h, w = frame.shape[:2]
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if len(frame.shape) == 3 else frame
        
        # 동적 투영(Projection)을 통해 첫 번째 오선지(Staff line)의 Y좌표 스캔
        _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
        row_sums = np.sum(thresh, axis=1) / 255
        
        # 폭의 50% 이상을 차지하는 검은 가로선을 오선지로 간주
        staff_lines = np.where(row_sums > w * 0.5)[0]
        
        if len(staff_lines) > 0:
            first_line_y = staff_lines[0]
            # 오선지 바로 위 영역 45px ~ 오선지 까지 (여유공간 2px) + 좌측 8% 너비만 추출 (기타 코드 다이어그램 제외)
            crop_y_start = max(0, first_line_y - 45)
            crop_y_end = max(10, first_line_y - 2)
            crop = gray[crop_y_start:crop_y_end, :int(w * 0.08)]
        else:
            # 안전 장치: 오선지를 못 찾았을 경우 기존 하드코딩 비율 사용
            crop = gray[:int(h * 0.25), :int(w * 0.08)]
            
        # 작은 마디번호의 인식률 극대화를 위해 3배 업스케일링 및 이진화 처리
        upscaled = cv2.resize(crop, (0, 0), fx=3.0, fy=3.0, interpolation=cv2.INTER_CUBIC)
        _, upscaled_thresh = cv2.threshold(upscaled, 150, 255, cv2.THRESH_BINARY_INV)
        
        results = reader.readtext(upscaled_thresh, allowlist='0123456789')
        
        measure_num = None
        if results:
            # conf > 0.4 이면서 1~3자리의 숫자로만 이루어진 텍스트를 마디 번호로 간주
            valid_results = [res[1] for res in results if res[2] > 0.4 and res[1].isdigit() and len(res[1]) <= 3]
            if valid_results:
                measure_num = valid_results[0]
        
        if measure_num is not None:
            if measure_num == last_measure_num:
                print(f"    - 프레임 {i+1}: 마디번호 [{measure_num}] 중복 감지 (삭제)")
                continue
            last_measure_num = measure_num
            print(f"    - 프레임 {i+1}: 마디번호 [{measure_num}] (유지)")
        else:
            print(f"    - 프레임 {i+1}: 마디번호 미검출 (유지)")
        
        unique.append(frame)

    print(f"  → OCR 3차: {len(unique)}개 고유 Tab 프레임")
    return unique

# Windows 콘솔 인코딩
if sys.platform == "win32":
    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
    sys.stderr.reconfigure(encoding="utf-8", errors="replace")


# ─── 설정 ─────────────────────────────────────────────────────────────────

DEFAULT_FPS = 2
SIMILARITY_THRESHOLD = 0.95
OVERLAY_SIMILARITY_THRESHOLD = 0.55

OVERLAY_MIN_AREA_RATIO = 0.05
OVERLAY_MAX_AREA_RATIO = 0.6
MIN_TAB_LINES = 4

# 프레임 추출 시 최대 폭 (1080p→1280p 다운스케일로 메모리 세이브)
MAX_FRAME_WIDTH = 1280
# 검출용 업스케일 폭 (360p→960px, 1.5x → Tab 라인 두꺼워짐)
DETECT_WIDTH = 960

PDF_DPI = 150
PDF_PAGE_WIDTH_MM = 210
PDF_PAGE_HEIGHT_MM = 297
PDF_MARGIN_MM = 10
TAB_GAP_MM = 3


# ─── Step 1: 다운로드 ─────────────────────────────────────────────────────

def _find_yt_dlp() -> str:
    yt_dlp = shutil.which("yt-dlp")
    if yt_dlp:
        return yt_dlp
    for pyver in ["Python312", "Python311", "Python310"]:
        p = Path(os.environ.get("APPDATA", "")) / "Python" / pyver / "Scripts" / "yt-dlp.exe"
        if p.exists():
            return str(p)
    p = Path(sys.executable).parent / "Scripts" / "yt-dlp.exe"
    if p.exists():
        return str(p)
    raise RuntimeError("yt-dlp를 찾을 수 없습니다. pip install yt-dlp")


def download_video(url: str, output_dir: Path) -> Tuple[Path, str]:
    """영상 다운로드 (1080p 우선)"""
    print("[1/5] 영상 다운로드 중...")
    yt_dlp = _find_yt_dlp()

    result = subprocess.run(
        [yt_dlp, "--get-title", "--encoding", "utf-8", url],
        capture_output=True, encoding="utf-8", errors="replace"
    )
    title = (result.stdout or "").strip() or "untitled"
    safe_title = re.sub(r'[\\/:*?"<>|\x00-\x1f]', '_', title)[:80]
    video_path = output_dir / f"{safe_title}.mp4"

    if video_path.exists():
        print(f"  → 이미 다운로드됨: {video_path.name}")
        return video_path, safe_title

    # 영상 추출 처리(CV)만 필요하므로, ffmpeg 병합이 불필요한 video-only 고화질 포맷(720p)을 직접 요청하여 360p 강등을 방지
    subprocess.run(
        [yt_dlp,
         "-f", "bestvideo[ext=mp4]",
         "-S", "res:720",
         "-o", str(video_path), url],
        encoding="utf-8", errors="replace", check=True
    )
    print(f"  → 다운로드 완료: {video_path.name}")
    return video_path, safe_title


# ─── Step 2: 프레임 추출 ──────────────────────────────────────────────────

def extract_frames(video_path: Path, fps: float = DEFAULT_FPS) -> List[np.ndarray]:
    print(f"[2/5] 프레임 추출 중 (fps={fps})...")
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise RuntimeError(f"영상을 열 수 없습니다: {video_path}")

    video_fps = cap.get(cv2.CAP_PROP_FPS)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    interval = max(1, int(video_fps / fps))
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # 4K 이상 → 1080p 다운스케일 (OOM 방지)
    need_resize = w > MAX_FRAME_WIDTH
    if need_resize:
        scale = MAX_FRAME_WIDTH / w
        target_size = (MAX_FRAME_WIDTH, int(h * scale))
        print(f"  → {w}x{h} → {target_size[0]}x{target_size[1]} 다운스케일")

    frames = []
    idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if idx % interval == 0:
            if need_resize:
                frame = cv2.resize(frame, target_size, interpolation=cv2.INTER_AREA)
            frames.append(frame)
            if len(frames) % 50 == 0:
                print(f"    ... {len(frames)}번째 프레임 추출 진행 중...", flush=True)
        idx += 1

    cap.release()
    print(f"  → {len(frames)}개 프레임 추출 ({w}x{h}, 원본 {video_fps:.0f}fps)")
    return frames


# ─── 핵심: 흰색 배경 Tab 영역 검출 ───────────────────────────────────────

def _find_white_tab_strip(frame: np.ndarray, min_strip_ratio: float = 0.10) -> Optional[Tuple[int, int]]:
    """프레임에서 흰색 배경의 Tab 스트립 영역의 Y범위(top, bottom)를 반환.

    전략: HSV 색공간에서 밝고(V>180) + 무채색(S<40)인 행을 찾아
    연속된 흰색 영역이 일정 비율 이상인 영역을 Tab 영역으로 판정.
    grayscale 단독보다 노란 하이라이트, 컬러 배경을 정확히 배제.
    """
    h, w = frame.shape[:2]
    margin_x = int(w * 0.1)

    # HSV 변환: 채도(S)와 명도(V) 동시 사용
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    _, s_ch, v_ch = cv2.split(hsv)

    roi_v = v_ch[:, margin_x:w - margin_x]
    roi_s = s_ch[:, margin_x:w - margin_x]

    # 2단계 흰색 마스크:
    #   1) 순수 흰색: V > 180, S < 40 (Tab 배경)
    #   2) 밝은 파스텔: V > 200, S < 100 (노란/초록 하이라이트 박스)
    pure_white = (roi_v > 180) & (roi_s < 40)
    bright_pastel = (roi_v > 200) & (roi_s < 100)
    tab_mask = pure_white | bright_pastel

    # 각 행의 Tab-like 픽셀 비율
    row_tab_ratio = np.mean(tab_mask, axis=1)
    bright_mask = row_tab_ratio > 0.5  # 행의 50% 이상이 Tab-like

    # 연속된 흰색 행 영역 찾기 (검은색 탭 라인 및 음표로 인한 끊김 허용)
    max_gap = int(h * 0.02)  # 약 2% (720p 기준 14px)까지의 흰색 끊김은 같은 영역으로 간주
    regions = []
    start = None
    gap_count = 0
    for i in range(h):
        if bright_mask[i]:
            if start is None:
                start = i
            gap_count = 0
        else:
            if start is not None:
                gap_count += 1
                if gap_count > max_gap:
                    length = (i - gap_count) - start
                    if length >= h * min_strip_ratio:
                        regions.append((start, i - gap_count))
                    start = None
    if start is not None:
        length = (h - gap_count) - start
        if length >= h * min_strip_ratio:
            regions.append((start, h - gap_count))

    if not regions:
        return None

    # 가장 넓은 흰색 스트립 반환
    best = max(regions, key=lambda r: r[1] - r[0])

    # 약간의 패딩 추가 (하단 짤림 방지)
    pad = int(h * 0.03)
    top = max(0, best[0] - pad)
    bottom = min(h, best[1] + pad)

    return (top, bottom)


def _trim_to_content(crop: np.ndarray, margin_px: int = 6) -> np.ndarray:
    """넓게 크롭된 Tab 이미지에서 Tab 콘텐츠 영역만 정밀 트림.

    전략: HSV 기반으로 각 행의 '흰색 배경 비율'을 계산.
    - Tab 영역: 30~95%가 흰색 (흰 배경 + Tab 라인/숫자)
    - 기타 영상: 흰색 < 20% (어두운 배경)
    - 순수 여백: 흰색 > 97%
    이를 통해 상/하단의 기타 영상과 빈 여백 모두 제거."""
    h, w = crop.shape[:2]
    if h < 15 or w < 50:
        return crop

    hsv = cv2.cvtColor(crop, cv2.COLOR_BGR2HSV)
    _, s_ch, v_ch = cv2.split(hsv)

    # 흰색/밝은 파스텔 픽셀 비율 (Tab 배경 감지)
    white_mask = ((v_ch > 180) & (s_ch < 40)) | ((v_ch > 200) & (s_ch < 100))
    row_white = np.mean(white_mask, axis=1)

    # Tab 행 = 흰색 비율 30~97% (라인/숫자 + 흰 배경)
    tab_rows = (row_white > 0.30) & (row_white < 0.97)

    # 콘텐츠 존재 확인 (어두운 픽셀 > 0.2%) - 마디번호 같이 아주 작은 숫자도 보존하기 위해 스레스홀드 극단적 하향
    gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) if len(crop.shape) == 3 else crop
    row_dark = np.mean(gray < 180, axis=1)
    content_rows = row_dark > 0.002

    # Tab 행 OR 콘텐츠 행
    valid_rows = tab_rows | content_rows

    # 상단: 첫 번째 유효 행
    top = 0
    for i in range(h):
        if valid_rows[i] and row_white[i] > 0.20:
            top = max(0, i - 120)  # 상단 마디번호 보존을 위해 압도적인 120px 강제 보호 (숫자가 꽤 높이 떠있음)
            break

    # 하단: 마지막 유효 행
    bottom = h
    for i in range(h - 1, -1, -1):
        if valid_rows[i] and row_white[i] > 0.20:
            bottom = min(h, i + margin_px)
            break

    if bottom - top < 15:
        return crop

    return crop[top:bottom, :]


def _has_tab_content(region: np.ndarray) -> bool:
    """흰색 영역 내에 실제 Tab 내용이 있는지 검증.
    방법: 흰색 배경 위의 어두운 픽셀(Tab 라인, 숫자, 코드명) 비율을 확인.
    Tab 영역은 일반적으로 3~25%의 어두운 콘텐츠를 포함."""
    if region is None or region.size == 0:
        return False

    gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY) if len(region.shape) == 3 else region
    h, w = gray.shape
    if h < 15 or w < 50:
        return False

    # 어두운 픽셀 비율 (< 180 = 라인/숫자/코드 등)
    dark_pixels = np.sum(gray < 180)
    dark_ratio = dark_pixels / gray.size

    # Tab 영역: 3~25%가 어두운 콘텐츠 (순수 흰 배경이면 < 1%, 기타 영상이면 > 30%)
    return 0.02 < dark_ratio < 0.30


# ─── Step 3: 패턴 감지 ────────────────────────────────────────────────────

def _detect_tab_overlay(frame: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
    """Tab을 포함한 흰색 오버레이 박스 검출"""
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    h, w = gray.shape

    _, thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    total_area = h * w
    best = None
    best_area = 0

    for cnt in contours:
        x, y, cw, ch = cv2.boundingRect(cnt)
        area = cw * ch
        ratio = area / total_area
        # 오버레이 = 프레임 폭의 85% 미만인 독립 박스 (전폭 스트립은 scroll)
        width_ratio = cw / w
        if (OVERLAY_MIN_AREA_RATIO < ratio < OVERLAY_MAX_AREA_RATIO
                and width_ratio < 0.85
                and cw > ch * 0.5 and area > best_area):
            # Tab 내용 검증
            region = frame[y:y + ch, x:x + cw]
            if _has_tab_content(region):
                best = (x, y, cw, ch)
                best_area = area

    return best


def detect_pattern(frames: List[np.ndarray], sample_count: int = 20) -> str:
    """영상 패턴 감지: scroll (우선) vs overlay"""
    print("[3/5] 영상 패턴 분석 중...")

    if len(frames) < sample_count:
        sample_count = len(frames)

    indices = np.linspace(0, len(frames) - 1, sample_count, dtype=int)
    sample_frames = [frames[i] for i in indices]

    # 1) 흰색 Tab 스트립 감지 (scroll) — 우선 검사
    tab_top_count = 0
    tab_bottom_count = 0
    for f in sample_frames:
        strip = _find_white_tab_strip(f)
        if strip is not None:
            top, bottom = strip
            h = f.shape[0]
            mid = (top + bottom) / 2
            if mid < h * 0.5:
                tab_top_count += 1
            else:
                tab_bottom_count += 1

    tab_count = tab_top_count + tab_bottom_count
    tab_ratio = tab_count / sample_count

    # 60% 이상에서 흰색 스트립 → scroll
    if tab_ratio >= 0.6:
        position = "상단" if tab_top_count > tab_bottom_count else "하단"
        print(f"  → 패턴: scroll (Tab {position}, 감지율: {tab_ratio:.0%})")
        return "scroll"

    # 2) 스트립 감지율 낮으면 오버레이 체크
    overlay_count = sum(1 for f in sample_frames if _detect_tab_overlay(f) is not None)
    overlay_ratio = overlay_count / sample_count
    if overlay_ratio > 0.2:
        print(f"  → 패턴: overlay (감지율: {overlay_ratio:.0%})")
        return "overlay"

    # 3) 둘 다 아니면 scroll 기본값
    position = "상단" if tab_top_count > tab_bottom_count else "하단"
    print(f"  → 패턴: scroll (fallback, Tab {position}, 감지율: {tab_ratio:.0%})")
    return "scroll"


# ─── Step 4: 고유 Tab 프레임 추출 ─────────────────────────────────────────

def compare_frames(frame1: np.ndarray, frame2: np.ndarray) -> float:
    """MSE 기반 유사도 (0~1, 1=동일)"""
    g1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) if len(frame1.shape) == 3 else frame1
    g2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) if len(frame2.shape) == 3 else frame2

    if g1.shape != g2.shape:
        g2 = cv2.resize(g2, (g1.shape[1], g1.shape[0]))

    target_w = 480
    if g1.shape[1] > target_w:
        scale = target_w / g1.shape[1]
        sz = (target_w, int(g1.shape[0] * scale))
        g1 = cv2.resize(g1, sz)
        g2 = cv2.resize(g2, sz)

    mse = np.mean(((g1.astype(np.float32) - g2.astype(np.float32)) / 255.0) ** 2)
    return max(0.0, 1.0 - min(mse * 8.0, 1.0))


def _dhash(image: np.ndarray, hash_size: int = 32) -> np.ndarray:
    """Difference Hash — 구조 기반 해시 (32×32 = 1024비트).
    인접 픽셀의 밝기 차이를 기록하여 위치 이동에 강건한 fingerprint 생성.
    16→32 확대로 마디번호/음표 위치까지 구분 가능."""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
    resized = cv2.resize(gray, (hash_size + 1, hash_size), interpolation=cv2.INTER_AREA)
    return (resized[:, 1:] > resized[:, :-1]).flatten()


def _dedup_by_hash(frames: List[np.ndarray],
                   max_hamming: int = 20) -> List[np.ndarray]:
    """pHash 기반 클러스터 중복 제거.
    유사 프레임을 그룹핑하고, 각 그룹에서 가장 선명한(Laplacian 분산 최대) 1장만 선택.
    → 스크롤 중복 + 반복 연습 구간 모두 제거."""
    if not frames:
        return []

    hashes = [_dhash(f) for f in frames]
    n = len(frames)
    used = [False] * n
    clusters = []

    for i in range(n):
        if used[i]:
            continue
        cluster = [i]
        used[i] = True
        for j in range(i + 1, n):
            if used[j]:
                continue
            dist = int(np.sum(hashes[i] != hashes[j]))
            if dist <= max_hamming:
                cluster.append(j)
                used[j] = True
        clusters.append(cluster)

    # 각 클러스터에서 최고 선명도 프레임 선택
    result = []
    for cluster in clusters:
        best_idx = max(cluster, key=lambda idx: cv2.Laplacian(
            cv2.cvtColor(frames[idx], cv2.COLOR_BGR2GRAY)
            if len(frames[idx].shape) == 3 else frames[idx],
            cv2.CV_64F).var())
        result.append(frames[best_idx])

    return result


def _extract_print_channel(frame: np.ndarray) -> np.ndarray:
    """PDF 출력용 채널 (Red 채널): 노란색을 투명(White)하게 만듦"""
    if len(frame.shape) != 3: return frame
    return frame[:, :, 2]

def _extract_tracking_channel(frame: np.ndarray) -> np.ndarray:
    """트래킹 전용 채널 (Blue 채널): 노란색을 거대한 검은색 마커로 만들어 반복적인 마디점프 시각적 오류를 영구차단"""
    if len(frame.shape) != 3: return frame
    return frame[:, :, 0]

def _detect_scroll_offset(frame_a: np.ndarray, frame_b: np.ndarray, min_confidence: float = 0.1) -> Tuple[int, float]:
    """이전 프레임(A)과 현재 프레임(B) 사이의 X축 이동량(Scroll)을 추정합니다."""
    h, w = frame_a.shape[:2]
    
    gb = _extract_tracking_channel(frame_b)
    ga = _extract_tracking_channel(frame_a)
    
    template_w = int(w * 0.5)
    template = ga[:, w - template_w:]
    result = cv2.matchTemplate(gb, template, cv2.TM_CCOEFF_NORMED)
    _, max_val, _, max_loc = cv2.minMaxLoc(result)
    scroll_px = (w - template_w) - max_loc[0]
    if max_val < min_confidence or scroll_px <= 0:
        return (0, max_val)
    return (scroll_px, max_val)

def _detect_measure_bars(gray_pano: np.ndarray) -> List[int]:
    """오직 기타 6현의 영역만 계산하여 세로로 쫙 채워진 마디 선(|)의 X좌표만 정밀하게 반환합니다."""
    _, thresh = cv2.threshold(gray_pano, 200, 255, cv2.THRESH_BINARY_INV)
    h, w = thresh.shape
    row_sums = np.sum(thresh, axis=1) / 255
    staff_rows = np.where(row_sums > w * 0.5)[0]
    
    if len(staff_rows) < 2: return []
        
    top_line = staff_rows[0]
    bottom_line = top_line
    for r in staff_rows:
        if r - top_line > 100: break
        bottom_line = r
        
    staff_region = thresh[top_line:bottom_line+1, :]
    expected_h = bottom_line - top_line + 1
    if expected_h < 10: return []
    
    col_sums = np.sum(staff_region, axis=0) / 255
    bar_cols = np.where(col_sums >= expected_h * 0.8)[0]
    
    measures = []
    curr = []
    for c in bar_cols:
        if not curr: curr.append(c)
        else:
            # [BUG3 FIX] 클러스터 허용폭 10→30px (마디선은 보통 2~5px 폭 클러스터)
            if c - curr[-1] < 30: curr.append(c)
            else:
                measures.append(int(np.mean(curr)))
                curr = [c]
    if curr: measures.append(int(np.mean(curr)))
    # [BUG3 FIX] 100px 미만 간격 마디선 제거 (음표 기둥 오탐 방지)
    measures = [x for i, x in enumerate(measures)
                if i == 0 or x - measures[i-1] >= 100]
    return measures

def _stamp_measure_number(measure_bgr: np.ndarray, num: int) -> np.ndarray:
    """마디 이미지 좌측 상단의 빈 공간에 자동으로 순차 진행 마디번호를 파란색 도장(Stamp)으로 찍습니다."""
    text = f"[{num}]"
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.7
    thickness = 2
    color = (200, 0, 0)
    cv2.putText(measure_bgr, text, (15, 30), font, font_scale, color, thickness, cv2.LINE_AA)
    return measure_bgr

def _stitch_scroll_segment(segment: List[np.ndarray]) -> np.ndarray:
    if len(segment) == 1: return segment[0]
    min_h = min(f.shape[0] for f in segment)
    panorama = segment[0][:min_h, :]
    for i in range(1, len(segment)):
        curr = segment[i][:min_h, :]
        scroll_px, conf = _detect_scroll_offset(segment[i-1][:min_h, :], curr, min_confidence=0.1)
        if scroll_px > 0 and conf > 0.15:
            new_strip = curr[:, curr.shape[1] - scroll_px:]
            panorama = np.hstack([panorama, new_strip])
        else:
            panorama = np.hstack([panorama, curr])
    return panorama

def _merge_scroll_candidates(candidates: List[np.ndarray], min_scroll: int = 5, min_segment_len: int = 2) -> List[np.ndarray]:
    if len(candidates) <= 1: return candidates
    result = []
    current_segment = [candidates[0]]
    prev_s_px = 0
    prev_conf = 1.0

    for i in range(1, len(candidates)):
        prev_frame = candidates[i-1]
        curr_frame = candidates[i]
        s_px, conf = _detect_scroll_offset(prev_frame, curr_frame, min_confidence=0.1)
        
        # [BUG1 FIX] 씬 전환 조건: conf 기반만 사용
        # abs(s_px - prev_s_px) > 100 제거 — 스크롤 가속도를 씬전환으로 오탐하던 원인
        is_cut = (conf <= 0.15) or (prev_conf - conf > 0.4)
        
        if not is_cut:
            current_segment.append(curr_frame)
        else:
            if len(current_segment) >= min_segment_len:
                result.append(_stitch_scroll_segment(current_segment))
            else:
                result.extend(current_segment)
            current_segment = [curr_frame]
            
        prev_s_px = s_px
        prev_conf = conf

    if len(current_segment) >= min_segment_len:
        result.append(_stitch_scroll_segment(current_segment))
    else:
        result.extend(current_segment)
        
    return result

def merge_panoramas_list(panoramas):
    if not panoramas: return []
    merged_list = []
    current_master = panoramas[0].copy()
    for i in range(1, len(panoramas)):
        next_pano = panoramas[i].copy()
        
        # 매마디가 똑같이 생긴 반주 구간(예: 코러스)이 있을 때, 검색 범위가 너무 넓거나
        # 비교 기준(head)이 너무 짧으면, OpenCV가 과거의 똑같은 반주에 현재 씬을 겹쳐버림(마디 누락/점프 발생).
        # 이를 막기 위해 비교 기준은 넓게(800), 검색 과거 이력은 짧게(1500=최대 편집 되감기 길이) 제한.
        head_w = min(800, next_pano.shape[1])
        head = next_pano[:, :head_w]
        
        search_w = min(1500, current_master.shape[1])
        search_region = current_master[:, -search_w:]
        h_gray = _extract_tracking_channel(head)
        s_gray = _extract_tracking_channel(search_region)
        matched = False
        if h_gray.shape[1] <= s_gray.shape[1] and h_gray.shape[0] == s_gray.shape[0]:
            res = cv2.matchTemplate(s_gray, h_gray, cv2.TM_CCOEFF_NORMED)
            _, max_val, _, max_loc = cv2.minMaxLoc(res)
            
            # [BUG2 FIX] 매칭 임계치 0.60 → 0.50 (반복 코러스 구간에서 0.56~0.59 스코어로 분리되던 버그)
            if max_val > 0.50:
                match_x_in_search = max_loc[0]
                absolute_match_x = current_master.shape[1] - search_w + match_x_in_search
                next_start_idx = current_master.shape[1] - absolute_match_x
                if next_start_idx < next_pano.shape[1]:
                    append_part = next_pano[:, next_start_idx:]
                    if append_part.shape[1] > 0:
                        current_master = np.hstack([current_master, append_part])
                matched = True
        
        if not matched:
            merged_list.append(current_master)
            current_master = next_pano
            
    merged_list.append(current_master)
    return merged_list

def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
    print(f"[4/5] 스크롤형 Tab 시계열 추적 추출 중...")
    
    strip_tops, strip_bottoms = [], []
    for frame in frames[:50]:
        strip = _find_white_tab_strip(frame)
        if strip:
            strip_tops.append(strip[0])
            strip_bottoms.append(strip[1])
            
    if not strip_tops:
        return []
        
    median_top = int(np.median(strip_tops))
    median_bottom = int(np.median(strip_bottoms))
    
    tracker = TemporalTracker()
    
    for frame in frames:
        h = frame.shape[0]
        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
        if not _has_tab_content(tab_crop): 
            continue
        tracker.process_frame(tab_crop)

    panorama = tracker.get_final_panorama()
    if panorama is None:
        return []
        
    print(f"  -> 생성된 파노라마 길이: {panorama.shape[1]}px")
    
    chunk_width = 1280
    final_chunks = []
    
    gray_pano = _extract_print_channel(panorama)
    bar_coords = _detect_measure_bars(gray_pano)
    
    if not bar_coords:
        w = panorama.shape[1]
        start_x = 0
        while start_x < w:
            chunk = panorama[:, start_x:min(w, start_x + chunk_width)]
            if chunk.shape[1] < chunk_width:
                pad = np.full((chunk.shape[0], chunk_width - chunk.shape[1], 3), 255, dtype=np.uint8)
                chunk = np.hstack([chunk, pad])
            final_chunks.append(chunk)
            start_x += chunk_width
    else:
        coords = [0] + bar_coords + [panorama.shape[1]]
        coords = sorted(list(set(coords)))
        
        current_row = None
        for i in range(len(coords) - 1):
            x_start = coords[i]
            x_end = coords[i+1]
            if x_end - x_start < 50:
                continue
                
            measure_img = panorama[:, x_start:x_end]
            
            if current_row is None:
                current_row = measure_img
            else:
                if current_row.shape[1] + measure_img.shape[1] > chunk_width:
                    pad_w = chunk_width - current_row.shape[1]
                    if pad_w > 0:
                        pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
                        current_row = np.hstack([current_row, pad_img])
                    final_chunks.append(current_row)
                    current_row = measure_img
                else:
                    current_row = np.hstack([current_row, measure_img])
                    
        if current_row is not None:
            pad_w = chunk_width - current_row.shape[1]
            if pad_w > 0:
                pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
                current_row = np.hstack([current_row, pad_img])
            final_chunks.append(current_row)
            
    print(f"  -> A4 분할 컷: {len(final_chunks)}개")
    return final_chunks

def extract_unique_overlay(frames: List[np.ndarray],
                           threshold: float = OVERLAY_SIMILARITY_THRESHOLD) -> List[np.ndarray]:
    """오버레이형: Tab 오버레이 박스 추출 + 전체 히스토리 중복 제거"""
    print("[4/5] 오버레이형 Tab 추출 중...")

    unique = []
    all_normalized = []

    for frame in frames:
        bbox = _detect_tab_overlay(frame)
        if bbox is None:
            continue

        x, y, w, h = bbox
        if h < 40 or w < 100:
            continue

        pad = 10
        x = max(0, x - pad)
        y = max(0, y - pad)
        w = min(frame.shape[1] - x, w + 2 * pad)
        h = min(frame.shape[0] - y, h + 2 * pad)

        crop = frame[y:y + h, x:x + w]

        # 밝기 필터
        if np.mean(cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)) < 120:
            continue

        # 정규화
        normalized = cv2.resize(crop, (480, 180), interpolation=cv2.INTER_AREA)
        canvas = np.full((180, 480, 3), 255, dtype=np.uint8)
        canvas[:normalized.shape[0], :normalized.shape[1]] = normalized

        # 전체 히스토리 비교
        is_dup = False
        for ref in all_normalized:
            if compare_frames(canvas, ref) >= threshold:
                is_dup = True
                break

        if not is_dup:
            unique.append(crop)
            all_normalized.append(canvas)

    # ── Phase 2: 마디번호 기반 최종 중복 제거 (OCR) ──
    if unique:
        unique = _dedup_by_measure_number(unique)

    print(f"  → 최종: {len(unique)}개 고유 Tab 오버레이")
    return unique


# ─── Step 5: A4 PDF 생성 ─────────────────────────────────────────────────

def generate_pdf(frames: List[np.ndarray], output_path: Path,
                 debug_dir: Optional[Path] = None) -> None:
    """Tab 프레임들을 A4 페이지에 여러 행으로 배치"""
    print("[5/5] A4 PDF 생성 중...")
    if not frames:
        print("  ⚠ 프레임 없음!")
        return

    page_w = int(PDF_PAGE_WIDTH_MM / 25.4 * PDF_DPI)
    page_h = int(PDF_PAGE_HEIGHT_MM / 25.4 * PDF_DPI)
    margin = int(PDF_MARGIN_MM / 25.4 * PDF_DPI)
    gap = int(TAB_GAP_MM / 25.4 * PDF_DPI)
    content_w = page_w - 2 * margin

    resized = []
    for i, frame in enumerate(frames):
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(rgb)
        if debug_dir:
            img.save(debug_dir / f"frame_{i:04d}.png")
        scale = content_w / img.width
        img_r = img.resize((content_w, int(img.height * scale)), Image.LANCZOS)
        resized.append(img_r)

    pages = []
    cur_y = margin
    page = Image.new('RGB', (page_w, page_h), (255, 255, 255))

    for img in resized:
        if cur_y + img.height > page_h - margin:
            pages.append(page)
            page = Image.new('RGB', (page_w, page_h), (255, 255, 255))
            cur_y = margin
        page.paste(img, (margin, cur_y))
        cur_y += img.height + gap

    if cur_y > margin + gap:
        pages.append(page)

    if not pages:
        return

    pages[0].save(str(output_path), save_all=True,
                  append_images=pages[1:], resolution=PDF_DPI)
    print(f"  → PDF: {len(resized)} Tab → {len(pages)} 페이지, {output_path.stat().st_size // 1024} KB")


def generate_long_image(frames: List[np.ndarray], output_path: Path) -> None:
    """Tab을 하나의 긴 이미지로"""
    if not frames:
        return
    max_w = max(f.shape[1] for f in frames)
    imgs = []
    for f in frames:
        if f.shape[1] != max_w:
            scale = max_w / f.shape[1]
            f = cv2.resize(f, (max_w, int(f.shape[0] * scale)))
        imgs.append(f)
    concat = np.vstack(imgs)
    Image.fromarray(cv2.cvtColor(concat, cv2.COLOR_BGR2RGB)).save(str(output_path))
    print(f"  → 롱 이미지: {max_w}x{concat.shape[0]}")


# ─── Main ─────────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(description="YouTube TAB → A4 PDF")
    parser.add_argument("url", help="YouTube URL")
    parser.add_argument("-o", "--output", help="출력 PDF 경로")
    parser.add_argument("--fps", type=float, default=DEFAULT_FPS)
    parser.add_argument("--similarity", type=float, default=None)
    parser.add_argument("--pattern", choices=["auto", "scroll", "overlay"],
                        default="auto")
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()

    output_dir = Path("output")
    output_dir.mkdir(exist_ok=True)
    debug_dir = None
    if args.debug:
        debug_dir = output_dir / "debug_frames"
        debug_dir.mkdir(exist_ok=True)

    video_path, safe_title = download_video(args.url, output_dir)
    frames = extract_frames(video_path, fps=args.fps)
    if not frames:
        print("❌ 프레임 추출 실패")
        sys.exit(1)

    pattern = detect_pattern(frames) if args.pattern == "auto" else args.pattern

    if pattern == "scroll":
        sim = args.similarity if args.similarity else SIMILARITY_THRESHOLD
        unique = extract_unique_scroll(frames, threshold=sim)
    else:
        sim = args.similarity if args.similarity else OVERLAY_SIMILARITY_THRESHOLD
        unique = extract_unique_overlay(frames, threshold=sim)

    if not unique:
        print("❌ 고유 Tab 프레임 없음. --similarity를 낮추거나 --pattern을 수동 지정하세요.")
        sys.exit(1)

    pdf_path = Path(args.output) if args.output else output_dir / f"{safe_title}.pdf"
    generate_pdf(unique, pdf_path, debug_dir=debug_dir)
    generate_long_image(unique, pdf_path.with_suffix(".png"))

    print(f"\n✅ 완료! PDF: {pdf_path}")


if __name__ == "__main__":
    main()