fix(cv): resolve measure duplication by isolating playhead and enforcing 1D sliding correlations

2026-03-29 00:06:38 +09:00
parent 64ecc12d35
commit cd159c2a99
5 changed files with 207 additions and 58 deletions
--- a/youtube_tab_to_pdf.py
+++ b/youtube_tab_to_pdf.py
@@ -18,6 +18,7 @@ from pathlib import Path
 from typing import List, Tuple, Optional

 import cv2
+from video_cv_tracker import TemporalTracker
 import numpy as np
 import img2pdf
 from PIL import Image
@@ -156,12 +157,11 @@ def download_video(url: str, output_dir: Path) -> Tuple[Path, str]:
        print(f"  → 이미 다운로드됨: {video_path.name}")
        return video_path, safe_title

-    # 720p 우선 (다운스케일링 부하 원천 차단)
+    # 영상 추출 처리(CV)만 필요하므로, ffmpeg 병합이 불필요한 video-only 고화질 포맷(720p)을 직접 요청하여 360p 강등을 방지
    subprocess.run(
        [yt_dlp,
-         "-f", "bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/"
-               "best[height<=720]/best",
-         "--merge-output-format", "mp4",
+         "-f", "bestvideo[ext=mp4]",
+         "-S", "res:720",
         "-o", str(video_path), url],
        encoding="utf-8", errors="replace", check=True
    )
@@ -659,90 +659,86 @@ def merge_panoramas_list(panoramas):
    return merged_list

 def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
-    print(f"[4/5] 스크롤형 Tab 추출 중 (threshold={threshold})...")
+    print(f"[4/5] 스크롤형 Tab 시계열 추적 추출 중...")
+    
    strip_tops, strip_bottoms = [], []
-    for frame in frames:
+    for frame in frames[:50]:
        strip = _find_white_tab_strip(frame)
        if strip:
            strip_tops.append(strip[0])
            strip_bottoms.append(strip[1])
-    if not strip_tops: return []
+            
+    if not strip_tops:
+        return []
+        
    median_top = int(np.median(strip_tops))
    median_bottom = int(np.median(strip_bottoms))
    
-    candidates, all_compared = [], []
+    tracker = TemporalTracker()
+    
    for frame in frames:
        h = frame.shape[0]
        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
-        if not _has_tab_content(tab_crop): continue
-        compare_img = cv2.resize(tab_crop, (480, 120), interpolation=cv2.INTER_AREA)
-        is_dup = False
-        for ref in all_compared:
-            if compare_frames(compare_img, ref) >= threshold:
-                is_dup = True
-                break
-        if not is_dup:
-            candidates.append(tab_crop)
-            all_compared.append(compare_img)
-
-    stitched = _merge_scroll_candidates(candidates)
-    merged_panoramas = merge_panoramas_list(stitched)
-    
-    chunk_width = candidates[0].shape[1] if candidates else 1280
-    final_chunks = []
-    global_measure_counter = 1
-    current_row = None
-    
-    for pano in merged_panoramas:
-        gray_pano = _extract_print_channel(pano)
-        bar_coords = _detect_measure_bars(gray_pano)
-        
-        if not bar_coords:
-            w = pano.shape[1]
-            start_x = 0
-            while start_x < w:
-                chunk = pano[:, start_x:min(w, start_x + chunk_width)]
-                if chunk.shape[1] < chunk_width:
-                    pad = np.full((chunk.shape[0], chunk_width - chunk.shape[1], 3), 255, dtype=np.uint8)
-                    chunk = np.hstack([chunk, pad])
-                gray_c = _extract_print_channel(chunk)
-                final_chunks.append(cv2.cvtColor(gray_c, cv2.COLOR_GRAY2BGR))
-                start_x += chunk_width
+        if not _has_tab_content(tab_crop): 
            continue
-            
-        coords = [0] + bar_coords + [pano.shape[1]]
+        tracker.process_frame(tab_crop)
+
+    panorama = tracker.get_final_panorama()
+    if panorama is None:
+        return []
+        
+    print(f"  -> 생성된 파노라마 길이: {panorama.shape[1]}px")
+    
+    chunk_width = 1280
+    final_chunks = []
+    
+    gray_pano = _extract_print_channel(panorama)
+    bar_coords = _detect_measure_bars(gray_pano)
+    
+    if not bar_coords:
+        w = panorama.shape[1]
+        start_x = 0
+        while start_x < w:
+            chunk = panorama[:, start_x:min(w, start_x + chunk_width)]
+            if chunk.shape[1] < chunk_width:
+                pad = np.full((chunk.shape[0], chunk_width - chunk.shape[1], 3), 255, dtype=np.uint8)
+                chunk = np.hstack([chunk, pad])
+            final_chunks.append(chunk)
+            start_x += chunk_width
+    else:
+        coords = [0] + bar_coords + [panorama.shape[1]]
        coords = sorted(list(set(coords)))
        
+        current_row = None
        for i in range(len(coords) - 1):
            x_start = coords[i]
            x_end = coords[i+1]
            if x_end - x_start < 50:
                continue
                
-            measure_img = pano[:, x_start:x_end]
-            gray_m = _extract_print_channel(measure_img)
-            bgr_m = cv2.cvtColor(gray_m, cv2.COLOR_GRAY2BGR)
+            measure_img = panorama[:, x_start:x_end]
            
            if current_row is None:
-                current_row = bgr_m
+                current_row = measure_img
            else:
-                if current_row.shape[1] + bgr_m.shape[1] > chunk_width:
+                if current_row.shape[1] + measure_img.shape[1] > chunk_width:
                    pad_w = chunk_width - current_row.shape[1]
                    if pad_w > 0:
                        pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
                        current_row = np.hstack([current_row, pad_img])
                    final_chunks.append(current_row)
-                    current_row = bgr_m
+                    current_row = measure_img
                else:
-                    current_row = np.hstack([current_row, bgr_m])
+                    current_row = np.hstack([current_row, measure_img])
                    
-    if current_row is not None:
-        pad_w = chunk_width - current_row.shape[1]
-        if pad_w > 0:
-            pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
-            current_row = np.hstack([current_row, pad_img])
-        final_chunks.append(current_row)
-
+        if current_row is not None:
+            pad_w = chunk_width - current_row.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                current_row = np.hstack([current_row, pad_img])
+            final_chunks.append(current_row)
+            
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개")
    return final_chunks

 def extract_unique_overlay(frames: List[np.ndarray],