chore(docs): document ScoreExtractor tiling and refactor debug scripts (#563)

2026-03-29 17:57:40 +09:00
parent 39b55f2e9f
commit ac0c098259
698 changed files with 141180 additions and 195 deletions
--- a/scripts/debug/check_top.py
+++ b/scripts/debug/check_top.py
@@ -0,0 +1,31 @@
+import cv2
+import numpy as np
+
+def img_to_ascii(image, max_w=120):
+    if isinstance(image, str):
+        image = cv2.imread(image)
+    if image is None: return
+    
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
+    h, w = gray.shape
+    scale = max_w / w
+    resized = cv2.resize(gray, (max_w, int(h * scale)))
+    
+    chars = " .:-=+*#%@"
+    for r in range(resized.shape[0]):
+        row_str = ""
+        for c in range(resized.shape[1]):
+            val = resized[r, c]
+            idx = int((val / 255.0) * (len(chars) - 1))
+            row_str += chars[idx]
+        print(row_str)
+
+if __name__ == "__main__":
+    img = cv2.imread("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/verify_pano_chunk_00.png")
+    cv2.imwrite("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/verify_first_measure.png", img[:50, :200])
+    
+    print("Exported verify_first_measure.png from verify_pano_chunk_00.png")
+    
+    print("Exported verify_first_measure.png from raw_frame_1920.png")
+
+
--- a/scripts/debug/debug_121.png
+++ b/scripts/debug/debug_121.png
--- a/scripts/debug/debug_38.png
+++ b/scripts/debug/debug_38.png
--- a/scripts/debug/debug_blocks.py
+++ b/scripts/debug/debug_blocks.py
@@ -0,0 +1,19 @@
+# Research Script for Debugging process_pages
+import cv2
+import pickle
+import os
+
+out_dir = r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\debug_blocks"
+os.makedirs(out_dir, exist_ok=True)
+
+with open('unique_pages.pkl', 'rb') as f:
+    unique_pages = pickle.load(f)
+
+from score_extractor import ScoreExtractor
+extractor = ScoreExtractor()
+extractor.process_pages(unique_pages)
+
+for i, block in enumerate(extractor.final_sheet_chunks):
+    cv2.imwrite(os.path.join(out_dir, f"block_{i:02d}.png"), block)
+
+print("Dumped blocks!")
--- a/scripts/debug/debug_c1.png
+++ b/scripts/debug/debug_c1.png
--- a/scripts/debug/debug_c3.png
+++ b/scripts/debug/debug_c3.png
--- a/scripts/debug/debug_crash.py
+++ b/scripts/debug/debug_crash.py
@@ -0,0 +1,48 @@
+import cv2
+import pickle
+import traceback
+
+try:
+    with open('unique_pages.pkl', 'rb') as f:
+        unique_pages = pickle.load(f)
+except Exception:
+    import video_cv_tracker as tracker_lib
+    from youtube_tab_to_pdf import _find_white_tab_strip
+    tracker = tracker_lib.TemporalTracker(diff_threshold=0.05)
+    video = cv2.VideoCapture("output/shintakarajima.mp4")
+    
+    # Just read 100 frames
+    frames = []
+    fps_orig = video.get(cv2.CAP_PROP_FPS)
+    stride = max(1, int(fps_orig / 4.0))
+    count = 0
+    while len(frames) < 150:
+        ret, f = video.read()
+        if not ret: break
+        if count % stride == 0: frames.append(f)
+        count += 1
+    video.release()
+    
+    top, bottom = 0, frames[0].shape[0]
+    for f in frames[::10]:
+        b = _find_white_tab_strip(f)
+        if b:
+            top, bottom = b
+            break
+            
+    for f in frames:
+        tracker.process_frame(f[top:bottom, :])
+        
+    unique_pages = tracker.get_unique_pages()
+    with open('unique_pages.pkl', 'wb') as f:
+        pickle.dump(unique_pages, f)
+
+from score_extractor import ScoreExtractor
+ex = ScoreExtractor()
+try:
+    print(f"Running ScoreExtractor on {len(unique_pages)} pages...")
+    ex.process_pages(unique_pages)
+    print("Success!")
+except Exception as e:
+    print("CRASHED:")
+    traceback.print_exc()
--- a/scripts/debug/debug_final_state_machine.png
+++ b/scripts/debug/debug_final_state_machine.png
--- a/scripts/debug/debug_gap_bridged.png
+++ b/scripts/debug/debug_gap_bridged.png
--- a/scripts/debug/debug_last_frame.png
+++ b/scripts/debug/debug_last_frame.png
--- a/scripts/debug/debug_morph_grid.png
+++ b/scripts/debug/debug_morph_grid.png
--- a/scripts/debug/debug_morph_horiz.png
+++ b/scripts/debug/debug_morph_horiz.png
--- a/scripts/debug/debug_morph_vert.png
+++ b/scripts/debug/debug_morph_vert.png
--- a/scripts/debug/debug_numbers.py
+++ b/scripts/debug/debug_numbers.py
@@ -0,0 +1,61 @@
+import cv2
+import numpy as np
+import easyocr
+import os
+from pathlib import Path
+from youtube_tab_to_pdf import _find_white_tab_strip, _has_tab_content, _extract_print_channel, _detect_measure_bars
+
+def main():
+    url = "https://youtu.be/tJq1n8TofM0"
+    video_path = Path("output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4")
+    artifact_dir = Path(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6")
+    
+    cap = cv2.VideoCapture(str(video_path))
+    ret, frame = cap.read()
+    
+    strip = _find_white_tab_strip(frame)
+    top, bottom = strip[0], strip[1]
+    
+    tab_crop = frame[max(0, top):min(frame.shape[0], bottom), :]
+    
+    gray_page = _extract_print_channel(tab_crop)
+    bar_coords = _detect_measure_bars(gray_page)
+    
+    coords = [0] + bar_coords + [tab_crop.shape[1]]
+    coords = sorted(list(set(coords)))
+    
+    reader = easyocr.Reader(['en'], verbose=False)
+    
+    for i in range(len(coords) - 1):
+        x_start = coords[i]
+        x_end = coords[i+1]
+        measure_w = x_end - x_start
+        if measure_w < 30: continue
+        
+        m_img = tab_crop[:, x_start:x_end]
+        
+        # Extract Number Sprite precisely
+        gray = cv2.cvtColor(m_img, cv2.COLOR_BGR2GRAY)
+        _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
+        row_sums = np.sum(thresh, axis=1) / 255
+        staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0]
+        
+        if len(staff_lines) > 0:
+            y_staff = staff_lines[0]
+            # 상단 45px, 좌측 70px 크롭
+            crop_y1 = max(0, y_staff - 45)
+            crop_y2 = y_staff
+            sprite = thresh[crop_y1:crop_y2, 0:min(70, m_img.shape[1])]
+            
+            out_file = artifact_dir / f"debug_sprite_{i}.png"
+            cv2.imwrite(str(out_file), sprite)
+            
+            # OCR
+            upscaled = cv2.resize(sprite, (0, 0), fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC)
+            res = reader.readtext(upscaled, allowlist='0123456789', detail=0)
+            print(f"Measure {i}: Found text = {res}")
+            
+    cap.release()
+
+if __name__ == "__main__":
+    main()
--- a/scripts/debug/debug_ocr_chunks.py
+++ b/scripts/debug/debug_ocr_chunks.py
--- a/scripts/debug/debug_ocr_image.py
+++ b/scripts/debug/debug_ocr_image.py
--- a/scripts/debug/debug_ocr_measure_2.png
+++ b/scripts/debug/debug_ocr_measure_2.png
--- a/scripts/debug/debug_ocr_measure_3.png
+++ b/scripts/debug/debug_ocr_measure_3.png
--- a/scripts/debug/debug_ocr_measure_4.png
+++ b/scripts/debug/debug_ocr_measure_4.png
--- a/scripts/debug/debug_ocr_measure_5.png
+++ b/scripts/debug/debug_ocr_measure_5.png
--- a/scripts/debug/debug_ocr_measure_6.png
+++ b/scripts/debug/debug_ocr_measure_6.png
--- a/scripts/debug/debug_ocr_measure_7.png
+++ b/scripts/debug/debug_ocr_measure_7.png
--- a/scripts/debug/debug_ocr_measure_8.png
+++ b/scripts/debug/debug_ocr_measure_8.png
--- a/scripts/debug/debug_ocr_measure_9.png
+++ b/scripts/debug/debug_ocr_measure_9.png
--- a/scripts/debug/debug_orb_failures.py
+++ b/scripts/debug/debug_orb_failures.py
@@ -0,0 +1,102 @@
+import cv2
+import numpy as np
+import sys
+from pathlib import Path
+
+# Load tracker directly to inspect ORB
+sys.path.append(str(Path(".").resolve()))
+from video_cv_tracker import TemporalTracker
+
+def main():
+    print("Testing ORB matcher...")
+    # Load test frames from video 1
+    cap = cv2.VideoCapture("output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4")
+    
+    # Fast forward to transition frame
+    cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
+    
+    succ, prev = cap.read()
+    curr = prev.copy()
+    
+    # We will just write a snippet from the actual video loop and manually feed it
+    # We can use debug_video1.py ? No, I'll extract real frames directly where the cut happens
+    
+    # A faster way: Just scan the video for transitions and print the ORB histogram
+    tracker = TemporalTracker()
+    frame_idx = 500
+    transitions_found = 0
+    while True:
+        succ, frame = cap.read()
+        if not succ: break
+        if frame_idx % 100 == 0:
+            print(f"Reading frame {frame_idx}...", flush=True)
+        
+        # We need the strip, like youtube_tab_to_pdf.py does
+        strip = frame[111:390] # Approximate Region
+        
+        dx, conf = tracker._calculate_pixel_shift(tracker.last_clean_frame if tracker.last_clean_frame is not None else strip, strip)
+        
+        if tracker.panorama is None:
+            tracker.panorama = strip.copy()
+            tracker.last_clean_frame = strip.copy()
+            frame_idx += 1
+            continue
+            
+        if (conf < 0.45) or (tracker.last_conf - conf > 0.3):
+            tracker.in_transition = True
+            
+        elif tracker.in_transition and conf > 0.85 and dx == 0:
+            tracker.in_transition = False
+            print(f"[{frame_idx}] Transition Recovered! Testing ORB...")
+            
+            # RUN ORB
+            search_w = min(1500, tracker.panorama.shape[1])
+            search_region = tracker._extract_print_channel(tracker.panorama[:, -search_w:])
+            head = tracker._extract_print_channel(strip)
+            
+            orb = cv2.ORB_create(1000)
+            kp1, des1 = orb.detectAndCompute(search_region, None)
+            kp2, des2 = orb.detectAndCompute(head, None)
+            
+            print(f"  kp1: {len(kp1) if kp1 else 0}, kp2: {len(kp2) if kp2 else 0}")
+            
+            if des1 is not None and des2 is not None and len(des1) > 10 and len(des2) > 10:
+                bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+                matches = bf.match(des1, des2)
+                
+                dx_votes = []
+                for m in matches:
+                    x1, y1 = kp1[m.queryIdx].pt
+                    x2, y2 = kp2[m.trainIdx].pt
+                    if abs(y1 - y2) < 10:
+                        dx_votes.append(x1 - x2)
+                
+                if dx_votes:
+                    hist, bins = np.histogram(dx_votes, bins=np.arange(min(dx_votes)-5, max(dx_votes)+5, 5))
+                    best_bin_idx = np.argmax(hist)
+                    print(f"  Max Vote Count: {hist[best_bin_idx]} at dx={bins[best_bin_idx]}")
+                    if hist[best_bin_idx] < 12:
+                        print("  => FAILED! Overlap not found (too few ORB matches). Will append complete new page.")
+                    else:
+                        print("  => SUCCESS! Overlap found.")
+                else:
+                    print("  => FAILED! No dx votes.")
+            else:
+                 print("  => FAILED! des1 or des2 is None or less than 10!")
+            
+            tracker.panorama = np.hstack([tracker.panorama, strip])
+            transitions_found += 1
+            if transitions_found > 0:
+                break
+                
+        elif dx > 0 and not tracker.in_transition:
+            tracker.panorama = np.hstack([tracker.panorama, strip[:, strip.shape[1] - int(dx):, :]])
+            
+        tracker.last_conf = conf
+        tracker.last_clean_frame = strip.copy()
+        frame_idx += 1
+        
+    cap.release()
+
+if __name__ == '__main__':
+    main()
--- a/scripts/debug/debug_output.txt
+++ b/scripts/debug/debug_output.txt
--- a/scripts/debug/debug_overlap.py
+++ b/scripts/debug/debug_overlap.py
@@ -0,0 +1,98 @@
+import cv2
+import numpy as np
+import sys
+import glob
+
+# Test matching between two chunks to see what the score was!
+# Wait, the chunks are the output of the slicing!
+# The tracker works on the original FRAMES!
+# Let's test the tracker on the original frames!
+# I will supply the exact logic used in the tracker.
+
+def test_tracker():
+    video_file = r"C:\Users\Certes\Desktop\guitar_score\output\サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
+    cap = cv2.VideoCapture(video_file)
+    
+    panorama = None
+    last_clean_frame = None
+    in_transition = False
+    last_conf = 1.0
+    
+    count = 0
+    saved_matches = []
+    
+    while True:
+        ret, frame = cap.read()
+        if not ret: break
+        
+        count += 1
+        if count % 15 != 0: # fps=2
+            continue
+            
+        frame = cv2.resize(frame, (1280, 720))
+        
+        if panorama is None:
+            panorama = frame.copy()
+            last_clean_frame = frame.copy()
+            continue
+            
+        # calculate shift
+        prev_chan = last_clean_frame[:, :, 0]
+        curr_chan = frame[:, :, 0]
+        w = 1280
+        template_w = int(w * 0.3)
+        start_x = int(w * 0.6)
+        template = prev_chan[:, start_x:start_x + template_w]
+        
+        res = cv2.matchTemplate(curr_chan, template, cv2.TM_CCOEFF_NORMED)
+        _, conf, _, max_loc = cv2.minMaxLoc(res)
+        dx = start_x - max_loc[0]
+        if conf < 0.15 or dx <= 0:
+            dx = 0
+        if dx > w * 0.15:
+            dx = 0
+            
+        if (conf < 0.45) or (last_conf - conf > 0.3):
+            in_transition = True
+        elif in_transition and conf > 0.85 and dx == 0:
+            in_transition = False
+            
+            # overlap logic
+            h = panorama.shape[0]
+            new_page = frame.copy()
+            search_w = min(1500, panorama.shape[1])
+            search_region = panorama[:, -search_w:, 0]
+            
+            head_w = min(400, new_page.shape[1])
+            head = new_page[:, 50:50+head_w, 0]
+            
+            res2 = cv2.matchTemplate(search_region, head, cv2.TM_CCOEFF_NORMED)
+            _, max_val, _, matched_loc = cv2.minMaxLoc(res2)
+            
+            saved_matches.append(max_val)
+            print(f"Page turn detected! Overlap match score: {max_val:.4f} at {matched_loc}")
+            
+            if max_val > 0.65:
+                overlap_px = search_w - matched_loc[0] + 50
+                if overlap_px < new_page.shape[1] - 50:
+                    panorama = np.hstack([panorama, new_page[:, overlap_px:]])
+                else:
+                    pass
+            else:
+                panorama = np.hstack([panorama, new_page])
+                
+        elif dx > 0 and dx < w and not in_transition:
+            new_strip = frame[:, w - dx:, :]
+            panorama = np.hstack([panorama, new_strip])
+
+        last_conf = conf
+        last_clean_frame = frame.copy()
+        
+        if len(saved_matches) >= 3:
+            break
+            
+    cap.release()
+    print("Test complete.")
+
+if __name__ == "__main__":
+    test_tracker()
--- a/scripts/debug/debug_sequence.py
+++ b/scripts/debug/debug_sequence.py
@@ -0,0 +1,73 @@
+import cv2
+import numpy as np
+from score_extractor import ScoreExtractor
+from youtube_tab_to_pdf import extract_unique_scroll, _detect_tab_overlay
+
+# Simplified run script to dump all macro blocks and ignored pages
+frames = []
+video = cv2.VideoCapture("sakanaction shintakarajima.mp4")
+fps_orig = video.get(cv2.CAP_PROP_FPS)
+stride = max(1, int(fps_orig / 4.0))
+count = 0
+while True:
+    ret, frame = video.read()
+    if not ret: break
+    if count % stride == 0:
+        frames.append(frame)
+    count += 1
+video.release()
+
+from video_cv_tracker import TemporalTracker
+from youtube_tab_to_pdf import _find_white_tab_strip
+tracker = TemporalTracker(diff_threshold=0.05)
+tab_bounds = None
+for f in frames[::30]:
+    b = _find_white_tab_strip(f)
+    if b:
+        tab_bounds = b
+        break
+top, bottom = tab_bounds if tab_bounds else (0, frames[0].shape[0])
+
+for f in frames:
+    tracker.process_frame(f[top:bottom, :])
+
+unique = tracker.get_unique_pages()
+
+ex = ScoreExtractor()
+# Manually process them and print verbose output
+ex.macro_blocks = [unique[0].copy()]
+ex.history_pages = [unique[0]]
+
+for i, page in enumerate(unique[1:], 1):
+    current = ex.macro_blocks[-1]
+    head_w = min(800, page.shape[1])
+    search_w = min(1500, current.shape[1])
+    
+    h_gray = cv2.cvtColor(page[:, :head_w], cv2.COLOR_BGR2GRAY)
+    s_gray = cv2.cvtColor(current[:, -search_w:], cv2.COLOR_BGR2GRAY)
+    
+    res = cv2.matchTemplate(s_gray, h_gray, cv2.TM_CCOEFF_NORMED)
+    _, max_val, _, max_loc = cv2.minMaxLoc(res)
+    
+    if max_val > 0.50:
+        print(f"[Page {i}] Stitched! max_val={max_val:.2f}")
+        absolute_match_x = current.shape[1] - search_w + max_loc[0]
+        next_start_idx = current.shape[1] - absolute_match_x
+        if next_start_idx < page.shape[1]:
+            append_part = page[:, next_start_idx:]
+            ex.macro_blocks[-1] = np.hstack([ex.macro_blocks[-1], append_part])
+            ex.history_pages.append(append_part)
+    else:
+        # Check repeat
+        is_repeat = ex._is_historical_repeat(page)
+        print(f"[Page {i}] Jump! max_val={max_val:.2f}, repeat={is_repeat}")
+        if is_repeat:
+            # We will save the rejected page to see if it was 22-29
+            cv2.imwrite(f"rejected_page_{i}.png", page)
+        else:
+            ex.macro_blocks.append(page.copy())
+            ex.history_pages.append(page)
+
+# Dump the starts of the blocks
+for j, b in enumerate(ex.macro_blocks):
+    cv2.imwrite(f"macro_block_{j}_start.png", b[:, :1800])
--- a/scripts/debug/debug_stitch.py
+++ b/scripts/debug/debug_stitch.py
@@ -0,0 +1,21 @@
+import cv2
+import numpy as np
+
+cap = cv2.VideoCapture("output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4")
+
+# Skip to 30 seconds
+fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
+cap.set(cv2.CAP_PROP_POS_FRAMES, 30 * fps)
+
+ret, frame_30s = cap.read()
+if ret:
+    cv2.imwrite("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/raw_frame_30s.png", frame_30s)
+    
+# Skip to 35 seconds
+cap.set(cv2.CAP_PROP_POS_FRAMES, 35 * fps)
+ret, frame_35s = cap.read()
+if ret:
+    cv2.imwrite("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/raw_frame_35s.png", frame_35s)
+    
+cap.release()
+print("Saved raw frames for structural analysis.")
--- a/scripts/debug/debug_super_scale.png
+++ b/scripts/debug/debug_super_scale.png
--- a/scripts/debug/debug_temporal_binary.png
+++ b/scripts/debug/debug_temporal_binary.png
--- a/scripts/debug/debug_temporal_median.png
+++ b/scripts/debug/debug_temporal_median.png
--- a/scripts/debug/debug_test_m1_sprite.py
+++ b/scripts/debug/debug_test_m1_sprite.py
--- a/scripts/debug/debug_video1.py
+++ b/scripts/debug/debug_video1.py
@@ -0,0 +1,35 @@
+import cv2
+import os
+import shutil
+
+video_file = r"C:\Users\Certes\Desktop\guitar_score\output\サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
+debug_dir = r"C:\Users\Certes\Desktop\guitar_score\output\debug_video1"
+
+if os.path.exists(debug_dir):
+    shutil.rmtree(debug_dir)
+os.makedirs(debug_dir)
+
+cap = cv2.VideoCapture(video_file)
+fps_orig = cap.get(cv2.CAP_PROP_FPS)
+target_fps = 1
+frame_skip = int(fps_orig / target_fps)
+
+count = 0
+saved = 0
+last_frame = None
+
+while True:
+    ret, frame = cap.read()
+    if not ret: break
+    
+    if count % (fps_orig * 10) == 0:
+        frame = cv2.resize(frame, (1280, 720))
+        cv2.imwrite(os.path.join(debug_dir, f"frame_{count:05d}.jpg"), frame)
+        saved += 1
+        if saved > 30:
+            break
+                
+    count += 1
+
+cap.release()
+print(f"Extraction complete. {saved} frames saved.")
--- a/scripts/debug/dump_frames.py
+++ b/scripts/debug/dump_frames.py
@@ -0,0 +1,33 @@
+"""원본 프레임 덤프 — 각 영상에서 5개 프레임을 랜덤 추출"""
+import sys
+if sys.platform == "win32":
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+import cv2
+import numpy as np
+from pathlib import Path
+
+output = Path("output")
+dump_dir = output / "raw_dump"
+dump_dir.mkdir(exist_ok=True)
+
+mp4s = sorted(output.glob("*.mp4"))
+for vi, mp4 in enumerate(mp4s):
+    cap = cv2.VideoCapture(str(mp4))
+    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    print(f"Video {vi+1}: {mp4.name[:30]}... ({w}x{h}, {fps:.0f}fps, {total} frames)")
+
+    # 균등 간격으로 5개 프레임
+    indices = np.linspace(total * 0.1, total * 0.9, 5, dtype=int)
+    for i, idx in enumerate(indices):
+        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
+        ret, frame = cap.read()
+        if ret:
+            path = dump_dir / f"v{vi+1}_raw_{i}.png"
+            cv2.imwrite(str(path), frame)
+            print(f"  frame {idx} → {path.name} ({frame.shape})")
+    cap.release()
+
+print(f"\n덤프 완료: {dump_dir}")
--- a/scripts/debug/dump_inspection_frames.py
+++ b/scripts/debug/dump_inspection_frames.py
@@ -0,0 +1,25 @@
+import cv2
+import pickle
+import os
+
+with open('unique_pages.pkl', 'rb') as f:
+    unique_pages = pickle.load(f)
+
+# Save jump cut boundary frames to see what happened exactly around measure 21 and 45.
+# We will use the browser subagent to securely review them.
+out_dir = r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6"
+
+# Let's save the Pages that we know caused issues:
+# In verify_log.txt, we saw:
+# Page 18-24 (Around Measure 21 problem)
+# Page 40-50 (Around Measure 45 problem)
+
+for i in range(16, 26):
+    if i < len(unique_pages):
+        cv2.imwrite(os.path.join(out_dir, f"jump_cut_inspection_page_{i}.png"), unique_pages[i])
+
+for i in range(43, 53):
+    if i < len(unique_pages):
+        cv2.imwrite(os.path.join(out_dir, f"jump_cut_inspection_page_{i}.png"), unique_pages[i])
+
+print(f"Dumped inspection frames to Artifact Directory.")
--- a/scripts/debug/dump_logs.py
+++ b/scripts/debug/dump_logs.py
@@ -0,0 +1,68 @@
+import cv2
+import pickle
+from pathlib import Path
+
+# TemporalTracker already saved the video chunks? No.
+# I will use fast_verify.py's frames but run process_pages directly and print all its output.
+import fast_verify
+from youtube_tab_to_pdf import extract_unique_scroll
+
+# Actually, I will just write a wrapper around ScoreExtractor to print to file
+import sys
+
+def main():
+    cap = cv2.VideoCapture("output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4")
+    # Quick dynamic crop
+    ret, initial = cap.read()
+    scale = 1280 / initial.shape[1]
+    
+    from youtube_tab_to_pdf import _find_white_tab_strip
+    crop_top, crop_bottom = 0, int(initial.shape[0] * scale)
+    
+    cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
+    ret, check_frame = cap.read()
+    if ret:
+        resized_check = cv2.resize(check_frame, (1280, int(check_frame.shape[0] * scale)))
+        bounds = _find_white_tab_strip(resized_check)
+        if bounds:
+            crop_top, crop_bottom = bounds
+            
+    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
+    # We don't want to load ALL 15000 frames into memory. Use TemporalTracker directly!
+    from video_cv_tracker import TemporalTracker
+    tracker = TemporalTracker(diff_threshold=0.05)
+    
+    count = 0
+    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
+    
+    while True:
+        ret, frame = cap.read()
+        if not ret: break
+        if count % 4 == 0:
+            resized = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))
+            tracker.process_frame(resized[crop_top:crop_bottom, :])
+        count += 1
+        
+    unique_pages = tracker.get_unique_pages()
+    print(f"Got {len(unique_pages)} unique pages from tracker.")
+    
+    from score_extractor import ScoreExtractor
+    extractor = ScoreExtractor()
+    
+    # We will hook print
+    original_print = print
+    with open("score_log.txt", "w") as f:
+        def my_print(*args, **kwargs):
+            text = " ".join(map(str, args))
+            f.write(text + "\n")
+            original_print(*args, **kwargs)
+        
+        import builtins
+        builtins.print = my_print
+        
+        extractor.process_pages(unique_pages)
+        
+        builtins.print = original_print
+
+if __name__ == "__main__":
+    main()
--- a/scripts/debug/dump_pages.py
+++ b/scripts/debug/dump_pages.py
@@ -0,0 +1,14 @@
+import cv2
+import pickle
+
+with open('unique_pages.pkl', 'rb') as f:
+    unique_pages = pickle.load(f)
+
+import os
+out_dir = r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\pages"
+os.makedirs(out_dir, exist_ok=True)
+
+for i, p in enumerate(unique_pages):
+    cv2.imwrite(os.path.join(out_dir, f"page_{i:03d}.png"), p)
+
+print(f"Saved {len(unique_pages)} pages to {out_dir}")
--- a/scripts/debug/dump_slices.py
+++ b/scripts/debug/dump_slices.py
@@ -0,0 +1,21 @@
+import cv2
+import os
+
+img = cv2.imread(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\final_check_100_sec.png")
+if img is None:
+    print("Image not found!")
+    exit(1)
+
+out_dir = r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\slices"
+os.makedirs(out_dir, exist_ok=True)
+
+h, w = img.shape[:2]
+# Final check image is a single ROW (very long panorama).
+# We will cut it into 2000px chunks.
+idx = 0
+for x in range(0, w, 2000):
+    slice_img = img[:, x:min(x+2000, w)]
+    cv2.imwrite(os.path.join(out_dir, f"pano_slice_{idx:02d}.png"), slice_img)
+    idx += 1
+
+print(f"Generated {idx} slices.")
--- a/scripts/debug/dump_sprites.py
+++ b/scripts/debug/dump_sprites.py
@@ -0,0 +1,64 @@
+import cv2
+import numpy as np
+import os
+from glob import glob
+
+video_path = glob('output/*.mp4')[0]
+cap = cv2.VideoCapture(video_path)
+
+def _find_white_tab_strip(frame):
+    h, w = frame.shape[:2]
+    gray = np.max(frame, axis=2)
+    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    row_white_counts = np.sum(binary > 0, axis=1)
+    
+    threshold = w * 0.1
+    white_rows = np.where(row_white_counts > threshold)[0]
+    if len(white_rows) < 5: return None
+    return white_rows[0], white_rows[-1]
+
+def get_number_sprite(m_img):
+    gray = np.max(m_img, axis=2)
+    _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    row_sums = np.sum(thresh, axis=1) / 255
+    staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0]
+    y_staff = staff_lines[0] if len(staff_lines) > 0 else 50
+    
+    crop_y1 = max(0, y_staff - 35)
+    crop_y2 = max(0, y_staff - 2)
+    crop_x1 = 0
+    crop_x2 = min(60, m_img.shape[1])
+    
+    if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: return None
+    return thresh[crop_y1:crop_y2, crop_x1:crop_x2]
+
+frame_count = 0
+found = 0
+while True:
+    ret, frame = cap.read()
+    if not ret: break
+    if frame_count % 30 == 0:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            tab_crop = frame[max(0, strip[0]):min(frame.shape[0], strip[1]), :]
+            
+            # _detect_measure_bars inline
+            b_gray = np.max(tab_crop, axis=2)
+            _, b_bin = cv2.threshold(b_gray, 180, 255, cv2.THRESH_BINARY)
+            col_sums = np.sum(b_bin, axis=0) / 255
+            bars = np.where(col_sums > tab_crop.shape[0] * 0.8)[0]
+            
+            if len(bars) > 1:
+                x_start = bars[0]
+                x_end = bars[1]
+                if x_end - x_start > 40:
+                    first_m = tab_crop[:, x_start:x_end]
+                    sprite = get_number_sprite(first_m)
+                    if sprite is not None:
+                        pixels = np.count_nonzero(sprite > 127)
+                        cv2.imwrite(f"C:/Users/Certes/Desktop/guitar_score/debug_s_{frame_count}_{pixels}.png", sprite)
+                        print(f"Dumped sprite frame {frame_count} with {pixels} pixels")
+                        found += 1
+                        if found > 15: break
+    frame_count += 1
+cap.release()
--- a/scripts/debug/fast_verify.py
+++ b/scripts/debug/fast_verify.py
@@ -0,0 +1,78 @@
+import cv2
+from video_cv_tracker import TemporalTracker
+from youtube_tab_to_pdf import extract_unique_scroll, generate_long_image, generate_pdf, download_video, extract_frames
+import sys
+import os
+from pathlib import Path
+
+# Run verification specifically on Shintakarajima
+url = "https://youtu.be/tJq1n8TofM0"
+video_path = Path("output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4")
+
+print("Extracting full video for final 142-measure verification...")
+cap = cv2.VideoCapture(str(video_path))
+
+# PRE-CALCULATE Dynamic Crop
+# Just like extract_unique_scroll does automatically, we detect the white band.
+ret, initial = cap.read()
+scale = 1280 / initial.shape[1]
+resized_init = cv2.resize(initial, (1280, int(initial.shape[0] * scale)))
+
+from youtube_tab_to_pdf import _find_white_tab_strip
+crop_top = 0
+crop_bottom = resized_init.shape[0]
+
+cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
+ret, check_frame = cap.read()
+if ret:
+    resized_check = cv2.resize(check_frame, (1280, int(check_frame.shape[0] * scale)))
+    bounds = _find_white_tab_strip(resized_check)
+    if bounds:
+        crop_top, crop_bottom = bounds
+        # Preserve D.S. al Coda, ┌─ 1., ┌─ 2., and measure numbers drawn in the black abyss!
+        crop_top = max(0, crop_top - 60)
+        
+print(f"Dynamically Cropping to: Y={crop_top} to {crop_bottom}")
+
+cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
+frames = []
+idx = 0
+tracker = TemporalTracker(diff_threshold=0.05)
+
+while True:
+    ret, frame = cap.read()
+    if not ret: break
+    
+    frame_resized = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))
+    clean_ribbon = frame_resized[crop_top:crop_bottom, :]
+    frames.append(clean_ribbon)
+    idx += 1
+
+cap.release()
+
+cv2.imwrite("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/raw_frame_check.png", frames[30])
+
+print(f"Extracted {len(frames)} frames. Running sequential page extraction...")
+try:
+    final_chunks = extract_unique_scroll(frames)
+    print("DEBUG: final_chunks len =", len(final_chunks))
+    if final_chunks:
+        print("DEBUG: final_chunks[0].shape =", final_chunks[0].shape)
+        cv2.imwrite("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/debug_chunk_0.png", final_chunks[0])
+    
+    # Save the chunks to artifact directory to literally look at it
+    artifact_path = Path(os.environ.get('APPDATA', '')) / '..' / 'Local' / 'Google' / 'AndroidStudio2024.1' # Just using relative artifact manually? No, I'll save it to C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\
+    artifact_path = Path(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6")
+    output_png = artifact_path / "final_check_100_sec.png"
+    
+    generate_long_image(final_chunks, output_png)
+    print(f"Saved successful verification image to: {output_png}")
+
+    if final_chunks:
+        generate_pdf(final_chunks, Path("output/shintakarajima_perfect.pdf"))
+        print("✨ Successfully generated output/shintakarajima_perfect.pdf ✨")
+    else:
+        print("Failed to produce rows.")
+except Exception as e:
+    import traceback
+    traceback.print_exc()
--- a/scripts/debug/find_staff_lines.py
+++ b/scripts/debug/find_staff_lines.py
@@ -0,0 +1,39 @@
+import cv2
+import numpy as np
+
+img = cv2.imread(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\raw_super_block.png")
+if img is None:
+    print("Image not found")
+    exit()
+
+gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+_, bin_inv = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
+
+# Staff lines are y=76 to y=152
+# A vertical bar line is a vertical strip of black pixels from 76 to 151
+# Sum down the columns
+col_sums = np.sum(bin_inv[76:152, :], axis=0) / 255.0
+
+# If a column has > 70 black pixels out of the 76 height, it's a solid vertical line
+bar_xs = np.where(col_sums > 70)[0]
+
+# Group adjacent pixels into single lines
+grouped_bars = []
+if len(bar_xs) > 0:
+    current_group = [bar_xs[0]]
+    for x in bar_xs[1:]:
+        if x - current_group[-1] <= 5:
+            current_group.append(x)
+        else:
+            grouped_bars.append(int(np.mean(current_group)))
+            current_group = [x]
+    grouped_bars.append(int(np.mean(current_group)))
+
+print(f"Found {len(grouped_bars)} vertical barlines:")
+print(grouped_bars)
+
+# Draw lines
+out = img.copy()
+for x in grouped_bars:
+    cv2.line(out, (x, 0), (x, out.shape[0]), (0, 0, 255), 2)
+cv2.imwrite(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\test_barlines.png", out)
--- a/scripts/debug/measure_1_crop.png
+++ b/scripts/debug/measure_1_crop.png
--- a/scripts/debug/patch_extractor.py
+++ b/scripts/debug/patch_extractor.py
@@ -0,0 +1,139 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 1FPS 타임라인 기반 마디 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    unique_measures = []
+    chunk_width = 1280
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    for frame_idx, frame in enumerate(frames):
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        gray_page = _extract_print_channel(tab_crop)
+        bar_coords = _detect_measure_bars(gray_page)
+        
+        if not bar_coords:
+            continue
+            
+        coords = [0] + bar_coords + [tab_crop.shape[1]]
+        coords = sorted(list(set(coords)))
+        
+        page_measures = []
+        for i in range(len(coords) - 1):
+            x_start = coords[i]
+            x_end = coords[i+1]
+            if x_end - x_start < 40: continue
+            page_measures.append(tab_crop[:, x_start:x_end])
+            
+        if not page_measures:
+            continue
+            
+        if not unique_measures:
+            unique_measures.extend(page_measures)
+            continue
+            
+        first_m = page_measures[0]
+        bin_first = get_clean_binary(first_m)
+        
+        best_error = 1.0
+        best_offset = 0
+        anchored = False
+        
+        for scan_dist in range(1, min(10, len(unique_measures) + 1)):
+            past_idx = len(unique_measures) - scan_dist
+            past_m = unique_measures[past_idx]
+            
+            bin_past = get_clean_binary(past_m)
+            
+            if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                hs = min(bin_first.shape[0], bin_past.shape[0])
+                ws = min(bin_first.shape[1], bin_past.shape[1])
+                s1 = bin_first[:hs, :ws]
+                s2 = bin_past[:hs, :ws]
+                
+                diff = cv2.absdiff(s1, s2)
+                error_ratio = np.sum(diff > 0) / s1.size
+                
+                if error_ratio < best_error:
+                    best_error = error_ratio
+                    best_offset = len(unique_measures) - past_idx
+                    
+        # Error ratio < 20% confirms identity for sparse structures
+        if best_error < 0.20:
+            new_start_offset = best_offset
+            anchored = True
+            print(f"    [Anchor] Frame {frame_idx} -> PDF offset {best_offset} (Best Error: {best_error:.4f})")
+        else:
+            print(f"    [New] Frame {frame_idx} -> No Match (Best Error was {best_error:.4f})")
+            
+        if anchored and new_start_offset < len(page_measures):
+            unique_measures.extend(page_measures[new_start_offset:])
+        elif not anchored:
+            unique_measures.extend(page_measures)
+            
+    print(f"  -> 동기화 중복 제거 완료: 무손실 타임라인 기반 {len(unique_measures)}개 연속 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Patched.")
--- a/scripts/debug/patch_extractor_with_sprite.py
+++ b/scripts/debug/patch_extractor_with_sprite.py
@@ -0,0 +1,182 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Number Sprite 앵커 기반 마디 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    unique_measures = []
+    chunk_width = 1280
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    def get_number_sprite(m_img):
+        # We find the top-left region where the number is displayed
+        gray = np.max(m_img, axis=2)
+        _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        row_sums = np.sum(thresh, axis=1) / 255
+        staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0]
+        y_staff = staff_lines[0] if len(staff_lines) > 0 else 50
+        
+        crop_y1 = max(0, y_staff - 35)
+        crop_y2 = max(0, y_staff - 2)
+        crop_x1 = 0
+        crop_x2 = min(60, m_img.shape[1])
+        
+        if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: return None
+        sprite = thresh[crop_y1:crop_y2, crop_x1:crop_x2]
+        
+        # if there are no white pixels, it's a blank space, not a number
+        if np.count_nonzero(sprite > 127) < 5: return None
+        return sprite
+
+    for frame_idx, frame in enumerate(frames):
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        gray_page = _extract_print_channel(tab_crop)
+        bar_coords = _detect_measure_bars(gray_page)
+        
+        if not bar_coords: continue
+            
+        coords = [0] + bar_coords + [tab_crop.shape[1]]
+        coords = sorted(list(set(coords)))
+        
+        page_measures = []
+        for i in range(len(coords) - 1):
+            x_start = coords[i]
+            x_end = coords[i+1]
+            if x_end - x_start < 40: continue
+            page_measures.append(tab_crop[:, x_start:x_end])
+            
+        if not page_measures: continue
+            
+        if not unique_measures:
+            unique_measures.extend(page_measures)
+            continue
+            
+        first_m = page_measures[0]
+        first_sprite = get_number_sprite(first_m)
+        
+        best_error = 1.0
+        best_offset = 0
+        anchored = False
+        
+        # Only anchor if we explicitly see a printed number in the top left
+        if first_sprite is not None:
+            # We can scan further back safely because different numbers won't mathematically match
+            for scan_dist in range(1, min(15, len(unique_measures) + 1)):
+                past_idx = len(unique_measures) - scan_dist
+                past_m = unique_measures[past_idx]
+                past_sprite = get_number_sprite(past_m)
+                
+                if past_sprite is not None:
+                    hs = min(first_sprite.shape[0], past_sprite.shape[0])
+                    ws = min(first_sprite.shape[1], past_sprite.shape[1])
+                    s1 = first_sprite[:hs, :ws]
+                    s2 = past_sprite[:hs, :ws]
+                    
+                    diff = cv2.absdiff(s1, s2)
+                    error_ratio = np.sum(diff > 0) / s1.size
+                    
+                    if error_ratio < best_error:
+                        best_error = error_ratio
+                        best_offset = len(unique_measures) - past_idx
+                        
+            # If the literal printed number matches perfectly, we securely anchor Here!
+            if best_error < 0.15:
+                new_start_offset = best_offset
+                anchored = True
+                
+        # Fallback for pages entirely devoid of explicit numbering
+        if not anchored:
+            bin_first = get_clean_binary(first_m)
+            for scan_dist in range(1, min(5, len(unique_measures) + 1)):
+                past_idx = len(unique_measures) - scan_dist
+                past_m = unique_measures[past_idx]
+                bin_past = get_clean_binary(past_m)
+                
+                if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                    hs = min(bin_first.shape[0], bin_past.shape[0])
+                    ws = min(bin_first.shape[1], bin_past.shape[1])
+                    s1 = bin_first[:hs, :ws]
+                    s2 = bin_past[:hs, :ws]
+                    
+                    diff = cv2.absdiff(s1, s2)
+                    error_ratio = np.sum(diff > 0) / s1.size
+                    
+                    if error_ratio < best_error:
+                        best_error = error_ratio
+                        best_offset = len(unique_measures) - past_idx
+                        
+            if best_error < 0.15:
+                new_start_offset = best_offset
+                anchored = True
+
+        if anchored and new_start_offset < len(page_measures):
+            # Middle append
+            unique_measures.extend(page_measures[new_start_offset:])
+        elif not anchored:
+            unique_measures.extend(page_measures)
+            
+    print(f"  -> 동기화 중복 제거 완료: Number Sprite 타임라인 기반 {len(unique_measures)}개 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Patched.")
--- a/scripts/debug/patch_extractor_with_tracker.py
+++ b/scripts/debug/patch_extractor_with_tracker.py
@@ -0,0 +1,145 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 페이지 분할 기반 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    # 5% 픽셀 변화를 통해 페이지가 넘어가는 장면(Scene)만 정지 화면으로 추출 (모션 블러 프레임 제거)
+    tracker = TemporalTracker(diff_threshold=0.05)
+    
+    for frame in frames:
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+        tracker.process_frame(tab_crop)
+
+    unique_pages = tracker.get_unique_pages()
+    if not unique_pages: return []
+    
+    unique_measures = []
+    chunk_width = 1280
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    for page_idx, page in enumerate(unique_pages):
+        gray_page = _extract_print_channel(page)
+        bar_coords = _detect_measure_bars(gray_page)
+        
+        if not bar_coords: continue
+            
+        coords = [0] + bar_coords + [page.shape[1]]
+        coords = sorted(list(set(coords)))
+        
+        page_measures = []
+        for i in range(len(coords) - 1):
+            x_start = coords[i]
+            x_end = coords[i+1]
+            if x_end - x_start < 40: continue
+            page_measures.append(page[:, x_start:x_end])
+            
+        if not page_measures: continue
+            
+        if not unique_measures:
+            unique_measures.extend(page_measures)
+            continue
+            
+        first_m = page_measures[0]
+        bin_first = get_clean_binary(first_m)
+        
+        best_error = 1.0
+        best_offset = 0
+        anchored = False
+        
+        for scan_dist in range(1, min(10, len(unique_measures) + 1)):
+            past_idx = len(unique_measures) - scan_dist
+            past_m = unique_measures[past_idx]
+            bin_past = get_clean_binary(past_m)
+            
+            if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                hs = min(bin_first.shape[0], bin_past.shape[0])
+                ws = min(bin_first.shape[1], bin_past.shape[1])
+                s1 = bin_first[:hs, :ws]
+                s2 = bin_past[:hs, :ws]
+                
+                diff = cv2.absdiff(s1, s2)
+                error_ratio = np.sum(diff > 0) / s1.size
+                
+                if error_ratio < best_error:
+                    best_error = error_ratio
+                    best_offset = len(unique_measures) - past_idx
+                    
+        if best_error < 0.20:
+            new_start_offset = best_offset
+            anchored = True
+            print(f"    [Anchor] Page {page_idx} -> PDF offset {best_offset} (Best Error: {best_error:.4f})")
+        else:
+            print(f"    [New Page] Page {page_idx} -> No Overlap (Best Error: {best_error:.4f})")
+            
+        if anchored and new_start_offset < len(page_measures):
+            # 중복된 오프셋만큼 건너뛰고 나머지 새 마디만 추가
+            unique_measures.extend(page_measures[new_start_offset:])
+        elif not anchored:
+            # 겹침이 전혀 없으므로 전체 마디 추가
+            unique_measures.extend(page_measures)
+            
+    print(f"  -> 동기화 중복 제거 완료: 무손실 정적 페이지 기반 {len(unique_measures)}개 연속 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Patched completely back to optimal tracking.")
--- a/scripts/debug/patch_final_holy_grail.py
+++ b/scripts/debug/patch_final_holy_grail.py
@@ -0,0 +1,168 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Binarized-Tracker 정밀 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    # The Holy Grail Tracker: Feed it ONLY the pure 200-threshold binary mask.
+    # The hand is gone. Only the white staff lines and notes exist.
+    # When the page flips, the notes change position, creating a very small but undeniable structural pixel diff.
+    # We use a highly sensitive 0.015 (1.5%) threshold to perfectly catch thin notes transitioning!
+    tracker = TemporalTracker(diff_threshold=0.015)
+    
+    # Store associations so we can retrieve the original BGR page later
+    clean_to_bgr = [] 
+
+    for frame in frames:
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        clean_bin = get_clean_binary(tab_crop)
+        # tracker will process the pure binary structural image
+        diff = 0.0
+        if tracker.last_frame is not None:
+            raw_diff = cv2.absdiff(clean_bin, tracker.last_frame)
+            non_zero_ratio = np.count_nonzero(raw_diff) / clean_bin.size
+            if non_zero_ratio > tracker.diff_threshold:
+                tracker.unique_pages.append(clean_bin)
+                clean_to_bgr.append(tab_crop)
+                tracker.last_frame = clean_bin.copy()
+        else:
+            tracker.unique_pages.append(clean_bin)
+            clean_to_bgr.append(tab_crop)
+            tracker.last_frame = clean_bin.copy()
+
+    unique_pages = clean_to_bgr
+    if not unique_pages: return []
+    
+    print(f"  -> {len(unique_pages)}개의 고유 정적 페이지 캡처 완료. 3-마디 역탐색 동기화 시작...")
+    
+    unique_measures = []
+    chunk_width = 1280
+
+    for page_idx, page in enumerate(unique_pages):
+        gray_page = _extract_print_channel(page)
+        bar_coords = _detect_measure_bars(gray_page)
+        
+        if not bar_coords: continue
+            
+        coords = [0] + bar_coords + [page.shape[1]]
+        coords = sorted(list(set(coords)))
+        
+        page_measures = []
+        for i in range(len(coords) - 1):
+            x_start = coords[i]
+            x_end = coords[i+1]
+            if x_end - x_start < 40: continue
+            page_measures.append(page[:, x_start:x_end])
+            
+        if not page_measures: continue
+            
+        if not unique_measures:
+            unique_measures.extend(page_measures)
+            continue
+            
+        first_m = page_measures[0]
+        bin_first = get_clean_binary(first_m)
+        
+        best_error = 1.0
+        best_offset = 0
+        anchored = False
+        
+        # We limit the search distance to EXACTLY 3 measures.
+        # This completely cures Time-Traveling overlaps caused by M10 matching identical M2.
+        # A page flip overlap can NEVER be further back than the immediately previous page's length.
+        for scan_dist in range(1, min(4, len(unique_measures) + 1)):
+            past_idx = len(unique_measures) - scan_dist
+            past_m = unique_measures[past_idx]
+            bin_past = get_clean_binary(past_m)
+            
+            if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                hs = min(bin_first.shape[0], bin_past.shape[0])
+                ws = min(bin_first.shape[1], bin_past.shape[1])
+                s1 = bin_first[:hs, :ws]
+                s2 = bin_past[:hs, :ws]
+                
+                diff = cv2.absdiff(s1, s2)
+                error_ratio = np.sum(diff > 0) / s1.size
+                
+                if error_ratio < best_error:
+                    best_error = error_ratio
+                    best_offset = len(unique_measures) - past_idx
+                    
+        if best_error < 0.20:
+            new_start_offset = best_offset
+            anchored = True
+            print(f"    [Anchor] Page {page_idx} -> PDF offset {best_offset} (Best Error: {best_error:.4f})")
+        else:
+            print(f"    [New Page] Page {page_idx} -> No Overlap (Best Error: {best_error:.4f})")
+            
+        if anchored and new_start_offset < len(page_measures):
+            unique_measures.extend(page_measures[new_start_offset:])
+        elif not anchored:
+            unique_measures.extend(page_measures)
+            
+    print(f"  -> 동기화 중복 제거 완료: 무손실 정적 페이지 기반 {len(unique_measures)}개 연속 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Holy Grail Pipeline Embedded.")
--- a/scripts/debug/patch_final_holy_matrix.py
+++ b/scripts/debug/patch_final_holy_matrix.py
@@ -0,0 +1,160 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Stable-Blurred-Matrix 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    unique_measures = []
+    chunk_width = 1280
+    
+    last_1fps_bin = None
+    last_solid_page = None
+
+    for frame_idx, frame in enumerate(frames):
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        clean_bin = get_clean_binary(tab_crop)
+        
+        if last_1fps_bin is not None:
+            diff = cv2.absdiff(clean_bin, last_1fps_bin)
+            error = np.count_nonzero(diff) / clean_bin.size
+            if error < 0.05:
+                has_changed_since_last_solid = True
+                
+                if last_solid_page is not None:
+                    s_diff = cv2.absdiff(clean_bin, last_solid_page)
+                    s_err = np.count_nonzero(s_diff) / clean_bin.size
+                    if s_err < 0.05:
+                        has_changed_since_last_solid = False
+                
+                if has_changed_since_last_solid:
+                    last_solid_page = clean_bin.copy()
+                    
+                    gray_page = _extract_print_channel(tab_crop)
+                    bar_coords = _detect_measure_bars(gray_page)
+                    
+                    if bar_coords:
+                        coords = [0] + bar_coords + [tab_crop.shape[1]]
+                        coords = sorted(list(set(coords)))
+                        
+                        page_measures = []
+                        for i in range(len(coords) - 1):
+                            x_start = coords[i]
+                            x_end = coords[i+1]
+                            if x_end - x_start < 40: continue
+                            page_measures.append(tab_crop[:, x_start:x_end])
+                            
+                        if page_measures:
+                            if not unique_measures:
+                                unique_measures.extend(page_measures)
+                            else:
+                                first_m = page_measures[0]
+                                bin_first = get_clean_binary(first_m)
+                                blurred_first = cv2.GaussianBlur(bin_first, (7, 7), 0)
+                                
+                                best_val = 0.0
+                                best_offset = 0
+                                anchored = False
+                                
+                                # Deep Scan Deduplication explicitly disabled to prevent repeating choruses wiping out the PDF timeline!
+                                # scan_dist=4 ensures we only match the immediately preceding page-flip overlap.
+                                for scan_dist in range(1, min(4, len(unique_measures) + 1)):
+                                    past_idx = len(unique_measures) - scan_dist
+                                    past_m = unique_measures[past_idx]
+                                    bin_past = get_clean_binary(past_m)
+                                    blurred_past = cv2.GaussianBlur(bin_past, (7, 7), 0)
+                                    
+                                    if abs(blurred_first.shape[1] - blurred_past.shape[1]) <= 30:
+                                        hs = min(blurred_first.shape[0], blurred_past.shape[0])
+                                        ws = min(blurred_first.shape[1], blurred_past.shape[1])
+                                        s1 = blurred_first[:hs, :ws]
+                                        s2 = blurred_past[:hs, :ws]
+                                        
+                                        template = s1[10:-10, 10:-10]
+                                        if template.shape[0] >= 10 and template.shape[1] >= 10:
+                                            res = cv2.matchTemplate(s2, template, cv2.TM_CCOEFF_NORMED)
+                                            # Using cv2.minMaxLoc inside the result matrix to find any peak (subpixel shifting tolerance)
+                                            _, max_val, _, _ = cv2.minMaxLoc(res)
+                                            
+                                            if max_val > best_val:
+                                                best_val = max_val
+                                                best_offset = len(unique_measures) - past_idx
+                                                
+                                if best_val > 0.85:
+                                    print(f"    [Anchor] Page Matched -> PDF offset {best_offset} (Confidence: {best_val:.2f})")
+                                    new_start_offset = best_offset
+                                    anchored = True
+                                    
+                                if anchored and new_start_offset < len(page_measures):
+                                    unique_measures.extend(page_measures[new_start_offset:])
+                                elif not anchored:
+                                    print(f"    [New Page] No recent overlap (Confidence: {best_val:.2f})")
+                                    unique_measures.extend(page_measures)
+                                    
+        last_1fps_bin = clean_bin.copy()
+            
+    print(f"  -> 동기화 중복 제거 완료: Stability-Blur 기반 {len(unique_measures)}개 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Stable-Blurred-Matrix Patched.")
--- a/scripts/debug/patch_final_holy_sprite.py
+++ b/scripts/debug/patch_final_holy_sprite.py
@@ -0,0 +1,198 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Stable Content Trigger + Number Sprite 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+        
+    def get_number_sprite(m_img):
+        gray = np.max(m_img, axis=2)
+        _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        row_sums = np.sum(thresh, axis=1) / 255
+        staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0]
+        y_staff = staff_lines[0] if len(staff_lines) > 0 else 50
+        
+        crop_y1 = max(0, y_staff - 35)
+        crop_y2 = max(0, y_staff - 2)
+        crop_x1 = 0
+        crop_x2 = min(60, m_img.shape[1])
+        
+        if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: return None
+        sprite = thresh[crop_y1:crop_y2, crop_x1:crop_x2]
+        if np.count_nonzero(sprite > 127) < 8: return None
+        return sprite
+
+    unique_measures = []
+    chunk_width = 1280
+    
+    last_1fps_bin = None
+    last_solid_page = None
+
+    for frame_idx, frame in enumerate(frames):
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        clean_bin = get_clean_binary(tab_crop)
+        
+        if last_1fps_bin is not None:
+            diff = cv2.absdiff(clean_bin, last_1fps_bin)
+            error = np.count_nonzero(diff) / clean_bin.size
+            if error < 0.05:
+                has_changed_since_last_solid = True
+                
+                if last_solid_page is not None:
+                    s_diff = cv2.absdiff(clean_bin, last_solid_page)
+                    s_err = np.count_nonzero(s_diff) / clean_bin.size
+                    if s_err < 0.05:
+                        has_changed_since_last_solid = False
+                
+                if has_changed_since_last_solid:
+                    last_solid_page = clean_bin.copy()
+                    
+                    gray_page = _extract_print_channel(tab_crop)
+                    bar_coords = _detect_measure_bars(gray_page)
+                    
+                    if bar_coords:
+                        coords = [0] + bar_coords + [tab_crop.shape[1]]
+                        coords = sorted(list(set(coords)))
+                        
+                        page_measures = []
+                        for i in range(len(coords) - 1):
+                            x_start = coords[i]
+                            x_end = coords[i+1]
+                            if x_end - x_start < 40: continue
+                            page_measures.append(tab_crop[:, x_start:x_end])
+                            
+                        if page_measures:
+                            if not unique_measures:
+                                unique_measures.extend(page_measures)
+                            else:
+                                first_m = page_measures[0]
+                                first_sprite = get_number_sprite(first_m)
+                                
+                                best_val = 0.0
+                                best_offset = 0
+                                anchored = False
+                                
+                                # Deep Scan Deduplication (find exact Number Sprite match)
+                                if first_sprite is not None:
+                                    for scan_dist in range(1, len(unique_measures) + 1):
+                                        past_idx = len(unique_measures) - scan_dist
+                                        past_m = unique_measures[past_idx]
+                                        past_sprite = get_number_sprite(past_m)
+                                        
+                                        if past_sprite is not None:
+                                            hs = min(first_sprite.shape[0], past_sprite.shape[0])
+                                            ws = min(first_sprite.shape[1], past_sprite.shape[1])
+                                            if hs > 5 and ws > 5:
+                                                s1 = first_sprite[:hs, :ws]
+                                                s2 = past_sprite[:hs, :ws]
+                                                
+                                                template = s1[2:-2, 2:-2]
+                                                if template.shape[0] >= 5 and template.shape[1] >= 5:
+                                                    res = cv2.matchTemplate(s2, template, cv2.TM_CCOEFF_NORMED)
+                                                    max_val = res[0][0]
+                                                    if max_val > best_val:
+                                                        best_val = max_val
+                                                        best_offset = len(unique_measures) - past_idx
+                                                        
+                                    if best_val > 0.85:
+                                        print(f"    [Sprite Anchor] Detected Measure {best_offset}! Ignoring duplicates.")
+                                        new_start_offset = best_offset
+                                        anchored = True
+                                        
+                                # Fallback geometric anchor for unlabeled pages (restricted back-scan)
+                                if not anchored:
+                                    bin_first = get_clean_binary(first_m)
+                                    best_err = 1.0
+                                    for scan_dist in range(1, min(4, len(unique_measures) + 1)):
+                                        past_idx = len(unique_measures) - scan_dist
+                                        past_m = unique_measures[past_idx]
+                                        bin_past = get_clean_binary(past_m)
+                                        
+                                        if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                                            hs = min(bin_first.shape[0], bin_past.shape[0])
+                                            ws = min(bin_first.shape[1], bin_past.shape[1])
+                                            s1 = bin_first[:hs, :ws]
+                                            s2 = bin_past[:hs, :ws]
+                                            
+                                            m_diff = cv2.absdiff(s1, s2)
+                                            error_ratio = np.sum(m_diff > 0) / s1.size
+                                            if error_ratio < best_err:
+                                                best_err = error_ratio
+                                                best_offset = len(unique_measures) - past_idx
+                                                
+                                    if best_err < 0.15:
+                                        new_start_offset = best_offset
+                                        anchored = True
+
+                                if anchored and new_start_offset < len(page_measures):
+                                    unique_measures.extend(page_measures[new_start_offset:])
+                                elif not anchored:
+                                    unique_measures.extend(page_measures)
+                                    
+        last_1fps_bin = clean_bin.copy()
+            
+    print(f"  -> 동기화 중복 제거 완료: Stability 기반 {len(unique_measures)}개 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Stable Sprite Anchor Patched.")
--- a/scripts/debug/patch_final_monotonic.py
+++ b/scripts/debug/patch_final_monotonic.py
--- a/scripts/debug/patch_final_truth.py
+++ b/scripts/debug/patch_final_truth.py
@@ -0,0 +1,145 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Keyframe 페이지 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    # 1. 0.05 threshold Tracker to completely ignore all fade/blur frames and extract EXACTLY 13 keyframes
+    tracker = TemporalTracker(diff_threshold=0.05)
+    
+    for frame in frames:
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+        tracker.process_frame(tab_crop)
+
+    unique_pages = tracker.get_unique_pages()
+    if not unique_pages: return []
+    
+    unique_measures = []
+    chunk_width = 1280
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    for page_idx, page in enumerate(unique_pages):
+        gray_page = _extract_print_channel(page)
+        bar_coords = _detect_measure_bars(gray_page)
+        
+        if not bar_coords: continue
+            
+        coords = [0] + bar_coords + [page.shape[1]]
+        coords = sorted(list(set(coords)))
+        
+        page_measures = []
+        for i in range(len(coords) - 1):
+            x_start = coords[i]
+            x_end = coords[i+1]
+            if x_end - x_start < 40: continue
+            page_measures.append(page[:, x_start:x_end])
+            
+        if not page_measures: continue
+            
+        if not unique_measures:
+            unique_measures.extend(page_measures)
+            continue
+            
+        first_m = page_measures[0]
+        bin_first = get_clean_binary(first_m)
+        
+        best_error = 1.0
+        best_offset = 0
+        anchored = False
+        
+        # 3. CRUCIAL FIX: scan_dist limited to exactly 3.
+        # Preventing M40 from visually matching M9 because Chorus repeats.
+        for scan_dist in range(1, min(4, len(unique_measures) + 1)):
+            past_idx = len(unique_measures) - scan_dist
+            past_m = unique_measures[past_idx]
+            bin_past = get_clean_binary(past_m)
+            
+            # 2. Binary Absdiff error < 0.20 for subpixel-immune, noise-immune math overlap matching
+            if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                hs = min(bin_first.shape[0], bin_past.shape[0])
+                ws = min(bin_first.shape[1], bin_past.shape[1])
+                s1 = bin_first[:hs, :ws]
+                s2 = bin_past[:hs, :ws]
+                
+                diff = cv2.absdiff(s1, s2)
+                error_ratio = np.sum(diff > 0) / s1.size
+                
+                if error_ratio < best_error:
+                    best_error = error_ratio
+                    best_offset = len(unique_measures) - past_idx
+                    
+        if best_error < 0.20:
+            new_start_offset = best_offset
+            anchored = True
+            
+        if anchored and new_start_offset < len(page_measures):
+            # Overlapped exactly at this point, only append the truly NEW measures
+            unique_measures.extend(page_measures[new_start_offset:])
+        elif not anchored:
+            # Completely discrete page flip with no overlap, append all measures
+            unique_measures.extend(page_measures)
+            
+    print(f"  -> 동기화 중복 제거 완료: 무손실 정적 페이지 기반 {len(unique_measures)}개 연속 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Final Truth Pipeline Patched.")
--- a/scripts/debug/patch_holy_grail_fix.py
+++ b/scripts/debug/patch_holy_grail_fix.py
@@ -0,0 +1,156 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Binarized-Tracker 정밀 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    diff_threshold = 0.015
+    clean_to_bgr = [] 
+    last_clean_bin = None
+
+    for frame in frames:
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        clean_bin = get_clean_binary(tab_crop)
+        if last_clean_bin is not None:
+            raw_diff = cv2.absdiff(clean_bin, last_clean_bin)
+            non_zero_ratio = np.count_nonzero(raw_diff) / clean_bin.size
+            if non_zero_ratio > diff_threshold:
+                clean_to_bgr.append(tab_crop)
+                last_clean_bin = clean_bin.copy()
+        else:
+            clean_to_bgr.append(tab_crop)
+            last_clean_bin = clean_bin.copy()
+
+    unique_pages = clean_to_bgr
+    if not unique_pages: return []
+    
+    print(f"  -> {len(unique_pages)}개의 고유 정적 페이지 캡처 완료. 3-마디 역탐색 동기화 시작...")
+    
+    unique_measures = []
+    chunk_width = 1280
+
+    for page_idx, page in enumerate(unique_pages):
+        gray_page = _extract_print_channel(page)
+        bar_coords = _detect_measure_bars(gray_page)
+        
+        if not bar_coords: continue
+            
+        coords = [0] + bar_coords + [page.shape[1]]
+        coords = sorted(list(set(coords)))
+        
+        page_measures = []
+        for i in range(len(coords) - 1):
+            x_start = coords[i]
+            x_end = coords[i+1]
+            if x_end - x_start < 40: continue
+            page_measures.append(page[:, x_start:x_end])
+            
+        if not page_measures: continue
+            
+        if not unique_measures:
+            unique_measures.extend(page_measures)
+            continue
+            
+        first_m = page_measures[0]
+        bin_first = get_clean_binary(first_m)
+        
+        best_error = 1.0
+        best_offset = 0
+        anchored = False
+        
+        for scan_dist in range(1, min(4, len(unique_measures) + 1)):
+            past_idx = len(unique_measures) - scan_dist
+            past_m = unique_measures[past_idx]
+            bin_past = get_clean_binary(past_m)
+            
+            if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                hs = min(bin_first.shape[0], bin_past.shape[0])
+                ws = min(bin_first.shape[1], bin_past.shape[1])
+                s1 = bin_first[:hs, :ws]
+                s2 = bin_past[:hs, :ws]
+                
+                diff = cv2.absdiff(s1, s2)
+                error_ratio = np.sum(diff > 0) / s1.size
+                
+                if error_ratio < best_error:
+                    best_error = error_ratio
+                    best_offset = len(unique_measures) - past_idx
+                    
+        if best_error < 0.20:
+            new_start_offset = best_offset
+            anchored = True
+            print(f"    [Anchor] Page {page_idx} -> PDF offset {best_offset} (Best Error: {best_error:.4f})")
+        else:
+            print(f"    [New Page] Page {page_idx} -> No Overlap (Best Error: {best_error:.4f})")
+            
+        if anchored and new_start_offset < len(page_measures):
+            unique_measures.extend(page_measures[new_start_offset:])
+        elif not anchored:
+            unique_measures.extend(page_measures)
+            
+    print(f"  -> 동기화 중복 제거 완료: 무손실 정적 페이지 기반 {len(unique_measures)}개 연속 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Holy Grail Pipeline Embedded Inline successfully!")
--- a/scripts/debug/patch_ocr_sprite.py
+++ b/scripts/debug/patch_ocr_sprite.py
@@ -0,0 +1,180 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Number Sprite Template 앵커 기반 마디 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    unique_measures = []
+    chunk_width = 1280
+    
+    def get_number_sprite(m_img):
+        # We explicitly use inverse thresholding to capture the tiny white number on black background
+        gray = np.max(m_img, axis=2)
+        _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        row_sums = np.sum(thresh, axis=1) / 255
+        staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0]
+        y_staff = staff_lines[0] if len(staff_lines) > 0 else 50
+        
+        crop_y1 = max(0, y_staff - 35)
+        crop_y2 = max(0, y_staff - 2)
+        crop_x1 = 0
+        crop_x2 = min(60, m_img.shape[1])
+        
+        if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: return None
+        sprite = thresh[crop_y1:crop_y2, crop_x1:crop_x2]
+        
+        # MUST BE STRICT: If there are fewer than 8 white pixels, it's a BLANK SPRITE.
+        # Blank sprites caused the catastrophic 1->36 time-travel deletion!
+        if np.count_nonzero(sprite > 127) < 8: return None
+        return sprite
+
+    for frame_idx, frame in enumerate(frames):
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        gray_page = _extract_print_channel(tab_crop)
+        bar_coords = _detect_measure_bars(gray_page)
+        
+        if not bar_coords: continue
+            
+        coords = [0] + bar_coords + [tab_crop.shape[1]]
+        coords = sorted(list(set(coords)))
+        
+        page_measures = []
+        for i in range(len(coords) - 1):
+            x_start = coords[i]
+            x_end = coords[i+1]
+            if x_end - x_start < 40: continue
+            page_measures.append(tab_crop[:, x_start:x_end])
+            
+        if not page_measures: continue
+            
+        if not unique_measures:
+            unique_measures.extend(page_measures)
+            first_sprite = get_number_sprite(page_measures[0])
+            has_pixels = np.count_nonzero(first_sprite > 127) if first_sprite is not None else 0
+            print(f"  -> [초기화] 첫 프레임 배열 등록: {len(page_measures)}개 마디 (Sprite Pixels: {has_pixels})")
+            continue
+            
+        first_m = page_measures[0]
+        first_sprite = get_number_sprite(first_m)
+        
+        anchored = False
+        new_start_offset = 0
+        best_val = 0.0
+        
+        # Only attempt anchor if the first measure explicitly displays a sequence number.
+        # If it's blank, we DO NOT blindly match it to other blank measures!
+        if first_sprite is not None:
+            # We can scan backwards up to 15 measures because clear Number Sprites are completely unique IDs.
+            for scan_dist in range(1, min(15, len(unique_measures) + 1)):
+                past_idx = len(unique_measures) - scan_dist
+                past_m = unique_measures[past_idx]
+                past_sprite = get_number_sprite(past_m)
+                
+                if past_sprite is not None:
+                    hs = min(first_sprite.shape[0], past_sprite.shape[0])
+                    ws = min(first_sprite.shape[1], past_sprite.shape[1])
+                    s1 = first_sprite[:hs, :ws]
+                    s2 = past_sprite[:hs, :ws]
+                    
+                    template = s1[2:-2, 2:-2]
+                    if template.shape[0] >= 5 and template.shape[1] >= 5:
+                        res = cv2.matchTemplate(s2, template, cv2.TM_CCOEFF_NORMED)
+                        max_val = res[0][0]
+                        
+                        if max_val > best_val:
+                            best_val = max_val
+                            new_start_offset = len(unique_measures) - past_idx
+                            
+            if best_val > 0.85:
+                anchored = True
+                
+        # If we failed to anchor via Sprite (maybe this page has no numbers at all),
+        # we fallback to strict whole-measure Template Matching (TM_CCOEFF_NORMED on greyscale prints to survive subpixel scroll drift)
+        if not anchored:
+            bin_first = _extract_print_channel(first_m) # greyscale thresholded
+            for scan_dist in range(1, min(4, len(unique_measures) + 1)): # strictly limit to 4 to prevent musical loops
+                past_idx = len(unique_measures) - scan_dist
+                past_m = unique_measures[past_idx]
+                bin_past = _extract_print_channel(past_m)
+                
+                if abs(bin_first.shape[1] - bin_past.shape[1]) <= 30:
+                    hs = min(bin_first.shape[0], bin_past.shape[0])
+                    ws = min(bin_first.shape[1], bin_past.shape[1])
+                    s1 = bin_first[:hs, :ws]
+                    s2 = bin_past[:hs, :ws]
+                    
+                    template = s1[10:-10, 10:-10]
+                    if template.shape[0] >= 10 and template.shape[1] >= 10:
+                        res = cv2.matchTemplate(s2, template, cv2.TM_CCOEFF_NORMED)
+                        max_val = res[0][0]
+                        if max_val > 0.85:
+                            new_start_offset = len(unique_measures) - past_idx
+                            anchored = True
+                            break
+
+        if anchored and new_start_offset < len(page_measures):
+            unique_measures.extend(page_measures[new_start_offset:])
+        elif not anchored:
+            unique_measures.extend(page_measures)
+            
+    print(f"  -> 동기화 중복 제거 완료: Number Sprite 시계열 기반 {len(unique_measures)}개 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Supreme Logic Embedded.")
--- a/scripts/debug/patch_stable_trigger.py
+++ b/scripts/debug/patch_stable_trigger.py
@@ -0,0 +1,153 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Stable Content Trigger 방식 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    unique_measures = []
+    chunk_width = 1280
+    
+    last_1fps_bin = None
+    last_solid_page = None
+
+    for frame_idx, frame in enumerate(frames):
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        clean_bin = get_clean_binary(tab_crop)
+        
+        if last_1fps_bin is not None:
+            # Check stability compared to 1 second ago
+            diff = cv2.absdiff(clean_bin, last_1fps_bin)
+            error = np.count_nonzero(diff) / clean_bin.size
+            if error < 0.05: # Page is fully stabilized (not a fading transition)
+                has_changed_since_last_solid = True
+                
+                if last_solid_page is not None:
+                    s_diff = cv2.absdiff(clean_bin, last_solid_page)
+                    s_err = np.count_nonzero(s_diff) / clean_bin.size
+                    if s_err < 0.05:
+                        has_changed_since_last_solid = False
+                
+                # We only process this page if it's securely stable AND we haven't already processed it
+                if has_changed_since_last_solid:
+                    last_solid_page = clean_bin.copy()
+                    
+                    # Extract measures
+                    gray_page = _extract_print_channel(tab_crop)
+                    bar_coords = _detect_measure_bars(gray_page)
+                    
+                    if bar_coords:
+                        coords = [0] + bar_coords + [tab_crop.shape[1]]
+                        coords = sorted(list(set(coords)))
+                        
+                        page_measures = []
+                        for i in range(len(coords) - 1):
+                            x_start = coords[i]
+                            x_end = coords[i+1]
+                            if x_end - x_start < 40: continue
+                            page_measures.append(tab_crop[:, x_start:x_end])
+                            
+                        if page_measures:
+                            if not unique_measures:
+                                unique_measures.extend(page_measures)
+                            else:
+                                first_m = page_measures[0]
+                                bin_first = get_clean_binary(first_m)
+                                
+                                best_error = 1.0
+                                best_offset = 0
+                                anchored = False
+                                
+                                # scan_dist=4 ensures we never loop back to identical repeating choruses from 10 seconds ago!
+                                for scan_dist in range(1, min(4, len(unique_measures) + 1)):
+                                    past_idx = len(unique_measures) - scan_dist
+                                    past_m = unique_measures[past_idx]
+                                    bin_past = get_clean_binary(past_m)
+                                    
+                                    if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                                        hs = min(bin_first.shape[0], bin_past.shape[0])
+                                        ws = min(bin_first.shape[1], bin_past.shape[1])
+                                        s1 = bin_first[:hs, :ws]
+                                        s2 = bin_past[:hs, :ws]
+                                        
+                                        m_diff = cv2.absdiff(s1, s2)
+                                        error_ratio = np.sum(m_diff > 0) / s1.size
+                                        
+                                        if error_ratio < best_error:
+                                            best_error = error_ratio
+                                            best_offset = len(unique_measures) - past_idx
+                                            
+                                if best_error < 0.15:
+                                    new_start_offset = best_offset
+                                    if new_start_offset < len(page_measures):
+                                        unique_measures.extend(page_measures[new_start_offset:])
+                                else:
+                                    unique_measures.extend(page_measures)
+                                    
+        last_1fps_bin = clean_bin.copy()
+            
+    print(f"  -> 동기화 중복 제거 완료: Stability 기반 {len(unique_measures)}개 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Stable Content Trigger Patched.")
--- a/scripts/debug/patch_tracker.py
+++ b/scripts/debug/patch_tracker.py
@@ -0,0 +1,80 @@
+import sys
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    lines = f.readlines()
+
+new_lines = []
+skip = False
+
+import_added = False
+
+for line in lines:
+    if line.startswith('import cv2') and not import_added:
+        new_lines.append(line)
+        new_lines.append('from video_cv_tracker import TemporalTracker\n')
+        import_added = True
+        continue
+        
+    if line.startswith('def extract_unique_scroll(frames:'):
+        skip = True
+        new_lines.append('''def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 스크롤형 Tab 시계열 추적 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops:
+        return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    tracker = TemporalTracker()
+    
+    for frame in frames:
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+        tracker.process_frame(tab_crop)
+
+    panorama = tracker.get_final_panorama()
+    if panorama is None:
+        return []
+        
+    print(f"  -> 생성된 파노라마 길이: {panorama.shape[1]}px")
+    
+    chunk_width = 1280
+    final_chunks = []
+    
+    w = panorama.shape[1]
+    start_x = 0
+    
+    while start_x < w:
+        chunk = panorama[:, start_x:min(w, start_x + chunk_width)]
+        if chunk.shape[1] < chunk_width:
+            pad = np.full((chunk.shape[0], chunk_width - chunk.shape[1], 3), 255, dtype=np.uint8)
+            chunk = np.hstack([chunk, pad])
+        final_chunks.append(chunk)
+        start_x += chunk_width
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개")
+    return final_chunks
+
+''')
+        continue
+        
+    if skip and line.startswith('def extract_unique_overlay('):
+        skip = False
+
+    if not skip:
+        new_lines.append(line)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.writelines(new_lines)
+    
+print("Patched youtube_tab_to_pdf.py successfully.")
--- a/scripts/debug/score_log.txt
+++ b/scripts/debug/score_log.txt
--- a/scripts/debug/test_blur_match.py
+++ b/scripts/debug/test_blur_match.py
@@ -0,0 +1,41 @@
+import cv2
+import numpy as np
+import time
+
+img0 = cv2.imread(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\raw_chunk_00.png")
+img1 = cv2.imread(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\raw_chunk_01.png")
+
+gray0 = cv2.cvtColor(img0, cv2.COLOR_BGR2GRAY)
+gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
+
+w = gray0.shape[1]
+
+best_ov = 0
+min_mad = float('inf')
+
+start_time = time.time()
+# Downsample by 2 horizontally & vertically for extreme speed
+small0 = cv2.resize(gray0, (w//2, gray0.shape[0]//2))
+small1 = cv2.resize(gray1, (w//2, gray1.shape[0]//2))
+sw = small0.shape[1]
+
+# We are testing overlap pixel widths
+for ov in range(sw-2, 10, -1):
+    diff = cv2.absdiff(small0[:, -ov:], small1[:, :ov])
+    mad = np.mean(diff)
+    
+    if mad < min_mad:
+        min_mad = mad
+        best_ov = ov * 2 # map back to original scale
+        
+    if min_mad < 3.0: # Break early if effectively a perfect match!
+        best_ov = ov * 2
+        break
+
+end_time = time.time()
+print(f"MSE MAD found overlap {best_ov}px with MAD {min_mad:.2f} in {(end_time-start_time)*1000:.1f}ms")
+
+# Verify
+stitched = np.hstack([img0, img1[:, best_ov:]])
+cv2.imwrite(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\test_mse_stitch.png", stitched)
+print("Exported test_mse_stitch.png")
--- a/scripts/debug/test_col_sums.py
+++ b/scripts/debug/test_col_sums.py
@@ -0,0 +1,47 @@
+import cv2
+import numpy as np
+import glob
+
+videos = glob.glob('output/*.mp4')
+cap = cv2.VideoCapture(videos[0])
+cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
+ret, frame = cap.read()
+cap.release()
+
+def _find_white_tab_strip(bgr: np.ndarray):
+    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
+    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    h, w = binary.shape
+    row_white_counts = np.sum(binary, axis=1) / 255
+    threshold = w * 0.1
+    white_rows = np.where(row_white_counts > threshold)[0]
+    if len(white_rows) < 2: return None
+    return white_rows[0], white_rows[-1]
+
+strip = _find_white_tab_strip(frame)
+if strip:
+    y1, y2 = strip
+    roi = frame[y1:y2, :]
+    
+    gray_roi = np.max(roi, axis=2)
+    _, binary = cv2.threshold(gray_roi, 200, 255, cv2.THRESH_BINARY)
+    
+    col_sums = np.sum(binary, axis=0) / 255
+    h_roi = y2 - y1
+    
+    # Relaxed to 40% to survive hand occlusions. Note stems max out at ~20-30%.
+    bars = np.where(col_sums > h_roi * 0.4)[0]
+    
+    clean_bars = []
+    for x in bars:
+        if not clean_bars or x - clean_bars[-1] > 20: # 20px min distance
+            clean_bars.append(int(x))
+            
+    # Include edges
+    if not clean_bars or clean_bars[0] > 50: clean_bars.insert(0, 0)
+    if clean_bars[-1] < binary.shape[1] - 50: clean_bars.append(binary.shape[1])
+    
+    print(f"Top: {y1}, Bottom: {y2}, Height: {h_roi}")
+    print(f"Detected Clean Measure Bars: {clean_bars}")
+else:
+    print("Could not find tab strip.")
--- a/scripts/debug/test_crop_pipeline.py
+++ b/scripts/debug/test_crop_pipeline.py
@@ -0,0 +1,39 @@
+import cv2
+import numpy as np
+from video_cv_tracker import TemporalTracker
+import time
+
+def extract_cropped_pages(video_path, limit_frames=3000):
+    cap = cv2.VideoCapture(video_path)
+    tracker = TemporalTracker(diff_threshold=0.20)
+    
+    frames_processed = 0
+    while frames_processed < limit_frames:
+        ret, frame = cap.read()
+        if not ret: break
+        
+        scale = 1280 / frame.shape[1]
+        frame = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))
+        
+        # Ultimate flawless crop derived from structural ASCII analysis:
+        # 103:280 precisely truncates before the top of the guitarist's head, isolating ONLY sheet music.
+        ribbon = frame[103:280, :]
+        
+        tracker.process_frame(ribbon)
+        frames_processed += 1
+        
+    pages = tracker.get_unique_pages()
+    cap.release()
+    return pages
+
+if __name__ == "__main__":
+    video_path = "output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
+    pages = extract_cropped_pages(video_path)
+    
+    print(f"Extracted {len(pages)} perfectly cropped median pages.")
+    
+    if pages:
+        # Stack vertically
+        final_img = np.vstack(pages)
+        cv2.imwrite("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/restored_perfect_crop.png", final_img)
+        print("Saved cleanly cropped vertical stack.")
--- a/scripts/debug/test_crop_raw.py
+++ b/scripts/debug/test_crop_raw.py
@@ -0,0 +1,17 @@
+import cv2
+import numpy as np
+
+frame = cv2.imread("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/raw_frame_30s.png")
+
+# Crop based on ASCII mathematical deduction
+# Top black letterbox is 0:100
+# White sheet music is 100:280
+# Guitarist is 280:720
+
+crop1 = frame[103:280, :]
+cv2.imwrite("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/crop_103_280.png", crop1)
+
+crop2 = frame[0:180, :]
+cv2.imwrite("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/crop_0_180.png", crop2)
+
+print("Saved crop_103_280.png and crop_0_180.png")
--- a/scripts/debug/test_easyocr.py
+++ b/scripts/debug/test_easyocr.py
@@ -0,0 +1,31 @@
+import cv2
+import numpy as np
+import easyocr
+import time
+
+reader = easyocr.Reader(['en'], gpu=False)
+
+def test_ocr(image_text, img_data):
+    # Upscale 3x to give CRAFT detector enough spatial resolution
+    upscaled = cv2.resize(img_data, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC)
+    # Pad to make it look like a printed document page
+    padded = cv2.copyMakeBorder(upscaled, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value=[255, 255, 255])
+    
+    t0 = time.time()
+    results = reader.readtext(padded, allowlist="0123456789")
+    tf = time.time()
+    
+    print(f"[{image_text}] Result: {results} (took {tf-t0:.2f}s)")
+
+# Generate a tiny "37" (white on black)
+img_37 = np.zeros((30, 40), dtype=np.uint8)
+img_37[5:10, 10:20] = 255 # Top of "3"
+img_37[12:15, 10:20] = 255 # Mid of "3"
+img_37[20:25, 10:20] = 255 # Bot of "3"
+img_37[5:10, 25:35] = 255 # Top of "7"
+img_37[5:25, 30:35] = 255 # Right of "7"
+
+# Invert it so it's black text on white background (what OCR expects)
+img_37_inv = cv2.bitwise_not(img_37)
+
+test_ocr("Tiny 37 Synth", img_37_inv)
--- a/scripts/debug/test_flip.py
+++ b/scripts/debug/test_flip.py
@@ -0,0 +1,44 @@
+import cv2
+import numpy as np
+
+def test_page_flip_diff():
+    import glob
+    videos = glob.glob("output/*.mp4")
+    cap = cv2.VideoCapture(videos[0] if videos else "output/shintakarajima.mp4")
+    ret, prev_frame = cap.read()
+    if not ret: return
+    scale = 1280 / prev_frame.shape[1]
+    prev = cv2.resize(prev_frame, (1280, int(prev_frame.shape[0] * scale)))[103:280, :]
+    prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
+    
+    idx = 1
+    max_diff = 0
+    max_diff_idx = -1
+    
+    print("Scanning first 2000 frames for diff_ratio spikes...")
+    while idx < 2000:
+        ret, frame = cap.read()
+        if not ret: break
+        
+        # Only check every frame
+        curr = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))[103:280, :]
+        curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY)
+        
+        diff = cv2.absdiff(prev_gray, curr_gray)
+        _, thresh = cv2.threshold(diff, 50, 255, cv2.THRESH_BINARY)
+        ratio = np.sum(thresh > 0) / thresh.size
+        
+        if ratio > 0.01:
+            print(f"Frame {idx}: diff_ratio = {ratio:.4f}")
+            
+        if ratio > max_diff:
+            max_diff = ratio
+            max_diff_idx = idx
+            
+        prev_gray = curr_gray
+        idx += 1
+        
+    print(f"\nMax diff spike: {max_diff:.4f} at frame {max_diff_idx}")
+
+if __name__ == "__main__":
+    test_page_flip_diff()
--- a/scripts/debug/test_gap_morphology.py
+++ b/scripts/debug/test_gap_morphology.py
@@ -0,0 +1,61 @@
+import cv2
+import numpy as np
+import glob
+
+videos = glob.glob('output/*.mp4')
+cap = cv2.VideoCapture(videos[0])
+
+# Collect 30 continuous frames
+frames = []
+cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
+for _ in range(30):
+    ret, frame = cap.read()
+    if not ret: break
+    frames.append(frame)
+cap.release()
+
+if len(frames) == 30:
+    median_frame = np.median(frames, axis=0).astype(np.uint8)
+    gray = np.max(median_frame, axis=2)
+    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    
+    row_sums = np.sum(binary, axis=1) / 255
+    y_staff = np.where(row_sums > binary.shape[1] * 0.4)[0]
+    
+    if len(y_staff) > 0:
+        y_top = y_staff[0]
+        y_bottom = y_staff[-1]
+        staff_h = y_bottom - y_top
+        
+        roi = binary[y_top:y_bottom, :]
+        
+        # 1. Bridge vertical gaps (like the gap between standard notation and tab)
+        # kernel of 20px will bridge gaps up to 19px without increasing horizontal width
+        bridge_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 20))
+        bridged = cv2.dilate(roi, bridge_kernel)
+        
+        # 2. Erase everything that isn't a continuous vertical line of at least 80% staff height
+        # Note stems are short, so they get erased even after bridging!
+        open_height = int(staff_h * 0.8)
+        open_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, open_height))
+        isolated_bars = cv2.morphologyEx(bridged, cv2.MORPH_OPEN, open_kernel)
+        
+        # 3. The isolated_bars image now contains ONLY thick, pure measure bars. Get their X coords.
+        col_sums = np.sum(isolated_bars, axis=0) / 255
+        
+        # Even 1 pixel of the filtered bar is enough, but let's use a tiny threshold
+        bars = np.where(col_sums > open_height * 0.5)[0]
+        
+        clean_bars = []
+        for x in bars:
+            if not clean_bars or x - clean_bars[-1] > 20:
+                clean_bars.append(int(x))
+                
+        # Inject edges
+        if not clean_bars or clean_bars[0] > 50: clean_bars.insert(0, 0)
+        if clean_bars[-1] < binary.shape[1] - 50: clean_bars.append(binary.shape[1])
+                
+        print(f"Gap-Bridged Morphology Measure Boundaries: {clean_bars}")
+        cv2.imwrite("C:/Users/Certes/Desktop/guitar_score/debug_gap_bridged.png", isolated_bars)
+else:
+    print("Not enough frames.")
--- a/scripts/debug/test_iou_math.py
+++ b/scripts/debug/test_iou_math.py
@@ -0,0 +1,60 @@
+import cv2
+import numpy as np
+
+# Simulate a thin "1" and "2"
+img_12 = np.zeros((60, 100), dtype=np.uint8)
+img_12[10:50, 40:45] = 255 # The "1"
+img_12[10:15, 60:80] = 255 # Top of "2"
+img_12[15:45, 75:80] = 255 # Right of "2"
+img_12[45:50, 60:80] = 255 # Bottom of "2"
+
+# Simulate a thin "3" and "7"
+img_37 = np.zeros((60, 100), dtype=np.uint8)
+img_37[10:15, 30:50] = 255 # Top of "3"
+img_37[25:30, 30:50] = 255 # Mid of "3"
+img_37[45:50, 30:50] = 255 # Bot of "3"
+img_37[10:15, 60:80] = 255 # Top of "7"
+img_37[15:50, 75:80] = 255 # Right of "7"
+
+# Simulate the SAME "12" but shifted by 2 pixels (due to video wobble)
+img_12_shifted = np.zeros((60, 100), dtype=np.uint8)
+img_12_shifted[12:52, 42:47] = 255 
+img_12_shifted[12:17, 62:82] = 255 
+img_12_shifted[17:47, 77:82] = 255 
+img_12_shifted[47:52, 62:82] = 255 
+
+def compute_iou(s1, s2):
+    intersection = np.logical_and(s1 > 0, s2 > 0)
+    union = np.logical_or(s1 > 0, s2 > 0)
+    return np.count_nonzero(intersection) / max(1, np.count_nonzero(union))
+
+def robust_match(s1, s2):
+    # Dilate by 3x3 to make lines thick enough to overlap even if shifted by 2px
+    kernel = np.ones((5, 5), np.uint8)
+    d1 = cv2.dilate(s1, kernel, iterations=1)
+    d2 = cv2.dilate(s2, kernel, iterations=1)
+    
+    # Try multiple subpixel shifts manually and take the best IoU
+    best_iou = 0
+    for dy in [-2, 0, 2]:
+        for dx in [-2, 0, 2]:
+            M = np.float32([[1, 0, dx], [0, 1, dy]])
+            shifted_d2 = cv2.warpAffine(d2, M, (s2.shape[1], s2.shape[0]))
+            iou = compute_iou(d1, shifted_d2)
+            if iou > best_iou:
+                best_iou = iou
+                
+    return best_iou
+
+print("IoU (12 vs 37):", robust_match(img_12, img_37))
+print("IoU (12 vs 12_shifted):", robust_match(img_12, img_12_shifted))
+
+# Let's see what TM_CCOEFF_NORMED would have done:
+res = cv2.matchTemplate(img_37, img_12[5:-5, 5:-5], cv2.TM_CCOEFF_NORMED)
+_, max_val_diff, _, _ = cv2.minMaxLoc(res)
+
+res2 = cv2.matchTemplate(img_12_shifted, img_12[5:-5, 5:-5], cv2.TM_CCOEFF_NORMED)
+_, max_val_same, _, _ = cv2.minMaxLoc(res2)
+
+print("\nTM_CCOEFF_NORMED (12 vs 37):", max_val_diff)
+print("TM_CCOEFF_NORMED (12 vs 12_shifted):", max_val_same)
--- a/scripts/debug/test_live_ocr.py
+++ b/scripts/debug/test_live_ocr.py
@@ -0,0 +1,75 @@
+import cv2
+import numpy as np
+import easyocr
+import re
+from youtube_tab_to_pdf import TemporalTracker
+
+cap = cv2.VideoCapture(r"C:\Users\Certes\Desktop\guitar_score\output\shintakarajima.mp4")
+total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+fps = cap.get(cv2.CAP_PROP_FPS)
+
+tracker = TemporalTracker()
+cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
+ret, check_frame = cap.read()
+
+from youtube_tab_to_pdf import _find_white_tab_strip
+bounds = _find_white_tab_strip(cv2.resize(check_frame, (1280, int(check_frame.shape[0] * (1280/check_frame.shape[1])))))
+if bounds:
+    crop_top = max(0, bounds[0] - 60)
+    crop_bottom = bounds[1]
+    tracker.set_crop(crop_top, crop_bottom)
+
+# Process only first 95 seconds to get unique pages
+print("Extracting unique pages from first 95 seconds...")
+tracker.process_video(cap, start_sec=0, end_sec=95)
+unique_pages = tracker.get_unique_pages()
+
+print(f"Extracted {len(unique_pages)} unique pages.")
+
+# Try easyOCR
+reader = easyocr.Reader(['en'], gpu=False)
+
+def extract_measure_number(page_bgr):
+    cw = min(page_bgr.shape[1], 1000)
+    page_gray = cv2.cvtColor(page_bgr[:, :cw], cv2.COLOR_BGR2GRAY)
+    _, bin_inv = cv2.threshold(page_gray, 200, 255, cv2.THRESH_BINARY_INV)
+    row_sums = np.sum(bin_inv, axis=1) / 255.0
+    staff_rows = np.where(row_sums > cw * 0.4)[0]
+    
+    if len(staff_rows) >= 6:
+        staff_y_top, staff_y_bottom = staff_rows[0], staff_rows[-1]
+        for r in staff_rows:
+            if r - staff_y_top > 100: break
+            staff_y_bottom = r
+    else:
+        return -1
+        
+    expected_h = max(10, staff_y_bottom - staff_y_top + 1)
+    staff_region = bin_inv[staff_y_top:staff_y_bottom+1, :]
+    col_sums = np.sum(staff_region, axis=0) / 255.0
+    bar_xs = np.where(col_sums >= expected_h * 0.8)[0]
+    
+    if len(bar_xs) == 0: return -1
+    x_bar = bar_xs[0]
+    
+    box_y1 = max(0, staff_y_top - 25)
+    box_y2 = staff_y_top
+    box_x1 = x_bar
+    box_x2 = min(page_gray.shape[1], x_bar + 35)
+    
+    num_box = page_gray[box_y1:box_y2, box_x1:box_x2]
+    _, num_inv = cv2.threshold(num_box, 200, 255, cv2.THRESH_BINARY_INV)
+    num_for_ocr = cv2.bitwise_not(num_inv) 
+    
+    upscaled = cv2.resize(num_for_ocr, None, fx=4, fy=4, interpolation=cv2.INTER_CUBIC)
+    padded = cv2.copyMakeBorder(upscaled, 20, 20, 20, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255])
+    
+    results = reader.readtext(padded, allowlist="0123456789")
+    if not results: return -1
+    
+    digits = re.findall(r'\d+', results[0][1])
+    return int(digits[0]) if digits else -1
+
+for i, page in enumerate(unique_pages):
+    num = extract_measure_number(page)
+    print(f"Page {i:02d}: {num}")
--- a/scripts/debug/test_m1.png
+++ b/scripts/debug/test_m1.png
--- a/scripts/debug/test_m2.png
+++ b/scripts/debug/test_m2.png
--- a/scripts/debug/test_math.py
+++ b/scripts/debug/test_math.py
@@ -0,0 +1,43 @@
+import cv2
+import numpy as np
+from youtube_tab_to_pdf import _find_white_tab_strip, _detect_measure_bars, _extract_print_channel
+
+def get_clean_binary(img):
+    gray = np.max(img, axis=2)
+    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    return binary
+
+cap = cv2.VideoCapture(r"output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4")
+cap.set(cv2.CAP_PROP_POS_FRAMES, 50)
+ret, f1 = cap.read()
+cap.set(cv2.CAP_PROP_POS_FRAMES, 65) # Next second
+ret, f2 = cap.read()
+cap.release()
+
+def process(frame):
+    s = _find_white_tab_strip(frame)
+    crop = frame[s[0]:s[1], :]
+    gray = _extract_print_channel(crop)
+    bars = _detect_measure_bars(gray)
+    coords = [0] + bars + [crop.shape[1]]
+    m = crop[:, coords[1]:coords[2]] # Get M2 just in case M1 is a clef
+    return m
+
+m1 = process(f1)
+m2 = process(f2)
+
+cv2.imwrite("test_m1.png", m1)
+cv2.imwrite("test_m2.png", m2)
+
+bin1 = get_clean_binary(m1)
+bin2 = get_clean_binary(m2)
+
+h = min(bin1.shape[0], bin2.shape[0])
+w = min(bin1.shape[1], bin2.shape[1])
+s1 = bin1[:h, :w]
+s2 = bin2[:h, :w]
+
+diff = cv2.absdiff(s1, s2)
+error_ratio = np.sum(diff > 0) / s1.size
+
+print(f"Error Ratio: {error_ratio:.4f}")
--- a/scripts/debug/test_measure_slice.py
+++ b/scripts/debug/test_measure_slice.py
@@ -0,0 +1,25 @@
+import cv2
+import numpy as np
+
+def find_measure_boundaries(img_bgr, max_width=1280):
+    img_gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+    _, bin_inv = cv2.threshold(img_gray, 180, 255, cv2.THRESH_BINARY_INV)
+    staff_region = bin_inv[50:160, :]
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
+    vertical_lines = cv2.morphologyEx(staff_region, cv2.MORPH_OPEN, kernel)
+    proj = np.sum(vertical_lines, axis=0) / 255
+    peaks = np.where(proj > 30)[0]
+    
+    valid_peaks = [p for p in peaks if p <= max_width - 15]
+    if not valid_peaks: return max_width
+    return valid_peaks[-1] + 10
+
+if __name__ == "__main__":
+    img = cv2.imread(r'C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\raw_super_block.png')
+    for w_cap in [1280, 2000, 2560]:
+        cw = min(w_cap, img.shape[1])
+        cut_x = find_measure_boundaries(img[:, :cw], cw)
+        print(f"Max {cw} => Cut at {cut_x}")
+        out = img[:, :cw].copy()
+        cv2.line(out, (cut_x, 0), (cut_x, out.shape[0]), (0, 0, 255), 2)
+        cv2.imwrite(r'C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\slice_'+str(w_cap)+'.png', out)
--- a/scripts/debug/test_morph_grid.py
+++ b/scripts/debug/test_morph_grid.py
@@ -0,0 +1,82 @@
+import cv2
+import numpy as np
+import glob
+from dataclasses import dataclass
+from typing import List, Tuple
+
+@dataclass
+class MeasureBound:
+    x_start: int
+    x_end: int
+    y_top: int
+    y_bottom: int
+
+class GridParser:
+    def __init__(self, frame: np.ndarray):
+        self.frame = frame
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        _, self.binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        self.h, self.w = self.binary.shape
+        
+    def find_staff_y_bounds(self) -> Tuple[int, int]:
+        h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (100, 1))
+        h_lines = cv2.morphologyEx(self.binary, cv2.MORPH_OPEN, h_kernel)
+        row_sums = np.sum(h_lines, axis=1) / 255
+        
+        staff_rows = np.where(row_sums > self.w * 0.4)[0]
+        if len(staff_rows) == 0: return 0, 0
+            
+        y_top = int(staff_rows[0])
+        y_bottom = y_top
+        
+        for y in staff_rows:
+            if y - y_bottom > 150: break
+            y_bottom = int(y)
+            
+        return max(0, y_top - 5), min(self.h, y_bottom + 5)
+        
+    def find_measure_bounds(self) -> List[MeasureBound]:
+        y_top, y_bottom = self.find_staff_y_bounds()
+        if y_bottom - y_top < 20: return []
+        staff_height = y_bottom - y_top
+        
+        # Isolate all vertical linear structures at least 30px tall (ignores almost all hand features and note heads)
+        v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 30))
+        roi = self.binary[y_top:y_bottom, :]
+        v_lines = cv2.morphologyEx(roi, cv2.MORPH_OPEN, v_kernel, iterations=1)
+        
+        # Aggregate the vertical structures. Measure bars will have a high column density.
+        col_sums = np.sum(v_lines, axis=0) / 255
+        
+        # We expect a measure bar to cross both staves, totaling maybe 50% of the ROI height
+        bar_cols = np.where(col_sums > staff_height * 0.4)[0]
+        
+        clean_bars = []
+        for x in bar_cols:
+            if not clean_bars or x - clean_bars[-1] > 20:
+                clean_bars.append(int(x))
+                
+        if not clean_bars or clean_bars[0] > 50:
+            clean_bars.insert(0, 0)
+        if clean_bars[-1] < self.w - 50:
+            clean_bars.append(self.w)
+            
+        measures = []
+        for i in range(len(clean_bars) - 1):
+            x1 = clean_bars[i]
+            x2 = clean_bars[i+1]
+            if x2 - x1 < 40: continue
+            measures.append(MeasureBound(x1, x2, y_top, y_bottom))
+            
+        return measures
+
+if __name__ == "__main__":
+    videos = glob.glob('output/*.mp4')
+    cap = cv2.VideoCapture(videos[0])
+    cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
+    ret, frame = cap.read()
+    cap.release()
+    
+    parser = GridParser(frame)
+    measures = parser.find_measure_bounds()
+    print(f"Measures: {[(m.x_start, m.x_end) for m in measures]}")
--- a/scripts/debug/test_morphology.py
+++ b/scripts/debug/test_morphology.py
@@ -0,0 +1,48 @@
+import cv2
+import numpy as np
+import glob
+
+video_path = glob.glob('output/*.mp4')[0]
+cap = cv2.VideoCapture(video_path)
+cap.set(cv2.CAP_PROP_POS_FRAMES, 500) # jump to a frame with chords and hand
+ret, frame = cap.read()
+cap.release()
+
+if not ret:
+    print("Cannot read video frame.")
+    exit()
+
+gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) # White text, black background
+
+# Morphological horizontal line detection
+horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
+detect_horizontal = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
+
+# Morphological vertical line detection
+vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 30))
+detect_vertical = cv2.morphologyEx(binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
+
+# Find staves
+row_sums = np.sum(detect_horizontal, axis=1) / 255
+y_staves = np.where(row_sums > binary.shape[1] * 0.4)[0]
+if len(y_staves) > 0:
+    print(f"Top staff line Y: {y_staves[0]}")
+    print(f"Bottom staff line Y: {y_staves[-1]}")
+    
+    # Restrict vertical detection to within the staff lines
+    staff_crop = detect_vertical[y_staves[0]:y_staves[-1], :]
+    col_sums = np.sum(staff_crop, axis=0) / 255
+    bars = np.where(col_sums > (y_staves[-1] - y_staves[0]) * 0.6)[0]
+    
+    # Filter bars that are too close (thickness)
+    clean_bars = []
+    for x in bars:
+        if not clean_bars or x - clean_bars[-1] > 10:
+            clean_bars.append(x)
+    print(f"Measure bars X: {clean_bars}")
+else:
+    print("No staves detected.")
+
+cv2.imwrite("C:/Users/Certes/Desktop/guitar_score/debug_morph_horiz.png", detect_horizontal)
+cv2.imwrite("C:/Users/Certes/Desktop/guitar_score/debug_morph_vert.png", detect_vertical)
--- a/scripts/debug/test_number_band.py
+++ b/scripts/debug/test_number_band.py
@@ -0,0 +1,33 @@
+import cv2
+import numpy as np
+from youtube_tab_to_pdf import extract_unique_scroll
+
+# We will read fast_test_pano.jpg
+img = cv2.imread('fast_test_pano.jpg', cv2.IMREAD_GRAYSCALE)
+
+# We want to find staff lines and number band
+_, bin_inv = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY_INV)
+row_sums = np.sum(bin_inv, axis=1) / 255.0
+staff_rows = np.where(row_sums > img.shape[1] * 0.4)[0]
+
+if len(staff_rows) >= 6:
+    staff_y_top = staff_rows[0]
+else:
+    staff_y_top = int(img.shape[0] * 0.3)
+
+# Number band
+band_y_top = max(0, staff_y_top - 25)
+band_y_bottom = staff_y_top
+
+band = img[band_y_top:band_y_bottom, :]
+
+# Save it to see if it correctly contains the numbers
+cv2.imwrite('debug_band.png', band)
+print(f"Band shape: {band.shape}")
+
+# Let's see if we can extract number boxes!
+band_inv = cv2.bitwise_not(band)
+col_sums = np.sum(band_inv, axis=0) / 255.0
+number_xs = np.where(col_sums > 5)[0] # at least 5 pixels of ink vertically
+
+print(f"Pixels with numbers: {len(number_xs)}")
--- a/scripts/debug/test_ocr_crop.py
+++ b/scripts/debug/test_ocr_crop.py
@@ -0,0 +1,44 @@
+import cv2
+import easyocr
+import numpy as np
+from youtube_tab_to_pdf import _extract_print_channel, _detect_measure_bars
+
+cap = cv2.VideoCapture(r"output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4")
+cap.set(cv2.CAP_PROP_POS_FRAMES, 50) # 1.6 seconds in
+ret, frame = cap.read()
+if not ret: exit()
+
+gray = np.max(frame, axis=2)
+_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+row_sums = np.sum(thresh, axis=1) / 255
+staff_lines = np.where(row_sums > frame.shape[1] * 0.5)[0]
+
+y_staff = staff_lines[0] if len(staff_lines) > 0 else 100
+
+bar_coords = _detect_measure_bars(thresh)
+print(f"Detected Bars at X: {bar_coords}")
+
+reader = easyocr.Reader(['en'], gpu=False)
+
+for idx, x_bar in enumerate(bar_coords):
+    # Crop the tiny region above the bar where the number should be
+    crop_y1 = max(0, y_staff - 25)
+    crop_y2 = max(0, y_staff - 2)
+    crop_x1 = max(0, x_bar - 5)
+    crop_x2 = min(frame.shape[1], x_bar + 25)
+    
+    if crop_y2 <= crop_y1 or crop_x2 <= crop_x1:
+        continue
+        
+    sprite = frame[crop_y1:crop_y2, crop_x1:crop_x2]
+    cv2.imwrite(f"debug_sprite_{idx}.png", sprite)
+    
+    # Scale up for better OCR
+    scaled = cv2.resize(sprite, (0,0), fx=3, fy=3, interpolation=cv2.INTER_CUBIC)
+    
+    gray_sprite = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY)
+    _, binary_sprite = cv2.threshold(gray_sprite, 180, 255, cv2.THRESH_BINARY_INV)
+    cv2.imwrite(f"debug_sprite_bin_{idx}.png", binary_sprite)
+    
+    res = reader.readtext(gray_sprite, allowlist='0123456789')
+    print(f"Bar {idx} X={x_bar} OCR: {res}")
--- a/scripts/debug/test_ocr_on_real_boxes.py
+++ b/scripts/debug/test_ocr_on_real_boxes.py
@@ -0,0 +1,74 @@
+import cv2
+import pickle
+import numpy as np
+import easyocr
+import time
+import re
+
+reader = easyocr.Reader(['en'], gpu=False)
+
+with open('unique_pages.pkl', 'rb') as f:
+    unique_pages = pickle.load(f)
+
+print(f"Loaded {len(unique_pages)} chunks. Running OCR on jump-cut boundaries...")
+
+def extract_measure_number(page_bgr):
+    # Same logic as before to find the first measure box
+    cw = min(page_bgr.shape[1], 1000)
+    page_gray = cv2.cvtColor(page_bgr[:, :cw], cv2.COLOR_BGR2GRAY)
+    _, bin_inv = cv2.threshold(page_gray, 200, 255, cv2.THRESH_BINARY_INV)
+    
+    row_sums = np.sum(bin_inv, axis=1) / 255.0
+    staff_rows = np.where(row_sums > cw * 0.4)[0]
+    
+    if len(staff_rows) >= 6:
+        staff_y_top, staff_y_bottom = staff_rows[0], staff_rows[-1]
+        for r in staff_rows:
+            if r - staff_y_top > 100: break
+            staff_y_bottom = r
+    else:
+        return -1
+        
+    expected_h = max(10, staff_y_bottom - staff_y_top + 1)
+    staff_region = bin_inv[staff_y_top:staff_y_bottom+1, :]
+    col_sums = np.sum(staff_region, axis=0) / 255.0
+    bar_xs = np.where(col_sums >= expected_h * 0.8)[0]
+    
+    if len(bar_xs) == 0: return -1
+    x_bar = bar_xs[0]
+    
+    box_y1 = max(0, staff_y_top - 25)
+    box_y2 = staff_y_top
+    box_x1 = x_bar
+    box_x2 = min(page_gray.shape[1], x_bar + 35)
+    
+    num_box = page_gray[box_y1:box_y2, box_x1:box_x2]
+    
+    # Preprocess for OCR
+    _, num_inv = cv2.threshold(num_box, 200, 255, cv2.THRESH_BINARY_INV)
+    
+    # Must pass white background with black text to EasyOCR! (Since it reads printed text)
+    num_for_ocr = cv2.bitwise_not(num_inv) 
+    
+    upscaled = cv2.resize(num_for_ocr, None, fx=4, fy=4, interpolation=cv2.INTER_CUBIC)
+    padded = cv2.copyMakeBorder(upscaled, 20, 20, 20, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255])
+    
+    results = reader.readtext(padded, allowlist="0123456789")
+    if not results: return -1
+    
+    text = results[0][1]
+    
+    digits = re.findall(r'\d+', text)
+    if digits:
+        return int(digits[0])
+    return -1
+
+results = []
+for i, page in enumerate(unique_pages):
+    t0 = time.time()
+    num = extract_measure_number(page)
+    tf = time.time()
+    print(f"Page {i:02d}: {num} (took {tf-t0:.2f}s)")
+    results.append(num)
+
+print(f"Sequential Detections: {results}")
--- a/scripts/debug/test_panorama.py
+++ b/scripts/debug/test_panorama.py
@@ -0,0 +1,137 @@
+import cv2
+import numpy as np
+import time
+from pathlib import Path
+
+def stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15):
+    cap = cv2.VideoCapture(video_path)
+    video_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
+    
+    # Calculate frame skip
+    frame_skip = int(video_fps / fps_sample_rate)
+    if frame_skip < 1: frame_skip = 1
+    
+    start_frame = int(start_sec * video_fps)
+    max_frames = int(duration_sec * video_fps)
+    
+    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+    
+    # Structural assumptions based on subagent analysis
+    # Y=103 to Y=435 is the white tablature bar
+    y_start = 103
+    y_end = 435
+    
+    panorama = None
+    prev_gray = None
+    
+    count = 0
+    while count < max_frames:
+        ret, frame = cap.read()
+        if not ret: break
+        
+        # We only process every `frame_skip` frames
+        if count % frame_skip != 0:
+            count += 1
+            continue
+            
+        scale = 1280 / frame.shape[1]
+        frame_resized = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))
+        
+        # Crop to the exact white ribbon
+        ribbon = frame_resized[y_start:y_end, :]
+        gray = cv2.cvtColor(ribbon, cv2.COLOR_BGR2GRAY)
+        
+        # Binarize aggressively to vertical features only to kill horizontal staff lines aliases
+        # dx=1, dy=0 computes horizontal gradient (which highlights VERTICAL edges like note stems and bar lines)
+        sobelx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
+        bin_float = np.abs(sobelx)
+        
+        if panorama is None:
+            # First frame is the initial panorama
+            panorama = ribbon.copy()
+            prev_gray = bin_float
+            continue
+            
+        # 1. Constrained Template Matching for dx
+        # Template is a 100px wide vertical slice from prev_gray at x=600
+        template = prev_gray[:, 600:700]
+        
+        # Search Region: from x=550 to x=710 in bin_float
+        search_region = bin_float[:, 550:710]
+        
+        res = cv2.matchTemplate(search_region, template, cv2.TM_CCOEFF_NORMED)
+        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
+        
+        # In search_region (starts at 550), the template's original position (600) is at index 50.
+        # If max_loc[0] == 50 -> no movement (dx=0).
+        # If max_loc[0] < 50 -> image moved left (dx > 0).
+        dx = 50 - max_loc[0]
+        
+        if count < 30: # Print first few shifts
+            print(f"Frame {count}: dx={dx}, max_val={max_val:.3f}")
+        shift_x = int(dx)
+        # dx is typically POSITIVE if the camera moves right, meaning the image content moves LEFT.
+        # dx will be positive or negative depending on parameter order.
+        # Let's enforce that we only append new pixels from the RIGHT edge of the 'new' frame.
+        shift_x = int(round(dx))
+        
+        # In a left-scrolling video, the content moves left. 
+        # phaseCorrelate(prev, curr) -> to overlap curr onto prev, we shift curr by +dx.
+        # The new pixels entering from the right are exactly the `dx` rightmost columns of the current ribbon!
+        # If shift_x > 0...
+        
+        # Let's verify shift_x sign.
+        # If curr is moved left by 10 pixels compared to prev, then prev[x] == curr[x-10].
+        # So curr must be shifted by +10 to match prev. Thus dx > 0.
+        # We need to append the NEWest 10 pixels from the right side of curr.
+        
+        if shift_x > 0 and shift_x < 300: # Sanity check to ignore massive glitches
+            # The new column is the absolute rightmost shift_x columns of the current ribbon
+            new_pixels = ribbon[:, -shift_x:]
+            panorama = np.hstack([panorama, new_pixels])
+            prev_gray = bin_float
+            
+    cap.release()
+    return panorama
+
+def slice_panorama_to_a4(panorama, slice_width=1280):
+    """Cuts the infinite 1D panorama into stacked A4 rows"""
+    h, w, c = panorama.shape
+    rows = []
+    
+    for start_x in range(0, w, slice_width):
+        end_x = start_x + slice_width
+        chunk = panorama[:, start_x:end_x]
+        
+        # Pad the last chunk with white if it's too short
+        if chunk.shape[1] < slice_width:
+            pad_w = slice_width - chunk.shape[1]
+            pad = np.ones((h, pad_w, c), dtype=np.uint8) * 255
+            chunk = np.hstack([chunk, pad])
+            
+        rows.append(chunk)
+        
+    final_image = np.vstack(rows)
+    return final_image
+
+if __name__ == "__main__":
+    video_path = "output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
+    if not Path(video_path).exists():
+        # Fallback to output/untitled.mp4 or whatever it might be named
+        for f in Path("output").glob("*.mp4"):
+            video_path = str(f)
+            break
+            
+    print(f"Stitching...")
+    
+    start_t = time.time()
+    panorama = stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15)
+    print(f"Extraction took {time.time() - start_t:.2f}s. Panorama shape: {panorama.shape}")
+    
+    if panorama is not None:
+        final_sheet = slice_panorama_to_a4(panorama, slice_width=1280)
+        out_path = "C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/verify_panorama.png"
+        cv2.imwrite(out_path, final_sheet)
+        print(f"Saved stacked result to {out_path} with shape {final_sheet.shape}")
+    else:
+        print("Failed to generate panorama.")
--- a/scripts/debug/test_pipeline.py
+++ b/scripts/debug/test_pipeline.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""로컬 캐시된 mp4 파일로 파이프라인 테스트 (다운로드 스킵)
+1080p 다운로드 모드: python test_pipeline.py --download
+"""
+import sys
+import os
+from pathlib import Path
+import importlib.util
+import argparse
+import gc
+
+# youtube_tab_to_pdf 모듈 임포트
+spec = importlib.util.spec_from_file_location(
+    "pipeline", str(Path(__file__).parent / "youtube_tab_to_pdf.py"))
+pipeline = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(pipeline)
+
+# 테스트용 YouTube URLs
+TEST_URLS = {
+    "video_1": "https://www.youtube.com/watch?v=x76IMSvWR0o",  # 晴る
+    "video_2": "https://www.youtube.com/watch?v=90BWvJY6KbE",  # 新宝島
+    "video_3": "https://www.youtube.com/watch?v=Ri9g4lwnrJQ",  # 空奏列車
+}
+
+
+def test_video(mp4_path: Path, label: str):
+    """단일 영상 테스트 — 다운로드 없이 로컬 파일 직접 사용"""
+    print(f"\n{'='*60}")
+    print(f"테스트: {label}")
+    print(f"파일: {mp4_path.name}")
+    print(f"{'='*60}")
+
+    output_dir = Path("output")
+    debug_dir = output_dir / "debug_frames" / label
+    debug_dir.mkdir(parents=True, exist_ok=True)
+
+    # Step 2: 프레임 추출
+    frames = pipeline.extract_frames(mp4_path)
+
+    # Step 3: 패턴 감지
+    pattern = pipeline.detect_pattern(frames)
+
+    # Step 4: 고유 프레임 추출
+    if pattern == "scroll":
+        unique = pipeline.extract_unique_scroll(frames)
+    elif pattern == "split":
+        unique = pipeline.extract_unique_split(frames)
+    else:
+        unique = pipeline.extract_unique_overlay(frames)
+
+    # Step 5: PDF 생성
+    pdf_path = output_dir / f"test_{label}.pdf"
+    pipeline.generate_pdf(unique, pdf_path, debug_dir=debug_dir)
+
+    print(f"\n결과: {pattern} / {len(unique)}개 고유 프레임")
+    return pattern, len(unique)
+
+
+def download_test_videos():
+    """1080p로 테스트 영상 다운로드"""
+    output_dir = Path("output")
+    output_dir.mkdir(exist_ok=True)
+
+    for label, url in TEST_URLS.items():
+        print(f"\n--- {label} 다운로드 ---")
+        try:
+            video_path, title = pipeline.download_video(url, output_dir)
+            print(f"  → 완료: {video_path.name}")
+        except Exception as e:
+            print(f"  → 실패: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--download", action="store_true",
+                        help="1080p로 테스트 영상 다운로드")
+    args = parser.parse_args()
+
+    if args.download:
+        download_test_videos()
+        return
+
+    output_dir = Path("output")
+    mp4_files = sorted(output_dir.glob("*.mp4"))
+    if not mp4_files:
+        print("테스트할 영상(mp4)이 output 폴더에 없습니다.")
+        print("  → python test_pipeline.py --download 로 영상 다운로드")
+        sys.exit(1)
+
+    print(f"캐시된 영상 {len(mp4_files)}개 발견:")
+    for f in mp4_files:
+        print(f"  - {f.name} ({f.stat().st_size / 1024 / 1024:.1f} MB)")
+
+    results = {}
+    for i, mp4 in enumerate(mp4_files):
+        label = f"video_{i+1}"
+        pattern, count = test_video(mp4, label)
+        results[label] = (mp4.name, pattern, count)
+        gc.collect()  # 1080p 프레임 메모리 해제
+
+    print(f"\n{'='*60}")
+    print("전체 결과 요약:")
+    print(f"{'='*60}")
+    for label, (name, pattern, count) in results.items():
+        print(f"  {label}: {pattern:8s} → {count:4d}개 프레임 | {name[:40]}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/debug/test_score_extractor.py
+++ b/scripts/debug/test_score_extractor.py
@@ -0,0 +1,57 @@
+import cv2
+import numpy as np
+import time
+import glob
+from video_cv_tracker import TemporalTracker
+from score_extractor import ScoreExtractor
+
+def test_pipeline():
+    videos = glob.glob('output/*.mp4')
+    if not videos: return
+    cap = cv2.VideoCapture(videos[0])
+    
+    # 1. Tracker extracts median jump-cut pages flawlessly
+    tracker = TemporalTracker(diff_threshold=0.05)
+    
+    # Process 100 seconds
+    limit_frames = 3000
+    
+    count = 0
+    t0 = time.time()
+    while count < limit_frames:
+        ret, frame = cap.read()
+        if not ret: break
+        
+        # We only pass the lower tab bounding box if needed.
+        # But actually, finding the tab strip directly using robust median is safer.
+        # Let's just crop roughly the bottom 2/3rds where tab lives, reducing processing load.
+        h = frame.shape[0]
+        roi = frame[int(h*0.3):h, :]
+        
+        tracker.process_frame(roi)
+        count += 1
+        if count % 300 == 0:
+            print(f"Processed {count} frames...")
+            
+    cap.release()
+    unique_pages = tracker.get_unique_pages()
+    print(f"Tracker returned {len(unique_pages)} unique structural median pages. Took {time.time()-t0:.2f}s")
+    
+    # 2. Score Extractor applies the Ultimate Structure State Machine
+    t1 = time.time()
+    extractor = ScoreExtractor()
+    extractor.process_pages(unique_pages)
+    tiled_rows = extractor.tile_to_a4(chunk_width=1280)
+    print(f"Extraction & Tiling took {time.time()-t1:.2f}s")
+    
+    if tiled_rows:
+        final_img = np.vstack(tiled_rows)
+        # Invert back to black-on-white PDF format
+        pdf_img = cv2.bitwise_not(final_img)
+        cv2.imwrite("C:/Users/Certes/Desktop/guitar_score/debug_final_state_machine.png", pdf_img)
+        print("Wrote debug_final_state_machine.png")
+    else:
+        print("Failed to produce rows.")
+
+if __name__ == "__main__":
+    test_pipeline()
--- a/scripts/debug/test_stitch.py
+++ b/scripts/debug/test_stitch.py
@@ -0,0 +1,36 @@
+import cv2
+import numpy as np
+
+img0 = cv2.imread(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\raw_chunk_00.png")
+img1 = cv2.imread(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\raw_chunk_01.png")
+
+gray0 = cv2.cvtColor(img0, cv2.COLOR_BGR2GRAY)
+gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
+
+h, w = gray0.shape
+
+# The first 300px of img1 is our template
+template_w = 400
+template = gray1[:60, :template_w] # ONLY TOP 60 PIXELS
+ref = gray0[:60, :] # ONLY TOP 60 PIXELS
+
+# Find where 'template' is in 'gray0'
+res = cv2.matchTemplate(ref, template, cv2.TM_CCOEFF_NORMED)
+_, max_val, _, max_loc = cv2.minMaxLoc(res)
+
+print(f"Match value (Top 60px): {max_val:.3f}")
+if max_val > 0.8:
+    match_x_in_last = max_loc[0]
+    overlap_len = w - match_x_in_last
+    print(f"Overlap starts in last_chunk at x={match_x_in_last}.")
+    print(f"Length of overlap is {overlap_len}px.")
+    
+    if overlap_len < w:
+        new_slice = img1[:, overlap_len:]
+        stitched = np.hstack([img0, new_slice])
+        cv2.imwrite(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\test_stitched_top60.png", stitched)
+        print("Exported test_stitched_top60.png")
+else:
+    print("No valid overlap found.")
+    
+
--- a/scripts/debug/test_temporal_median.py
+++ b/scripts/debug/test_temporal_median.py
@@ -0,0 +1,50 @@
+import cv2
+import numpy as np
+import glob
+
+videos = glob.glob('output/*.mp4')
+cap = cv2.VideoCapture(videos[0])
+
+# Collect 30 continuous frames (about 1 second of video)
+frames = []
+cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
+for _ in range(30):
+    ret, frame = cap.read()
+    if not ret: break
+    frames.append(frame)
+cap.release()
+
+if len(frames) == 30:
+    # 1. Temporal Median to completely erase the live-action moving guitarist and background
+    median_frame = np.median(frames, axis=0).astype(np.uint8)
+    
+    gray = np.max(median_frame, axis=2)
+    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    
+    # 2. Extract true staff lines from the pristine static overlay
+    row_sums = np.sum(binary, axis=1) / 255
+    y_staff = np.where(row_sums > binary.shape[1] * 0.4)[0]
+    
+    if len(y_staff) > 0:
+        print(f"Pristine staff lines detected at: {y_staff}")
+        y_top = y_staff[0]
+        y_bottom = y_staff[-1]
+        
+        # 3. Extract vertical bars perfectly
+        roi = binary[y_top:y_bottom, :]
+        col_sums = np.sum(roi, axis=0) / 255
+        
+        staff_h = y_bottom - y_top
+        bars = np.where(col_sums > staff_h * 0.5)[0]
+        
+        clean_bars = []
+        for x in bars:
+            if not clean_bars or x - clean_bars[-1] > 20:
+                clean_bars.append(int(x))
+                
+        print(f"Pristine Measure Boundaries: {clean_bars}")
+        
+    cv2.imwrite("C:/Users/Certes/Desktop/guitar_score/debug_temporal_median.png", median_frame)
+    cv2.imwrite("C:/Users/Certes/Desktop/guitar_score/debug_temporal_binary.png", binary)
+else:
+    print("Not enough frames.")
--- a/scripts/debug/test_y_crop.py
+++ b/scripts/debug/test_y_crop.py
@@ -0,0 +1,52 @@
+import cv2
+import numpy as np
+
+def find_white_tab_bounds(video_path):
+    cap = cv2.VideoCapture(video_path)
+    cap.set(cv2.CAP_PROP_POS_FRAMES, 30 * cap.get(cv2.CAP_PROP_FPS))
+    ret, frame = cap.read()
+    cap.release()
+    
+    if not ret: return None
+    
+    scale = 1280 / frame.shape[1]
+    frame = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))
+    
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    
+    # Calculate row-wise mean brightness
+    row_means = np.mean(gray, axis=1)
+    
+    # We are looking for the white paper background which has brightness > 230 on average
+    # Wait, notes and black lines reduce the mean of a row.
+    # A single black horizontal line on white reduces mean by (255 - 0) * (width/width) -> It drops to ~180 if it's thick.
+    # Let's say any row with mean > 180 is part of the white strip.
+    is_white_row = row_means > 180
+    
+    # Find contiguous blocks of True
+    # Pad with False to handle edges cleanly
+    padded = np.concatenate(([False], is_white_row, [False]))
+    diffs = np.diff(padded.astype(int))
+    
+    starts = np.where(diffs == 1)[0]
+    ends = np.where(diffs == -1)[0]
+    
+    best_start, best_end, max_len = 0, 0, 0
+    
+    for s, e in zip(starts, ends):
+        length = e - s
+        if length > max_len:
+            max_len = length
+            best_start = s
+            best_end = e
+            
+    return best_start, best_end, frame.shape[0]
+
+if __name__ == "__main__":
+    video_path = "output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
+    bounds = find_white_tab_bounds(video_path)
+    if bounds:
+        s, e, h = bounds
+        print(f"Mathematically found White Tab Strip: Y_START={s}, Y_END={e}. Total Height={h}")
+    else:
+        print("Failed to find bound")
--- a/scripts/debug/verify_chunk_0.jpg
+++ b/scripts/debug/verify_chunk_0.jpg
--- a/scripts/debug/verify_chunk_1.jpg
+++ b/scripts/debug/verify_chunk_1.jpg
--- a/scripts/debug/verify_chunk_2.jpg
+++ b/scripts/debug/verify_chunk_2.jpg
--- a/scripts/debug/verify_chunk_3.jpg
+++ b/scripts/debug/verify_chunk_3.jpg
--- a/scripts/debug/verify_chunk_4.jpg
+++ b/scripts/debug/verify_chunk_4.jpg
--- a/scripts/debug/verify_chunk_5.jpg
+++ b/scripts/debug/verify_chunk_5.jpg
--- a/scripts/debug/verify_chunk_6.jpg
+++ b/scripts/debug/verify_chunk_6.jpg
--- a/scripts/debug/verify_fixes.py
+++ b/scripts/debug/verify_fixes.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+"""
+수정된 버그 3개가 실제로 동작하는지 검증하는 재실행 시뮬레이션.
+youtube_tab_to_pdf.py의 수정된 함수들을 직접 임포트하여 사용합니다.
+"""
+import sys
+from pathlib import Path
+import cv2
+import numpy as np
+
+if sys.platform == "win32":
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+
+# 메인 모듈 임포트 (수정된 코드 사용)
+sys.path.insert(0, str(Path(__file__).parent))
+from youtube_tab_to_pdf import (
+    _find_white_tab_strip, _has_tab_content,
+    _detect_scroll_offset, _extract_tracking_channel,
+    _merge_scroll_candidates, merge_panoramas_list,
+    _detect_measure_bars, compare_frames
+)
+
+FRAME_DIR = Path("output/temp_frames")
+OUT_DIR   = Path("output/sim_verify")
+OUT_DIR.mkdir(exist_ok=True)
+
+def main():
+    paths = sorted(FRAME_DIR.glob("f_0*.png"))
+    if not paths:
+        print("❌ 프레임 없음"); return
+
+    print(f"[VERIFY] {len(paths)}개 프레임 — 수정된 코드로 재검증")
+
+    # 스트립 Y범위
+    tops, bots = [], []
+    for p in paths[:30]:
+        f = cv2.imread(str(p))
+        if f is None: continue
+        s = _find_white_tab_strip(f)
+        if s: tops.append(s[0]); bots.append(s[1])
+    med_top = int(np.median(tops))
+    med_bot = int(np.median(bots))
+    print(f"  스트립 Y: {med_top}~{med_bot}")
+
+    # MSE 중복제거
+    THRESHOLD = 0.95
+    candidates, compared = [], []
+    for p in paths:
+        f = cv2.imread(str(p))
+        if f is None: continue
+        h = f.shape[0]
+        crop = f[max(0, med_top):min(h, med_bot), :]
+        if not _has_tab_content(crop): continue
+        cmp_img = cv2.resize(crop, (480, 120), interpolation=cv2.INTER_AREA)
+        if any(compare_frames(cmp_img, ref) >= THRESHOLD for ref in compared):
+            continue
+        candidates.append(crop)
+        compared.append(cmp_img)
+
+    print(f"\n[1] MSE 중복제거 후: {len(candidates)}개 후보")
+
+    # ── BUG1 검증: 씬전환 감지 횟수 ─────────────────────────────────────
+    print(f"\n[2] BUG1 검증 — 씬전환 감지 횟수 (기대: 1~3)")
+    stitched = _merge_scroll_candidates(candidates)
+    print(f"    _merge_scroll_candidates 결과: {len(stitched)}개 세그먼트 → 파노라마")
+    for i, s in enumerate(stitched):
+        print(f"    세그먼트 파노라마 {i}: {s.shape[1]}px")
+        cv2.imwrite(str(OUT_DIR / f"seg_pano_{i:02d}.png"), s)
+
+    # ── BUG2 검증: 파노라마 병합 ────────────────────────────────────────
+    print(f"\n[3] BUG2 검증 — 파노라마 병합 (기대: 1~2개)")
+    merged = merge_panoramas_list(stitched)
+    print(f"    merge_panoramas_list 결과: {len(merged)}개 최종 파노라마")
+    for i, m in enumerate(merged):
+        print(f"    최종 파노라마 {i}: {m.shape[1]}x{m.shape[0]}px")
+        cv2.imwrite(str(OUT_DIR / f"final_pano_{i:02d}.png"), m)
+
+    # ── BUG3 검증: 마디 구분선 탐지 ────────────────────────────────────
+    print(f"\n[4] BUG3 검증 — 마디 구분선 탐지 (기대: 간격 모두 ≥100px)")
+    total_measures = 0
+    all_ok = True
+    for i, m in enumerate(merged):
+        gray = m[:, :, 2]  # Red 채널
+        bars = _detect_measure_bars(gray)
+        total_measures += max(0, len(bars) - 1)  # 구분선 사이가 마디 수
+        print(f"    파노라마 {i}: {len(bars)}개 구분선 탐지", end="")
+        if bars:
+            gaps = [bars[j+1]-bars[j] for j in range(len(bars)-1)]
+            min_gap = min(gaps) if gaps else 0
+            ok = min_gap >= 100
+            if not ok: all_ok = False
+            print(f" | 최소간격: {min_gap}px {'✅' if ok else '❌ (오탐 여전히 존재)'}")
+            print(f"      첫5개 좌표: {bars[:5]}")
+        else:
+            print()
+
+    # ── 최종 판정 ───────────────────────────────────────────────────────
+    print(f"\n{'='*60}")
+    print("[검증 결과]")
+    seg_ok = len(stitched) <= 5          # 씬전환 5회 이하 (이전 8회 → 개선)
+    merge_ok = len(merged) <= 2          # 파노라마 2개 이하 (이전 3개 → 개선)
+    bar_ok = all_ok                      # 모든 마디선 간격 ≥100px
+    print(f"  BUG1 씬전환 오탐: {'✅ 개선됨' if seg_ok else '❌ 여전히 과다'} ({len(stitched)}개 세그먼트, 이전 9개)")
+    print(f"  BUG2 파노라마 분리: {'✅ 개선됨' if merge_ok else '❌ 여전히 분리'} ({len(merged)}개, 이전 3개)")
+    print(f"  BUG3 마디선 오탐: {'✅ 개선됨' if bar_ok else '❌ 여전히 오탐'}")
+    print(f"  탐지된 총 마디 수: {total_measures}개")
+    print(f"{'='*60}")
+
+    if seg_ok and merge_ok and bar_ok:
+        print("\n✅ 모든 버그 수정 확인 — 실제 파이프라인 실행 가능")
+    else:
+        print("\n⚠ 일부 문제 잔존 — 추가 파라미터 조정 필요")
+
+if __name__ == "__main__":
+    main()
--- a/scripts/debug/verify_log.txt
+++ b/scripts/debug/verify_log.txt
--- a/scripts/debug/verify_monotonic.py
+++ b/scripts/debug/verify_monotonic.py
@@ -0,0 +1,41 @@
+import sys
+sys.path.append(r"C:\Users\Certes\Desktop\guitar_score")
+import cv2
+import easyocr
+import numpy as np
+import os
+from youtube_tab_to_pdf import extract_frames, extract_unique_scroll
+
+video_file = r"C:\Users\Certes\Desktop\guitar_score\output\サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
+print("Extracting frames...")
+frames = extract_frames(video_file, fps=2)
+
+print("Running pipeline extraction...")
+unique = extract_unique_scroll(frames, threshold=0.95)
+
+print("Initializing OCR...")
+reader = easyocr.Reader(['en'])
+
+print(f"Generated {len(unique)} chunks.")
+detect_log = []
+
+for i, page in enumerate(unique):
+    # Image is A4 width
+    # We want to OCR the top 150 pixels of the whole chunk to find measure numbers
+    h, w = page.shape[:2]
+    top_area = page[:min(200, h), :]
+    
+    results = reader.readtext(top_area)
+    # filter for numbers
+    nums = []
+    for (bbox, text, prob) in results:
+        t = ''.join(filter(str.isdigit, text))
+        if t:
+            nums.append(int(t))
+            
+    print(f"Page {i} measure numbers detected: {nums}")
+    detect_log.append(nums)
+    
+    cv2.imwrite(f"output/verify_chunk_{i}.jpg", page)
+    if i > 5:
+        break
--- a/scripts/debug/verify_ocr.py
+++ b/scripts/debug/verify_ocr.py
@@ -0,0 +1,55 @@
+import cv2
+import numpy as np
+import glob
+
+def get_number_sprite(m_img):
+    gray = np.max(m_img, axis=2)
+    _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    row_sums = np.sum(thresh, axis=1) / 255
+    staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0]
+    y_staff = staff_lines[0] if len(staff_lines) > 0 else 50
+    crop_y1 = max(0, y_staff - 60)
+    crop_y2 = max(0, y_staff - 5)
+    crop_x1 = 0
+    crop_x2 = min(60, m_img.shape[1])
+    if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: return None
+    sprite = thresh[crop_y1:crop_y2, crop_x1:crop_x2]
+    if np.count_nonzero(sprite > 127) < 8: return None
+    return sprite
+
+img_path = r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\final_check_100_sec.png"
+img = cv2.imread(img_path)
+
+h, w = img.shape[:2]
+gray = np.max(img, axis=2)
+col_sums = np.sum(gray < 100, axis=0) # white padding is 255, black measures are <100
+# ACTUALLY, final image has white padding for rows. And black background for music.
+# Let's just crop based on the stitched widths.
+# Better yet, just use a sliding window template match on the number sprite!
+# Even simpler: just visually save the sprites of the FIRST measure of every ROW!
+
+rows = []
+for y in range(0, h, 320): # assuming chunk height is around 320
+    chunk = img[y:y+320, :]
+    if np.max(chunk) > 200:
+        rows.append(chunk)
+
+print(f"Detected {len(rows)} A4 rows in final image.")
+
+for i, row in enumerate(rows):
+    gray_row = np.max(row, axis=2)
+    _, binary = cv2.threshold(gray_row, 200, 255, cv2.THRESH_BINARY)
+    
+    # Just save the first 100x100 box of the row where the number sprite usually is
+    row_sums = np.sum(binary, axis=1) / 255
+    staff_lines = np.where(row_sums > w * 0.4)[0]
+    if len(staff_lines) > 0:
+        y_staff = staff_lines[0]
+        crop_y1 = max(0, y_staff - 60)
+        crop_y2 = max(0, y_staff - 5)
+        sprite = binary[crop_y1:crop_y2, 10:80]
+        
+        cv2.imwrite(f"C:/Users/Certes/Desktop/guitar_score/debug_ocr_measure_{i}.png", sprite)
+        pixels = np.count_nonzero(sprite > 127)
+        print(f"Row {i} parsed. Sprite white pixels: {pixels}")
+    
--- a/scripts/debug/view_ascii.py
+++ b/scripts/debug/view_ascii.py
@@ -0,0 +1,32 @@
+import cv2
+import numpy as np
+
+def img_to_ascii(img_path, target_width=120):
+    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
+    if img is None:
+        print("Could not load image:", img_path)
+        return
+        
+    h, w = img.shape
+    aspect_ratio = h / w
+    # Terminal characters are roughly 2:1 height:width, so adjust aspect
+    target_height = int(target_width * aspect_ratio * 0.5)
+    
+    resized = cv2.resize(img, (target_width, target_height))
+    
+    # ASCII characters gradient from dark to light
+    chars = ["@", "%", "#", "*", "+", "=", "-", ":", ".", " "]
+    
+    # Normalize mapping
+    for y in range(target_height):
+        row_str = ""
+        for x in range(target_width):
+            pixel = resized[y, x]
+            # Map 0-255 to 0-9
+            char_idx = int((pixel / 255.0) * 9)
+            row_str += chars[char_idx]
+        print(row_str)
+
+if __name__ == "__main__":
+    print("=== debug_chunk_0.png ===")
+    img_to_ascii("C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/debug_chunk_0.png", 120)