guitar_score/scripts/debug/patch_extractor.py

import re

with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
    code = f.read()

new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
    print(f"[4/5] 순차 1FPS 타임라인 기반 마디 추출 중...")

    strip_tops, strip_bottoms = [], []
    for frame in frames[:50]:
        strip = _find_white_tab_strip(frame)
        if strip:
            strip_tops.append(strip[0])
            strip_bottoms.append(strip[1])

    if not strip_tops: return []

    median_top = int(np.median(strip_tops))
    median_bottom = int(np.median(strip_bottoms))

    unique_measures = []
    chunk_width = 1280

    def get_clean_binary(img):
        gray = np.max(img, axis=2)
        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
        return binary

    for frame_idx, frame in enumerate(frames):
        h = frame.shape[0]
        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
        if not _has_tab_content(tab_crop):
            continue

        gray_page = _extract_print_channel(tab_crop)
        bar_coords = _detect_measure_bars(gray_page)

        if not bar_coords:
            continue

        coords = [0] + bar_coords + [tab_crop.shape[1]]
        coords = sorted(list(set(coords)))

        page_measures = []
        for i in range(len(coords) - 1):
            x_start = coords[i]
            x_end = coords[i+1]
            if x_end - x_start < 40: continue
            page_measures.append(tab_crop[:, x_start:x_end])

        if not page_measures:
            continue

        if not unique_measures:
            unique_measures.extend(page_measures)
            continue

        first_m = page_measures[0]
        bin_first = get_clean_binary(first_m)

        best_error = 1.0
        best_offset = 0
        anchored = False

        for scan_dist in range(1, min(10, len(unique_measures) + 1)):
            past_idx = len(unique_measures) - scan_dist
            past_m = unique_measures[past_idx]

            bin_past = get_clean_binary(past_m)

            if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
                hs = min(bin_first.shape[0], bin_past.shape[0])
                ws = min(bin_first.shape[1], bin_past.shape[1])
                s1 = bin_first[:hs, :ws]
                s2 = bin_past[:hs, :ws]

                diff = cv2.absdiff(s1, s2)
                error_ratio = np.sum(diff > 0) / s1.size

                if error_ratio < best_error:
                    best_error = error_ratio
                    best_offset = len(unique_measures) - past_idx

        # Error ratio < 20% confirms identity for sparse structures
        if best_error < 0.20:
            new_start_offset = best_offset
            anchored = True
            print(f"    [Anchor] Frame {frame_idx} -> PDF offset {best_offset} (Best Error: {best_error:.4f})")
        else:
            print(f"    [New] Frame {frame_idx} -> No Match (Best Error was {best_error:.4f})")

        if anchored and new_start_offset < len(page_measures):
            unique_measures.extend(page_measures[new_start_offset:])
        elif not anchored:
            unique_measures.extend(page_measures)

    print(f"  -> 동기화 중복 제거 완료: 무손실 타임라인 기반 {len(unique_measures)}개 연속 마디 보존")

    final_chunks = []
    current_row_measures = []
    current_row_width = 0

    for measure_img in unique_measures:
        measure_w = measure_img.shape[1]

        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
            row_img = np.hstack(current_row_measures)
            pad_w = chunk_width - row_img.shape[1]
            if pad_w > 0:
                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
                row_img = np.hstack([row_img, pad_img])
            final_chunks.append(row_img)
            current_row_measures = [measure_img]
            current_row_width = measure_w
        else:
            current_row_measures.append(measure_img)
            current_row_width += measure_w

    if current_row_measures:
        row_img = np.hstack(current_row_measures)
        if row_img.shape[1] > chunk_width:
             row_img = row_img[:, :chunk_width]
        else:
            pad_w = chunk_width - row_img.shape[1]
            if pad_w > 0:
                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
                row_img = np.hstack([row_img, pad_img])
        final_chunks.append(row_img)

    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
    return final_chunks
"""

pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)

with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
    f.write(new_code)
print("Patched.")