import re with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f: code = f.read() new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]: print(f"[4/5] 순차 Keyframe 페이지 추출 중...") strip_tops, strip_bottoms = [], [] for frame in frames[:50]: strip = _find_white_tab_strip(frame) if strip: strip_tops.append(strip[0]) strip_bottoms.append(strip[1]) if not strip_tops: return [] median_top = int(np.median(strip_tops)) median_bottom = int(np.median(strip_bottoms)) # 1. 0.05 threshold Tracker to completely ignore all fade/blur frames and extract EXACTLY 13 keyframes tracker = TemporalTracker(diff_threshold=0.05) for frame in frames: h = frame.shape[0] tab_crop = frame[max(0, median_top):min(h, median_bottom), :] if not _has_tab_content(tab_crop): continue tracker.process_frame(tab_crop) unique_pages = tracker.get_unique_pages() if not unique_pages: return [] unique_measures = [] chunk_width = 1280 def get_clean_binary(img): gray = np.max(img, axis=2) _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) return binary for page_idx, page in enumerate(unique_pages): gray_page = _extract_print_channel(page) bar_coords = _detect_measure_bars(gray_page) if not bar_coords: continue coords = [0] + bar_coords + [page.shape[1]] coords = sorted(list(set(coords))) page_measures = [] for i in range(len(coords) - 1): x_start = coords[i] x_end = coords[i+1] if x_end - x_start < 40: continue page_measures.append(page[:, x_start:x_end]) if not page_measures: continue if not unique_measures: unique_measures.extend(page_measures) continue first_m = page_measures[0] bin_first = get_clean_binary(first_m) best_error = 1.0 best_offset = 0 anchored = False # 3. CRUCIAL FIX: scan_dist limited to exactly 3. # Preventing M40 from visually matching M9 because Chorus repeats. for scan_dist in range(1, min(4, len(unique_measures) + 1)): past_idx = len(unique_measures) - scan_dist past_m = unique_measures[past_idx] bin_past = get_clean_binary(past_m) # 2. Binary Absdiff error < 0.20 for subpixel-immune, noise-immune math overlap matching if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25: hs = min(bin_first.shape[0], bin_past.shape[0]) ws = min(bin_first.shape[1], bin_past.shape[1]) s1 = bin_first[:hs, :ws] s2 = bin_past[:hs, :ws] diff = cv2.absdiff(s1, s2) error_ratio = np.sum(diff > 0) / s1.size if error_ratio < best_error: best_error = error_ratio best_offset = len(unique_measures) - past_idx if best_error < 0.20: new_start_offset = best_offset anchored = True if anchored and new_start_offset < len(page_measures): # Overlapped exactly at this point, only append the truly NEW measures unique_measures.extend(page_measures[new_start_offset:]) elif not anchored: # Completely discrete page flip with no overlap, append all measures unique_measures.extend(page_measures) print(f" -> 동기화 중복 제거 완료: 무손실 정적 페이지 기반 {len(unique_measures)}개 연속 마디 보존") final_chunks = [] current_row_measures = [] current_row_width = 0 for measure_img in unique_measures: measure_w = measure_img.shape[1] if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0: row_img = np.hstack(current_row_measures) pad_w = chunk_width - row_img.shape[1] if pad_w > 0: pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8) row_img = np.hstack([row_img, pad_img]) final_chunks.append(row_img) current_row_measures = [measure_img] current_row_width = measure_w else: current_row_measures.append(measure_img) current_row_width += measure_w if current_row_measures: row_img = np.hstack(current_row_measures) if row_img.shape[1] > chunk_width: row_img = row_img[:, :chunk_width] else: pad_w = chunk_width - row_img.shape[1] if pad_w > 0: pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8) row_img = np.hstack([row_img, pad_img]) final_chunks.append(row_img) print(f" -> A4 분할 컷: {len(final_chunks)}개 줄(Row)") return final_chunks """ pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks' new_code = re.sub(pattern, new_func, code, flags=re.DOTALL) with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f: f.write(new_code) print("Final Truth Pipeline Patched.")