import re with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f: code = f.read() new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]: print(f"[4/5] 순차 Stable Content Trigger 방식 추출 중...") strip_tops, strip_bottoms = [], [] for frame in frames[:50]: strip = _find_white_tab_strip(frame) if strip: strip_tops.append(strip[0]) strip_bottoms.append(strip[1]) if not strip_tops: return [] median_top = int(np.median(strip_tops)) median_bottom = int(np.median(strip_bottoms)) def get_clean_binary(img): gray = np.max(img, axis=2) _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) return binary unique_measures = [] chunk_width = 1280 last_1fps_bin = None last_solid_page = None for frame_idx, frame in enumerate(frames): h = frame.shape[0] tab_crop = frame[max(0, median_top):min(h, median_bottom), :] if not _has_tab_content(tab_crop): continue clean_bin = get_clean_binary(tab_crop) if last_1fps_bin is not None: # Check stability compared to 1 second ago diff = cv2.absdiff(clean_bin, last_1fps_bin) error = np.count_nonzero(diff) / clean_bin.size if error < 0.05: # Page is fully stabilized (not a fading transition) has_changed_since_last_solid = True if last_solid_page is not None: s_diff = cv2.absdiff(clean_bin, last_solid_page) s_err = np.count_nonzero(s_diff) / clean_bin.size if s_err < 0.05: has_changed_since_last_solid = False # We only process this page if it's securely stable AND we haven't already processed it if has_changed_since_last_solid: last_solid_page = clean_bin.copy() # Extract measures gray_page = _extract_print_channel(tab_crop) bar_coords = _detect_measure_bars(gray_page) if bar_coords: coords = [0] + bar_coords + [tab_crop.shape[1]] coords = sorted(list(set(coords))) page_measures = [] for i in range(len(coords) - 1): x_start = coords[i] x_end = coords[i+1] if x_end - x_start < 40: continue page_measures.append(tab_crop[:, x_start:x_end]) if page_measures: if not unique_measures: unique_measures.extend(page_measures) else: first_m = page_measures[0] bin_first = get_clean_binary(first_m) best_error = 1.0 best_offset = 0 anchored = False # scan_dist=4 ensures we never loop back to identical repeating choruses from 10 seconds ago! for scan_dist in range(1, min(4, len(unique_measures) + 1)): past_idx = len(unique_measures) - scan_dist past_m = unique_measures[past_idx] bin_past = get_clean_binary(past_m) if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25: hs = min(bin_first.shape[0], bin_past.shape[0]) ws = min(bin_first.shape[1], bin_past.shape[1]) s1 = bin_first[:hs, :ws] s2 = bin_past[:hs, :ws] m_diff = cv2.absdiff(s1, s2) error_ratio = np.sum(m_diff > 0) / s1.size if error_ratio < best_error: best_error = error_ratio best_offset = len(unique_measures) - past_idx if best_error < 0.15: new_start_offset = best_offset if new_start_offset < len(page_measures): unique_measures.extend(page_measures[new_start_offset:]) else: unique_measures.extend(page_measures) last_1fps_bin = clean_bin.copy() print(f" -> 동기화 중복 제거 완료: Stability 기반 {len(unique_measures)}개 마디 보존") final_chunks = [] current_row_measures = [] current_row_width = 0 for measure_img in unique_measures: measure_w = measure_img.shape[1] if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0: row_img = np.hstack(current_row_measures) pad_w = chunk_width - row_img.shape[1] if pad_w > 0: pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8) row_img = np.hstack([row_img, pad_img]) final_chunks.append(row_img) current_row_measures = [measure_img] current_row_width = measure_w else: current_row_measures.append(measure_img) current_row_width += measure_w if current_row_measures: row_img = np.hstack(current_row_measures) if row_img.shape[1] > chunk_width: row_img = row_img[:, :chunk_width] else: pad_w = chunk_width - row_img.shape[1] if pad_w > 0: pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8) row_img = np.hstack([row_img, pad_img]) final_chunks.append(row_img) print(f" -> A4 분할 컷: {len(final_chunks)}개 줄(Row)") return final_chunks """ pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks' new_code = re.sub(pattern, new_func, code, flags=re.DOTALL) with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f: f.write(new_code) print("Stable Content Trigger Patched.")