chore(docs): document ScoreExtractor tiling and refactor debug scripts (#563)

2026-03-29 17:57:40 +09:00
parent 39b55f2e9f
commit ac0c098259
698 changed files with 141180 additions and 195 deletions
--- a/scripts/debug/patch_stable_trigger.py
+++ b/scripts/debug/patch_stable_trigger.py
@@ -0,0 +1,153 @@
+import re
+
+with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
+    code = f.read()
+
+new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
+    print(f"[4/5] 순차 Stable Content Trigger 방식 추출 중...")
+    
+    strip_tops, strip_bottoms = [], []
+    for frame in frames[:50]:
+        strip = _find_white_tab_strip(frame)
+        if strip:
+            strip_tops.append(strip[0])
+            strip_bottoms.append(strip[1])
+            
+    if not strip_tops: return []
+        
+    median_top = int(np.median(strip_tops))
+    median_bottom = int(np.median(strip_bottoms))
+    
+    def get_clean_binary(img):
+        gray = np.max(img, axis=2)
+        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        return binary
+
+    unique_measures = []
+    chunk_width = 1280
+    
+    last_1fps_bin = None
+    last_solid_page = None
+
+    for frame_idx, frame in enumerate(frames):
+        h = frame.shape[0]
+        tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
+        if not _has_tab_content(tab_crop): 
+            continue
+            
+        clean_bin = get_clean_binary(tab_crop)
+        
+        if last_1fps_bin is not None:
+            # Check stability compared to 1 second ago
+            diff = cv2.absdiff(clean_bin, last_1fps_bin)
+            error = np.count_nonzero(diff) / clean_bin.size
+            if error < 0.05: # Page is fully stabilized (not a fading transition)
+                has_changed_since_last_solid = True
+                
+                if last_solid_page is not None:
+                    s_diff = cv2.absdiff(clean_bin, last_solid_page)
+                    s_err = np.count_nonzero(s_diff) / clean_bin.size
+                    if s_err < 0.05:
+                        has_changed_since_last_solid = False
+                
+                # We only process this page if it's securely stable AND we haven't already processed it
+                if has_changed_since_last_solid:
+                    last_solid_page = clean_bin.copy()
+                    
+                    # Extract measures
+                    gray_page = _extract_print_channel(tab_crop)
+                    bar_coords = _detect_measure_bars(gray_page)
+                    
+                    if bar_coords:
+                        coords = [0] + bar_coords + [tab_crop.shape[1]]
+                        coords = sorted(list(set(coords)))
+                        
+                        page_measures = []
+                        for i in range(len(coords) - 1):
+                            x_start = coords[i]
+                            x_end = coords[i+1]
+                            if x_end - x_start < 40: continue
+                            page_measures.append(tab_crop[:, x_start:x_end])
+                            
+                        if page_measures:
+                            if not unique_measures:
+                                unique_measures.extend(page_measures)
+                            else:
+                                first_m = page_measures[0]
+                                bin_first = get_clean_binary(first_m)
+                                
+                                best_error = 1.0
+                                best_offset = 0
+                                anchored = False
+                                
+                                # scan_dist=4 ensures we never loop back to identical repeating choruses from 10 seconds ago!
+                                for scan_dist in range(1, min(4, len(unique_measures) + 1)):
+                                    past_idx = len(unique_measures) - scan_dist
+                                    past_m = unique_measures[past_idx]
+                                    bin_past = get_clean_binary(past_m)
+                                    
+                                    if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
+                                        hs = min(bin_first.shape[0], bin_past.shape[0])
+                                        ws = min(bin_first.shape[1], bin_past.shape[1])
+                                        s1 = bin_first[:hs, :ws]
+                                        s2 = bin_past[:hs, :ws]
+                                        
+                                        m_diff = cv2.absdiff(s1, s2)
+                                        error_ratio = np.sum(m_diff > 0) / s1.size
+                                        
+                                        if error_ratio < best_error:
+                                            best_error = error_ratio
+                                            best_offset = len(unique_measures) - past_idx
+                                            
+                                if best_error < 0.15:
+                                    new_start_offset = best_offset
+                                    if new_start_offset < len(page_measures):
+                                        unique_measures.extend(page_measures[new_start_offset:])
+                                else:
+                                    unique_measures.extend(page_measures)
+                                    
+        last_1fps_bin = clean_bin.copy()
+            
+    print(f"  -> 동기화 중복 제거 완료: Stability 기반 {len(unique_measures)}개 마디 보존")
+
+    final_chunks = []
+    current_row_measures = []
+    current_row_width = 0
+    
+    for measure_img in unique_measures:
+        measure_w = measure_img.shape[1]
+        
+        if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
+            row_img = np.hstack(current_row_measures)
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+            final_chunks.append(row_img)
+            current_row_measures = [measure_img]
+            current_row_width = measure_w
+        else:
+            current_row_measures.append(measure_img)
+            current_row_width += measure_w
+            
+    if current_row_measures:
+        row_img = np.hstack(current_row_measures)
+        if row_img.shape[1] > chunk_width:
+             row_img = row_img[:, :chunk_width]
+        else:
+            pad_w = chunk_width - row_img.shape[1]
+            if pad_w > 0:
+                pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
+                row_img = np.hstack([row_img, pad_img])
+        final_chunks.append(row_img)
+        
+    print(f"  -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
+    return final_chunks
+"""
+
+pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
+new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
+
+with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
+    f.write(new_code)
+print("Stable Content Trigger Patched.")