fix(pipeline): 스티칭 버그 3종 수정 + AI 마디번호 스탬프 제거

- [BUG1] _merge_scroll_candidates: 씬전환 가속도 조건 제거 (9→1 세그먼트) - [BUG2] merge_panoramas_list: 매칭 임계치 0.60→0.50 (파노라마 3→1 병합) - [BUG3] _detect_measure_bars: 마디선 최소간격 100px 필터 추가 (17px 오탐 제거) - remove: _stamp_measure_number 호출 제거 (AI 임의 [1][2][3] 스탬프 삭제) - add: sim_stitch.py, simulate_ocr_pipeline.py, verify_fixes.py (진단/검증 스크립트)
2026-03-28 09:17:22 +09:00
parent 52cbc5679a
commit 7c14f3c040
7 changed files with 767 additions and 13 deletions
--- a/sim_stitch.py
+++ b/sim_stitch.py
@@ -0,0 +1,296 @@
+#!/usr/bin/env python3
+"""
+파노라마 스티칭 정밀 진단 스크립트
+-----------------------------------
+실제 캐시 프레임(temp_frames/f_XXXX.png)을 이용해
+파노라마 스티칭의 각 단계를 정밀 추적합니다.
+
+목적:
+  1. 스크롤 오프셋이 제대로 감지되는가?
+  2. `_merge_scroll_candidates` 씬 전환 감지가 정확한가?
+  3. `merge_panoramas_list` 템플릿 매칭이 반복 구간을 제대로 이어붙이는가?
+  4. 최종 파노라마에 실제로 누락된 마디가 있는가?
+
+실행:
+  C:\\ProgramData\\miniforge3\\envs\\score\\python.exe sim_stitch.py
+"""
+
+import sys
+from pathlib import Path
+import cv2
+import numpy as np
+
+if sys.platform == "win32":
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+
+FRAME_DIR = Path("output/temp_frames")
+OUT_DIR   = Path("output/sim_stitch")
+OUT_DIR.mkdir(exist_ok=True)
+
+# ─── 기존 코드와 동일한 함수들 ────────────────────────────────────────────
+
+def _find_white_tab_strip(frame, min_strip_ratio=0.10):
+    h, w = frame.shape[:2]
+    margin_x = int(w * 0.1)
+    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
+    _, s_ch, v_ch = cv2.split(hsv)
+    roi_v = v_ch[:, margin_x:w - margin_x]
+    roi_s = s_ch[:, margin_x:w - margin_x]
+    pure_white    = (roi_v > 180) & (roi_s < 40)
+    bright_pastel = (roi_v > 200) & (roi_s < 100)
+    tab_mask      = pure_white | bright_pastel
+    row_tab_ratio = np.mean(tab_mask, axis=1)
+    bright_mask   = row_tab_ratio > 0.5
+    max_gap = int(h * 0.02)
+    regions, start, gap_count = [], None, 0
+    for i in range(h):
+        if bright_mask[i]:
+            if start is None: start = i
+            gap_count = 0
+        else:
+            if start is not None:
+                gap_count += 1
+                if gap_count > max_gap:
+                    length = (i - gap_count) - start
+                    if length >= h * min_strip_ratio:
+                        regions.append((start, i - gap_count))
+                    start = None
+    if start is not None:
+        length = (h - gap_count) - start
+        if length >= h * min_strip_ratio:
+            regions.append((start, h - gap_count))
+    if not regions: return None
+    best = max(regions, key=lambda r: r[1] - r[0])
+    pad  = int(h * 0.03)
+    return (max(0, best[0] - pad), min(h, best[1] + pad))
+
+def _has_tab_content(region):
+    if region is None or region.size == 0: return False
+    gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY) if len(region.shape) == 3 else region
+    h, w = gray.shape
+    if h < 15 or w < 50: return False
+    dark_ratio = np.sum(gray < 180) / gray.size
+    return 0.02 < dark_ratio < 0.30
+
+def compare_frames(f1, f2):
+    g1 = cv2.cvtColor(f1, cv2.COLOR_BGR2GRAY) if len(f1.shape)==3 else f1
+    g2 = cv2.cvtColor(f2, cv2.COLOR_BGR2GRAY) if len(f2.shape)==3 else f2
+    if g1.shape != g2.shape:
+        g2 = cv2.resize(g2, (g1.shape[1], g1.shape[0]))
+    target_w = 480
+    if g1.shape[1] > target_w:
+        scale = target_w / g1.shape[1]
+        sz = (target_w, int(g1.shape[0] * scale))
+        g1 = cv2.resize(g1, sz)
+        g2 = cv2.resize(g2, sz)
+    mse = np.mean(((g1.astype(np.float32) - g2.astype(np.float32)) / 255.0)**2)
+    return max(0.0, 1.0 - min(mse * 8.0, 1.0))
+
+def _extract_tracking_channel(frame):
+    if len(frame.shape) != 3: return frame
+    return frame[:, :, 0]  # Blue channel
+
+def _detect_scroll_offset(frame_a, frame_b, min_confidence=0.1):
+    h, w = frame_a.shape[:2]
+    ga = _extract_tracking_channel(frame_a)
+    gb = _extract_tracking_channel(frame_b)
+    tmpl_w = int(w * 0.5)
+    template = ga[:, w - tmpl_w:]
+    result = cv2.matchTemplate(gb, template, cv2.TM_CCOEFF_NORMED)
+    _, max_val, _, max_loc = cv2.minMaxLoc(result)
+    scroll_px = (w - tmpl_w) - max_loc[0]
+    if max_val < min_confidence or scroll_px <= 0:
+        return (0, max_val)
+    return (scroll_px, max_val)
+
+
+# ─── Main ────────────────────────────────────────────────────────────────────
+
+def main():
+    paths = sorted(FRAME_DIR.glob("f_0*.png"))
+    if not paths:
+        print("❌ 프레임 없음:", FRAME_DIR); return
+
+    print(f"[STITCH-SIM] {len(paths)}개 프레임")
+
+    # 스트립 Y범위 계산
+    strip_tops, strip_bottoms = [], []
+    for p in paths[:30]:
+        f = cv2.imread(str(p))
+        if f is None: continue
+        s = _find_white_tab_strip(f)
+        if s: strip_tops.append(s[0]); strip_bottoms.append(s[1])
+    med_top    = int(np.median(strip_tops))
+    med_bottom = int(np.median(strip_bottoms))
+    print(f"  스트립 Y: {med_top} ~ {med_bottom}")
+
+    # 탭 크롭 추출 + MSE 중복 제거 (기존 로직)
+    SIMILARITY_THRESHOLD = 0.95
+    candidates, all_compared = [], []
+    for p in paths:
+        f = cv2.imread(str(p))
+        if f is None: continue
+        h = f.shape[0]
+        crop = f[max(0, med_top):min(h, med_bottom), :]
+        if not _has_tab_content(crop): continue
+        compare_img = cv2.resize(crop, (480, 120), interpolation=cv2.INTER_AREA)
+        if any(compare_frames(compare_img, ref) >= SIMILARITY_THRESHOLD for ref in all_compared):
+            continue
+        candidates.append(crop)
+        all_compared.append(compare_img)
+
+    print(f"\n[1단계] MSE 중복제거 후 후보: {len(candidates)}개 프레임")
+
+    # 스크롤 오프셋 분석 — 연속 프레임 간 이동량 측정
+    print(f"\n[2단계] 연속 프레임 스크롤 오프셋 분석:")
+    print(f"  {'idx':>4}  {'scroll_px':>10}  {'conf':>6}  {'씬전환':>8}")
+    print(f"  {'-'*40}")
+    
+    scroll_data = []
+    prev_s = 0
+    prev_conf = 1.0
+    for i in range(1, len(candidates)):
+        s, conf = _detect_scroll_offset(candidates[i-1], candidates[i])
+        is_cut = (conf <= 0.15) or (abs(s - prev_s) > 100) or (prev_conf - conf > 0.4)
+        scroll_data.append((i, s, conf, is_cut))
+        mark = "✂ CUT" if is_cut else ""
+        print(f"  {i:>4}  {s:>10}px  {conf:>6.3f}  {mark}")
+        prev_s = s
+        prev_conf = conf
+
+    n_cuts = sum(1 for _, _, _, cut in scroll_data if cut)
+    print(f"\n  → 씬 전환 감지 횟수: {n_cuts}개 (예상: 1~3개)")
+    print(f"     → 분절 세그먼트: {n_cuts+1}개")
+
+    # 세그먼트별 파노라마 스티칭
+    print(f"\n[3단계] 세그먼트 파노라마 스티칭:")
+    segments = []
+    current_seg = [candidates[0]]
+    for i, (idx, s, conf, is_cut) in enumerate(scroll_data):
+        if is_cut:
+            segments.append(current_seg)
+            current_seg = [candidates[idx]]
+        else:
+            current_seg.append(candidates[idx])
+    segments.append(current_seg)
+
+    panos = []
+    for seg_i, seg in enumerate(segments):
+        if len(seg) == 1:
+            panos.append(seg[0])
+            print(f"  세그먼트 {seg_i}: 1프레임 → 스티칭 불필요 ({seg[0].shape[1]}px)")
+            continue
+        
+        min_h = min(f.shape[0] for f in seg)
+        panorama = seg[0][:min_h, :]
+        for i in range(1, len(seg)):
+            curr = seg[i][:min_h, :]
+            scroll_px, conf = _detect_scroll_offset(seg[i-1][:min_h, :], curr)
+            if scroll_px > 0 and conf > 0.15:
+                new_strip = curr[:, curr.shape[1] - scroll_px:]
+                panorama = np.hstack([panorama, new_strip])
+            else:
+                panorama = np.hstack([panorama, curr])
+        
+        panos.append(panorama)
+        print(f"  세그먼트 {seg_i}: {len(seg)}프레임 → 파노라마 {panorama.shape[1]}px")
+        cv2.imwrite(str(OUT_DIR / f"raw_pano_{seg_i:02d}.png"), panorama)
+
+    # merge_panoramas_list 단계 진단
+    print(f"\n[4단계] 파노라마 병합 (merge_panoramas_list):")
+    print(f"  병합 전: {len(panos)}개 파노라마")
+    
+    if len(panos) > 1:
+        merged_list = []
+        current_master = panos[0].copy()
+        for i in range(1, len(panos)):
+            next_pano = panos[i].copy()
+            head_w = min(800, next_pano.shape[1])
+            head = next_pano[:, :head_w]
+            search_w = min(1500, current_master.shape[1])
+            search_region = current_master[:, -search_w:]
+            h_gray = _extract_tracking_channel(head)
+            s_gray = _extract_tracking_channel(search_region)
+            matched = False
+            if h_gray.shape[1] <= s_gray.shape[1] and h_gray.shape[0] == s_gray.shape[0]:
+                res = cv2.matchTemplate(s_gray, h_gray, cv2.TM_CCOEFF_NORMED)
+                _, max_val, _, max_loc = cv2.minMaxLoc(res)
+                print(f"  파노라마 {i}: 템플릿 매칭 max_val={max_val:.3f}", end="")
+                if max_val > 0.60:
+                    match_x = max_loc[0]
+                    abs_x = current_master.shape[1] - search_w + match_x
+                    skip = current_master.shape[1] - abs_x
+                    append_part = next_pano[:, skip:]
+                    if append_part.shape[1] > 0:
+                        current_master = np.hstack([current_master, append_part])
+                    matched = True
+                    print(f" → ✅ 매칭 성공 (이어붙임, skip={skip}px)")
+                else:
+                    print(f" → ❌ 매칭 실패 (score 낮음, 새 파노라마로 분리)")
+            else:
+                print(f"  파노라마 {i}: 크기 불일치로 매칭 불가")
+            
+            if not matched:
+                merged_list.append(current_master)
+                current_master = next_pano
+        merged_list.append(current_master)
+    else:
+        merged_list = panos
+
+    print(f"\n  병합 후: {len(merged_list)}개 파노라마")
+    for i, m in enumerate(merged_list):
+        print(f"  최종 파노라마 {i}: {m.shape[1]}x{m.shape[0]}px")
+        cv2.imwrite(str(OUT_DIR / f"final_pano_{i:02d}.png"), m)
+
+    # 마디 구분선 탐지 결과 진단
+    print(f"\n[5단계] 마디 구분선(|) 탐지:")
+    def _detect_measure_bars(gray_pano):
+        _, thresh = cv2.threshold(gray_pano, 200, 255, cv2.THRESH_BINARY_INV)
+        h, w = thresh.shape
+        row_sums = np.sum(thresh, axis=1) / 255
+        staff_rows = np.where(row_sums > w * 0.5)[0]
+        if len(staff_rows) < 2: return []
+        top_line = staff_rows[0]
+        bottom_line = top_line
+        for r in staff_rows:
+            if r - top_line > 100: break
+            bottom_line = r
+        staff_region = thresh[top_line:bottom_line+1, :]
+        expected_h = bottom_line - top_line + 1
+        if expected_h < 10: return []
+        col_sums = np.sum(staff_region, axis=0) / 255
+        bar_cols = np.where(col_sums >= expected_h * 0.8)[0]
+        measures = []
+        curr = []
+        for c in bar_cols:
+            if not curr: curr.append(c)
+            else:
+                if c - curr[-1] < 10: curr.append(c)
+                else:
+                    measures.append(int(np.mean(curr)))
+                    curr = [c]
+        if curr: measures.append(int(np.mean(curr)))
+        return measures
+
+    for i, m in enumerate(merged_list):
+        # Red 채널 (출력용)
+        gray = m[:, :, 2]  
+        bars = _detect_measure_bars(gray)
+        print(f"  파노라마 {i} ({m.shape[1]}px): {len(bars)}개 마디 구분선 탐지")
+        if bars:
+            intervals = [bars[j+1]-bars[j] for j in range(len(bars)-1)]
+            if intervals:
+                print(f"    마디 간격: min={min(intervals)}, max={max(intervals)}, mean={np.mean(intervals):.0f}px")
+            print(f"    처음 5개 좌표: {bars[:5]}")
+
+    print(f"\n[STITCH-SIM 완료]")
+    print(f"  결과 저장: {OUT_DIR}")
+    print(f"  핵심 체크포인트:")
+    print(f"  - 씬 전환 {n_cuts}회 → {n_cuts+1}개 세그먼트 분리")
+    print(f"  - 최종 병합 파노라마: {len(merged_list)}개")
+    total_bars = sum(len(_detect_measure_bars(m[:,:,2])) for m in merged_list)
+    print(f"  - 총 탐지 마디 구분선: {total_bars}개")
+
+if __name__ == "__main__":
+    main()