chore(docs): document ScoreExtractor tiling and refactor debug scripts (#563)

This commit is contained in:
2026-03-29 17:57:40 +09:00
parent 39b55f2e9f
commit ac0c098259
698 changed files with 141180 additions and 195 deletions

View File

@@ -268,10 +268,11 @@ def _find_white_tab_strip(frame: np.ndarray, min_strip_ratio: float = 0.10) -> O
# 가장 넓은 흰색 스트립 반환
best = max(regions, key=lambda r: r[1] - r[0])
# 약간의 패딩 추가 (하단 짤림 방지)
pad = int(h * 0.03)
top = max(0, best[0] - pad)
bottom = min(h, best[1] + pad)
# 추가 패딩: 상단은 반복선 브래킷(┌─ 1.) 보존을 위해 크게 잡음
pad_top = int(h * 0.15)
pad_bottom = int(h * 0.03)
top = max(0, best[0] - pad_top)
bottom = min(h, best[1] + pad_bottom)
return (top, bottom)
@@ -658,88 +659,55 @@ def merge_panoramas_list(panoramas):
merged_list.append(current_master)
return merged_list
def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
print(f"[4/5] 스크롤형 Tab 시계열 추적 추출 중...")
def extract_unique_scroll(frames: List[np.ndarray], scan_dist: int = 4) -> List[np.ndarray]:
"""
Deprecated parameters kept for signature compatibility.
Uses the new Object-Oriented Hybrid State Machine (ScoreExtractor)
and robust TemporalTracker to guarantee pure monotonic structural extraction.
"""
from video_cv_tracker import TemporalTracker
from score_extractor import ScoreExtractor
strip_tops, strip_bottoms = [], []
for frame in frames[:50]:
strip = _find_white_tab_strip(frame)
if strip:
strip_tops.append(strip[0])
strip_bottoms.append(strip[1])
print("[Pipeline] Isolating static structures via TemporalTracker")
# Tracker handles Temporal Median to isolate sheet music overlays
tracker = TemporalTracker(diff_threshold=0.05)
# Dynamically find the pristine white tablature strip bounding box to isolate it from background noise
tab_bounds = None
for f in frames[::30]:
bounds = _find_white_tab_strip(f)
if bounds:
tab_bounds = bounds
break
if not strip_tops:
return []
median_top = int(np.median(strip_tops))
median_bottom = int(np.median(strip_bottoms))
tracker = TemporalTracker()
if tab_bounds:
top, bottom = tab_bounds
print(f" -> Found precise sheet music bounds: Y={top} to Y={bottom}")
else:
top, bottom = 0, frames[0].shape[0]
print(f" -> Bounding box not found, fallback to full frame: Y={top} to Y={bottom}")
for frame in frames:
h = frame.shape[0]
tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
if not _has_tab_content(tab_crop):
continue
tracker.process_frame(tab_crop)
panorama = tracker.get_final_panorama()
if panorama is None:
return []
# Tightly constrain the region of interest to the sheet music.
# This completely hides the guitarist's hands and guarantees pure static tracking.
roi = frame[top:bottom, :]
tracker.process_frame(roi)
print(f" -> 생성된 파노라마 길이: {panorama.shape[1]}px")
unique_pages = tracker.get_unique_pages()
print(f"[Pipeline] Reduced down to {len(unique_pages)} static structural median pages.")
chunk_width = 1280
final_chunks = []
# State Machine extraction
extractor = ScoreExtractor()
extractor.process_pages(unique_pages)
tiled_rows = extractor.tile_to_a4(chunk_width=1800)
gray_pano = _extract_print_channel(panorama)
bar_coords = _detect_measure_bars(gray_pano)
if not bar_coords:
w = panorama.shape[1]
start_x = 0
while start_x < w:
chunk = panorama[:, start_x:min(w, start_x + chunk_width)]
if chunk.shape[1] < chunk_width:
pad = np.full((chunk.shape[0], chunk_width - chunk.shape[1], 3), 255, dtype=np.uint8)
chunk = np.hstack([chunk, pad])
final_chunks.append(chunk)
start_x += chunk_width
else:
coords = [0] + bar_coords + [panorama.shape[1]]
coords = sorted(list(set(coords)))
# Wait, the thresholding already produced a 255 White Background with 0 Black Text!
# No need to invert!
final_a4_chunks = []
for row in tiled_rows:
final_a4_chunks.append(row)
current_row = None
for i in range(len(coords) - 1):
x_start = coords[i]
x_end = coords[i+1]
if x_end - x_start < 50:
continue
measure_img = panorama[:, x_start:x_end]
if current_row is None:
current_row = measure_img
else:
if current_row.shape[1] + measure_img.shape[1] > chunk_width:
pad_w = chunk_width - current_row.shape[1]
if pad_w > 0:
pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
current_row = np.hstack([current_row, pad_img])
final_chunks.append(current_row)
current_row = measure_img
else:
current_row = np.hstack([current_row, measure_img])
if current_row is not None:
pad_w = chunk_width - current_row.shape[1]
if pad_w > 0:
pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
current_row = np.hstack([current_row, pad_img])
final_chunks.append(current_row)
print(f" -> A4 분할 컷: {len(final_chunks)}")
return final_chunks
return final_a4_chunks
def extract_unique_overlay(frames: List[np.ndarray],
threshold: float = OVERLAY_SIMILARITY_THRESHOLD) -> List[np.ndarray]:
@@ -804,8 +772,8 @@ def generate_pdf(frames: List[np.ndarray], output_path: Path,
print(" ⚠ 프레임 없음!")
return
page_w = int(PDF_PAGE_WIDTH_MM / 25.4 * PDF_DPI)
page_h = int(PDF_PAGE_HEIGHT_MM / 25.4 * PDF_DPI)
page_w = int(PDF_PAGE_HEIGHT_MM / 25.4 * PDF_DPI) # Landscape width
page_h = int(PDF_PAGE_WIDTH_MM / 25.4 * PDF_DPI) # Landscape height
margin = int(PDF_MARGIN_MM / 25.4 * PDF_DPI)
gap = int(TAB_GAP_MM / 25.4 * PDF_DPI)
content_w = page_w - 2 * margin
@@ -843,20 +811,34 @@ def generate_pdf(frames: List[np.ndarray], output_path: Path,
print(f" → PDF: {len(resized)} Tab → {len(pages)} 페이지, {output_path.stat().st_size // 1024} KB")
def generate_long_image(frames: List[np.ndarray], output_path: Path) -> None:
"""Tab을 하나의 긴 이미지로"""
if not frames:
def generate_long_image(chunks: List[np.ndarray], output_path: str):
if not chunks:
return
max_w = max(f.shape[1] for f in frames)
imgs = []
for f in frames:
if f.shape[1] != max_w:
scale = max_w / f.shape[1]
f = cv2.resize(f, (max_w, int(f.shape[0] * scale)))
imgs.append(f)
concat = np.vstack(imgs)
Image.fromarray(cv2.cvtColor(concat, cv2.COLOR_BGR2RGB)).save(str(output_path))
print(f" → 롱 이미지: {max_w}x{concat.shape[0]}")
print(f"DEBUG: First chunk shape = {chunks[0].shape}, dtype = {chunks[0].dtype}")
# Calculate exact total height required
total_h = sum(chunk.shape[0] for chunk in chunks)
max_w = max(chunk.shape[1] for chunk in chunks)
# Ensure correct channel dimensions for the canvas to prevent squishing!
if len(chunks[0].shape) == 3:
canvas = np.full((total_h, max_w, 3), 255, dtype=np.uint8)
else:
canvas = np.full((total_h, max_w), 255, dtype=np.uint8)
y_offset = 0
for chunk in chunks:
h, w = chunk.shape[:2]
if len(chunk.shape) == 3 and len(canvas.shape) == 2:
canvas[y_offset:y_offset+h, :w] = cv2.cvtColor(chunk, cv2.COLOR_BGR2GRAY)
elif len(chunk.shape) == 2 and len(canvas.shape) == 3:
canvas[y_offset:y_offset+h, :w] = cv2.cvtColor(chunk, cv2.COLOR_GRAY2BGR)
else:
canvas[y_offset:y_offset+h, :w] = chunk
y_offset += h
cv2.imwrite(str(output_path), canvas)
# ─── Main ─────────────────────────────────────────────────────────────────