chore(docs): document ScoreExtractor tiling and refactor debug scripts (#563)
This commit is contained in:
@@ -268,10 +268,11 @@ def _find_white_tab_strip(frame: np.ndarray, min_strip_ratio: float = 0.10) -> O
|
||||
# 가장 넓은 흰색 스트립 반환
|
||||
best = max(regions, key=lambda r: r[1] - r[0])
|
||||
|
||||
# 약간의 패딩 추가 (하단 짤림 방지)
|
||||
pad = int(h * 0.03)
|
||||
top = max(0, best[0] - pad)
|
||||
bottom = min(h, best[1] + pad)
|
||||
# 추가 패딩: 상단은 반복선 브래킷(┌─ 1.) 보존을 위해 크게 잡음
|
||||
pad_top = int(h * 0.15)
|
||||
pad_bottom = int(h * 0.03)
|
||||
top = max(0, best[0] - pad_top)
|
||||
bottom = min(h, best[1] + pad_bottom)
|
||||
|
||||
return (top, bottom)
|
||||
|
||||
@@ -658,88 +659,55 @@ def merge_panoramas_list(panoramas):
|
||||
merged_list.append(current_master)
|
||||
return merged_list
|
||||
|
||||
def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
|
||||
print(f"[4/5] 스크롤형 Tab 시계열 추적 추출 중...")
|
||||
def extract_unique_scroll(frames: List[np.ndarray], scan_dist: int = 4) -> List[np.ndarray]:
|
||||
"""
|
||||
Deprecated parameters kept for signature compatibility.
|
||||
Uses the new Object-Oriented Hybrid State Machine (ScoreExtractor)
|
||||
and robust TemporalTracker to guarantee pure monotonic structural extraction.
|
||||
"""
|
||||
from video_cv_tracker import TemporalTracker
|
||||
from score_extractor import ScoreExtractor
|
||||
|
||||
strip_tops, strip_bottoms = [], []
|
||||
for frame in frames[:50]:
|
||||
strip = _find_white_tab_strip(frame)
|
||||
if strip:
|
||||
strip_tops.append(strip[0])
|
||||
strip_bottoms.append(strip[1])
|
||||
print("[Pipeline] Isolating static structures via TemporalTracker")
|
||||
# Tracker handles Temporal Median to isolate sheet music overlays
|
||||
tracker = TemporalTracker(diff_threshold=0.05)
|
||||
|
||||
# Dynamically find the pristine white tablature strip bounding box to isolate it from background noise
|
||||
tab_bounds = None
|
||||
for f in frames[::30]:
|
||||
bounds = _find_white_tab_strip(f)
|
||||
if bounds:
|
||||
tab_bounds = bounds
|
||||
break
|
||||
|
||||
if not strip_tops:
|
||||
return []
|
||||
|
||||
median_top = int(np.median(strip_tops))
|
||||
median_bottom = int(np.median(strip_bottoms))
|
||||
|
||||
tracker = TemporalTracker()
|
||||
if tab_bounds:
|
||||
top, bottom = tab_bounds
|
||||
print(f" -> Found precise sheet music bounds: Y={top} to Y={bottom}")
|
||||
else:
|
||||
top, bottom = 0, frames[0].shape[0]
|
||||
print(f" -> Bounding box not found, fallback to full frame: Y={top} to Y={bottom}")
|
||||
|
||||
for frame in frames:
|
||||
h = frame.shape[0]
|
||||
tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
|
||||
if not _has_tab_content(tab_crop):
|
||||
continue
|
||||
tracker.process_frame(tab_crop)
|
||||
|
||||
panorama = tracker.get_final_panorama()
|
||||
if panorama is None:
|
||||
return []
|
||||
# Tightly constrain the region of interest to the sheet music.
|
||||
# This completely hides the guitarist's hands and guarantees pure static tracking.
|
||||
roi = frame[top:bottom, :]
|
||||
tracker.process_frame(roi)
|
||||
|
||||
print(f" -> 생성된 파노라마 길이: {panorama.shape[1]}px")
|
||||
unique_pages = tracker.get_unique_pages()
|
||||
print(f"[Pipeline] Reduced down to {len(unique_pages)} static structural median pages.")
|
||||
|
||||
chunk_width = 1280
|
||||
final_chunks = []
|
||||
# State Machine extraction
|
||||
extractor = ScoreExtractor()
|
||||
extractor.process_pages(unique_pages)
|
||||
tiled_rows = extractor.tile_to_a4(chunk_width=1800)
|
||||
|
||||
gray_pano = _extract_print_channel(panorama)
|
||||
bar_coords = _detect_measure_bars(gray_pano)
|
||||
|
||||
if not bar_coords:
|
||||
w = panorama.shape[1]
|
||||
start_x = 0
|
||||
while start_x < w:
|
||||
chunk = panorama[:, start_x:min(w, start_x + chunk_width)]
|
||||
if chunk.shape[1] < chunk_width:
|
||||
pad = np.full((chunk.shape[0], chunk_width - chunk.shape[1], 3), 255, dtype=np.uint8)
|
||||
chunk = np.hstack([chunk, pad])
|
||||
final_chunks.append(chunk)
|
||||
start_x += chunk_width
|
||||
else:
|
||||
coords = [0] + bar_coords + [panorama.shape[1]]
|
||||
coords = sorted(list(set(coords)))
|
||||
# Wait, the thresholding already produced a 255 White Background with 0 Black Text!
|
||||
# No need to invert!
|
||||
final_a4_chunks = []
|
||||
for row in tiled_rows:
|
||||
final_a4_chunks.append(row)
|
||||
|
||||
current_row = None
|
||||
for i in range(len(coords) - 1):
|
||||
x_start = coords[i]
|
||||
x_end = coords[i+1]
|
||||
if x_end - x_start < 50:
|
||||
continue
|
||||
|
||||
measure_img = panorama[:, x_start:x_end]
|
||||
|
||||
if current_row is None:
|
||||
current_row = measure_img
|
||||
else:
|
||||
if current_row.shape[1] + measure_img.shape[1] > chunk_width:
|
||||
pad_w = chunk_width - current_row.shape[1]
|
||||
if pad_w > 0:
|
||||
pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
|
||||
current_row = np.hstack([current_row, pad_img])
|
||||
final_chunks.append(current_row)
|
||||
current_row = measure_img
|
||||
else:
|
||||
current_row = np.hstack([current_row, measure_img])
|
||||
|
||||
if current_row is not None:
|
||||
pad_w = chunk_width - current_row.shape[1]
|
||||
if pad_w > 0:
|
||||
pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
|
||||
current_row = np.hstack([current_row, pad_img])
|
||||
final_chunks.append(current_row)
|
||||
|
||||
print(f" -> A4 분할 컷: {len(final_chunks)}개")
|
||||
return final_chunks
|
||||
return final_a4_chunks
|
||||
|
||||
def extract_unique_overlay(frames: List[np.ndarray],
|
||||
threshold: float = OVERLAY_SIMILARITY_THRESHOLD) -> List[np.ndarray]:
|
||||
@@ -804,8 +772,8 @@ def generate_pdf(frames: List[np.ndarray], output_path: Path,
|
||||
print(" ⚠ 프레임 없음!")
|
||||
return
|
||||
|
||||
page_w = int(PDF_PAGE_WIDTH_MM / 25.4 * PDF_DPI)
|
||||
page_h = int(PDF_PAGE_HEIGHT_MM / 25.4 * PDF_DPI)
|
||||
page_w = int(PDF_PAGE_HEIGHT_MM / 25.4 * PDF_DPI) # Landscape width
|
||||
page_h = int(PDF_PAGE_WIDTH_MM / 25.4 * PDF_DPI) # Landscape height
|
||||
margin = int(PDF_MARGIN_MM / 25.4 * PDF_DPI)
|
||||
gap = int(TAB_GAP_MM / 25.4 * PDF_DPI)
|
||||
content_w = page_w - 2 * margin
|
||||
@@ -843,20 +811,34 @@ def generate_pdf(frames: List[np.ndarray], output_path: Path,
|
||||
print(f" → PDF: {len(resized)} Tab → {len(pages)} 페이지, {output_path.stat().st_size // 1024} KB")
|
||||
|
||||
|
||||
def generate_long_image(frames: List[np.ndarray], output_path: Path) -> None:
|
||||
"""Tab을 하나의 긴 이미지로"""
|
||||
if not frames:
|
||||
def generate_long_image(chunks: List[np.ndarray], output_path: str):
|
||||
if not chunks:
|
||||
return
|
||||
max_w = max(f.shape[1] for f in frames)
|
||||
imgs = []
|
||||
for f in frames:
|
||||
if f.shape[1] != max_w:
|
||||
scale = max_w / f.shape[1]
|
||||
f = cv2.resize(f, (max_w, int(f.shape[0] * scale)))
|
||||
imgs.append(f)
|
||||
concat = np.vstack(imgs)
|
||||
Image.fromarray(cv2.cvtColor(concat, cv2.COLOR_BGR2RGB)).save(str(output_path))
|
||||
print(f" → 롱 이미지: {max_w}x{concat.shape[0]}")
|
||||
|
||||
print(f"DEBUG: First chunk shape = {chunks[0].shape}, dtype = {chunks[0].dtype}")
|
||||
# Calculate exact total height required
|
||||
total_h = sum(chunk.shape[0] for chunk in chunks)
|
||||
max_w = max(chunk.shape[1] for chunk in chunks)
|
||||
|
||||
# Ensure correct channel dimensions for the canvas to prevent squishing!
|
||||
if len(chunks[0].shape) == 3:
|
||||
canvas = np.full((total_h, max_w, 3), 255, dtype=np.uint8)
|
||||
else:
|
||||
canvas = np.full((total_h, max_w), 255, dtype=np.uint8)
|
||||
|
||||
y_offset = 0
|
||||
for chunk in chunks:
|
||||
h, w = chunk.shape[:2]
|
||||
if len(chunk.shape) == 3 and len(canvas.shape) == 2:
|
||||
canvas[y_offset:y_offset+h, :w] = cv2.cvtColor(chunk, cv2.COLOR_BGR2GRAY)
|
||||
elif len(chunk.shape) == 2 and len(canvas.shape) == 3:
|
||||
canvas[y_offset:y_offset+h, :w] = cv2.cvtColor(chunk, cv2.COLOR_GRAY2BGR)
|
||||
else:
|
||||
canvas[y_offset:y_offset+h, :w] = chunk
|
||||
|
||||
y_offset += h
|
||||
|
||||
cv2.imwrite(str(output_path), canvas)
|
||||
|
||||
|
||||
# ─── Main ─────────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user