chore(docs): document ScoreExtractor tiling and refactor debug scripts (#563)
This commit is contained in:
180
scripts/debug/patch_ocr_sprite.py
Normal file
180
scripts/debug/patch_ocr_sprite.py
Normal file
@@ -0,0 +1,180 @@
|
||||
import re
|
||||
|
||||
with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
|
||||
code = f.read()
|
||||
|
||||
new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
|
||||
print(f"[4/5] 순차 Number Sprite Template 앵커 기반 마디 추출 중...")
|
||||
|
||||
strip_tops, strip_bottoms = [], []
|
||||
for frame in frames[:50]:
|
||||
strip = _find_white_tab_strip(frame)
|
||||
if strip:
|
||||
strip_tops.append(strip[0])
|
||||
strip_bottoms.append(strip[1])
|
||||
|
||||
if not strip_tops: return []
|
||||
|
||||
median_top = int(np.median(strip_tops))
|
||||
median_bottom = int(np.median(strip_bottoms))
|
||||
|
||||
unique_measures = []
|
||||
chunk_width = 1280
|
||||
|
||||
def get_number_sprite(m_img):
|
||||
# We explicitly use inverse thresholding to capture the tiny white number on black background
|
||||
gray = np.max(m_img, axis=2)
|
||||
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
||||
row_sums = np.sum(thresh, axis=1) / 255
|
||||
staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0]
|
||||
y_staff = staff_lines[0] if len(staff_lines) > 0 else 50
|
||||
|
||||
crop_y1 = max(0, y_staff - 35)
|
||||
crop_y2 = max(0, y_staff - 2)
|
||||
crop_x1 = 0
|
||||
crop_x2 = min(60, m_img.shape[1])
|
||||
|
||||
if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: return None
|
||||
sprite = thresh[crop_y1:crop_y2, crop_x1:crop_x2]
|
||||
|
||||
# MUST BE STRICT: If there are fewer than 8 white pixels, it's a BLANK SPRITE.
|
||||
# Blank sprites caused the catastrophic 1->36 time-travel deletion!
|
||||
if np.count_nonzero(sprite > 127) < 8: return None
|
||||
return sprite
|
||||
|
||||
for frame_idx, frame in enumerate(frames):
|
||||
h = frame.shape[0]
|
||||
tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
|
||||
if not _has_tab_content(tab_crop):
|
||||
continue
|
||||
|
||||
gray_page = _extract_print_channel(tab_crop)
|
||||
bar_coords = _detect_measure_bars(gray_page)
|
||||
|
||||
if not bar_coords: continue
|
||||
|
||||
coords = [0] + bar_coords + [tab_crop.shape[1]]
|
||||
coords = sorted(list(set(coords)))
|
||||
|
||||
page_measures = []
|
||||
for i in range(len(coords) - 1):
|
||||
x_start = coords[i]
|
||||
x_end = coords[i+1]
|
||||
if x_end - x_start < 40: continue
|
||||
page_measures.append(tab_crop[:, x_start:x_end])
|
||||
|
||||
if not page_measures: continue
|
||||
|
||||
if not unique_measures:
|
||||
unique_measures.extend(page_measures)
|
||||
first_sprite = get_number_sprite(page_measures[0])
|
||||
has_pixels = np.count_nonzero(first_sprite > 127) if first_sprite is not None else 0
|
||||
print(f" -> [초기화] 첫 프레임 배열 등록: {len(page_measures)}개 마디 (Sprite Pixels: {has_pixels})")
|
||||
continue
|
||||
|
||||
first_m = page_measures[0]
|
||||
first_sprite = get_number_sprite(first_m)
|
||||
|
||||
anchored = False
|
||||
new_start_offset = 0
|
||||
best_val = 0.0
|
||||
|
||||
# Only attempt anchor if the first measure explicitly displays a sequence number.
|
||||
# If it's blank, we DO NOT blindly match it to other blank measures!
|
||||
if first_sprite is not None:
|
||||
# We can scan backwards up to 15 measures because clear Number Sprites are completely unique IDs.
|
||||
for scan_dist in range(1, min(15, len(unique_measures) + 1)):
|
||||
past_idx = len(unique_measures) - scan_dist
|
||||
past_m = unique_measures[past_idx]
|
||||
past_sprite = get_number_sprite(past_m)
|
||||
|
||||
if past_sprite is not None:
|
||||
hs = min(first_sprite.shape[0], past_sprite.shape[0])
|
||||
ws = min(first_sprite.shape[1], past_sprite.shape[1])
|
||||
s1 = first_sprite[:hs, :ws]
|
||||
s2 = past_sprite[:hs, :ws]
|
||||
|
||||
template = s1[2:-2, 2:-2]
|
||||
if template.shape[0] >= 5 and template.shape[1] >= 5:
|
||||
res = cv2.matchTemplate(s2, template, cv2.TM_CCOEFF_NORMED)
|
||||
max_val = res[0][0]
|
||||
|
||||
if max_val > best_val:
|
||||
best_val = max_val
|
||||
new_start_offset = len(unique_measures) - past_idx
|
||||
|
||||
if best_val > 0.85:
|
||||
anchored = True
|
||||
|
||||
# If we failed to anchor via Sprite (maybe this page has no numbers at all),
|
||||
# we fallback to strict whole-measure Template Matching (TM_CCOEFF_NORMED on greyscale prints to survive subpixel scroll drift)
|
||||
if not anchored:
|
||||
bin_first = _extract_print_channel(first_m) # greyscale thresholded
|
||||
for scan_dist in range(1, min(4, len(unique_measures) + 1)): # strictly limit to 4 to prevent musical loops
|
||||
past_idx = len(unique_measures) - scan_dist
|
||||
past_m = unique_measures[past_idx]
|
||||
bin_past = _extract_print_channel(past_m)
|
||||
|
||||
if abs(bin_first.shape[1] - bin_past.shape[1]) <= 30:
|
||||
hs = min(bin_first.shape[0], bin_past.shape[0])
|
||||
ws = min(bin_first.shape[1], bin_past.shape[1])
|
||||
s1 = bin_first[:hs, :ws]
|
||||
s2 = bin_past[:hs, :ws]
|
||||
|
||||
template = s1[10:-10, 10:-10]
|
||||
if template.shape[0] >= 10 and template.shape[1] >= 10:
|
||||
res = cv2.matchTemplate(s2, template, cv2.TM_CCOEFF_NORMED)
|
||||
max_val = res[0][0]
|
||||
if max_val > 0.85:
|
||||
new_start_offset = len(unique_measures) - past_idx
|
||||
anchored = True
|
||||
break
|
||||
|
||||
if anchored and new_start_offset < len(page_measures):
|
||||
unique_measures.extend(page_measures[new_start_offset:])
|
||||
elif not anchored:
|
||||
unique_measures.extend(page_measures)
|
||||
|
||||
print(f" -> 동기화 중복 제거 완료: Number Sprite 시계열 기반 {len(unique_measures)}개 마디 보존")
|
||||
|
||||
final_chunks = []
|
||||
current_row_measures = []
|
||||
current_row_width = 0
|
||||
|
||||
for measure_img in unique_measures:
|
||||
measure_w = measure_img.shape[1]
|
||||
|
||||
if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
|
||||
row_img = np.hstack(current_row_measures)
|
||||
pad_w = chunk_width - row_img.shape[1]
|
||||
if pad_w > 0:
|
||||
pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
|
||||
row_img = np.hstack([row_img, pad_img])
|
||||
final_chunks.append(row_img)
|
||||
current_row_measures = [measure_img]
|
||||
current_row_width = measure_w
|
||||
else:
|
||||
current_row_measures.append(measure_img)
|
||||
current_row_width += measure_w
|
||||
|
||||
if current_row_measures:
|
||||
row_img = np.hstack(current_row_measures)
|
||||
if row_img.shape[1] > chunk_width:
|
||||
row_img = row_img[:, :chunk_width]
|
||||
else:
|
||||
pad_w = chunk_width - row_img.shape[1]
|
||||
if pad_w > 0:
|
||||
pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
|
||||
row_img = np.hstack([row_img, pad_img])
|
||||
final_chunks.append(row_img)
|
||||
|
||||
print(f" -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
|
||||
return final_chunks
|
||||
"""
|
||||
|
||||
pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
|
||||
new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
|
||||
|
||||
with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
|
||||
f.write(new_code)
|
||||
print("Supreme Logic Embedded.")
|
||||
Reference in New Issue
Block a user