146 lines
5.5 KiB
Python
146 lines
5.5 KiB
Python
import re
|
|
|
|
with open('youtube_tab_to_pdf.py', 'r', encoding='utf-8') as f:
|
|
code = f.read()
|
|
|
|
new_func = """def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
|
|
print(f"[4/5] 순차 Keyframe 페이지 추출 중...")
|
|
|
|
strip_tops, strip_bottoms = [], []
|
|
for frame in frames[:50]:
|
|
strip = _find_white_tab_strip(frame)
|
|
if strip:
|
|
strip_tops.append(strip[0])
|
|
strip_bottoms.append(strip[1])
|
|
|
|
if not strip_tops: return []
|
|
|
|
median_top = int(np.median(strip_tops))
|
|
median_bottom = int(np.median(strip_bottoms))
|
|
|
|
# 1. 0.05 threshold Tracker to completely ignore all fade/blur frames and extract EXACTLY 13 keyframes
|
|
tracker = TemporalTracker(diff_threshold=0.05)
|
|
|
|
for frame in frames:
|
|
h = frame.shape[0]
|
|
tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
|
|
if not _has_tab_content(tab_crop):
|
|
continue
|
|
tracker.process_frame(tab_crop)
|
|
|
|
unique_pages = tracker.get_unique_pages()
|
|
if not unique_pages: return []
|
|
|
|
unique_measures = []
|
|
chunk_width = 1280
|
|
|
|
def get_clean_binary(img):
|
|
gray = np.max(img, axis=2)
|
|
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
|
return binary
|
|
|
|
for page_idx, page in enumerate(unique_pages):
|
|
gray_page = _extract_print_channel(page)
|
|
bar_coords = _detect_measure_bars(gray_page)
|
|
|
|
if not bar_coords: continue
|
|
|
|
coords = [0] + bar_coords + [page.shape[1]]
|
|
coords = sorted(list(set(coords)))
|
|
|
|
page_measures = []
|
|
for i in range(len(coords) - 1):
|
|
x_start = coords[i]
|
|
x_end = coords[i+1]
|
|
if x_end - x_start < 40: continue
|
|
page_measures.append(page[:, x_start:x_end])
|
|
|
|
if not page_measures: continue
|
|
|
|
if not unique_measures:
|
|
unique_measures.extend(page_measures)
|
|
continue
|
|
|
|
first_m = page_measures[0]
|
|
bin_first = get_clean_binary(first_m)
|
|
|
|
best_error = 1.0
|
|
best_offset = 0
|
|
anchored = False
|
|
|
|
# 3. CRUCIAL FIX: scan_dist limited to exactly 3.
|
|
# Preventing M40 from visually matching M9 because Chorus repeats.
|
|
for scan_dist in range(1, min(4, len(unique_measures) + 1)):
|
|
past_idx = len(unique_measures) - scan_dist
|
|
past_m = unique_measures[past_idx]
|
|
bin_past = get_clean_binary(past_m)
|
|
|
|
# 2. Binary Absdiff error < 0.20 for subpixel-immune, noise-immune math overlap matching
|
|
if abs(bin_first.shape[1] - bin_past.shape[1]) <= 25:
|
|
hs = min(bin_first.shape[0], bin_past.shape[0])
|
|
ws = min(bin_first.shape[1], bin_past.shape[1])
|
|
s1 = bin_first[:hs, :ws]
|
|
s2 = bin_past[:hs, :ws]
|
|
|
|
diff = cv2.absdiff(s1, s2)
|
|
error_ratio = np.sum(diff > 0) / s1.size
|
|
|
|
if error_ratio < best_error:
|
|
best_error = error_ratio
|
|
best_offset = len(unique_measures) - past_idx
|
|
|
|
if best_error < 0.20:
|
|
new_start_offset = best_offset
|
|
anchored = True
|
|
|
|
if anchored and new_start_offset < len(page_measures):
|
|
# Overlapped exactly at this point, only append the truly NEW measures
|
|
unique_measures.extend(page_measures[new_start_offset:])
|
|
elif not anchored:
|
|
# Completely discrete page flip with no overlap, append all measures
|
|
unique_measures.extend(page_measures)
|
|
|
|
print(f" -> 동기화 중복 제거 완료: 무손실 정적 페이지 기반 {len(unique_measures)}개 연속 마디 보존")
|
|
|
|
final_chunks = []
|
|
current_row_measures = []
|
|
current_row_width = 0
|
|
|
|
for measure_img in unique_measures:
|
|
measure_w = measure_img.shape[1]
|
|
|
|
if current_row_width + measure_w > chunk_width and len(current_row_measures) > 0:
|
|
row_img = np.hstack(current_row_measures)
|
|
pad_w = chunk_width - row_img.shape[1]
|
|
if pad_w > 0:
|
|
pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
|
|
row_img = np.hstack([row_img, pad_img])
|
|
final_chunks.append(row_img)
|
|
current_row_measures = [measure_img]
|
|
current_row_width = measure_w
|
|
else:
|
|
current_row_measures.append(measure_img)
|
|
current_row_width += measure_w
|
|
|
|
if current_row_measures:
|
|
row_img = np.hstack(current_row_measures)
|
|
if row_img.shape[1] > chunk_width:
|
|
row_img = row_img[:, :chunk_width]
|
|
else:
|
|
pad_w = chunk_width - row_img.shape[1]
|
|
if pad_w > 0:
|
|
pad_img = np.full((row_img.shape[0], pad_w, 3), 255, dtype=np.uint8)
|
|
row_img = np.hstack([row_img, pad_img])
|
|
final_chunks.append(row_img)
|
|
|
|
print(f" -> A4 분할 컷: {len(final_chunks)}개 줄(Row)")
|
|
return final_chunks
|
|
"""
|
|
|
|
pattern = r'def extract_unique_scroll\(frames: List\[np\.ndarray\], threshold: float = SIMILARITY_THRESHOLD\) -> List\[np\.ndarray\]:.*?return final_chunks'
|
|
new_code = re.sub(pattern, new_func, code, flags=re.DOTALL)
|
|
|
|
with open('youtube_tab_to_pdf.py', 'w', encoding='utf-8') as f:
|
|
f.write(new_code)
|
|
print("Final Truth Pipeline Patched.")
|