chore(docs): document ScoreExtractor tiling and refactor debug scripts (#563)

This commit is contained in:
2026-03-29 17:57:40 +09:00
parent 39b55f2e9f
commit ac0c098259
698 changed files with 141180 additions and 195 deletions

View File

@@ -0,0 +1,73 @@
import cv2
import numpy as np
from score_extractor import ScoreExtractor
from youtube_tab_to_pdf import extract_unique_scroll, _detect_tab_overlay
# Simplified run script to dump all macro blocks and ignored pages
frames = []
video = cv2.VideoCapture("sakanaction shintakarajima.mp4")
fps_orig = video.get(cv2.CAP_PROP_FPS)
stride = max(1, int(fps_orig / 4.0))
count = 0
while True:
ret, frame = video.read()
if not ret: break
if count % stride == 0:
frames.append(frame)
count += 1
video.release()
from video_cv_tracker import TemporalTracker
from youtube_tab_to_pdf import _find_white_tab_strip
tracker = TemporalTracker(diff_threshold=0.05)
tab_bounds = None
for f in frames[::30]:
b = _find_white_tab_strip(f)
if b:
tab_bounds = b
break
top, bottom = tab_bounds if tab_bounds else (0, frames[0].shape[0])
for f in frames:
tracker.process_frame(f[top:bottom, :])
unique = tracker.get_unique_pages()
ex = ScoreExtractor()
# Manually process them and print verbose output
ex.macro_blocks = [unique[0].copy()]
ex.history_pages = [unique[0]]
for i, page in enumerate(unique[1:], 1):
current = ex.macro_blocks[-1]
head_w = min(800, page.shape[1])
search_w = min(1500, current.shape[1])
h_gray = cv2.cvtColor(page[:, :head_w], cv2.COLOR_BGR2GRAY)
s_gray = cv2.cvtColor(current[:, -search_w:], cv2.COLOR_BGR2GRAY)
res = cv2.matchTemplate(s_gray, h_gray, cv2.TM_CCOEFF_NORMED)
_, max_val, _, max_loc = cv2.minMaxLoc(res)
if max_val > 0.50:
print(f"[Page {i}] Stitched! max_val={max_val:.2f}")
absolute_match_x = current.shape[1] - search_w + max_loc[0]
next_start_idx = current.shape[1] - absolute_match_x
if next_start_idx < page.shape[1]:
append_part = page[:, next_start_idx:]
ex.macro_blocks[-1] = np.hstack([ex.macro_blocks[-1], append_part])
ex.history_pages.append(append_part)
else:
# Check repeat
is_repeat = ex._is_historical_repeat(page)
print(f"[Page {i}] Jump! max_val={max_val:.2f}, repeat={is_repeat}")
if is_repeat:
# We will save the rejected page to see if it was 22-29
cv2.imwrite(f"rejected_page_{i}.png", page)
else:
ex.macro_blocks.append(page.copy())
ex.history_pages.append(page)
# Dump the starts of the blocks
for j, b in enumerate(ex.macro_blocks):
cv2.imwrite(f"macro_block_{j}_start.png", b[:, :1800])