import cv2 import numpy as np from score_extractor import ScoreExtractor from youtube_tab_to_pdf import extract_unique_scroll, _detect_tab_overlay # Simplified run script to dump all macro blocks and ignored pages frames = [] video = cv2.VideoCapture("sakanaction shintakarajima.mp4") fps_orig = video.get(cv2.CAP_PROP_FPS) stride = max(1, int(fps_orig / 4.0)) count = 0 while True: ret, frame = video.read() if not ret: break if count % stride == 0: frames.append(frame) count += 1 video.release() from video_cv_tracker import TemporalTracker from youtube_tab_to_pdf import _find_white_tab_strip tracker = TemporalTracker(diff_threshold=0.05) tab_bounds = None for f in frames[::30]: b = _find_white_tab_strip(f) if b: tab_bounds = b break top, bottom = tab_bounds if tab_bounds else (0, frames[0].shape[0]) for f in frames: tracker.process_frame(f[top:bottom, :]) unique = tracker.get_unique_pages() ex = ScoreExtractor() # Manually process them and print verbose output ex.macro_blocks = [unique[0].copy()] ex.history_pages = [unique[0]] for i, page in enumerate(unique[1:], 1): current = ex.macro_blocks[-1] head_w = min(800, page.shape[1]) search_w = min(1500, current.shape[1]) h_gray = cv2.cvtColor(page[:, :head_w], cv2.COLOR_BGR2GRAY) s_gray = cv2.cvtColor(current[:, -search_w:], cv2.COLOR_BGR2GRAY) res = cv2.matchTemplate(s_gray, h_gray, cv2.TM_CCOEFF_NORMED) _, max_val, _, max_loc = cv2.minMaxLoc(res) if max_val > 0.50: print(f"[Page {i}] Stitched! max_val={max_val:.2f}") absolute_match_x = current.shape[1] - search_w + max_loc[0] next_start_idx = current.shape[1] - absolute_match_x if next_start_idx < page.shape[1]: append_part = page[:, next_start_idx:] ex.macro_blocks[-1] = np.hstack([ex.macro_blocks[-1], append_part]) ex.history_pages.append(append_part) else: # Check repeat is_repeat = ex._is_historical_repeat(page) print(f"[Page {i}] Jump! max_val={max_val:.2f}, repeat={is_repeat}") if is_repeat: # We will save the rejected page to see if it was 22-29 cv2.imwrite(f"rejected_page_{i}.png", page) else: ex.macro_blocks.append(page.copy()) ex.history_pages.append(page) # Dump the starts of the blocks for j, b in enumerate(ex.macro_blocks): cv2.imwrite(f"macro_block_{j}_start.png", b[:, :1800])