import cv2 import numpy as np import time from pathlib import Path def stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15): cap = cv2.VideoCapture(video_path) video_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0 # Calculate frame skip frame_skip = int(video_fps / fps_sample_rate) if frame_skip < 1: frame_skip = 1 start_frame = int(start_sec * video_fps) max_frames = int(duration_sec * video_fps) cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame) # Structural assumptions based on subagent analysis # Y=103 to Y=435 is the white tablature bar y_start = 103 y_end = 435 panorama = None prev_gray = None count = 0 while count < max_frames: ret, frame = cap.read() if not ret: break # We only process every `frame_skip` frames if count % frame_skip != 0: count += 1 continue scale = 1280 / frame.shape[1] frame_resized = cv2.resize(frame, (1280, int(frame.shape[0] * scale))) # Crop to the exact white ribbon ribbon = frame_resized[y_start:y_end, :] gray = cv2.cvtColor(ribbon, cv2.COLOR_BGR2GRAY) # Binarize aggressively to vertical features only to kill horizontal staff lines aliases # dx=1, dy=0 computes horizontal gradient (which highlights VERTICAL edges like note stems and bar lines) sobelx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3) bin_float = np.abs(sobelx) if panorama is None: # First frame is the initial panorama panorama = ribbon.copy() prev_gray = bin_float continue # 1. Constrained Template Matching for dx # Template is a 100px wide vertical slice from prev_gray at x=600 template = prev_gray[:, 600:700] # Search Region: from x=550 to x=710 in bin_float search_region = bin_float[:, 550:710] res = cv2.matchTemplate(search_region, template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) # In search_region (starts at 550), the template's original position (600) is at index 50. # If max_loc[0] == 50 -> no movement (dx=0). # If max_loc[0] < 50 -> image moved left (dx > 0). dx = 50 - max_loc[0] if count < 30: # Print first few shifts print(f"Frame {count}: dx={dx}, max_val={max_val:.3f}") shift_x = int(dx) # dx is typically POSITIVE if the camera moves right, meaning the image content moves LEFT. # dx will be positive or negative depending on parameter order. # Let's enforce that we only append new pixels from the RIGHT edge of the 'new' frame. shift_x = int(round(dx)) # In a left-scrolling video, the content moves left. # phaseCorrelate(prev, curr) -> to overlap curr onto prev, we shift curr by +dx. # The new pixels entering from the right are exactly the `dx` rightmost columns of the current ribbon! # If shift_x > 0... # Let's verify shift_x sign. # If curr is moved left by 10 pixels compared to prev, then prev[x] == curr[x-10]. # So curr must be shifted by +10 to match prev. Thus dx > 0. # We need to append the NEWest 10 pixels from the right side of curr. if shift_x > 0 and shift_x < 300: # Sanity check to ignore massive glitches # The new column is the absolute rightmost shift_x columns of the current ribbon new_pixels = ribbon[:, -shift_x:] panorama = np.hstack([panorama, new_pixels]) prev_gray = bin_float cap.release() return panorama def slice_panorama_to_a4(panorama, slice_width=1280): """Cuts the infinite 1D panorama into stacked A4 rows""" h, w, c = panorama.shape rows = [] for start_x in range(0, w, slice_width): end_x = start_x + slice_width chunk = panorama[:, start_x:end_x] # Pad the last chunk with white if it's too short if chunk.shape[1] < slice_width: pad_w = slice_width - chunk.shape[1] pad = np.ones((h, pad_w, c), dtype=np.uint8) * 255 chunk = np.hstack([chunk, pad]) rows.append(chunk) final_image = np.vstack(rows) return final_image if __name__ == "__main__": video_path = "output/サカナクション/新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4" if not Path(video_path).exists(): # Fallback to output/untitled.mp4 or whatever it might be named for f in Path("output").glob("*.mp4"): video_path = str(f) break print(f"Stitching...") start_t = time.time() panorama = stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15) print(f"Extraction took {time.time() - start_t:.2f}s. Panorama shape: {panorama.shape}") if panorama is not None: final_sheet = slice_panorama_to_a4(panorama, slice_width=1280) out_path = "C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/verify_panorama.png" cv2.imwrite(out_path, final_sheet) print(f"Saved stacked result to {out_path} with shape {final_sheet.shape}") else: print("Failed to generate panorama.")