138 lines
5.4 KiB
Python
138 lines
5.4 KiB
Python
import cv2
|
|
import numpy as np
|
|
import time
|
|
from pathlib import Path
|
|
|
|
def stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15):
|
|
cap = cv2.VideoCapture(video_path)
|
|
video_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
|
|
|
# Calculate frame skip
|
|
frame_skip = int(video_fps / fps_sample_rate)
|
|
if frame_skip < 1: frame_skip = 1
|
|
|
|
start_frame = int(start_sec * video_fps)
|
|
max_frames = int(duration_sec * video_fps)
|
|
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
|
|
|
|
# Structural assumptions based on subagent analysis
|
|
# Y=103 to Y=435 is the white tablature bar
|
|
y_start = 103
|
|
y_end = 435
|
|
|
|
panorama = None
|
|
prev_gray = None
|
|
|
|
count = 0
|
|
while count < max_frames:
|
|
ret, frame = cap.read()
|
|
if not ret: break
|
|
|
|
# We only process every `frame_skip` frames
|
|
if count % frame_skip != 0:
|
|
count += 1
|
|
continue
|
|
|
|
scale = 1280 / frame.shape[1]
|
|
frame_resized = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))
|
|
|
|
# Crop to the exact white ribbon
|
|
ribbon = frame_resized[y_start:y_end, :]
|
|
gray = cv2.cvtColor(ribbon, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Binarize aggressively to vertical features only to kill horizontal staff lines aliases
|
|
# dx=1, dy=0 computes horizontal gradient (which highlights VERTICAL edges like note stems and bar lines)
|
|
sobelx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
|
|
bin_float = np.abs(sobelx)
|
|
|
|
if panorama is None:
|
|
# First frame is the initial panorama
|
|
panorama = ribbon.copy()
|
|
prev_gray = bin_float
|
|
continue
|
|
|
|
# 1. Constrained Template Matching for dx
|
|
# Template is a 100px wide vertical slice from prev_gray at x=600
|
|
template = prev_gray[:, 600:700]
|
|
|
|
# Search Region: from x=550 to x=710 in bin_float
|
|
search_region = bin_float[:, 550:710]
|
|
|
|
res = cv2.matchTemplate(search_region, template, cv2.TM_CCOEFF_NORMED)
|
|
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
|
|
|
|
# In search_region (starts at 550), the template's original position (600) is at index 50.
|
|
# If max_loc[0] == 50 -> no movement (dx=0).
|
|
# If max_loc[0] < 50 -> image moved left (dx > 0).
|
|
dx = 50 - max_loc[0]
|
|
|
|
if count < 30: # Print first few shifts
|
|
print(f"Frame {count}: dx={dx}, max_val={max_val:.3f}")
|
|
shift_x = int(dx)
|
|
# dx is typically POSITIVE if the camera moves right, meaning the image content moves LEFT.
|
|
# dx will be positive or negative depending on parameter order.
|
|
# Let's enforce that we only append new pixels from the RIGHT edge of the 'new' frame.
|
|
shift_x = int(round(dx))
|
|
|
|
# In a left-scrolling video, the content moves left.
|
|
# phaseCorrelate(prev, curr) -> to overlap curr onto prev, we shift curr by +dx.
|
|
# The new pixels entering from the right are exactly the `dx` rightmost columns of the current ribbon!
|
|
# If shift_x > 0...
|
|
|
|
# Let's verify shift_x sign.
|
|
# If curr is moved left by 10 pixels compared to prev, then prev[x] == curr[x-10].
|
|
# So curr must be shifted by +10 to match prev. Thus dx > 0.
|
|
# We need to append the NEWest 10 pixels from the right side of curr.
|
|
|
|
if shift_x > 0 and shift_x < 300: # Sanity check to ignore massive glitches
|
|
# The new column is the absolute rightmost shift_x columns of the current ribbon
|
|
new_pixels = ribbon[:, -shift_x:]
|
|
panorama = np.hstack([panorama, new_pixels])
|
|
prev_gray = bin_float
|
|
|
|
cap.release()
|
|
return panorama
|
|
|
|
def slice_panorama_to_a4(panorama, slice_width=1280):
|
|
"""Cuts the infinite 1D panorama into stacked A4 rows"""
|
|
h, w, c = panorama.shape
|
|
rows = []
|
|
|
|
for start_x in range(0, w, slice_width):
|
|
end_x = start_x + slice_width
|
|
chunk = panorama[:, start_x:end_x]
|
|
|
|
# Pad the last chunk with white if it's too short
|
|
if chunk.shape[1] < slice_width:
|
|
pad_w = slice_width - chunk.shape[1]
|
|
pad = np.ones((h, pad_w, c), dtype=np.uint8) * 255
|
|
chunk = np.hstack([chunk, pad])
|
|
|
|
rows.append(chunk)
|
|
|
|
final_image = np.vstack(rows)
|
|
return final_image
|
|
|
|
if __name__ == "__main__":
|
|
video_path = "output/サカナクション/新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
|
|
if not Path(video_path).exists():
|
|
# Fallback to output/untitled.mp4 or whatever it might be named
|
|
for f in Path("output").glob("*.mp4"):
|
|
video_path = str(f)
|
|
break
|
|
|
|
print(f"Stitching...")
|
|
|
|
start_t = time.time()
|
|
panorama = stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15)
|
|
print(f"Extraction took {time.time() - start_t:.2f}s. Panorama shape: {panorama.shape}")
|
|
|
|
if panorama is not None:
|
|
final_sheet = slice_panorama_to_a4(panorama, slice_width=1280)
|
|
out_path = "C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/verify_panorama.png"
|
|
cv2.imwrite(out_path, final_sheet)
|
|
print(f"Saved stacked result to {out_path} with shape {final_sheet.shape}")
|
|
else:
|
|
print("Failed to generate panorama.")
|