Files
guitar_score/scripts/debug/test_panorama.py

138 lines
5.4 KiB
Python

import cv2
import numpy as np
import time
from pathlib import Path
def stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15):
cap = cv2.VideoCapture(video_path)
video_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
# Calculate frame skip
frame_skip = int(video_fps / fps_sample_rate)
if frame_skip < 1: frame_skip = 1
start_frame = int(start_sec * video_fps)
max_frames = int(duration_sec * video_fps)
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
# Structural assumptions based on subagent analysis
# Y=103 to Y=435 is the white tablature bar
y_start = 103
y_end = 435
panorama = None
prev_gray = None
count = 0
while count < max_frames:
ret, frame = cap.read()
if not ret: break
# We only process every `frame_skip` frames
if count % frame_skip != 0:
count += 1
continue
scale = 1280 / frame.shape[1]
frame_resized = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))
# Crop to the exact white ribbon
ribbon = frame_resized[y_start:y_end, :]
gray = cv2.cvtColor(ribbon, cv2.COLOR_BGR2GRAY)
# Binarize aggressively to vertical features only to kill horizontal staff lines aliases
# dx=1, dy=0 computes horizontal gradient (which highlights VERTICAL edges like note stems and bar lines)
sobelx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
bin_float = np.abs(sobelx)
if panorama is None:
# First frame is the initial panorama
panorama = ribbon.copy()
prev_gray = bin_float
continue
# 1. Constrained Template Matching for dx
# Template is a 100px wide vertical slice from prev_gray at x=600
template = prev_gray[:, 600:700]
# Search Region: from x=550 to x=710 in bin_float
search_region = bin_float[:, 550:710]
res = cv2.matchTemplate(search_region, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# In search_region (starts at 550), the template's original position (600) is at index 50.
# If max_loc[0] == 50 -> no movement (dx=0).
# If max_loc[0] < 50 -> image moved left (dx > 0).
dx = 50 - max_loc[0]
if count < 30: # Print first few shifts
print(f"Frame {count}: dx={dx}, max_val={max_val:.3f}")
shift_x = int(dx)
# dx is typically POSITIVE if the camera moves right, meaning the image content moves LEFT.
# dx will be positive or negative depending on parameter order.
# Let's enforce that we only append new pixels from the RIGHT edge of the 'new' frame.
shift_x = int(round(dx))
# In a left-scrolling video, the content moves left.
# phaseCorrelate(prev, curr) -> to overlap curr onto prev, we shift curr by +dx.
# The new pixels entering from the right are exactly the `dx` rightmost columns of the current ribbon!
# If shift_x > 0...
# Let's verify shift_x sign.
# If curr is moved left by 10 pixels compared to prev, then prev[x] == curr[x-10].
# So curr must be shifted by +10 to match prev. Thus dx > 0.
# We need to append the NEWest 10 pixels from the right side of curr.
if shift_x > 0 and shift_x < 300: # Sanity check to ignore massive glitches
# The new column is the absolute rightmost shift_x columns of the current ribbon
new_pixels = ribbon[:, -shift_x:]
panorama = np.hstack([panorama, new_pixels])
prev_gray = bin_float
cap.release()
return panorama
def slice_panorama_to_a4(panorama, slice_width=1280):
"""Cuts the infinite 1D panorama into stacked A4 rows"""
h, w, c = panorama.shape
rows = []
for start_x in range(0, w, slice_width):
end_x = start_x + slice_width
chunk = panorama[:, start_x:end_x]
# Pad the last chunk with white if it's too short
if chunk.shape[1] < slice_width:
pad_w = slice_width - chunk.shape[1]
pad = np.ones((h, pad_w, c), dtype=np.uint8) * 255
chunk = np.hstack([chunk, pad])
rows.append(chunk)
final_image = np.vstack(rows)
return final_image
if __name__ == "__main__":
video_path = "output/サカナクション/新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
if not Path(video_path).exists():
# Fallback to output/untitled.mp4 or whatever it might be named
for f in Path("output").glob("*.mp4"):
video_path = str(f)
break
print(f"Stitching...")
start_t = time.time()
panorama = stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15)
print(f"Extraction took {time.time() - start_t:.2f}s. Panorama shape: {panorama.shape}")
if panorama is not None:
final_sheet = slice_panorama_to_a4(panorama, slice_width=1280)
out_path = "C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/verify_panorama.png"
cv2.imwrite(out_path, final_sheet)
print(f"Saved stacked result to {out_path} with shape {final_sheet.shape}")
else:
print("Failed to generate panorama.")