guitar_score/scripts/debug/test_panorama.py

import cv2
import numpy as np
import time
from pathlib import Path

def stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15):
    cap = cv2.VideoCapture(video_path)
    video_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0

    # Calculate frame skip
    frame_skip = int(video_fps / fps_sample_rate)
    if frame_skip < 1: frame_skip = 1

    start_frame = int(start_sec * video_fps)
    max_frames = int(duration_sec * video_fps)

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    # Structural assumptions based on subagent analysis
    # Y=103 to Y=435 is the white tablature bar
    y_start = 103
    y_end = 435

    panorama = None
    prev_gray = None

    count = 0
    while count < max_frames:
        ret, frame = cap.read()
        if not ret: break

        # We only process every `frame_skip` frames
        if count % frame_skip != 0:
            count += 1
            continue

        scale = 1280 / frame.shape[1]
        frame_resized = cv2.resize(frame, (1280, int(frame.shape[0] * scale)))

        # Crop to the exact white ribbon
        ribbon = frame_resized[y_start:y_end, :]
        gray = cv2.cvtColor(ribbon, cv2.COLOR_BGR2GRAY)

        # Binarize aggressively to vertical features only to kill horizontal staff lines aliases
        # dx=1, dy=0 computes horizontal gradient (which highlights VERTICAL edges like note stems and bar lines)
        sobelx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
        bin_float = np.abs(sobelx)

        if panorama is None:
            # First frame is the initial panorama
            panorama = ribbon.copy()
            prev_gray = bin_float
            continue

        # 1. Constrained Template Matching for dx
        # Template is a 100px wide vertical slice from prev_gray at x=600
        template = prev_gray[:, 600:700]

        # Search Region: from x=550 to x=710 in bin_float
        search_region = bin_float[:, 550:710]

        res = cv2.matchTemplate(search_region, template, cv2.TM_CCOEFF_NORMED)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)

        # In search_region (starts at 550), the template's original position (600) is at index 50.
        # If max_loc[0] == 50 -> no movement (dx=0).
        # If max_loc[0] < 50 -> image moved left (dx > 0).
        dx = 50 - max_loc[0]

        if count < 30: # Print first few shifts
            print(f"Frame {count}: dx={dx}, max_val={max_val:.3f}")
        shift_x = int(dx)
        # dx is typically POSITIVE if the camera moves right, meaning the image content moves LEFT.
        # dx will be positive or negative depending on parameter order.
        # Let's enforce that we only append new pixels from the RIGHT edge of the 'new' frame.
        shift_x = int(round(dx))

        # In a left-scrolling video, the content moves left.
        # phaseCorrelate(prev, curr) -> to overlap curr onto prev, we shift curr by +dx.
        # The new pixels entering from the right are exactly the `dx` rightmost columns of the current ribbon!
        # If shift_x > 0...

        # Let's verify shift_x sign.
        # If curr is moved left by 10 pixels compared to prev, then prev[x] == curr[x-10].
        # So curr must be shifted by +10 to match prev. Thus dx > 0.
        # We need to append the NEWest 10 pixels from the right side of curr.

        if shift_x > 0 and shift_x < 300: # Sanity check to ignore massive glitches
            # The new column is the absolute rightmost shift_x columns of the current ribbon
            new_pixels = ribbon[:, -shift_x:]
            panorama = np.hstack([panorama, new_pixels])
            prev_gray = bin_float

    cap.release()
    return panorama

def slice_panorama_to_a4(panorama, slice_width=1280):
    """Cuts the infinite 1D panorama into stacked A4 rows"""
    h, w, c = panorama.shape
    rows = []

    for start_x in range(0, w, slice_width):
        end_x = start_x + slice_width
        chunk = panorama[:, start_x:end_x]

        # Pad the last chunk with white if it's too short
        if chunk.shape[1] < slice_width:
            pad_w = slice_width - chunk.shape[1]
            pad = np.ones((h, pad_w, c), dtype=np.uint8) * 255
            chunk = np.hstack([chunk, pad])

        rows.append(chunk)

    final_image = np.vstack(rows)
    return final_image

if __name__ == "__main__":
    video_path = "output/サカナクション／新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4"
    if not Path(video_path).exists():
        # Fallback to output/untitled.mp4 or whatever it might be named
        for f in Path("output").glob("*.mp4"):
            video_path = str(f)
            break

    print(f"Stitching...")

    start_t = time.time()
    panorama = stitch_scrolling_video(video_path, start_sec=0, duration_sec=100, fps_sample_rate=15)
    print(f"Extraction took {time.time() - start_t:.2f}s. Panorama shape: {panorama.shape}")

    if panorama is not None:
        final_sheet = slice_panorama_to_a4(panorama, slice_width=1280)
        out_path = "C:/Users/Certes/.gemini/antigravity/brain/975cea00-dd68-4689-9ee3-f1a2408b4ee6/verify_panorama.png"
        cv2.imwrite(out_path, final_sheet)
        print(f"Saved stacked result to {out_path} with shape {final_sheet.shape}")
    else:
        print("Failed to generate panorama.")