import cv2 import numpy as np import easyocr import os from pathlib import Path from youtube_tab_to_pdf import _find_white_tab_strip, _has_tab_content, _extract_print_channel, _detect_measure_bars def main(): url = "https://youtu.be/tJq1n8TofM0" video_path = Path("output/サカナクション/新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4") artifact_dir = Path(r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6") cap = cv2.VideoCapture(str(video_path)) ret, frame = cap.read() strip = _find_white_tab_strip(frame) top, bottom = strip[0], strip[1] tab_crop = frame[max(0, top):min(frame.shape[0], bottom), :] gray_page = _extract_print_channel(tab_crop) bar_coords = _detect_measure_bars(gray_page) coords = [0] + bar_coords + [tab_crop.shape[1]] coords = sorted(list(set(coords))) reader = easyocr.Reader(['en'], verbose=False) for i in range(len(coords) - 1): x_start = coords[i] x_end = coords[i+1] measure_w = x_end - x_start if measure_w < 30: continue m_img = tab_crop[:, x_start:x_end] # Extract Number Sprite precisely gray = cv2.cvtColor(m_img, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV) row_sums = np.sum(thresh, axis=1) / 255 staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0] if len(staff_lines) > 0: y_staff = staff_lines[0] # 상단 45px, 좌측 70px 크롭 crop_y1 = max(0, y_staff - 45) crop_y2 = y_staff sprite = thresh[crop_y1:crop_y2, 0:min(70, m_img.shape[1])] out_file = artifact_dir / f"debug_sprite_{i}.png" cv2.imwrite(str(out_file), sprite) # OCR upscaled = cv2.resize(sprite, (0, 0), fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC) res = reader.readtext(upscaled, allowlist='0123456789', detail=0) print(f"Measure {i}: Found text = {res}") cap.release() if __name__ == "__main__": main()