import cv2 import easyocr import numpy as np from youtube_tab_to_pdf import _extract_print_channel, _detect_measure_bars cap = cv2.VideoCapture(r"output/サカナクション/新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4") cap.set(cv2.CAP_PROP_POS_FRAMES, 50) # 1.6 seconds in ret, frame = cap.read() if not ret: exit() gray = np.max(frame, axis=2) _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) row_sums = np.sum(thresh, axis=1) / 255 staff_lines = np.where(row_sums > frame.shape[1] * 0.5)[0] y_staff = staff_lines[0] if len(staff_lines) > 0 else 100 bar_coords = _detect_measure_bars(thresh) print(f"Detected Bars at X: {bar_coords}") reader = easyocr.Reader(['en'], gpu=False) for idx, x_bar in enumerate(bar_coords): # Crop the tiny region above the bar where the number should be crop_y1 = max(0, y_staff - 25) crop_y2 = max(0, y_staff - 2) crop_x1 = max(0, x_bar - 5) crop_x2 = min(frame.shape[1], x_bar + 25) if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: continue sprite = frame[crop_y1:crop_y2, crop_x1:crop_x2] cv2.imwrite(f"debug_sprite_{idx}.png", sprite) # Scale up for better OCR scaled = cv2.resize(sprite, (0,0), fx=3, fy=3, interpolation=cv2.INTER_CUBIC) gray_sprite = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY) _, binary_sprite = cv2.threshold(gray_sprite, 180, 255, cv2.THRESH_BINARY_INV) cv2.imwrite(f"debug_sprite_bin_{idx}.png", binary_sprite) res = reader.readtext(gray_sprite, allowlist='0123456789') print(f"Bar {idx} X={x_bar} OCR: {res}")