45 lines
1.6 KiB
Python
45 lines
1.6 KiB
Python
import cv2
|
|
import easyocr
|
|
import numpy as np
|
|
from youtube_tab_to_pdf import _extract_print_channel, _detect_measure_bars
|
|
|
|
cap = cv2.VideoCapture(r"output/サカナクション/新宝島(エレキギターTAB) 難易度★★★ sakanaction shintakarajima.mp4")
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, 50) # 1.6 seconds in
|
|
ret, frame = cap.read()
|
|
if not ret: exit()
|
|
|
|
gray = np.max(frame, axis=2)
|
|
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
|
row_sums = np.sum(thresh, axis=1) / 255
|
|
staff_lines = np.where(row_sums > frame.shape[1] * 0.5)[0]
|
|
|
|
y_staff = staff_lines[0] if len(staff_lines) > 0 else 100
|
|
|
|
bar_coords = _detect_measure_bars(thresh)
|
|
print(f"Detected Bars at X: {bar_coords}")
|
|
|
|
reader = easyocr.Reader(['en'], gpu=False)
|
|
|
|
for idx, x_bar in enumerate(bar_coords):
|
|
# Crop the tiny region above the bar where the number should be
|
|
crop_y1 = max(0, y_staff - 25)
|
|
crop_y2 = max(0, y_staff - 2)
|
|
crop_x1 = max(0, x_bar - 5)
|
|
crop_x2 = min(frame.shape[1], x_bar + 25)
|
|
|
|
if crop_y2 <= crop_y1 or crop_x2 <= crop_x1:
|
|
continue
|
|
|
|
sprite = frame[crop_y1:crop_y2, crop_x1:crop_x2]
|
|
cv2.imwrite(f"debug_sprite_{idx}.png", sprite)
|
|
|
|
# Scale up for better OCR
|
|
scaled = cv2.resize(sprite, (0,0), fx=3, fy=3, interpolation=cv2.INTER_CUBIC)
|
|
|
|
gray_sprite = cv2.cvtColor(scaled, cv2.COLOR_BGR2GRAY)
|
|
_, binary_sprite = cv2.threshold(gray_sprite, 180, 255, cv2.THRESH_BINARY_INV)
|
|
cv2.imwrite(f"debug_sprite_bin_{idx}.png", binary_sprite)
|
|
|
|
res = reader.readtext(gray_sprite, allowlist='0123456789')
|
|
print(f"Bar {idx} X={x_bar} OCR: {res}")
|