chore(docs): document ScoreExtractor tiling and refactor debug scripts (#563)
This commit is contained in:
75
scripts/debug/test_live_ocr.py
Normal file
75
scripts/debug/test_live_ocr.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import easyocr
|
||||
import re
|
||||
from youtube_tab_to_pdf import TemporalTracker
|
||||
|
||||
cap = cv2.VideoCapture(r"C:\Users\Certes\Desktop\guitar_score\output\shintakarajima.mp4")
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
|
||||
tracker = TemporalTracker()
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, 500)
|
||||
ret, check_frame = cap.read()
|
||||
|
||||
from youtube_tab_to_pdf import _find_white_tab_strip
|
||||
bounds = _find_white_tab_strip(cv2.resize(check_frame, (1280, int(check_frame.shape[0] * (1280/check_frame.shape[1])))))
|
||||
if bounds:
|
||||
crop_top = max(0, bounds[0] - 60)
|
||||
crop_bottom = bounds[1]
|
||||
tracker.set_crop(crop_top, crop_bottom)
|
||||
|
||||
# Process only first 95 seconds to get unique pages
|
||||
print("Extracting unique pages from first 95 seconds...")
|
||||
tracker.process_video(cap, start_sec=0, end_sec=95)
|
||||
unique_pages = tracker.get_unique_pages()
|
||||
|
||||
print(f"Extracted {len(unique_pages)} unique pages.")
|
||||
|
||||
# Try easyOCR
|
||||
reader = easyocr.Reader(['en'], gpu=False)
|
||||
|
||||
def extract_measure_number(page_bgr):
|
||||
cw = min(page_bgr.shape[1], 1000)
|
||||
page_gray = cv2.cvtColor(page_bgr[:, :cw], cv2.COLOR_BGR2GRAY)
|
||||
_, bin_inv = cv2.threshold(page_gray, 200, 255, cv2.THRESH_BINARY_INV)
|
||||
row_sums = np.sum(bin_inv, axis=1) / 255.0
|
||||
staff_rows = np.where(row_sums > cw * 0.4)[0]
|
||||
|
||||
if len(staff_rows) >= 6:
|
||||
staff_y_top, staff_y_bottom = staff_rows[0], staff_rows[-1]
|
||||
for r in staff_rows:
|
||||
if r - staff_y_top > 100: break
|
||||
staff_y_bottom = r
|
||||
else:
|
||||
return -1
|
||||
|
||||
expected_h = max(10, staff_y_bottom - staff_y_top + 1)
|
||||
staff_region = bin_inv[staff_y_top:staff_y_bottom+1, :]
|
||||
col_sums = np.sum(staff_region, axis=0) / 255.0
|
||||
bar_xs = np.where(col_sums >= expected_h * 0.8)[0]
|
||||
|
||||
if len(bar_xs) == 0: return -1
|
||||
x_bar = bar_xs[0]
|
||||
|
||||
box_y1 = max(0, staff_y_top - 25)
|
||||
box_y2 = staff_y_top
|
||||
box_x1 = x_bar
|
||||
box_x2 = min(page_gray.shape[1], x_bar + 35)
|
||||
|
||||
num_box = page_gray[box_y1:box_y2, box_x1:box_x2]
|
||||
_, num_inv = cv2.threshold(num_box, 200, 255, cv2.THRESH_BINARY_INV)
|
||||
num_for_ocr = cv2.bitwise_not(num_inv)
|
||||
|
||||
upscaled = cv2.resize(num_for_ocr, None, fx=4, fy=4, interpolation=cv2.INTER_CUBIC)
|
||||
padded = cv2.copyMakeBorder(upscaled, 20, 20, 20, 20, cv2.BORDER_CONSTANT, value=[255, 255, 255])
|
||||
|
||||
results = reader.readtext(padded, allowlist="0123456789")
|
||||
if not results: return -1
|
||||
|
||||
digits = re.findall(r'\d+', results[0][1])
|
||||
return int(digits[0]) if digits else -1
|
||||
|
||||
for i, page in enumerate(unique_pages):
|
||||
num = extract_measure_number(page)
|
||||
print(f"Page {i:02d}: {num}")
|
||||
Reference in New Issue
Block a user