chore(docs): document ScoreExtractor tiling and refactor debug scripts (#563)
This commit is contained in:
55
scripts/debug/verify_ocr.py
Normal file
55
scripts/debug/verify_ocr.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import glob
|
||||
|
||||
def get_number_sprite(m_img):
|
||||
gray = np.max(m_img, axis=2)
|
||||
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
||||
row_sums = np.sum(thresh, axis=1) / 255
|
||||
staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0]
|
||||
y_staff = staff_lines[0] if len(staff_lines) > 0 else 50
|
||||
crop_y1 = max(0, y_staff - 60)
|
||||
crop_y2 = max(0, y_staff - 5)
|
||||
crop_x1 = 0
|
||||
crop_x2 = min(60, m_img.shape[1])
|
||||
if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: return None
|
||||
sprite = thresh[crop_y1:crop_y2, crop_x1:crop_x2]
|
||||
if np.count_nonzero(sprite > 127) < 8: return None
|
||||
return sprite
|
||||
|
||||
img_path = r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\final_check_100_sec.png"
|
||||
img = cv2.imread(img_path)
|
||||
|
||||
h, w = img.shape[:2]
|
||||
gray = np.max(img, axis=2)
|
||||
col_sums = np.sum(gray < 100, axis=0) # white padding is 255, black measures are <100
|
||||
# ACTUALLY, final image has white padding for rows. And black background for music.
|
||||
# Let's just crop based on the stitched widths.
|
||||
# Better yet, just use a sliding window template match on the number sprite!
|
||||
# Even simpler: just visually save the sprites of the FIRST measure of every ROW!
|
||||
|
||||
rows = []
|
||||
for y in range(0, h, 320): # assuming chunk height is around 320
|
||||
chunk = img[y:y+320, :]
|
||||
if np.max(chunk) > 200:
|
||||
rows.append(chunk)
|
||||
|
||||
print(f"Detected {len(rows)} A4 rows in final image.")
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
gray_row = np.max(row, axis=2)
|
||||
_, binary = cv2.threshold(gray_row, 200, 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Just save the first 100x100 box of the row where the number sprite usually is
|
||||
row_sums = np.sum(binary, axis=1) / 255
|
||||
staff_lines = np.where(row_sums > w * 0.4)[0]
|
||||
if len(staff_lines) > 0:
|
||||
y_staff = staff_lines[0]
|
||||
crop_y1 = max(0, y_staff - 60)
|
||||
crop_y2 = max(0, y_staff - 5)
|
||||
sprite = binary[crop_y1:crop_y2, 10:80]
|
||||
|
||||
cv2.imwrite(f"C:/Users/Certes/Desktop/guitar_score/debug_ocr_measure_{i}.png", sprite)
|
||||
pixels = np.count_nonzero(sprite > 127)
|
||||
print(f"Row {i} parsed. Sprite white pixels: {pixels}")
|
||||
|
||||
Reference in New Issue
Block a user