import cv2 import numpy as np import glob def get_number_sprite(m_img): gray = np.max(m_img, axis=2) _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) row_sums = np.sum(thresh, axis=1) / 255 staff_lines = np.where(row_sums > m_img.shape[1] * 0.5)[0] y_staff = staff_lines[0] if len(staff_lines) > 0 else 50 crop_y1 = max(0, y_staff - 60) crop_y2 = max(0, y_staff - 5) crop_x1 = 0 crop_x2 = min(60, m_img.shape[1]) if crop_y2 <= crop_y1 or crop_x2 <= crop_x1: return None sprite = thresh[crop_y1:crop_y2, crop_x1:crop_x2] if np.count_nonzero(sprite > 127) < 8: return None return sprite img_path = r"C:\Users\Certes\.gemini\antigravity\brain\975cea00-dd68-4689-9ee3-f1a2408b4ee6\final_check_100_sec.png" img = cv2.imread(img_path) h, w = img.shape[:2] gray = np.max(img, axis=2) col_sums = np.sum(gray < 100, axis=0) # white padding is 255, black measures are <100 # ACTUALLY, final image has white padding for rows. And black background for music. # Let's just crop based on the stitched widths. # Better yet, just use a sliding window template match on the number sprite! # Even simpler: just visually save the sprites of the FIRST measure of every ROW! rows = [] for y in range(0, h, 320): # assuming chunk height is around 320 chunk = img[y:y+320, :] if np.max(chunk) > 200: rows.append(chunk) print(f"Detected {len(rows)} A4 rows in final image.") for i, row in enumerate(rows): gray_row = np.max(row, axis=2) _, binary = cv2.threshold(gray_row, 200, 255, cv2.THRESH_BINARY) # Just save the first 100x100 box of the row where the number sprite usually is row_sums = np.sum(binary, axis=1) / 255 staff_lines = np.where(row_sums > w * 0.4)[0] if len(staff_lines) > 0: y_staff = staff_lines[0] crop_y1 = max(0, y_staff - 60) crop_y2 = max(0, y_staff - 5) sprite = binary[crop_y1:crop_y2, 10:80] cv2.imwrite(f"C:/Users/Certes/Desktop/guitar_score/debug_ocr_measure_{i}.png", sprite) pixels = np.count_nonzero(sprite > 127) print(f"Row {i} parsed. Sprite white pixels: {pixels}")