33 lines
1.2 KiB
Python
33 lines
1.2 KiB
Python
import cv2
|
|
import easyocr
|
|
import numpy as np
|
|
|
|
img_path = r"C:\Users\Certes\.gemini\antigravity\brain\5805a1e3-c776-4325-8538-351d54b5e0a0\ai_slice_3.png"
|
|
img = cv2.imread(img_path)
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
_, bin_inv = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
|
|
|
row_sums = np.sum(bin_inv, axis=1) / 255.0
|
|
staff_rows = np.where(row_sums > bin_inv.shape[1] * 0.4)[0]
|
|
staff_top = staff_rows[0]
|
|
|
|
upper_band = gray[max(0, staff_top - 60) : staff_top + 10, :] # Include slightly below top line
|
|
cv2.imwrite(r"C:\Users\Certes\Desktop\guitar_score\scripts\debug\upper_band.png", upper_band)
|
|
|
|
reader = easyocr.Reader(['en'], gpu=False)
|
|
|
|
# Test 1: Original
|
|
print("Testing Original Upper Band...")
|
|
results = reader.readtext(upper_band, allowlist='0123456789')
|
|
for r in results: print(r)
|
|
|
|
# Test 2: Upscaled
|
|
print("\nTesting Upscaled x2...")
|
|
lg2 = cv2.resize(upper_band, (upper_band.shape[1]*2, upper_band.shape[0]*2), interpolation=cv2.INTER_CUBIC)
|
|
for r in reader.readtext(lg2, allowlist='0123456789'): print(r)
|
|
|
|
# Test 3: Binarized
|
|
print("\nTesting Binarized Upscaled...")
|
|
_, bin_lg = cv2.threshold(lg2, 180, 255, cv2.THRESH_BINARY_INV)
|
|
for r in reader.readtext(bin_lg, allowlist='0123456789'): print(r)
|