fix(cv): resolve infinite page duplication bug caused by playback cursor
This commit is contained in:
65
scripts/debug/test_ocr_band.py
Normal file
65
scripts/debug/test_ocr_band.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import easyocr
|
||||
import sys
|
||||
|
||||
# Initialize EasyOCR reader
|
||||
reader = easyocr.Reader(['en'], gpu=False)
|
||||
|
||||
def extract_measure_numbers(img_path):
|
||||
img = cv2.imread(img_path)
|
||||
if img is None: return
|
||||
|
||||
# We find the rows of the staff
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
_, bin_inv = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
||||
|
||||
# To find the first measure of this chunk, let's find the vertical measure bar
|
||||
col_sums = np.sum(bin_inv, axis=0) / 255.0
|
||||
# a measure bar is a tall black line
|
||||
bar_xs = np.where(col_sums > 30)[0]
|
||||
|
||||
if len(bar_xs) == 0: return
|
||||
|
||||
# Group the xs into distinct bars
|
||||
bars = []
|
||||
curr = [bar_xs[0]]
|
||||
for x in bar_xs[1:]:
|
||||
if x - curr[-1] < 10:
|
||||
curr.append(x)
|
||||
else:
|
||||
bars.append(int(np.mean(curr)))
|
||||
curr = [x]
|
||||
bars.append(int(np.mean(curr)))
|
||||
|
||||
print(f"File: {img_path}")
|
||||
for bar_x in bars[:5]: # Check first 5 bars
|
||||
# The measure number is usually right after the bar, slightly above the staff.
|
||||
# Let's crop a box [bar_x : bar_x + 50] horizontally, and [staff_top - 30 : staff_top] vertically
|
||||
|
||||
# approximate staff_top
|
||||
slice_cols = bin_inv[:, bar_x:bar_x+10]
|
||||
row_sums = np.sum(slice_cols, axis=1) / 255.0
|
||||
staff_rows = np.where(row_sums > 2)[0]
|
||||
if len(staff_rows) == 0: continue
|
||||
staff_top = staff_rows[0]
|
||||
|
||||
y1 = max(0, staff_top - 40)
|
||||
y2 = staff_top
|
||||
x1 = bar_x
|
||||
x2 = min(img.shape[1], bar_x + 60)
|
||||
|
||||
crop = gray[y1:y2, x1:x2]
|
||||
if crop.shape[0] < 10 or crop.shape[1] < 10: continue
|
||||
|
||||
# Upscale for OCR
|
||||
crop_lg = cv2.resize(crop, (crop.shape[1]*3, crop.shape[0]*3), interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
results = reader.readtext(crop_lg, allowlist='0123456789')
|
||||
for (bbox, text, prob) in results:
|
||||
if prob > 0.5:
|
||||
print(f" Bar at x={bar_x}: Measure {text} (conf: {prob:.2f})")
|
||||
|
||||
if __name__ == "__main__":
|
||||
extract_measure_numbers(r"C:\Users\Certes\.gemini\antigravity\brain\5805a1e3-c776-4325-8538-351d54b5e0a0\ai_slice_0.png")
|
||||
extract_measure_numbers(r"C:\Users\Certes\.gemini\antigravity\brain\5805a1e3-c776-4325-8538-351d54b5e0a0\ai_slice_3.png")
|
||||
Reference in New Issue
Block a user