Files
guitar_score/verify_pdf.py

71 lines
2.3 KiB
Python

import fitz
import cv2
import numpy as np
def _get_ocr_reader():
import easyocr
return easyocr.Reader(['en'])
def verify_pdf_with_ocr(pdf_path):
print(f"Opening PDF for OCR Verification: {pdf_path}")
doc = fitz.open(pdf_path)
print(f"Total Pages: {len(doc)}")
reader = _get_ocr_reader()
for i in range(len(doc)):
page = doc[i]
pix = page.get_pixmap(dpi=150)
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
if pix.n == 4:
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
elif pix.n == 1:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
h, w = gray.shape
# Count staff lines
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
row_sums = np.sum(thresh, axis=1) / 255.0
staff_rows = np.where(row_sums > w * 0.4)[0]
# Group them
staff_blocks = []
if len(staff_rows) > 0:
c = [staff_rows[0]]
for r in staff_rows[1:]:
if r - c[-1] < 10:
c.append(r)
else:
staff_blocks.append(c)
c = [r]
staff_blocks.append(c)
print(f"\n[Page {i+1}] (Shape: {w}x{h})")
print(f" - Found {len(staff_blocks)} horizontal staff lines/blocks.")
# Read measure number using OCR from top left
if len(staff_blocks) > 0:
top_y = staff_blocks[0][0]
else:
top_y = 100
crop_y1 = max(0, top_y - 60)
crop_y2 = top_y + 10
crop_x2 = int(w * 0.15)
crop = gray[crop_y1:crop_y2, :crop_x2]
upscaled = cv2.resize(crop, (0,0), fx=3.0, fy=3.0, interpolation=cv2.INTER_CUBIC)
_, upscaled_thresh = cv2.threshold(upscaled, 150, 255, cv2.THRESH_BINARY_INV)
results = reader.readtext(upscaled_thresh, allowlist='0123456789')
if results:
print(f" - OCR Candidate Measure Numbers: {[r[1] for r in results]}")
else:
print(f" - No Measure Number Detected.")
if __name__ == "__main__":
verify_pdf_with_ocr("output/shintakarajima_perfect.pdf")