import fitz import cv2 import numpy as np def _get_ocr_reader(): import easyocr return easyocr.Reader(['en']) def verify_pdf_with_ocr(pdf_path): print(f"Opening PDF for OCR Verification: {pdf_path}") doc = fitz.open(pdf_path) print(f"Total Pages: {len(doc)}") reader = _get_ocr_reader() for i in range(len(doc)): page = doc[i] pix = page.get_pixmap(dpi=150) img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n) if pix.n == 4: img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) elif pix.n == 1: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) h, w = gray.shape # Count staff lines _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV) row_sums = np.sum(thresh, axis=1) / 255.0 staff_rows = np.where(row_sums > w * 0.4)[0] # Group them staff_blocks = [] if len(staff_rows) > 0: c = [staff_rows[0]] for r in staff_rows[1:]: if r - c[-1] < 10: c.append(r) else: staff_blocks.append(c) c = [r] staff_blocks.append(c) print(f"\n[Page {i+1}] (Shape: {w}x{h})") print(f" - Found {len(staff_blocks)} horizontal staff lines/blocks.") # Read measure number using OCR from top left if len(staff_blocks) > 0: top_y = staff_blocks[0][0] else: top_y = 100 crop_y1 = max(0, top_y - 60) crop_y2 = top_y + 10 crop_x2 = int(w * 0.15) crop = gray[crop_y1:crop_y2, :crop_x2] upscaled = cv2.resize(crop, (0,0), fx=3.0, fy=3.0, interpolation=cv2.INTER_CUBIC) _, upscaled_thresh = cv2.threshold(upscaled, 150, 255, cv2.THRESH_BINARY_INV) results = reader.readtext(upscaled_thresh, allowlist='0123456789') if results: print(f" - OCR Candidate Measure Numbers: {[r[1] for r in results]}") else: print(f" - No Measure Number Detected.") if __name__ == "__main__": verify_pdf_with_ocr("output/shintakarajima_perfect.pdf")