wip: [01-stabilize] paused at task 1/1 - OCR Hallucination Immune logic via Semantic delta window and fret-isolation
This commit is contained in:
70
verify_pdf.py
Normal file
70
verify_pdf.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import fitz
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def _get_ocr_reader():
|
||||
import easyocr
|
||||
return easyocr.Reader(['en'])
|
||||
|
||||
def verify_pdf_with_ocr(pdf_path):
|
||||
print(f"Opening PDF for OCR Verification: {pdf_path}")
|
||||
doc = fitz.open(pdf_path)
|
||||
print(f"Total Pages: {len(doc)}")
|
||||
|
||||
reader = _get_ocr_reader()
|
||||
|
||||
for i in range(len(doc)):
|
||||
page = doc[i]
|
||||
pix = page.get_pixmap(dpi=150)
|
||||
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
||||
|
||||
if pix.n == 4:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
|
||||
elif pix.n == 1:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
h, w = gray.shape
|
||||
|
||||
# Count staff lines
|
||||
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
||||
row_sums = np.sum(thresh, axis=1) / 255.0
|
||||
staff_rows = np.where(row_sums > w * 0.4)[0]
|
||||
|
||||
# Group them
|
||||
staff_blocks = []
|
||||
if len(staff_rows) > 0:
|
||||
c = [staff_rows[0]]
|
||||
for r in staff_rows[1:]:
|
||||
if r - c[-1] < 10:
|
||||
c.append(r)
|
||||
else:
|
||||
staff_blocks.append(c)
|
||||
c = [r]
|
||||
staff_blocks.append(c)
|
||||
|
||||
print(f"\n[Page {i+1}] (Shape: {w}x{h})")
|
||||
print(f" - Found {len(staff_blocks)} horizontal staff lines/blocks.")
|
||||
|
||||
# Read measure number using OCR from top left
|
||||
if len(staff_blocks) > 0:
|
||||
top_y = staff_blocks[0][0]
|
||||
else:
|
||||
top_y = 100
|
||||
|
||||
crop_y1 = max(0, top_y - 60)
|
||||
crop_y2 = top_y + 10
|
||||
crop_x2 = int(w * 0.15)
|
||||
|
||||
crop = gray[crop_y1:crop_y2, :crop_x2]
|
||||
upscaled = cv2.resize(crop, (0,0), fx=3.0, fy=3.0, interpolation=cv2.INTER_CUBIC)
|
||||
_, upscaled_thresh = cv2.threshold(upscaled, 150, 255, cv2.THRESH_BINARY_INV)
|
||||
|
||||
results = reader.readtext(upscaled_thresh, allowlist='0123456789')
|
||||
if results:
|
||||
print(f" - OCR Candidate Measure Numbers: {[r[1] for r in results]}")
|
||||
else:
|
||||
print(f" - No Measure Number Detected.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify_pdf_with_ocr("output/shintakarajima_perfect.pdf")
|
||||
Reference in New Issue
Block a user