wip: [01-stabilize] paused at task 1/1 - OCR Hallucination Immune logic via Semantic delta window and fret-isolation

This commit is contained in:
2026-03-29 22:08:40 +09:00
parent aca7bf592a
commit 2507de45d3
4289 changed files with 732689 additions and 28672 deletions

70
verify_pdf.py Normal file
View File

@@ -0,0 +1,70 @@
import fitz
import cv2
import numpy as np
def _get_ocr_reader():
import easyocr
return easyocr.Reader(['en'])
def verify_pdf_with_ocr(pdf_path):
print(f"Opening PDF for OCR Verification: {pdf_path}")
doc = fitz.open(pdf_path)
print(f"Total Pages: {len(doc)}")
reader = _get_ocr_reader()
for i in range(len(doc)):
page = doc[i]
pix = page.get_pixmap(dpi=150)
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
if pix.n == 4:
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
elif pix.n == 1:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
h, w = gray.shape
# Count staff lines
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
row_sums = np.sum(thresh, axis=1) / 255.0
staff_rows = np.where(row_sums > w * 0.4)[0]
# Group them
staff_blocks = []
if len(staff_rows) > 0:
c = [staff_rows[0]]
for r in staff_rows[1:]:
if r - c[-1] < 10:
c.append(r)
else:
staff_blocks.append(c)
c = [r]
staff_blocks.append(c)
print(f"\n[Page {i+1}] (Shape: {w}x{h})")
print(f" - Found {len(staff_blocks)} horizontal staff lines/blocks.")
# Read measure number using OCR from top left
if len(staff_blocks) > 0:
top_y = staff_blocks[0][0]
else:
top_y = 100
crop_y1 = max(0, top_y - 60)
crop_y2 = top_y + 10
crop_x2 = int(w * 0.15)
crop = gray[crop_y1:crop_y2, :crop_x2]
upscaled = cv2.resize(crop, (0,0), fx=3.0, fy=3.0, interpolation=cv2.INTER_CUBIC)
_, upscaled_thresh = cv2.threshold(upscaled, 150, 255, cv2.THRESH_BINARY_INV)
results = reader.readtext(upscaled_thresh, allowlist='0123456789')
if results:
print(f" - OCR Candidate Measure Numbers: {[r[1] for r in results]}")
else:
print(f" - No Measure Number Detected.")
if __name__ == "__main__":
verify_pdf_with_ocr("output/shintakarajima_perfect.pdf")