guitar_score/scripts/debug/test_iou_math.py

import cv2
import numpy as np

# Simulate a thin "1" and "2"
img_12 = np.zeros((60, 100), dtype=np.uint8)
img_12[10:50, 40:45] = 255 # The "1"
img_12[10:15, 60:80] = 255 # Top of "2"
img_12[15:45, 75:80] = 255 # Right of "2"
img_12[45:50, 60:80] = 255 # Bottom of "2"

# Simulate a thin "3" and "7"
img_37 = np.zeros((60, 100), dtype=np.uint8)
img_37[10:15, 30:50] = 255 # Top of "3"
img_37[25:30, 30:50] = 255 # Mid of "3"
img_37[45:50, 30:50] = 255 # Bot of "3"
img_37[10:15, 60:80] = 255 # Top of "7"
img_37[15:50, 75:80] = 255 # Right of "7"

# Simulate the SAME "12" but shifted by 2 pixels (due to video wobble)
img_12_shifted = np.zeros((60, 100), dtype=np.uint8)
img_12_shifted[12:52, 42:47] = 255
img_12_shifted[12:17, 62:82] = 255
img_12_shifted[17:47, 77:82] = 255
img_12_shifted[47:52, 62:82] = 255

def compute_iou(s1, s2):
    intersection = np.logical_and(s1 > 0, s2 > 0)
    union = np.logical_or(s1 > 0, s2 > 0)
    return np.count_nonzero(intersection) / max(1, np.count_nonzero(union))

def robust_match(s1, s2):
    # Dilate by 3x3 to make lines thick enough to overlap even if shifted by 2px
    kernel = np.ones((5, 5), np.uint8)
    d1 = cv2.dilate(s1, kernel, iterations=1)
    d2 = cv2.dilate(s2, kernel, iterations=1)

    # Try multiple subpixel shifts manually and take the best IoU
    best_iou = 0
    for dy in [-2, 0, 2]:
        for dx in [-2, 0, 2]:
            M = np.float32([[1, 0, dx], [0, 1, dy]])
            shifted_d2 = cv2.warpAffine(d2, M, (s2.shape[1], s2.shape[0]))
            iou = compute_iou(d1, shifted_d2)
            if iou > best_iou:
                best_iou = iou

    return best_iou

print("IoU (12 vs 37):", robust_match(img_12, img_37))
print("IoU (12 vs 12_shifted):", robust_match(img_12, img_12_shifted))

# Let's see what TM_CCOEFF_NORMED would have done:
res = cv2.matchTemplate(img_37, img_12[5:-5, 5:-5], cv2.TM_CCOEFF_NORMED)
_, max_val_diff, _, _ = cv2.minMaxLoc(res)

res2 = cv2.matchTemplate(img_12_shifted, img_12[5:-5, 5:-5], cv2.TM_CCOEFF_NORMED)
_, max_val_same, _, _ = cv2.minMaxLoc(res2)

print("\nTM_CCOEFF_NORMED (12 vs 37):", max_val_diff)
print("TM_CCOEFF_NORMED (12 vs 12_shifted):", max_val_same)