Files
guitar_score/video_cv_tracker.py

59 lines
2.5 KiB
Python

import cv2
import numpy as np
from typing import List, Tuple, Optional
class TemporalTracker:
def __init__(self, diff_threshold: float = 0.05):
self.diff_threshold = diff_threshold
self.last_frame = None
self.current_page_frames = []
self.unique_pages = []
self.frame_count = 0
self.stable_frame_count = 0
def _extract_print_channel(self, bgr: np.ndarray) -> np.ndarray:
# 흑백으로 변환하여 악보의 선, 숫자 등 고정 텍스트 기호 영역 픽셀을 명확히 함
gray = np.max(bgr, axis=2)
_, binary = cv2.threshold(gray, 230, 255, cv2.THRESH_BINARY)
return binary
def process_frame(self, frame: np.ndarray) -> None:
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
self.frame_count += 1
if self.last_frame is None:
self.last_frame = frame_gray.copy()
self.current_page_frames.append(frame.copy())
self.stable_frame_count = 1
return
diff = cv2.absdiff(self.last_frame, frame_gray)
_, thresh = cv2.threshold(diff, 50, 255, cv2.THRESH_BINARY)
diff_ratio = np.sum(thresh > 0) / thresh.size
if diff_ratio > self.diff_threshold:
self.stable_frame_count = 0
if len(self.current_page_frames) > 0:
print(f"[Tracker] Page Flip Detected! (Change: {diff_ratio*100:.1f}%) -> Saving Median Page {len(self.unique_pages)+1}")
# Compute median on BGR to preserve the highest quality true colors and erase moving noise
median_page = np.median(self.current_page_frames, axis=0).astype(np.uint8)
self.unique_pages.append(median_page)
self.current_page_frames = []
self.last_frame = frame_gray.copy()
else:
self.stable_frame_count += 1
if self.stable_frame_count % 3 == 0:
self.current_page_frames.append(frame.copy())
def get_final_panorama(self) -> Optional[np.ndarray]:
# 시스템 호환성을 위해 이름만 panorama 유지 (실제로는 불필요해진 로직)
return None
def get_unique_pages(self) -> List[np.ndarray]:
if len(self.current_page_frames) > 0:
median_page = np.median(self.current_page_frames, axis=0).astype(np.uint8)
self.unique_pages.append(median_page)
print(f"[Tracker] Saving Final Median Page {len(self.unique_pages)}")
return self.unique_pages