fix(cv): resolve measure duplication by isolating playhead and enforcing 1D sliding correlations

This commit is contained in:
2026-03-29 00:06:38 +09:00
parent 64ecc12d35
commit cd159c2a99
5 changed files with 207 additions and 58 deletions

View File

@@ -18,6 +18,7 @@ from pathlib import Path
from typing import List, Tuple, Optional
import cv2
from video_cv_tracker import TemporalTracker
import numpy as np
import img2pdf
from PIL import Image
@@ -156,12 +157,11 @@ def download_video(url: str, output_dir: Path) -> Tuple[Path, str]:
print(f" → 이미 다운로드됨: {video_path.name}")
return video_path, safe_title
# 720p 우선 (다운스케일링 부하 원천 차단)
# 영상 추출 처리(CV)만 필요하므로, ffmpeg 병합이 불필요한 video-only 고화질 포맷(720p)을 직접 요청하여 360p 강등을 방지
subprocess.run(
[yt_dlp,
"-f", "bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/"
"best[height<=720]/best",
"--merge-output-format", "mp4",
"-f", "bestvideo[ext=mp4]",
"-S", "res:720",
"-o", str(video_path), url],
encoding="utf-8", errors="replace", check=True
)
@@ -659,90 +659,86 @@ def merge_panoramas_list(panoramas):
return merged_list
def extract_unique_scroll(frames: List[np.ndarray], threshold: float = SIMILARITY_THRESHOLD) -> List[np.ndarray]:
print(f"[4/5] 스크롤형 Tab 추출 중 (threshold={threshold})...")
print(f"[4/5] 스크롤형 Tab 시계열 추적 추출 중...")
strip_tops, strip_bottoms = [], []
for frame in frames:
for frame in frames[:50]:
strip = _find_white_tab_strip(frame)
if strip:
strip_tops.append(strip[0])
strip_bottoms.append(strip[1])
if not strip_tops: return []
if not strip_tops:
return []
median_top = int(np.median(strip_tops))
median_bottom = int(np.median(strip_bottoms))
candidates, all_compared = [], []
tracker = TemporalTracker()
for frame in frames:
h = frame.shape[0]
tab_crop = frame[max(0, median_top):min(h, median_bottom), :]
if not _has_tab_content(tab_crop): continue
compare_img = cv2.resize(tab_crop, (480, 120), interpolation=cv2.INTER_AREA)
is_dup = False
for ref in all_compared:
if compare_frames(compare_img, ref) >= threshold:
is_dup = True
break
if not is_dup:
candidates.append(tab_crop)
all_compared.append(compare_img)
stitched = _merge_scroll_candidates(candidates)
merged_panoramas = merge_panoramas_list(stitched)
chunk_width = candidates[0].shape[1] if candidates else 1280
final_chunks = []
global_measure_counter = 1
current_row = None
for pano in merged_panoramas:
gray_pano = _extract_print_channel(pano)
bar_coords = _detect_measure_bars(gray_pano)
if not bar_coords:
w = pano.shape[1]
start_x = 0
while start_x < w:
chunk = pano[:, start_x:min(w, start_x + chunk_width)]
if chunk.shape[1] < chunk_width:
pad = np.full((chunk.shape[0], chunk_width - chunk.shape[1], 3), 255, dtype=np.uint8)
chunk = np.hstack([chunk, pad])
gray_c = _extract_print_channel(chunk)
final_chunks.append(cv2.cvtColor(gray_c, cv2.COLOR_GRAY2BGR))
start_x += chunk_width
if not _has_tab_content(tab_crop):
continue
coords = [0] + bar_coords + [pano.shape[1]]
tracker.process_frame(tab_crop)
panorama = tracker.get_final_panorama()
if panorama is None:
return []
print(f" -> 생성된 파노라마 길이: {panorama.shape[1]}px")
chunk_width = 1280
final_chunks = []
gray_pano = _extract_print_channel(panorama)
bar_coords = _detect_measure_bars(gray_pano)
if not bar_coords:
w = panorama.shape[1]
start_x = 0
while start_x < w:
chunk = panorama[:, start_x:min(w, start_x + chunk_width)]
if chunk.shape[1] < chunk_width:
pad = np.full((chunk.shape[0], chunk_width - chunk.shape[1], 3), 255, dtype=np.uint8)
chunk = np.hstack([chunk, pad])
final_chunks.append(chunk)
start_x += chunk_width
else:
coords = [0] + bar_coords + [panorama.shape[1]]
coords = sorted(list(set(coords)))
current_row = None
for i in range(len(coords) - 1):
x_start = coords[i]
x_end = coords[i+1]
if x_end - x_start < 50:
continue
measure_img = pano[:, x_start:x_end]
gray_m = _extract_print_channel(measure_img)
bgr_m = cv2.cvtColor(gray_m, cv2.COLOR_GRAY2BGR)
measure_img = panorama[:, x_start:x_end]
if current_row is None:
current_row = bgr_m
current_row = measure_img
else:
if current_row.shape[1] + bgr_m.shape[1] > chunk_width:
if current_row.shape[1] + measure_img.shape[1] > chunk_width:
pad_w = chunk_width - current_row.shape[1]
if pad_w > 0:
pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
current_row = np.hstack([current_row, pad_img])
final_chunks.append(current_row)
current_row = bgr_m
current_row = measure_img
else:
current_row = np.hstack([current_row, bgr_m])
current_row = np.hstack([current_row, measure_img])
if current_row is not None:
pad_w = chunk_width - current_row.shape[1]
if pad_w > 0:
pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
current_row = np.hstack([current_row, pad_img])
final_chunks.append(current_row)
if current_row is not None:
pad_w = chunk_width - current_row.shape[1]
if pad_w > 0:
pad_img = np.full((current_row.shape[0], pad_w, 3), 255, dtype=np.uint8)
current_row = np.hstack([current_row, pad_img])
final_chunks.append(current_row)
print(f" -> A4 분할 컷: {len(final_chunks)}")
return final_chunks
def extract_unique_overlay(frames: List[np.ndarray],