fix(anime): 파이프라인 5건 수정 — 에피소드 정규식(v2/S01E), 릴리스 그룹 필터, 자막 보호, 배치 다운로드, 타임아웃

This commit is contained in:
2026-03-15 08:27:08 +09:00
parent 63818999d9
commit 9f74812710
40 changed files with 2759 additions and 815 deletions

View File

@@ -110,11 +110,102 @@ class AnissiaClient:
]
async def search_anime(self, keyword: str) -> list[AnimeInfo]:
"""키워드로 전체 편성표에서 검색 (한글/일어 제목 매칭)."""
"""키워드로 전체 편성표에서 검색 (한글/일어/영문 fuzzy 매칭)."""
import re as _re
all_anime = await self.get_all_schedule()
keyword_lower = keyword.lower()
return [
a for a in all_anime
if keyword_lower in a.subject.lower()
or keyword_lower in a.original_subject.lower()
]
# 특수문자 제거 버전 (따옴표, 괄호 등)
keyword_norm = _re.sub(r'[^\w\s]', '', keyword_lower)
try:
from tools.title_matcher import japanese_to_romaji, title_similarity
use_romaji = True
except ImportError:
use_romaji = False
results = []
fuzzy_candidates = []
for a in all_anime:
subj_lower = a.subject.lower()
orig_lower = a.original_subject.lower()
# 특수문자 제거 버전
subj_norm = _re.sub(r'[^\w\s]', '', subj_lower)
orig_norm = _re.sub(r'[^\w\s]', '', orig_lower)
# 1차: substring 매칭 (원본 + 정규화)
if (keyword_lower in subj_lower or keyword_norm in subj_norm):
results.append(a)
elif (keyword_lower in orig_lower or keyword_norm in orig_norm):
results.append(a)
elif use_romaji:
romaji = japanese_to_romaji(a.original_subject).lower()
# 2차: romaji substring
if keyword_lower in romaji:
results.append(a)
else:
# 3차: 단어 단위 fuzzy — 검색어와 romaji 개별 단어 비교
words = romaji.split()
best_word_sim = max(
(title_similarity(keyword, w) for w in words),
default=0.0,
)
# 전체 문자열 유사도도 참고
full_sim = title_similarity(keyword, romaji)
best_sim = max(best_word_sim, full_sim)
if best_sim >= 0.6:
fuzzy_candidates.append((best_sim, a))
# exact 결과가 없을 때만 fuzzy 결과 사용
if not results and fuzzy_candidates:
fuzzy_candidates.sort(key=lambda x: x[0], reverse=True)
results = [a for _, a in fuzzy_candidates[:10]]
return results
# ── CLI 진입점 ──
if __name__ == "__main__":
import sys
import asyncio
client = AnissiaClient()
args = sys.argv[1:]
async def main():
if not args:
print("사용법: python tools/anissia_client.py [schedule|search|captions] [인자]")
return
if args[0] == "schedule":
# python tools/anissia_client.py schedule 3 (수요일)
week = int(args[1]) if len(args) > 1 else 0
anime_list = await client.get_schedule(week)
day = WEEK_NAMES.get(week, "?")
print(f"📺 {day}요일 편성표 ({len(anime_list)}개):")
for a in anime_list:
cap = f"자막 {a.caption_count}" if a.caption_count else "자막 없음"
print(f" {a.time} {a.subject} ({a.original_subject}) [{cap}]")
elif args[0] == "search" and len(args) > 1:
# python tools/anissia_client.py search "프리렌"
keyword = " ".join(args[1:])
results = await client.search_anime(keyword)
print(f"🔍 '{keyword}' 검색 결과 ({len(results)}개):")
for a in results:
print(f" [{a.anime_no}] {a.subject} ({a.original_subject}) | {WEEK_NAMES.get(a.week, '?')} {a.time}")
elif args[0] == "captions" and len(args) > 1:
# python tools/anissia_client.py captions 12345
anime_no = int(args[1])
captions = await client.get_captions(anime_no)
print(f"📝 자막 목록 ({len(captions)}건):")
for c in captions:
print(f" {c.episode}화 | {c.name} | {c.website} | {c.updated}")
else:
print("사용법: python tools/anissia_client.py [schedule|search|captions] [인자]")
asyncio.run(main())