feat(anime): 자막/토렌트 파이프라인 대폭 개선

- Blogspot Atom Feed API로 전체 에피소드 자막 URL 발견 - AniList prequel 체인 기반 시즌 에피소드 오프셋 자동 감지 - Nyaa S-tag 감지 → 절대/시즌 번호 체계 자동 판별 - 기존 자막 에피소드 스킵 (URL 페치 전 pre-skip) - 오프셋 적용 자막 리네임 (시즌번호→절대번호 매칭) - ASW HEVC 토렌트 우선 정렬 (truncation 방지) - 토렌트 완료 대기 → 자동 삭제 라이프사이클 - 중복 자막 자동 삭제 - .smi 자막 확장자 지원
2026-03-15 18:23:57 +09:00
parent 9f74812710
commit 3618387b8e
8 changed files with 1386 additions and 532 deletions
--- a/api/discord_bot.py
+++ b/api/discord_bot.py
@@ -142,7 +142,7 @@ async def _agent_call(text: str, history: str, project_path: str) -> str:
    )
    response = await gemini.call_agent(
-        "agent", context, cwd=project_path, timeout=300,
+        "agent", context, cwd=project_path, timeout=1200,
    )
    return response
@@ -291,6 +291,39 @@ async def on_message(message: discord.Message):
            )
            async with message.channel.typing():
                # 1단계: NLU 빠른 분류 (Gemini text 모드, ~5초)
                intent = await _classify_intent(user_text, ws.path)
                logger.info(f"NLU 분류: '{user_text[:50]}' → {intent}")
                # 2단계: anime 의도면 직접 실행 (Gemini agent 우회)
                if intent and intent.get("mode") == "anime":
                    if progress_msg:
                        await progress_msg.edit(embed=discord.Embed(
                            title="📦 애니 작업 실행 중...",
                            description=f"```{user_text[:200]}```",
                            color=0x3498DB,
                        ))
                    await _handle_anime(message, intent)
                    if progress_msg:
                        try: await progress_msg.delete()
                        except Exception: pass
                    return
                # 2-2단계: research 의도면 조사+위키 등록
                if intent and intent.get("mode") == "research":
                    if progress_msg:
                        await progress_msg.edit(embed=discord.Embed(
                            title="🔍 리서치 진행 중...",
                            description=f"주제: **{intent.get('topic', user_text[:50])}**",
                            color=0x9B59B6,
                        ))
                    await _handle_research(message, intent, ws.path)
                    if progress_msg:
                        try: await progress_msg.delete()
                        except Exception: pass
                    return
                # 3단계: 일반 → Gemini agent 모드
                history = await _get_channel_history(message.channel, limit=10)
                response = await _agent_call(user_text, history, ws.path)
@@ -337,7 +370,47 @@ async def on_message(message: discord.Message):
 # ──────────────────────────────────────────────
-#  Anime 핸들러 (AI가 분류한 의도 실행)
+#  NLU 빠른 분류 (Gemini text 모드)
 # ──────────────────────────────────────────────
 _NLU_PROMPT = """\
 사용자 메시지를 분류하여 JSON으로 응답하세요. 반드시 JSON만 출력.
 분류:
 - 애니메이션 다운로드/검색/상태 → mode: "anime"
 - 조사+위키 등록/정리 요청 → mode: "research"
 - 그 외 → mode: "chat"
 anime 필드: action ("batch"|"download"|"search"|"status"|"schedule"|"list"), title, episode, filter
 research 필드:
 - action: "research" (조사+등록), "organize" (기존 정리), "update" (페이지 수정)
 - topic: 주제
 예시:
 "이번분기 애니 자막 다운받아줘" → {"mode":"anime","action":"batch","filter":"자막"}
 "FX Forward 조사해서 위키에 정리해줘" → {"mode":"research","action":"research","topic":"FX Forward"}
 "위키에 서버 관련 내용 정리해줘" → {"mode":"research","action":"organize","topic":"서버"}
 "GraphQL이 뭐야?" → {"mode":"chat"}
 """
 async def _classify_intent(text: str, project_path: str) -> dict | None:
    """Gemini text 모드로 빠른 의도 분류 (~5초)."""
    try:
        gemini = GeminiCaller(project_path)
        raw = await gemini.call("unified", f"{_NLU_PROMPT}\n\n사용자: {text}", timeout=30)
        import json as _json
        m = re.search(r'\{[^}]+\}', raw, re.DOTALL)
        if m:
            return _json.loads(m.group(0))
    except Exception as e:
        logger.warning(f"NLU 분류 실패 (agent fallback): {e}")
    return None
 # ──────────────────────────────────────────────
 #  Anime 핸들러 (NLU 분류 결과로 직접 실행)
 # ──────────────────────────────────────────────
 async def _handle_anime(message: discord.Message, parsed: dict):
@@ -362,6 +435,9 @@ async def _handle_anime(message: discord.Message, parsed: dict):
        elif action == "list":
            await _anime_list(message, pipeline, filter_str)
        elif action == "batch":
            await _anime_batch_direct(message, filter_str or "자막 배치")
        elif action in ("download", "sub_only", "video_only"):
            # 필터에 batch 조건이 있으면 복수 다운로드
            if not title and filter_str:
@@ -437,66 +513,236 @@ async def _anime_download(message, pipeline, title, mode, episode):
 async def _anime_batch(message, pipeline, action, filter_str):
-    """필터 기반 복수 애니 다운로드 (이번분기 자막있는것 등)."""
+    """필터 기반 복수 애니 다운로드 — batch_download() 사용."""
-    embed = discord.Embed(title="⏳ 편성표 분석 중...", description="조건에 맞는 애니 검색", color=0xF39C12)
+    await _anime_batch_direct(message, f"{filter_str} {action}")
 async def _anime_batch_direct(message, user_text: str):
    """애니 배치 다운로드 — Gemini 없이 직접 실행."""
    from tools.anime_pipeline import AnimePipeline
    t = user_text.lower()
    mode = "auto"
    if "자막만" in t or "sub_only" in t:
        mode = "sub_only"
    elif "영상만" in t or "video_only" in t:
        mode = "video_only"
    sub_filter = "자막" in t or "sub:yes" in t  # 자막 언급 시 자막 필터 활성
    embed = discord.Embed(
        title="📦 배치 다운로드 시작",
        description=f"모드: `{mode}` | 자막 필터: `{'ON' if sub_filter else 'OFF'}`\n⏳ NAS 스캔 + Anissia 로딩 중...",
        color=0xF39C12,
    )
    status_msg = await message.channel.send(embed=embed)
-    # 전체 편성표 로드
+    pipeline = AnimePipeline()
    all_anime = await pipeline.anissia.get_all_schedule()
-    # 필터 적용
+    try:
-    filtered = all_anime
+        results = await pipeline.batch_download(mode=mode, sub_filter=sub_filter)
-    if "sub:yes" in filter_str or "자막" in filter_str:
+    except Exception as e:
-        filtered = [a for a in filtered if a.caption_count > 0]
+        logger.error(f"배치 다운로드 오류: {e}", exc_info=True)
-    if "quarter:current" in filter_str or "이번" in filter_str:
+        embed.title = "❌ 배치 다운로드 실패"
-        from datetime import date
+        embed.description = str(e)[:500]
-        today = date.today()
+        embed.color = 0xE74C3C
        current_q = (today.month - 1) // 3 + 1
        current_year = today.year
        def _in_current_quarter(a):
            if not a.start_date:
                return False
            parts = a.start_date.split("-")
            y, m = int(parts[0]), int(parts[1])
            q = (m - 1) // 3 + 1
            return y == current_year and q == current_q
        filtered = [a for a in filtered if _in_current_quarter(a)]
    if "status:on" in filter_str:
        filtered = [a for a in filtered if a.status == "ON"]
    else:
        # 기본: ON 상태만
        filtered = [a for a in filtered if a.status == "ON"]
    embed.title = f"📋 조건 매칭: {len(filtered)}개"
    embed.description = "\n".join(f"• {a.subject} (자막 {a.caption_count}명)" for a in filtered[:15])
    if len(filtered) > 15:
        embed.description += f"\n... 외 {len(filtered)-15}개"
    embed.color = 0x3498DB
        await status_msg.edit(embed=embed)
    if not filtered:
        return
-    # 다운로드 실행
+    # 결과 정리
-    success_count = 0
+    success = [r for r in results if r.success]
-    fail_count = 0
+    lines = []
-    for anime in filtered:
+    for r in results:
-        try:
+        icon = "✅" if r.success else "❌"
-            result = await pipeline.download(anime.subject, mode=action)
+        title = r.anime.subject if r.anime else "?"
-            if result.torrent_added or result.subtitles:
+        detail = ""
-                success_count += 1
+        if r.torrent_added:
-            else:
+            detail += " 🎬토렌트추가"
-                fail_count += 1
+        if r.subtitles:
-        except Exception as e:
+            detail += f" 📝자막{len(r.subtitles)}"
-            logger.error(f"배치 다운로드 오류 ({anime.subject}): {e}")
+        if r.errors:
-            fail_count += 1
+            detail += f" ⚠️{r.errors[0][:50]}"
        lines.append(f"{icon} {title}{detail}")
-    result_embed = discord.Embed(
+    embed.title = f"📊 배치 결과: {len(success)}/{len(results)}건 성공"
-        title=f"📊 배치 다운로드 결과",
+    embed.description = "\n".join(lines) or "처리할 애니가 없습니다."
-        description=f"✅ 성공: {success_count}개\n⚠️ 실패/보류: {fail_count}개",
+    embed.color = 0x2ECC71 if success else 0xF39C12
-        color=0x2ECC71 if success_count > 0 else 0xF39C12,
+    await status_msg.edit(embed=embed)
 # ──────────────────────────────────────────────
 #  Research 핸들러 (조사+위키 등록 / 기존 정리)
 # ──────────────────────────────────────────────
 async def _handle_research(message: discord.Message, parsed: dict, project_path: str):
    """리서치 요청 처리 — 조사 후 위키 등록 또는 기존 위키 정리."""
    from tools.wiki_client import WikiClient
    action = parsed.get("action", "research")
    topic = parsed.get("topic", "")
    wiki = WikiClient()
    if not topic:
        await message.reply("🔍 어떤 주제를 조사할까요?")
        return
    try:
        if action == "research":
            await _research_and_publish(message, topic, wiki, project_path)
        elif action == "organize":
            await _organize_wiki(message, topic, wiki, project_path)
        elif action == "update":
            await _research_and_publish(message, topic, wiki, project_path)
        else:
            await message.reply(f"❓ 알 수 없는 리서치 액션: {action}")
    except Exception as e:
        logger.error(f"리서치 핸들러 오류: {e}", exc_info=True)
        await message.reply(f"❌ 리서치 오류: {str(e)[:300]}")
 async def _research_and_publish(
    message: discord.Message, topic: str,
    wiki, project_path: str,
 ):
    """주제 조사 → 위키 페이지 등록."""
    from tools.wiki_client import WikiClient
    status_msg = await message.channel.send(
        embed=discord.Embed(
            title="🔍 조사 중...",
            description=f"**{topic}**\n\nGemini가 웹 검색 + 자료를 수집하고 있습니다.",
            color=0x9B59B6,
        )
-    await message.channel.send(embed=result_embed)
+    )
    # Gemini agent로 조사 (google_web_search 자동 사용)
    gemini = GeminiCaller(project_path)
    research_prompt = (
        f"다음 주제에 대해 깊이 있게 조사하고, 위키 페이지용 마크다운으로 정리하세요.\n\n"
        f"주제: {topic}\n\n"
        f"요구사항:\n"
        f"1. 반드시 웹 검색을 통해 최신 정보를 확인하세요\n"
        f"2. 핵심 개념, 장단점, 실무 활용법을 포함하세요\n"
        f"3. 출처 URL을 반드시 포함하세요\n"
        f"4. 마크다운 형식으로 작성하세요 (# 제목부터)\n"
        f"5. 한국어로 작성하세요\n"
    )
    try:
        content = await gemini.call_agent(
            "agent", research_prompt, cwd=project_path, timeout=300,
        )
    except GeminiCallError as e:
        await status_msg.edit(embed=discord.Embed(
            title="❌ 조사 실패", description=str(e)[:500], color=0xE74C3C,
        ))
        return
    if not content or len(content) < 50:
        await status_msg.edit(embed=discord.Embed(
            title="⚠️ 결과 부족", description="충분한 정보를 수집하지 못했습니다.",
            color=0xF39C12,
        ))
        return
    # 위키 등록
    slug = WikiClient.slugify(topic)
    path = f"research/{slug}"
    page = await wiki.upsert_page(path, topic, content, description=f"리서치: {topic}")
    # 대시보드 갱신
    await wiki.update_dashboard()
    await status_msg.edit(embed=discord.Embed(
        title=f"✅ 위키 등록 완료: {topic}",
        description=(
            f"📄 [{path}](https://wiki.variet.net/{path})\n"
            f"📊 대시보드 갱신됨\n"
            f"📝 {len(content)}자 작성"
        ),
        color=0x2ECC71,
    ))
 async def _organize_wiki(
    message: discord.Message, topic: str,
    wiki, project_path: str,
 ):
    """기존 위키 페이지들을 주제별로 재정리."""
    status_msg = await message.channel.send(
        embed=discord.Embed(
            title="📋 위키 분석 중...",
            description=f"**{topic}** 관련 페이지를 수집하고 있습니다.",
            color=0x9B59B6,
        )
    )
    # 관련 페이지 수집
    all_pages = await wiki.list_pages()
    topic_lower = topic.lower()
    related = [p for p in all_pages if topic_lower in p.title.lower() or topic_lower in p.path.lower()]
    if not related:
        await status_msg.edit(embed=discord.Embed(
            title="⚠️ 관련 페이지 없음",
            description=f"'{topic}' 관련 위키 페이지를 찾지 못했습니다.",
            color=0xF39C12,
        ))
        return
    # 각 페이지 내용 수집
    contents = []
    for p in related:
        full_page = await wiki.get_page(p.id)
        contents.append(f"## 페이지: {full_page.title} (/{full_page.path})\n{full_page.content}\n")
    combined = "\n---\n".join(contents)
    await status_msg.edit(embed=discord.Embed(
        title="🔄 재정리 중...",
        description=f"{len(related)}개 페이지를 Gemini가 통합 정리합니다.",
        color=0x9B59B6,
    ))
    # Gemini로 통합 정리
    gemini = GeminiCaller(project_path)
    organize_prompt = (
        f"다음은 '{topic}' 관련 기존 위키 페이지들입니다.\n"
        f"이 내용을 하나의 통합된 위키 페이지로 재정리하세요.\n\n"
        f"요구사항:\n"
        f"1. 중복 제거, 논리적 구조화\n"
        f"2. 누락된 정보가 있으면 웹 검색으로 보완\n"
        f"3. 마크다운 형식, 한국어\n\n"
        f"=== 기존 페이지들 ===\n{combined[:15000]}"
    )
    try:
        content = await gemini.call_agent(
            "agent", organize_prompt, cwd=project_path, timeout=300,
        )
    except GeminiCallError as e:
        await status_msg.edit(embed=discord.Embed(
            title="❌ 정리 실패", description=str(e)[:500], color=0xE74C3C,
        ))
        return
    # 통합 페이지 등록
    from tools.wiki_client import WikiClient
    slug = WikiClient.slugify(topic)
    path = f"research/{slug}"
    page = await wiki.upsert_page(path, f"{topic} (통합)", content, description=f"통합 정리: {topic}")
    await wiki.update_dashboard()
    await status_msg.edit(embed=discord.Embed(
        title=f"✅ 위키 정리 완료: {topic}",
        description=(
            f"📄 [{path}](https://wiki.variet.net/{path})\n"
            f"🔗 원본 {len(related)}개 → 통합 1개\n"
            f"📝 {len(content)}자"
        ),
        color=0x2ECC71,
    ))
 async def _anime_schedule(message, pipeline, filter_str):
--- a/config.py
+++ b/config.py
@@ -59,3 +59,8 @@ QBIT_PASSWORD: str = os.getenv("QBIT_PASSWORD", "")
 NAS_ANIME_PATH: str = os.getenv(
    "NAS_ANIME_PATH", r"\\192.168.10.10\NasData\Video\Animation"
 )
 # === Wiki.js ===
 WIKI_URL: str = os.getenv("WIKI_URL", "https://wiki.variet.net")
 WIKI_API_KEY: str = os.getenv("WIKI_API_KEY", "")
--- a/prompts/agent.md
+++ b/prompts/agent.md
@@ -29,12 +29,30 @@
 - `gitea_commits`, `gitea_prs`, `gitea_issues`, `gitea_branches` — Git 관리
 - `vikunja_tasks`, `vikunja_create_task`, `vikunja_complete_task` — 태스크 관리
-## 복수 작품 처리 방법
+## ⚠️ 복수 작품 처리 — 반드시 전부 완료할 것
-사용자가 "여러 작품 다운로드" 등 복수 작업을 요청하면:
+사용자가 "이번 분기 애니 다운받아줘" 등 **복수 작업**을 요청하면:
-1. 먼저 `anime_nas_list`로 대상 목록을 확인하세요
+
-2. 각 작품마다 `anime_download`를 **개별 호출**하세요
+1. `anime_nas_list(current_quarter=True)`로 이번 분기 애니 **전체 목록** 확인
-3. 진행 상황과 결과를 정리하여 보고하세요
+2. 목록의 **모든 작품**에 대해 `anime_download`를 **하나씩 순서대로 호출**
 3. **1개만 하고 멈추지 마세요** — 목록 끝까지 전부 처리해야 합니다
 4. 도중에 개별 실패가 있어도 **다음 작품으로 넘어가세요**
 5. 전부 완료한 뒤 결과를 정리하여 보고하세요
 ### 예시 흐름
 ```
 → anime_nas_list(current_quarter=True)
  "5개 애니 확인: A, B, C, D, E"
 → anime_download("A") → 결과 기록
 → anime_download("B") → 결과 기록
 → anime_download("C") → 결과 기록
 → anime_download("D") → 결과 기록
 → anime_download("E") → 결과 기록
 → 최종 보고: "5개 중 3개 성공, 2개 보류"
 ```
 ## 응답 규칙
--- a/prompts/operator.md
+++ b/prompts/operator.md
@@ -39,6 +39,12 @@ C:\ProgramData\miniforge3\envs\agent_chat\python.exe tools/nyaa_client.py search
 C:\ProgramData\miniforge3\envs\agent_chat\python.exe tools/qbit_client.py status
 C:\ProgramData\miniforge3\envs\agent_chat\python.exe tools/qbit_client.py add "magnet:..." --path "경로"
 C:\ProgramData\miniforge3\envs\agent_chat\python.exe tools/qbit_client.py delete <hash>
 # Wiki.js 도구
 C:\ProgramData\miniforge3\envs\agent_chat\python.exe tools/wiki_client.py list [prefix]
 C:\ProgramData\miniforge3\envs\agent_chat\python.exe tools/wiki_client.py get <path>
 C:\ProgramData\miniforge3\envs\agent_chat\python.exe tools/wiki_client.py create <path> <title> [content]
 C:\ProgramData\miniforge3\envs\agent_chat\python.exe tools/wiki_client.py dashboard
 ```
 ## 실행 패턴 예시
--- a/tools/anime_pipeline.py
+++ b/tools/anime_pipeline.py
--- a/tools/anissia_client.py
+++ b/tools/anissia_client.py
@@ -49,6 +49,7 @@ class AnissiaClient:
    def __init__(self, timeout: float = 15.0):
        self._timeout = timeout
        self._schedule_cache: list[AnimeInfo] | None = None
    async def get_schedule(self, week: int) -> list[AnimeInfo]:
        """요일별 편성표 조회 (week: 0=일 ~ 6=토, 7=기타)."""
@@ -110,10 +111,17 @@ class AnissiaClient:
        ]
    async def search_anime(self, keyword: str) -> list[AnimeInfo]:
-        """키워드로 전체 편성표에서 검색 (한글/일어/영문 fuzzy 매칭)."""
+        """키워드로 전체 편성표에서 검색 (한글/일어/영문 fuzzy 매칭).
        스케줄은 세션당 1회만 API 호출, 이후 캐시 사용.
        """
        import re as _re
-        all_anime = await self.get_all_schedule()
+        # 캐시 사용
        if self._schedule_cache is None:
            self._schedule_cache = await self.get_all_schedule()
            logger.info(f"스케줄 캐시 로드: {len(self._schedule_cache)}개")
        all_anime = self._schedule_cache
        keyword_lower = keyword.lower()
        # 특수문자 제거 버전 (따옴표, 괄호 등)
        keyword_norm = _re.sub(r'[^\w\s]', '', keyword_lower)
@@ -133,9 +141,13 @@ class AnissiaClient:
            # 특수문자 제거 버전
            subj_norm = _re.sub(r'[^\w\s]', '', subj_lower)
            orig_norm = _re.sub(r'[^\w\s]', '', orig_lower)
            # 공백까지 제거 버전 (NAS 폴더명→Anissia 매칭용)
            subj_compact = _re.sub(r'\s+', '', subj_norm)
            keyword_compact = _re.sub(r'\s+', '', keyword_norm)
-            # 1차: substring 매칭 (원본 + 정규화)
+            # 1차: substring 매칭 (원본 + 정규화 + 공백제거)
-            if (keyword_lower in subj_lower or keyword_norm in subj_norm):
+            if (keyword_lower in subj_lower or keyword_norm in subj_norm
                    or keyword_compact in subj_compact):
                results.append(a)
            elif (keyword_lower in orig_lower or keyword_norm in orig_norm):
                results.append(a)
--- a/tools/subtitle_downloader.py
+++ b/tools/subtitle_downloader.py
@@ -218,7 +218,7 @@ class SubtitleDownloader:
        seen_urls = {r.download_url for r in results}
        # HTML <a href="...">
-        for gurl in re.findall(r'href="([^"]+\.(?:ass|srt|ssa|sub|zip|7z)(?:\?[^"]*)?)"', html, re.IGNORECASE):
+        for gurl in re.findall(r'href="([^"]+\.(?:ass|srt|ssa|sub|smi|zip|7z)(?:\?[^"]*)?)"', html, re.IGNORECASE):
            if gurl not in seen_urls:
                seen_urls.add(gurl)
                filename = unquote(gurl.split("/")[-1].split("?")[0])
@@ -230,7 +230,7 @@ class SubtitleDownloader:
                ))
        # 마크다운 [텍스트](url) — Blogspot 등
-        for text, gurl in re.findall(r'\[([^\]]+)\]\((https?://[^)]+\.(?:ass|srt|ssa|sub|zip|7z)[^)]*)\)', html, re.IGNORECASE):
+        for text, gurl in re.findall(r'\[([^\]]+)\]\((https?://[^)]+\.(?:ass|srt|ssa|sub|smi|zip|7z)[^)]*)\)', html, re.IGNORECASE):
            if gurl not in seen_urls:
                seen_urls.add(gurl)
                results.append(SubtitleFile(
@@ -307,7 +307,7 @@ class SubtitleDownloader:
                        if name.endswith("/"):
                            continue
                        ext = Path(name).suffix.lower()
-                        if ext in (".ass", ".srt", ".ssa", ".sub"):
+                        if ext in (".ass", ".srt", ".ssa", ".sub", ".smi"):
                            # 중첩 폴더 무시, 파일만 추출
                            out_name = Path(name).name
                            out_path = target_dir / out_name
--- a/tools/title_matcher.py
+++ b/tools/title_matcher.py
@@ -19,6 +19,79 @@ logger = logging.getLogger("variet.tools.matcher")
 #  영어 제목 조회 (Jikan API / MyAnimeList)
 # ──────────────────────────────────────────────
 async def web_search_anime_title(query: str) -> list[str]:
    """웹 검색으로 애니 제목 후보를 찾습니다.
    압축된 한글 제목(예: '너따위가마왕을이길수있다고생각하지마')으로
    검색하여 정확한 애니 제목 후보들을 반환합니다.
    Returns:
        검색 결과에서 추출한 제목 후보 리스트 (최대 5개)
    """
    import re as _re
    search_query = f"{query} 애니"
    candidates = []
    try:
        # DuckDuckGo HTML 검색 (API 키 불필요)
        async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client:
            resp = await client.get(
                "https://html.duckduckgo.com/html/",
                params={"q": search_query},
                headers={"User-Agent": "Mozilla/5.0"},
            )
            resp.raise_for_status()
            html = resp.text
        from html import unescape as _unescape
        # 검색 결과 제목/스니펫에서 한글 애니 제목 추출
        title_matches = _re.findall(
            r'class="result__a"[^>]*>([^<]+)</a>', html
        )
        snippet_matches = _re.findall(
            r'class="result__snippet"[^>]*>(.*?)</a>', html, _re.DOTALL
        )
        all_text = " ".join(title_matches + snippet_matches)
        all_text = _re.sub(r'<[^>]+>', ' ', all_text)
        all_text = _unescape(all_text)  # &quot; → " 등 HTML 엔티티 변환
        def _clean_candidate(text: str) -> str:
            """후보 텍스트 정리: HTML 엔티티, 말줄임, 부가 정보 제거."""
            text = _unescape(text)
            text = _re.sub(r'<[^>]+>', '', text)
            text = _re.sub(r'\s*\.{2,}$', '', text)     # 말줄임 제거
            text = _re.sub(r'\s*\d+월\s*\d+일.*$', '', text)  # 날짜 이후 제거
            text = _re.sub(r'\s*\d+화.*$', '', text)     # "N화" 이후 제거
            text = _re.sub(r'\s*[-–—]\s*(나무위키|위키백과|namu\.wiki|Wikipedia|onnada).*$', '', text)
            text = text.strip().strip('"\'「」『』')
            return text.strip()
        # 「」『』"" 안의 제목 추출
        quoted = _re.findall(r'[「『"]([\w\s~·!?,가-힣]+?)[」』"]', all_text)
        for q in quoted:
            clean = _clean_candidate(q)
            if len(clean) >= 4 and any('\uAC00' <= c <= '\uD7A3' for c in clean):
                if clean not in candidates:
                    candidates.append(clean)
        # 검색 결과 제목에서 사이트명 제거
        for t in title_matches[:5]:
            clean = _clean_candidate(t)
            if len(clean) >= 4 and any('\uAC00' <= c <= '\uD7A3' for c in clean):
                if clean not in candidates:
                    candidates.append(clean)
        logger.info(f"웹 검색 '{search_query}' → {len(candidates)}개 후보: {candidates[:3]}")
        return candidates[:5]
    except Exception as e:
        logger.warning(f"웹 검색 실패: {e}")
        return []
 async def fetch_english_title(japanese_title: str) -> dict[str, str]:
    """Jikan API로 일본어 원제의 영어/로마자 제목 조회.
@@ -28,11 +101,27 @@ async def fetch_english_title(japanese_title: str) -> dict[str, str]:
         "synonyms": ["Frieren at the Funeral Season 2"]}
        실패 시 빈 dict.
    """
    return await fetch_title_via_jikan(japanese_title)
 async def fetch_title_via_jikan(query: str) -> dict[str, str]:
    """Jikan API로 제목 조회 — 한글/일본어/영어 어떤 검색어든 가능.
    NAS 폴더명(한글 압축) → Jikan 검색 → 정확한 제목들을 반환.
    Anissia 직접 검색 실패 시 fallback으로 사용.
    Returns:
        {"default": "로마자/영어 제목",
         "english": "영어 제목",
         "japanese": "일본어 제목",
         "synonyms": ["동의어1", ...]}
        실패 시 빈 dict.
    """
    try:
        async with httpx.AsyncClient(timeout=10) as client:
            resp = await client.get(
                "https://api.jikan.moe/v4/anime",
-                params={"q": japanese_title, "limit": 5},
+                params={"q": query, "limit": 5},
            )
            resp.raise_for_status()
            data = resp.json()
@@ -41,22 +130,36 @@ async def fetch_english_title(japanese_title: str) -> dict[str, str]:
        if not items:
            return {}
-        # 원제와 가장 잘 매칭되는 항목 선택
+        # 검색어와 가장 잘 매칭되는 항목 선택
        # 한글 검색이면 title_japanese와 비교, 그 외에는 title과 비교
        best = None
        best_score = 0.0
        for item in items:
-            jp = item.get("title_japanese", "")
+            # 여러 제목 필드와 비교하여 최고 유사도 채택
-            score = SequenceMatcher(None, japanese_title, jp).ratio()
+            candidates = [
                item.get("title_japanese", ""),
                item.get("title", ""),
                item.get("title_english", "") or "",
            ]
            for t in item.get("titles", []):
                candidates.append(t.get("title", ""))
            score = max(
                SequenceMatcher(None, query, c).ratio()
                for c in candidates if c
            ) if candidates else 0.0
            if score > best_score:
                best_score = score
                best = item
-        if not best or best_score < 0.5:
+        if not best or best_score < 0.3:
            return {}
        result = {
            "default": best.get("title", ""),
            "english": best.get("title_english") or "",
            "japanese": best.get("title_japanese") or "",
            "synonyms": [],
        }
        for t in best.get("titles", []):
@@ -64,8 +167,8 @@ async def fetch_english_title(japanese_title: str) -> dict[str, str]:
                result["synonyms"].append(t["title"])
        logger.info(
-            f"Jikan 영어 제목 조회: {japanese_title} → "
+            f"Jikan 제목 조회: '{query}' → "
-            f"default={result['default']}, english={result['english']}"
+            f"default={result['default']}, jp={result['japanese']}"
        )
        return result