From d2776fc327271fc0f9d43d9c91c082b8c4e7266d Mon Sep 17 00:00:00 2001 From: Variet Date: Sat, 7 Mar 2026 21:32:19 +0900 Subject: [PATCH] fix(scrape): filter out style tags and CSS code from chat messages --- server/cdp-client.js | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/server/cdp-client.js b/server/cdp-client.js index 406161f..0bad1f1 100644 --- a/server/cdp-client.js +++ b/server/cdp-client.js @@ -244,14 +244,22 @@ class CDPClient { } // --- 일반 텍스트 --- - const text = block.textContent.trim(); + // style 태그 내용을 제외한 순수 텍스트만 추출 + const cloned = block.cloneNode(true); + cloned.querySelectorAll('style').forEach(s => s.remove()); + const text = cloned.textContent.trim(); if (text.length > 0) { - // CSS 코드나 내부 스타일은 건너뛰기 - if (text.startsWith('/*') || text.startsWith('@media') || text.startsWith('.') && text.includes('{')) continue; + // CSS 코드/내부 스타일 건너뛰기 + if (text.startsWith('/*') || text.startsWith('@media') || + text.includes('prefers-color-scheme') || + text.includes('{') && text.includes('}') && text.includes(':') && text.includes(';') && text.length < 2000 || + text.startsWith('.markdown-alert')) continue; // leading-relaxed select-text → 마크다운 렌더링 텍스트 const mkEl = block.querySelector('.leading-relaxed.select-text'); - const htmlContent = mkEl ? mkEl.innerHTML : block.innerHTML; + let htmlContent = mkEl ? mkEl.innerHTML : block.innerHTML; + // HTML에서도 style 태그 제거 + htmlContent = htmlContent.replace(/]*>[\s\S]*?<\/style>/gi, ''); messages.push({ type: 'text',