fix(scrape): filter out style tags and CSS code from chat messages

This commit is contained in:
2026-03-07 21:32:19 +09:00
parent 6ed5b33caa
commit d2776fc327

View File

@@ -244,14 +244,22 @@ class CDPClient {
}
// --- 일반 텍스트 ---
const text = block.textContent.trim();
// style 태그 내용을 제외한 순수 텍스트만 추출
const cloned = block.cloneNode(true);
cloned.querySelectorAll('style').forEach(s => s.remove());
const text = cloned.textContent.trim();
if (text.length > 0) {
// CSS 코드내부 스타일 건너뛰기
if (text.startsWith('/*') || text.startsWith('@media') || text.startsWith('.') && text.includes('{')) continue;
// CSS 코드/내부 스타일 건너뛰기
if (text.startsWith('/*') || text.startsWith('@media') ||
text.includes('prefers-color-scheme') ||
text.includes('{') && text.includes('}') && text.includes(':') && text.includes(';') && text.length < 2000 ||
text.startsWith('.markdown-alert')) continue;
// leading-relaxed select-text → 마크다운 렌더링 텍스트
const mkEl = block.querySelector('.leading-relaxed.select-text');
const htmlContent = mkEl ? mkEl.innerHTML : block.innerHTML;
let htmlContent = mkEl ? mkEl.innerHTML : block.innerHTML;
// HTML에서도 style 태그 제거
htmlContent = htmlContent.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
messages.push({
type: 'text',