From 9281c6b45d0c53fa2a34827e7c747edf753644e8 Mon Sep 17 00:00:00 2001 From: Variet Date: Sat, 7 Mar 2026 22:57:04 +0900 Subject: [PATCH] refactor(scrape): complete rewrite of content extraction - per-turn parsing for all types --- public/css/style.css | 119 ++++++++++++++++++ public/js/chat-panel.js | 22 ++++ server/cdp-client.js | 273 +++++++++++++++++++--------------------- 3 files changed, 268 insertions(+), 146 deletions(-) diff --git a/public/css/style.css b/public/css/style.css index 27ebbf4..2b00ce5 100644 --- a/public/css/style.css +++ b/public/css/style.css @@ -643,6 +643,125 @@ body { } /* Chat Input */ + +/* --- 사용자 메시지 --- */ +.msg-user { + background: var(--accent-primary); + color: white; + padding: 8px 14px; + border-radius: 12px 12px 4px 12px; + margin: 4px 0; + margin-left: auto; + max-width: 80%; + font-size: 13px; + line-height: 1.5; + width: fit-content; +} + +/* --- 상태 표시 --- */ +.msg-status { + font-size: 12px; + color: var(--text-muted, var(--text-secondary)); + opacity: 0.6; + padding: 2px 0; + font-style: italic; +} + +/* --- 마크다운 렌더링 콘텐츠 --- */ +.msg-text table { + border-collapse: collapse; + width: 100%; + margin: 8px 0; + font-size: 12px; +} + +.msg-text table th, +.msg-text table td { + border: 1px solid var(--border-subtle); + padding: 6px 10px; + text-align: left; +} + +.msg-text table th { + background: var(--bg-tertiary); + font-weight: 600; + color: var(--text-primary); +} + +.msg-text table td { + color: var(--text-secondary); +} + +.msg-text blockquote { + border-left: 3px solid var(--accent-primary); + padding: 4px 12px; + margin: 8px 0; + color: var(--text-secondary); + font-style: italic; +} + +.msg-text ul, +.msg-text ol { + padding-left: 20px; + margin: 4px 0; +} + +.msg-text li { + margin: 2px 0; +} + +.msg-text h1, +.msg-text h2, +.msg-text h3, +.msg-text h4 { + margin: 8px 0 4px; + color: var(--text-primary); +} + +.msg-text h3 { + font-size: 14px; +} + +.msg-text h4 { + font-size: 13px; +} + +.msg-text a { + color: var(--accent-primary); + text-decoration: underline; +} + +.msg-text code { + background: var(--bg-tertiary); + padding: 1px 4px; + border-radius: 3px; + font-family: var(--font-mono); + font-size: 12px; +} + +.msg-text pre { + background: var(--bg-tertiary); + padding: 10px; + border-radius: 6px; + overflow-x: auto; + margin: 6px 0; +} + +.msg-text pre code { + background: none; + padding: 0; +} + +.msg-text hr { + border: none; + border-top: 1px solid var(--border-subtle); + margin: 8px 0; +} + +.msg-text strong { + color: var(--text-primary); +} + .chat-input-area { display: flex; align-items: flex-end; diff --git a/public/js/chat-panel.js b/public/js/chat-panel.js index 4e5dd52..90e2d54 100644 --- a/public/js/chat-panel.js +++ b/public/js/chat-panel.js @@ -103,6 +103,8 @@ class ChatPanel { case 'code': return this._renderCode(msg); case 'image': return this._renderImage(msg); case 'actions': return this._renderActions(msg); + case 'user': return this._renderUser(msg); + case 'status': return this._renderStatus(msg); default: return null; } } @@ -353,4 +355,24 @@ class ChatPanel { _scrollToBottom() { this.messagesEl.scrollTop = this.messagesEl.scrollHeight; } + + /** + * 사용자 메시지 렌더링 + */ + _renderUser(msg) { + const wrapper = document.createElement('div'); + wrapper.className = 'msg-user'; + wrapper.textContent = msg.content; + return wrapper; + } + + /** + * 상태 표시 (Running, Generating 등) + */ + _renderStatus(msg) { + const wrapper = document.createElement('div'); + wrapper.className = 'msg-status'; + wrapper.textContent = msg.content; + return wrapper; + } } diff --git a/server/cdp-client.js b/server/cdp-client.js index 867a75d..401632f 100644 --- a/server/cdp-client.js +++ b/server/cdp-client.js @@ -121,171 +121,152 @@ class CDPClient { if (!scrollEl) return JSON.stringify([]); const messages = []; - - // 뷰포트에 실제 렌더링된 최상위 컨테이너 찾기 const topContainer = scrollEl.querySelector('.mx-auto.w-full > div > div'); if (!topContainer) return JSON.stringify([]); - // 각 turn(대화 턴)을 순회 + const actionKeywords = ['Proceed','Cancel','Open','View','Review','Approve','Reject','Yes','No','Accept','Deny','Allow','Skip']; + + // 유틸: 액션 버튼 추출 + function extractActions(container) { + return Array.from(container.querySelectorAll('button')).map(b => { + const label = b.textContent.trim(); + const rect = b.getBoundingClientRect(); + return { + label, + x: Math.round(rect.left + rect.width / 2), + y: Math.round(rect.top + rect.height / 2), + w: Math.round(rect.width), + h: Math.round(rect.height), + }; + }).filter(b => b.label && b.w > 0 && actionKeywords.some(k => b.label.includes(k))); + } + + // 유틸: 마크다운 영역에서 콘텐츠를 추출 + function extractContentBlocks(container) { + // select-text 또는 leading-relaxed 마크다운 렌더링 영역 찾기 + const mkEls = container.querySelectorAll('.select-text .leading-relaxed, .leading-relaxed.select-text'); + + for (const mkEl of mkEls) { + // style 태그 제거 + const clone = mkEl.cloneNode(true); + clone.querySelectorAll('style').forEach(s => s.remove()); + + const html = clone.innerHTML; + const text = clone.textContent.trim(); + + if (!text || text.length < 2) continue; + // CSS 필터 + if (text.startsWith('/*') || text.includes('prefers-color-scheme')) continue; + + messages.push({ + type: 'text', + content: text.substring(0, 5000), + html: html.substring(0, 10000), + }); + } + } + + // 각 turn을 순회 const turns = topContainer.children; for (let i = 0; i < turns.length; i++) { const turn = turns[i]; - // placeholder 블록 건너뛰기 (가상 스크롤) - const isPlaceholder = turn.children.length > 0 && + // placeholder 건너뛰기 + if (turn.children.length > 0 && Array.from(turn.children).every(c => c.classList.contains('rounded-lg') && c.classList.contains('bg-gray-500/10') && c.textContent.trim() === '' - ); - if (isPlaceholder) continue; + )) continue; - // 턴 내부의 각 메시지 블록 순회 - const blocks = turn.querySelectorAll(':scope > *'); - for (const block of blocks) { - // placeholder 개별 블록도 건너뛰기 - if (block.classList.contains('bg-gray-500/10') && block.textContent.trim() === '') continue; + // style 태그 미리 제거 + turn.querySelectorAll('style').forEach(s => s.remove()); - // 블록 내 style 태그 제거 (CSS 코드 누출 방지) - block.querySelectorAll('style').forEach(s => s.remove()); + // --- 사용자 메시지 감지 (bg-gray-500/15 + select-text) --- + const userMsgEl = turn.querySelector('.bg-gray-500\\\\/15.select-text, .bg-gray-500\\\\/15 .select-text'); + if (userMsgEl) { + const text = userMsgEl.textContent.trim(); + if (text) { + messages.push({ type: 'user', content: text.substring(0, 2000) }); + } + } - // --- 작업 카드 (task boundary) --- - const taskCard = block.querySelector('.isolate'); - if (taskCard || block.classList.contains('isolate')) { - const card = taskCard || block; - const titleEl = card.querySelector('.font-semibold'); - const summaryEl = card.querySelector('.text-sm .leading-relaxed'); - const expanded = card.querySelector('[aria-expanded]'); - - // 하위 항목들 추출 - const steps = []; - card.querySelectorAll('.flex.items-center.gap-2, .flex.w-full.items-center.gap-2').forEach(step => { - const txt = step.textContent.trim(); - if (txt && txt.length > 2) { - const svg = step.querySelector('svg'); - let icon = ''; - if (svg) { - const cls = svg.getAttribute('class') || ''; - if (cls.includes('check')) icon = '✓'; - else if (cls.includes('loader') || cls.includes('spin')) icon = '⟳'; - else if (cls.includes('x-circle') || cls.includes('alert')) icon = '⚠'; - } - steps.push({ icon, text: txt.substring(0, 200) }); + // --- isolate 카드들 (task boundary) --- + const isolates = turn.querySelectorAll('.isolate'); + for (const card of isolates) { + const titleEl = card.querySelector('.font-semibold'); + const summaryEl = card.querySelector('.text-sm .leading-relaxed'); + const expanded = card.querySelector('[aria-expanded]'); + + const steps = []; + card.querySelectorAll('.flex.items-center.gap-2, .flex.w-full.items-center.gap-2').forEach(step => { + const txt = step.textContent.trim(); + if (txt && txt.length > 2) { + const svg = step.querySelector('svg'); + let icon = ''; + if (svg) { + const cls = svg.getAttribute('class') || ''; + if (cls.includes('check')) icon = '✓'; + else if (cls.includes('loader') || cls.includes('spin')) icon = '⟳'; + else if (cls.includes('x-circle') || cls.includes('alert')) icon = '⚠'; } - }); - - // 카드 내부 액션 버튼 추출 (Cancel, Review Changes 등) - const actionKeywords = ['Proceed','Cancel','Open','View','Review','Approve','Reject','Yes','No','Accept','Deny','Allow','Skip']; - const cardBtns = Array.from(card.querySelectorAll('button')).map(b => { - const label = b.textContent.trim(); - const rect = b.getBoundingClientRect(); - return { - label, - x: Math.round(rect.left + rect.width / 2), - y: Math.round(rect.top + rect.height / 2), - w: Math.round(rect.width), - h: Math.round(rect.height), - }; - }).filter(b => b.label && b.w > 0 && actionKeywords.some(k => b.label.includes(k))); - - messages.push({ - type: 'task', - title: titleEl ? titleEl.textContent.trim() : '', - summary: summaryEl ? summaryEl.textContent.trim().substring(0, 500) : '', - collapsed: expanded ? expanded.getAttribute('aria-expanded') === 'false' : true, - steps: steps.slice(0, 20), - actions: cardBtns.slice(0, 5), - }); - continue; - } - - // --- Thought Process --- - const thoughtBtn = block.querySelector('button'); - if (thoughtBtn && thoughtBtn.textContent.includes('Thought for')) { - messages.push({ - type: 'thought', - label: thoughtBtn.textContent.trim(), - collapsed: true, - }); - continue; - } - - // --- 코드 블록 --- - const pre = block.querySelector('pre'); - if (pre && !block.querySelector('.isolate')) { - const codeEl = pre.querySelector('code'); - const lang = codeEl ? (codeEl.className.match(/language-(\\w+)/) || [])[1] || '' : ''; - messages.push({ - type: 'code', - language: lang, - content: (codeEl || pre).textContent.substring(0, 2000), - }); - continue; - } - - // --- 이미지 --- - const img = block.querySelector('img'); - if (img && img.src) { - messages.push({ - type: 'image', - src: img.src, - alt: img.alt || '', - width: img.naturalWidth || img.width || 200, - height: img.naturalHeight || img.height || 150, - }); - continue; - } - - // --- 버튼 영역 (Proceed, Cancel 등) --- - const actionBtns = block.querySelectorAll('button'); - if (actionBtns.length > 0) { - const actionKeywords = ['Proceed','Cancel','Open','View','Review','Approve','Reject','Yes','No','Accept','Deny','Allow','Skip']; - const buttons = Array.from(actionBtns).map(b => { - const label = b.textContent.trim(); - const rect = b.getBoundingClientRect(); - return { - label, - x: Math.round(rect.left + rect.width / 2), - y: Math.round(rect.top + rect.height / 2), - w: Math.round(rect.width), - h: Math.round(rect.height), - }; - }).filter(b => b.label && b.w > 0); - - if (buttons.length > 0 && buttons.some(b => actionKeywords.some(k => b.label.includes(k)))) { - messages.push({ - type: 'actions', - buttons: buttons.slice(0, 8), - }); - continue; + steps.push({ icon, text: txt.substring(0, 200) }); } + }); + + const cardBtns = extractActions(card); + + messages.push({ + type: 'task', + title: titleEl ? titleEl.textContent.trim() : '', + summary: summaryEl ? summaryEl.textContent.trim().substring(0, 500) : '', + collapsed: expanded ? expanded.getAttribute('aria-expanded') === 'false' : true, + steps: steps.slice(0, 20), + actions: cardBtns.slice(0, 5), + }); + } + + // --- Thought Process --- + const thoughtBtns = turn.querySelectorAll('button'); + for (const btn of thoughtBtns) { + if (btn.textContent.includes('Thought for')) { + messages.push({ type: 'thought', label: btn.textContent.trim(), collapsed: true }); } + } - // --- 일반 텍스트 --- - // style 태그 내용을 제외한 순수 텍스트만 추출 - const cloned = block.cloneNode(true); - cloned.querySelectorAll('style').forEach(s => s.remove()); - const text = cloned.textContent.trim(); - if (text.length > 0) { - // CSS 코드/내부 스타일 건너뛰기 - if (text.startsWith('/*') || text.startsWith('@media') || - text.includes('prefers-color-scheme') || - text.includes('{') && text.includes('}') && text.includes(':') && text.includes(';') && text.length < 2000 || - text.startsWith('.markdown-alert')) continue; + // --- isolate 바깥의 마크다운 콘텐츠 --- + // (isolate 내부가 아닌 마크다운 블록) + const allMkEls = turn.querySelectorAll('.leading-relaxed.select-text, .select-text .leading-relaxed'); + for (const mkEl of allMkEls) { + // isolate 내부면 건너뛰기 (이미 task로 처리) + if (mkEl.closest('.isolate')) continue; - // leading-relaxed select-text → 마크다운 렌더링 텍스트 - const mkEl = block.querySelector('.leading-relaxed.select-text'); - // HTML에서도 style 태그를 DOM으로 제거 - const htmlSrc = mkEl || block; - const htmlClone = htmlSrc.cloneNode(true); - htmlClone.querySelectorAll('style').forEach(s => s.remove()); - const htmlContent = htmlClone.innerHTML; - - messages.push({ - type: 'text', - content: text.substring(0, 3000), - html: htmlContent.substring(0, 5000), - }); + const clone = mkEl.cloneNode(true); + clone.querySelectorAll('style').forEach(s => s.remove()); + + const html = clone.innerHTML; + const text = clone.textContent.trim(); + + if (!text || text.length < 2) continue; + if (text.startsWith('/*') || text.includes('prefers-color-scheme')) continue; + + messages.push({ + type: 'text', + content: text.substring(0, 5000), + html: html.substring(0, 10000), + }); + } + + // --- isolate 바깥 독립 코드/이미지/상태 --- + const turnBlocks = turn.querySelectorAll(':scope > *'); + for (const block of turnBlocks) { + if (block.querySelector('.isolate') || block.classList.contains('isolate')) continue; + + // 상태 텍스트 (Running, Generating 등) + if (block.classList.contains('whitespace-nowrap')) { + const st = block.textContent.trim(); + if (st) messages.push({ type: 'status', content: st }); + continue; } } }