feat(bridge): DOM Markdown parser restoration (v0.5.56) + code noise filter fix + user msg relay #task-634

This commit is contained in:
Variet Worker
2026-04-17 08:06:53 +09:00
parent 13e569f426
commit 6b9f1188c3
5 changed files with 134 additions and 47 deletions

View File

@@ -544,8 +544,10 @@ function _handleChatSnapshot(req: any, res: any, ctx: HttpBridgeContext) {
try {
const data = JSON.parse(body);
if (data.text && typeof ctx.writeChatSnapshot === 'function') {
ctx.writeChatSnapshot(`💬 **[DOM 추출] AI 응답**\n\n${data.text}`);
ctx.logToFile(`[HTTP] chat snapshot written (${data.text.length} chars)`);
const isUser = data.role === 'user';
const prefix = isUser ? '🧑‍💻 **[DOM 추출] 사용자 요청**' : '💬 **[DOM 추출] AI 응답**';
ctx.writeChatSnapshot(`${prefix}\n\n${data.text}`);
ctx.logToFile(`[HTTP] chat snapshot written (${data.text.length} chars, role: ${data.role || 'bot'})`);
}
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ ok: true }));

View File

@@ -484,15 +484,69 @@ export function generateApprovalObserverScript(_port: number): string {
var _lastStepTextSent = false;
var _lastResponseBlockCount = 0; // track number of response blocks for AG Native
function convertNodeToMarkdown(node) {
if (!node) return '';
if (node.nodeType === 3) return node.textContent; // Text node
if (node.nodeType !== 1) return ''; // Skip other node types
var tag = node.tagName.toLowerCase();
// Skip hidden or UI elements
if (tag === 'style' || tag === 'script' || tag === 'noscript' || tag === 'button' || tag === 'svg') return '';
var cls = '';
if (typeof node.className === 'string') cls = node.className;
else if (node.className && node.className.baseVal) cls = node.className.baseVal;
if (cls && (cls.indexOf('google-symbols') !== -1 || cls.indexOf('material-icons') !== -1 || cls.indexOf('copy') !== -1 || cls.indexOf('codicon') !== -1)) return '';
var childrenMd = '';
for (var i = 0; i < node.childNodes.length; i++) {
childrenMd += convertNodeToMarkdown(node.childNodes[i]);
}
switch (tag) {
case 'h1': return '\\n# ' + childrenMd.trim() + '\\n';
case 'h2': return '\\n## ' + childrenMd.trim() + '\\n';
case 'h3': return '\\n### ' + childrenMd.trim() + '\\n';
case 'h4': return '\\n#### ' + childrenMd.trim() + '\\n';
case 'p': return '\\n' + childrenMd.trim() + '\\n';
case 'div':
// Treat specific divs as blocks if they end up behaving like paragraphs
if (cls.indexOf('block') !== -1 || cls.indexOf('message') !== -1) return '\\n' + childrenMd.trim() + '\\n';
return childrenMd;
case 'br': return '\\n';
case 'strong':
case 'b': return '**' + childrenMd + '**';
case 'em':
case 'i': return '*' + childrenMd + '*';
case 'a':
var href = node.getAttribute('href') || '';
return '[' + childrenMd + '](' + href + ')';
case 'code': return (node.parentNode && node.parentNode.tagName === 'PRE') ? childrenMd : (String.fromCharCode(96) + childrenMd + String.fromCharCode(96));
case 'pre': return '\\n' + String.fromCharCode(96,96,96) + '\\n' + childrenMd.trim() + '\\n' + String.fromCharCode(96,96,96) + '\\n';
case 'li':
var prefix = '- ';
if (node.parentNode && node.parentNode.tagName.toLowerCase() === 'ol') {
var idx = 1;
var curr = node.previousSibling;
while(curr) { if (curr.nodeType === 1 && curr.tagName.toLowerCase() === 'li') idx++; curr = curr.previousSibling; }
prefix = idx + '. ';
}
return '\\n' + prefix + childrenMd.trim();
case 'ul':
case 'ol': return '\\n' + childrenMd + '\\n';
case 'blockquote': return '\\n> ' + childrenMd.trim().split('\\n').join('\\n> ') + '\\n';
default: return childrenMd;
}
}
function extractCleanStepText(stepEl) {
if (!stepEl) return '';
// Clone the step element so we can strip UI elements without affecting the DOM
var clone = stepEl.cloneNode(true);
// v16: Remove style/script/noscript elements FIRST — AG Native markdown injects <style> blocks
// that contain CSS rules (e.g. remark-github-blockquote-alert/alert.css) whose textContent
// gets captured as AI response text
// v16: Remove style/script/noscript elements FIRST
var styleEls = clone.querySelectorAll('style, script, noscript, link[rel="stylesheet"]');
for (var si = 0; si < styleEls.length; si++) {
if (styleEls[si].parentNode) styleEls[si].parentNode.removeChild(styleEls[si]);
@@ -518,34 +572,34 @@ export function generateApprovalObserverScript(_port: number): string {
// Try to get text from markdown rendering area first
// AG Native uses .leading-relaxed.select-text, Cascade uses .markdown-body/.prose
var mdEl = clone.querySelector('.markdown-body, .prose, [class*="markdown"], [class*="rendered"]');
var mdEl = clone.querySelector('.markdown-body, .prose, [class*="markdown"], [class*="rendered"]') || clone;
// Use our custom DOM-to-Markdown parser instead of innerText
var rawText = convertNodeToMarkdown(mdEl).trim();
// v18 FIX: DO NOT apply cleanLines to full markdown content, it destroys valid code blocks
// Safely remove "Thought for X" lines only
rawText = rawText.replace(/Thought for \\d+s?/gi, '');
rawText = rawText.replace(/Thought for a few seconds/gi, '');
// v18 FIX: Temporarily attach to DOM to force layout computation for .innerText
// Without this, .innerText on unattached node behaves exactly like .textContent (loses block newlines)
var container = document.createElement('div');
container.style.position = 'absolute';
container.style.left = '-9999px';
container.style.top = '-9999px';
container.style.opacity = '0';
container.style.width = '800px';
container.appendChild(clone);
document.body.appendChild(container);
var targetEl = mdEl || clone;
var rawText = '';
try {
if (targetEl.innerText && targetEl.innerText.trim().length > 10) {
rawText = targetEl.innerText.trim();
} else {
// Fallback: get all text but filter aggressively
rawText = (targetEl.innerText || targetEl.textContent || '').trim();
}
} finally {
if (container.parentNode) container.parentNode.removeChild(container);
// Cleanup multiple empty lines
var lines = rawText.split('\\n');
var finalLines = [];
var lastEmpty = false;
for (var i = 0; i < lines.length; i++) {
var line = lines[i].replace(/\\s+$/, '');
if (line.length === 0) {
if (!lastEmpty && finalLines.length > 0) {
finalLines.push('');
lastEmpty = true;
}
} else {
finalLines.push(line);
lastEmpty = false;
}
}
// Apply line-by-line noise filter
return cleanLines(rawText).substring(0, 3500);
return finalLines.join('\\n').substring(0, 3500);
}
function scanChatBodies() {
@@ -561,9 +615,8 @@ export function generateApprovalObserverScript(_port: number): string {
}
if (cv) {
// AG Native path: find AI response blocks by class pattern
// DOM structure: #conversation > ... > .leading-relaxed.select-text (AI response text)
var responseBlocks = cv.querySelectorAll('.leading-relaxed.select-text');
// AG Native path: find AI and User response blocks by class pattern
var responseBlocks = cv.querySelectorAll('.leading-relaxed.select-text, .text-ide-message-block-user-color, .text-ide-message-block-bot-color, .bg-ide-message-block-user-background');
if (responseBlocks.length > 0) {
// Process the LAST (most recent) response block
@@ -587,13 +640,21 @@ export function generateApprovalObserverScript(_port: number): string {
}
var blockText = extractCleanStepText(lastBlock);
if (blockText && blockText.length > 30) {
// QUALITY CHECK: Skip if the text is mostly short lines (UI artifacts)
var lines = blockText.split('\\n').filter(function(l) { return l.trim().length > 0; });
var longLines = lines.filter(function(l) { return l.trim().length > 20; });
if (longLines.length === 0) {
log('AG-Native: skipped (no long lines, likely UI noise)');
return;
var clsStr = (typeof lastBlock.className === 'string') ? lastBlock.className : '';
var isUser = clsStr.indexOf('user-color') !== -1 || clsStr.indexOf('user-background') !== -1 || clsStr.indexOf('user-message') !== -1;
var role = isUser ? 'user' : 'bot';
// Bot messages often start empty and stream in. User messages are usually immediate.
if (blockText && (blockText.length > 30 || isUser && blockText.length > 0)) {
// QUALITY CHECK: Skip if the text is mostly short lines (UI artifacts), BUT skip this check for user messages
if (!isUser) {
var lines = blockText.split('\\n').filter(function(l) { return l.trim().length > 0; });
var longLines = lines.filter(function(l) { return l.trim().length > 20; });
if (longLines.length === 0) {
log('AG-Native: skipped (no long lines, likely UI noise)');
return;
}
}
// Wait for content to stabilize (3s no change)
@@ -605,22 +666,24 @@ export function generateApprovalObserverScript(_port: number): string {
}
if (_lastStepTextSent) return;
if (Date.now() - _lastStepTextTime < 3000) return; // Still waiting
// Bot needs 3s to stabilize, User just needs 500ms
var waitTime = isUser ? 500 : 3000;
if (Date.now() - _lastStepTextTime < waitTime) return; // Still waiting
// Content is stable — send it
_lastStepTextSent = true;
_lastResponseBlockCount = responseBlocks.length;
lastBlock.dataset.agChatScraped = 'pending';
log('AG-Native chat relay: blocks=' + responseBlocks.length + ' text=' + blockText.length + ' chars');
(function(el, txt, count) {
log('AG-Native chat relay [' + role + ']: blocks=' + responseBlocks.length + ' text=' + blockText.length + ' chars');
(function(el, txt, count, r) {
fetch(BASE + '/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: txt, source: 'ag_native_block_' + count, block_index: count })
body: JSON.stringify({ text: txt, source: 'ag_native_block_' + count, block_index: count, role: r })
}).then(function() { el.dataset.agChatScraped = 'true'; log('AG-Native chat sent OK'); })
.catch(function(e) { el.dataset.agChatScraped = 'false'; log('AG-Native chat send error: ' + e.message); });
})(lastBlock, blockText, responseBlocks.length);
})(lastBlock, blockText, responseBlocks.length, role);
}
return; // AG Native path handled — don't fall through to Cascade path
}