feat(bridge): DOM Markdown parser restoration (v0.5.56) + code noise filter fix + user msg relay #task-634
This commit is contained in:
@@ -484,15 +484,69 @@ export function generateApprovalObserverScript(_port: number): string {
|
||||
var _lastStepTextSent = false;
|
||||
var _lastResponseBlockCount = 0; // track number of response blocks for AG Native
|
||||
|
||||
function convertNodeToMarkdown(node) {
|
||||
if (!node) return '';
|
||||
if (node.nodeType === 3) return node.textContent; // Text node
|
||||
if (node.nodeType !== 1) return ''; // Skip other node types
|
||||
|
||||
var tag = node.tagName.toLowerCase();
|
||||
|
||||
// Skip hidden or UI elements
|
||||
if (tag === 'style' || tag === 'script' || tag === 'noscript' || tag === 'button' || tag === 'svg') return '';
|
||||
var cls = '';
|
||||
if (typeof node.className === 'string') cls = node.className;
|
||||
else if (node.className && node.className.baseVal) cls = node.className.baseVal;
|
||||
|
||||
if (cls && (cls.indexOf('google-symbols') !== -1 || cls.indexOf('material-icons') !== -1 || cls.indexOf('copy') !== -1 || cls.indexOf('codicon') !== -1)) return '';
|
||||
|
||||
var childrenMd = '';
|
||||
for (var i = 0; i < node.childNodes.length; i++) {
|
||||
childrenMd += convertNodeToMarkdown(node.childNodes[i]);
|
||||
}
|
||||
|
||||
switch (tag) {
|
||||
case 'h1': return '\\n# ' + childrenMd.trim() + '\\n';
|
||||
case 'h2': return '\\n## ' + childrenMd.trim() + '\\n';
|
||||
case 'h3': return '\\n### ' + childrenMd.trim() + '\\n';
|
||||
case 'h4': return '\\n#### ' + childrenMd.trim() + '\\n';
|
||||
case 'p': return '\\n' + childrenMd.trim() + '\\n';
|
||||
case 'div':
|
||||
// Treat specific divs as blocks if they end up behaving like paragraphs
|
||||
if (cls.indexOf('block') !== -1 || cls.indexOf('message') !== -1) return '\\n' + childrenMd.trim() + '\\n';
|
||||
return childrenMd;
|
||||
case 'br': return '\\n';
|
||||
case 'strong':
|
||||
case 'b': return '**' + childrenMd + '**';
|
||||
case 'em':
|
||||
case 'i': return '*' + childrenMd + '*';
|
||||
case 'a':
|
||||
var href = node.getAttribute('href') || '';
|
||||
return '[' + childrenMd + '](' + href + ')';
|
||||
case 'code': return (node.parentNode && node.parentNode.tagName === 'PRE') ? childrenMd : (String.fromCharCode(96) + childrenMd + String.fromCharCode(96));
|
||||
case 'pre': return '\\n' + String.fromCharCode(96,96,96) + '\\n' + childrenMd.trim() + '\\n' + String.fromCharCode(96,96,96) + '\\n';
|
||||
case 'li':
|
||||
var prefix = '- ';
|
||||
if (node.parentNode && node.parentNode.tagName.toLowerCase() === 'ol') {
|
||||
var idx = 1;
|
||||
var curr = node.previousSibling;
|
||||
while(curr) { if (curr.nodeType === 1 && curr.tagName.toLowerCase() === 'li') idx++; curr = curr.previousSibling; }
|
||||
prefix = idx + '. ';
|
||||
}
|
||||
return '\\n' + prefix + childrenMd.trim();
|
||||
case 'ul':
|
||||
case 'ol': return '\\n' + childrenMd + '\\n';
|
||||
case 'blockquote': return '\\n> ' + childrenMd.trim().split('\\n').join('\\n> ') + '\\n';
|
||||
default: return childrenMd;
|
||||
}
|
||||
}
|
||||
|
||||
function extractCleanStepText(stepEl) {
|
||||
if (!stepEl) return '';
|
||||
|
||||
// Clone the step element so we can strip UI elements without affecting the DOM
|
||||
var clone = stepEl.cloneNode(true);
|
||||
|
||||
// v16: Remove style/script/noscript elements FIRST — AG Native markdown injects <style> blocks
|
||||
// that contain CSS rules (e.g. remark-github-blockquote-alert/alert.css) whose textContent
|
||||
// gets captured as AI response text
|
||||
// v16: Remove style/script/noscript elements FIRST
|
||||
var styleEls = clone.querySelectorAll('style, script, noscript, link[rel="stylesheet"]');
|
||||
for (var si = 0; si < styleEls.length; si++) {
|
||||
if (styleEls[si].parentNode) styleEls[si].parentNode.removeChild(styleEls[si]);
|
||||
@@ -518,34 +572,34 @@ export function generateApprovalObserverScript(_port: number): string {
|
||||
|
||||
// Try to get text from markdown rendering area first
|
||||
// AG Native uses .leading-relaxed.select-text, Cascade uses .markdown-body/.prose
|
||||
var mdEl = clone.querySelector('.markdown-body, .prose, [class*="markdown"], [class*="rendered"]');
|
||||
var mdEl = clone.querySelector('.markdown-body, .prose, [class*="markdown"], [class*="rendered"]') || clone;
|
||||
|
||||
// Use our custom DOM-to-Markdown parser instead of innerText
|
||||
var rawText = convertNodeToMarkdown(mdEl).trim();
|
||||
|
||||
// v18 FIX: DO NOT apply cleanLines to full markdown content, it destroys valid code blocks
|
||||
// Safely remove "Thought for X" lines only
|
||||
rawText = rawText.replace(/Thought for \\d+s?/gi, '');
|
||||
rawText = rawText.replace(/Thought for a few seconds/gi, '');
|
||||
|
||||
// v18 FIX: Temporarily attach to DOM to force layout computation for .innerText
|
||||
// Without this, .innerText on unattached node behaves exactly like .textContent (loses block newlines)
|
||||
var container = document.createElement('div');
|
||||
container.style.position = 'absolute';
|
||||
container.style.left = '-9999px';
|
||||
container.style.top = '-9999px';
|
||||
container.style.opacity = '0';
|
||||
container.style.width = '800px';
|
||||
container.appendChild(clone);
|
||||
document.body.appendChild(container);
|
||||
|
||||
var targetEl = mdEl || clone;
|
||||
var rawText = '';
|
||||
try {
|
||||
if (targetEl.innerText && targetEl.innerText.trim().length > 10) {
|
||||
rawText = targetEl.innerText.trim();
|
||||
} else {
|
||||
// Fallback: get all text but filter aggressively
|
||||
rawText = (targetEl.innerText || targetEl.textContent || '').trim();
|
||||
}
|
||||
} finally {
|
||||
if (container.parentNode) container.parentNode.removeChild(container);
|
||||
// Cleanup multiple empty lines
|
||||
var lines = rawText.split('\\n');
|
||||
var finalLines = [];
|
||||
var lastEmpty = false;
|
||||
for (var i = 0; i < lines.length; i++) {
|
||||
var line = lines[i].replace(/\\s+$/, '');
|
||||
if (line.length === 0) {
|
||||
if (!lastEmpty && finalLines.length > 0) {
|
||||
finalLines.push('');
|
||||
lastEmpty = true;
|
||||
}
|
||||
} else {
|
||||
finalLines.push(line);
|
||||
lastEmpty = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply line-by-line noise filter
|
||||
return cleanLines(rawText).substring(0, 3500);
|
||||
|
||||
return finalLines.join('\\n').substring(0, 3500);
|
||||
}
|
||||
|
||||
function scanChatBodies() {
|
||||
@@ -561,9 +615,8 @@ export function generateApprovalObserverScript(_port: number): string {
|
||||
}
|
||||
|
||||
if (cv) {
|
||||
// AG Native path: find AI response blocks by class pattern
|
||||
// DOM structure: #conversation > ... > .leading-relaxed.select-text (AI response text)
|
||||
var responseBlocks = cv.querySelectorAll('.leading-relaxed.select-text');
|
||||
// AG Native path: find AI and User response blocks by class pattern
|
||||
var responseBlocks = cv.querySelectorAll('.leading-relaxed.select-text, .text-ide-message-block-user-color, .text-ide-message-block-bot-color, .bg-ide-message-block-user-background');
|
||||
|
||||
if (responseBlocks.length > 0) {
|
||||
// Process the LAST (most recent) response block
|
||||
@@ -587,13 +640,21 @@ export function generateApprovalObserverScript(_port: number): string {
|
||||
}
|
||||
|
||||
var blockText = extractCleanStepText(lastBlock);
|
||||
if (blockText && blockText.length > 30) {
|
||||
// QUALITY CHECK: Skip if the text is mostly short lines (UI artifacts)
|
||||
var lines = blockText.split('\\n').filter(function(l) { return l.trim().length > 0; });
|
||||
var longLines = lines.filter(function(l) { return l.trim().length > 20; });
|
||||
if (longLines.length === 0) {
|
||||
log('AG-Native: skipped (no long lines, likely UI noise)');
|
||||
return;
|
||||
|
||||
var clsStr = (typeof lastBlock.className === 'string') ? lastBlock.className : '';
|
||||
var isUser = clsStr.indexOf('user-color') !== -1 || clsStr.indexOf('user-background') !== -1 || clsStr.indexOf('user-message') !== -1;
|
||||
var role = isUser ? 'user' : 'bot';
|
||||
|
||||
// Bot messages often start empty and stream in. User messages are usually immediate.
|
||||
if (blockText && (blockText.length > 30 || isUser && blockText.length > 0)) {
|
||||
// QUALITY CHECK: Skip if the text is mostly short lines (UI artifacts), BUT skip this check for user messages
|
||||
if (!isUser) {
|
||||
var lines = blockText.split('\\n').filter(function(l) { return l.trim().length > 0; });
|
||||
var longLines = lines.filter(function(l) { return l.trim().length > 20; });
|
||||
if (longLines.length === 0) {
|
||||
log('AG-Native: skipped (no long lines, likely UI noise)');
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for content to stabilize (3s no change)
|
||||
@@ -605,22 +666,24 @@ export function generateApprovalObserverScript(_port: number): string {
|
||||
}
|
||||
|
||||
if (_lastStepTextSent) return;
|
||||
if (Date.now() - _lastStepTextTime < 3000) return; // Still waiting
|
||||
// Bot needs 3s to stabilize, User just needs 500ms
|
||||
var waitTime = isUser ? 500 : 3000;
|
||||
if (Date.now() - _lastStepTextTime < waitTime) return; // Still waiting
|
||||
|
||||
// Content is stable — send it
|
||||
_lastStepTextSent = true;
|
||||
_lastResponseBlockCount = responseBlocks.length;
|
||||
lastBlock.dataset.agChatScraped = 'pending';
|
||||
|
||||
log('AG-Native chat relay: blocks=' + responseBlocks.length + ' text=' + blockText.length + ' chars');
|
||||
(function(el, txt, count) {
|
||||
log('AG-Native chat relay [' + role + ']: blocks=' + responseBlocks.length + ' text=' + blockText.length + ' chars');
|
||||
(function(el, txt, count, r) {
|
||||
fetch(BASE + '/chat', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ text: txt, source: 'ag_native_block_' + count, block_index: count })
|
||||
body: JSON.stringify({ text: txt, source: 'ag_native_block_' + count, block_index: count, role: r })
|
||||
}).then(function() { el.dataset.agChatScraped = 'true'; log('AG-Native chat sent OK'); })
|
||||
.catch(function(e) { el.dataset.agChatScraped = 'false'; log('AG-Native chat send error: ' + e.message); });
|
||||
})(lastBlock, blockText, responseBlocks.length);
|
||||
})(lastBlock, blockText, responseBlocks.length, role);
|
||||
}
|
||||
return; // AG Native path handled — don't fall through to Cascade path
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user