fix(bridge): approval flow robustness — pending cleanup, MERGE dedup, false positive filter, auto_resolve, 30min timeout

2026-03-10 00:35:41 +09:00
parent 7fdefb0c63
commit 373c0f7ddc
8 changed files with 340 additions and 22 deletions
--- a/extension/src/extension.ts
+++ b/extension/src/extension.ts
@@ -402,6 +402,7 @@ const pendingResponses = new Map<string, { approved: boolean } | null>();

 // Click trigger: extension sets this, renderer polls and clicks button
 let clickTrigger: { action: 'approve' | 'reject'; timestamp: number } | null = null;
+let sessionStalled = false; // true when session is stalled waiting for approval

 // Deep inspect trigger: curl sets this, renderer picks it up and POSTs results back
 let deepInspectRequested = false;
@@ -437,6 +438,24 @@ function startObserverHttpBridge(): Promise<number> {
                    req.on('end', () => {
                        try {
                            const data = JSON.parse(body);
+
+                            // ── Server-side false positive filter ──
+                            const cmd = (data.command || '').trim();
+                            const FALSE_POSITIVE_RE = /^(Proceed|Continue|Open|Close|OK|Yes|No|Save|Undo|Redo|Back|Next|More|Less|Got it)$/i;
+                            if (FALSE_POSITIVE_RE.test(cmd)) {
+                                logToFile(`[HTTP] filtered false positive: "${cmd}"`);
+                                res.writeHead(200, { 'Content-Type': 'application/json' });
+                                res.end(JSON.stringify({ ok: false, filtered: true }));
+                                return;
+                            }
+                            // "Run" button → only accept if session is actually stalled (waiting for approval)
+                            if (/^Run/i.test(cmd) && !sessionStalled) {
+                                logToFile(`[HTTP] filtered "Run" — session not stalled`);
+                                res.writeHead(200, { 'Content-Type': 'application/json' });
+                                res.end(JSON.stringify({ ok: false, filtered: true }));
+                                return;
+                            }
+
                            const rid = data.request_id || Date.now().toString();
                            // Write pending file for Discord bot
                            const pendingDir = path.join(bridgePath, 'pending');
@@ -866,8 +885,6 @@ function generateApprovalObserverScript(_port: number): string {
    {re:/^Accept$/i,       type:'agent_step'},
    {re:/^Allow/i,         type:'permission'},
    {re:/^Approve/i,       type:'agent_step'},
-    {re:/^Continue$/i,     type:'continue'},
-    {re:/^Proceed$/i,      type:'continue'},
    {re:/^Retry$/i,        type:'error_recovery'},
    {re:/^Dismiss$/i,      type:'error_recovery'},
  ];
@@ -1379,6 +1396,26 @@ function setupMonitor() {
            }

            if (delta > 0) {
+                sessionStalled = false;
+                // Steps progressed — if we had a pending approval, it was handled in AG directly
+                if (!sawRunningAfterPending && lastPendingStepIndex >= 0) {
+                    // Mark pending as auto_resolved so bot can update Discord message
+                    try {
+                        const pendingFiles = fs.readdirSync(path.join(bridgePath, 'pending'))
+                            .filter((f: string) => f.endsWith('.json'));
+                        for (const pf of pendingFiles) {
+                            const pfPath = path.join(bridgePath, 'pending', pf);
+                            const pd = JSON.parse(fs.readFileSync(pfPath, 'utf-8'));
+                            if (pd.status === 'pending' && pd.step_index === lastPendingStepIndex) {
+                                pd.status = 'auto_resolved';
+                                fs.writeFileSync(pfPath, JSON.stringify(pd, null, 2), 'utf-8');
+                                logToFile(`[AUTO-RESOLVE] step=${lastPendingStepIndex} progressed → marked ${pf}`);
+                                break;
+                            }
+                        }
+                    } catch (e: any) { logToFile(`[AUTO-RESOLVE] error: ${e.message}`); }
+                    lastPendingStepIndex = -1;
+                }
                consecutiveIdleCount = 0;
                sawRunningAfterPending = true;
                stallProbed = false; // allow re-probe on next stall
@@ -1394,6 +1431,7 @@ function setupMonitor() {
                } else {
                    // lastModifiedTime frozen = real stall (approval waiting)
                    consecutiveIdleCount++;
+                    if (consecutiveIdleCount >= 1) sessionStalled = true;
                }
                lastModTime = currentModTime;

@@ -1474,6 +1512,53 @@ function setupMonitor() {
                // Stall fallback REMOVED — step probe is sole fallback source
                // (stall fallback was generating false positives and is now redundant)
            } else if (!isRunning) {
+                // ── Error detection: probe when session transitions from RUNNING→idle ──
+                if (consecutiveIdleCount > 0 && !stallProbed) {
+                    // Was running, now idle — possible error. Probe once.
+                    try {
+                        const stepsResp = await sdk.ls.rawRPC('GetCascadeTrajectorySteps', {
+                            cascadeId: bestSessionId,
+                        });
+                        if (stepsResp?.steps?.length > 0) {
+                            const steps = stepsResp.steps;
+                            // Check last 3 steps for error/failed status
+                            for (let si = steps.length - 1; si >= Math.max(0, steps.length - 3); si--) {
+                                const step = steps[si];
+                                const stepStatus = step?.status || '';
+                                const stepType = step?.type || '';
+                                if (stepStatus.includes('ERROR') || stepStatus.includes('FAILED')) {
+                                    const toolCall = step?.metadata?.toolCall;
+                                    const toolName = toolCall?.name || stepType.replace('CORTEX_STEP_TYPE_', '').toLowerCase();
+                                    let command = `⚠️ Error: ${toolName}`;
+                                    if (toolCall?.argumentsJson) {
+                                        try {
+                                            const args = JSON.parse(toolCall.argumentsJson);
+                                            if (args.CommandLine) command = `⚠️ Error: ${args.CommandLine.substring(0, 100)}`;
+                                            else if (args.TargetFile) command = `⚠️ Error: ${args.TargetFile.split(/[\\/]/).pop()}`;
+                                        } catch { }
+                                    }
+                                    const description = `Step #${si} ${stepStatus} — Retry?`;
+                                    logToFile(`[STEP-PROBE] ★ ERROR! step=${si} status=${stepStatus} type=${stepType}`);
+                                    if (si !== lastPendingStepIndex) {
+                                        stallProbed = true;
+                                        lastPendingStepIndex = si;
+                                        writePendingApproval({
+                                            conversation_id: activeSessionId,
+                                            command,
+                                            description,
+                                            step_type: 'error_recovery',
+                                            step_index: si,
+                                            source: 'step_probe_error',
+                                        });
+                                    }
+                                    break;
+                                }
+                            }
+                        }
+                    } catch (e: any) {
+                        logToFile(`[STEP-PROBE-ERR] error check: ${e.message}`);
+                    }
+                }
                consecutiveIdleCount = 0;
                lastModTime = currentModTime;
            }
@@ -1728,7 +1813,7 @@ function writePendingApproval(data: { conversation_id: string; command: string;
        const pendingDir = path.join(bridgePath, 'pending');
        if (!fs.existsSync(pendingDir)) { fs.mkdirSync(pendingDir, { recursive: true }); }

-        // ── Dedup: skip if DOM observer already created a pending for same action recently ──
+        // ── Dedup: if DOM observer already created a "Run"-only pending, MERGE detailed info into it ──
        const nowMs = Date.now();
        const DEDUP_WINDOW_MS = 15_000; // 15 second dedup window
        try {
@@ -1739,7 +1824,14 @@ function writePendingApproval(data: { conversation_id: string; command: string;
                if (existing.source === 'dom_observer' && existing.status === 'pending') {
                    const age = nowMs - (existing.timestamp * 1000);
                    if (age < DEDUP_WINDOW_MS && age >= 0) {
-                        logToFile(`[DEDUP] skip step_probe pending — DOM observer pending exists: ${ef} (${Math.round(age/1000)}s ago)`);
+                        // MERGE: update DOM observer pending with detailed step_probe info
+                        existing.command = data.command;
+                        existing.description = data.description;
+                        if (data.step_type) existing.step_type = data.step_type;
+                        if (data.step_index !== undefined) existing.step_index = data.step_index;
+                        existing.source = 'dom_observer+step_probe';  // mark as merged
+                        fs.writeFileSync(efPath, JSON.stringify(existing, null, 2), 'utf-8');
+                        logToFile(`[DEDUP] MERGED step_probe info into DOM pending: ${ef} cmd="${data.command.substring(0, 60)}"`);
                        return;
                    }
                }