wip: [01-stabilize] paused at task 1/1 - OCR Hallucination Immune logic via Semantic delta window and fret-isolation

This commit is contained in:
2026-03-29 22:08:40 +09:00
parent aca7bf592a
commit 2507de45d3
4289 changed files with 732689 additions and 28672 deletions

View File

@@ -0,0 +1,36 @@
{
"name": "brainstorm-server-tests",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "brainstorm-server-tests",
"version": "1.0.0",
"dependencies": {
"ws": "^8.19.0"
}
},
"node_modules/ws": {
"version": "8.19.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz",
"integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": ">=5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
}
}
}

View File

@@ -0,0 +1,10 @@
{
"name": "brainstorm-server-tests",
"version": "1.0.0",
"scripts": {
"test": "node server.test.js"
},
"dependencies": {
"ws": "^8.19.0"
}
}

View File

@@ -0,0 +1,427 @@
/**
* Integration tests for the brainstorm server.
*
* Tests the full server behavior: HTTP serving, WebSocket communication,
* file watching, and the brainstorming workflow.
*
* Uses the `ws` npm package as a test client (test-only dependency,
* not shipped to end users).
*/
const { spawn } = require('child_process');
const http = require('http');
const WebSocket = require('ws');
const fs = require('fs');
const path = require('path');
const assert = require('assert');
const SERVER_PATH = path.join(__dirname, '../../skills/brainstorming/scripts/server.cjs');
const TEST_PORT = 3334;
const TEST_DIR = '/tmp/brainstorm-test';
const CONTENT_DIR = path.join(TEST_DIR, 'content');
const STATE_DIR = path.join(TEST_DIR, 'state');
function cleanup() {
if (fs.existsSync(TEST_DIR)) {
fs.rmSync(TEST_DIR, { recursive: true });
}
}
async function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function fetch(url) {
return new Promise((resolve, reject) => {
http.get(url, (res) => {
let data = '';
res.on('data', chunk => data += chunk);
res.on('end', () => resolve({
status: res.statusCode,
headers: res.headers,
body: data
}));
}).on('error', reject);
});
}
function startServer() {
return spawn('node', [SERVER_PATH], {
env: { ...process.env, BRAINSTORM_PORT: TEST_PORT, BRAINSTORM_DIR: TEST_DIR }
});
}
async function waitForServer(server) {
let stdout = '';
let stderr = '';
return new Promise((resolve, reject) => {
server.stdout.on('data', (data) => {
stdout += data.toString();
if (stdout.includes('server-started')) {
resolve({ stdout, stderr, getStdout: () => stdout });
}
});
server.stderr.on('data', (data) => { stderr += data.toString(); });
server.on('error', reject);
setTimeout(() => reject(new Error(`Server didn't start. stderr: ${stderr}`)), 5000);
});
}
async function runTests() {
cleanup();
const server = startServer();
let stdoutAccum = '';
server.stdout.on('data', (data) => { stdoutAccum += data.toString(); });
const { stdout: initialStdout } = await waitForServer(server);
let passed = 0;
let failed = 0;
function test(name, fn) {
return fn().then(() => {
console.log(` PASS: ${name}`);
passed++;
}).catch(e => {
console.log(` FAIL: ${name}`);
console.log(` ${e.message}`);
failed++;
});
}
try {
// ========== Server Startup ==========
console.log('\n--- Server Startup ---');
await test('outputs server-started JSON on startup', () => {
const msg = JSON.parse(initialStdout.trim());
assert.strictEqual(msg.type, 'server-started');
assert.strictEqual(msg.port, TEST_PORT);
assert(msg.url, 'Should include URL');
assert(msg.screen_dir, 'Should include screen_dir');
return Promise.resolve();
});
await test('writes server-info to state/', () => {
const infoPath = path.join(STATE_DIR, 'server-info');
assert(fs.existsSync(infoPath), 'state/server-info should exist');
const info = JSON.parse(fs.readFileSync(infoPath, 'utf-8').trim());
assert.strictEqual(info.type, 'server-started');
assert.strictEqual(info.port, TEST_PORT);
assert.strictEqual(info.screen_dir, CONTENT_DIR, 'screen_dir should point to content/');
assert.strictEqual(info.state_dir, STATE_DIR, 'state_dir should point to state/');
return Promise.resolve();
});
// ========== HTTP Serving ==========
console.log('\n--- HTTP Serving ---');
await test('serves waiting page when no screens exist', async () => {
const res = await fetch(`http://localhost:${TEST_PORT}/`);
assert.strictEqual(res.status, 200);
assert(res.body.includes('Waiting for the agent'), 'Should show waiting message');
});
await test('injects helper.js into waiting page', async () => {
const res = await fetch(`http://localhost:${TEST_PORT}/`);
assert(res.body.includes('WebSocket'), 'Should have helper.js injected');
assert(res.body.includes('toggleSelect'), 'Should have toggleSelect from helper');
assert(res.body.includes('brainstorm'), 'Should have brainstorm API from helper');
});
await test('returns Content-Type text/html', async () => {
const res = await fetch(`http://localhost:${TEST_PORT}/`);
assert(res.headers['content-type'].includes('text/html'), 'Should be text/html');
});
await test('serves full HTML documents as-is (not wrapped)', async () => {
const fullDoc = '<!DOCTYPE html>\n<html><head><title>Custom</title></head><body><h1>Custom Page</h1></body></html>';
fs.writeFileSync(path.join(CONTENT_DIR, 'full-doc.html'), fullDoc);
await sleep(300);
const res = await fetch(`http://localhost:${TEST_PORT}/`);
assert(res.body.includes('<h1>Custom Page</h1>'), 'Should contain original content');
assert(res.body.includes('WebSocket'), 'Should still inject helper.js');
assert(!res.body.includes('indicator-bar'), 'Should NOT wrap in frame template');
});
await test('wraps content fragments in frame template', async () => {
const fragment = '<h2>Pick a layout</h2>\n<div class="options"><div class="option" data-choice="a"><div class="letter">A</div></div></div>';
fs.writeFileSync(path.join(CONTENT_DIR, 'fragment.html'), fragment);
await sleep(300);
const res = await fetch(`http://localhost:${TEST_PORT}/`);
assert(res.body.includes('indicator-bar'), 'Fragment should get indicator bar');
assert(!res.body.includes('<!-- CONTENT -->'), 'Placeholder should be replaced');
assert(res.body.includes('Pick a layout'), 'Fragment content should be present');
assert(res.body.includes('data-choice="a"'), 'Fragment interactive elements intact');
});
await test('serves newest file by mtime', async () => {
fs.writeFileSync(path.join(CONTENT_DIR, 'older.html'), '<h2>Older</h2>');
await sleep(100);
fs.writeFileSync(path.join(CONTENT_DIR, 'newer.html'), '<h2>Newer</h2>');
await sleep(300);
const res = await fetch(`http://localhost:${TEST_PORT}/`);
assert(res.body.includes('Newer'), 'Should serve newest file');
});
await test('ignores non-html files for serving', async () => {
// Write a newer non-HTML file — should still serve newest .html
fs.writeFileSync(path.join(CONTENT_DIR, 'data.json'), '{"not": "html"}');
await sleep(300);
const res = await fetch(`http://localhost:${TEST_PORT}/`);
assert(res.body.includes('Newer'), 'Should still serve newest HTML');
assert(!res.body.includes('"not"'), 'Should not serve JSON');
});
await test('returns 404 for non-root paths', async () => {
const res = await fetch(`http://localhost:${TEST_PORT}/other`);
assert.strictEqual(res.status, 404);
});
// ========== WebSocket Communication ==========
console.log('\n--- WebSocket Communication ---');
await test('accepts WebSocket upgrade on /', async () => {
const ws = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise((resolve, reject) => {
ws.on('open', resolve);
ws.on('error', reject);
});
ws.close();
});
await test('relays user events to stdout with source field', async () => {
stdoutAccum = '';
const ws = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise(resolve => ws.on('open', resolve));
ws.send(JSON.stringify({ type: 'click', text: 'Test Button' }));
await sleep(300);
assert(stdoutAccum.includes('"source":"user-event"'), 'Should tag with source');
assert(stdoutAccum.includes('Test Button'), 'Should include event data');
ws.close();
});
await test('writes choice events to state/events', async () => {
// Clean up events from prior tests
const eventsFile = path.join(STATE_DIR, 'events');
if (fs.existsSync(eventsFile)) fs.unlinkSync(eventsFile);
const ws = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise(resolve => ws.on('open', resolve));
ws.send(JSON.stringify({ type: 'click', choice: 'b', text: 'Option B' }));
await sleep(300);
assert(fs.existsSync(eventsFile), '.events should exist');
const lines = fs.readFileSync(eventsFile, 'utf-8').trim().split('\n');
const event = JSON.parse(lines[lines.length - 1]);
assert.strictEqual(event.choice, 'b');
assert.strictEqual(event.text, 'Option B');
ws.close();
});
await test('does NOT write non-choice events to state/events', async () => {
const eventsFile = path.join(STATE_DIR, 'events');
if (fs.existsSync(eventsFile)) fs.unlinkSync(eventsFile);
const ws = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise(resolve => ws.on('open', resolve));
ws.send(JSON.stringify({ type: 'hover', text: 'Something' }));
await sleep(300);
// Non-choice events should not create .events file
assert(!fs.existsSync(eventsFile), '.events should not exist for non-choice events');
ws.close();
});
await test('handles multiple concurrent WebSocket clients', async () => {
const ws1 = new WebSocket(`ws://localhost:${TEST_PORT}`);
const ws2 = new WebSocket(`ws://localhost:${TEST_PORT}`);
await Promise.all([
new Promise(resolve => ws1.on('open', resolve)),
new Promise(resolve => ws2.on('open', resolve))
]);
let ws1Reload = false;
let ws2Reload = false;
ws1.on('message', (data) => {
if (JSON.parse(data.toString()).type === 'reload') ws1Reload = true;
});
ws2.on('message', (data) => {
if (JSON.parse(data.toString()).type === 'reload') ws2Reload = true;
});
fs.writeFileSync(path.join(CONTENT_DIR, 'multi-client.html'), '<h2>Multi</h2>');
await sleep(500);
assert(ws1Reload, 'Client 1 should receive reload');
assert(ws2Reload, 'Client 2 should receive reload');
ws1.close();
ws2.close();
});
await test('cleans up closed clients from broadcast list', async () => {
const ws1 = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise(resolve => ws1.on('open', resolve));
ws1.close();
await sleep(100);
// This should not throw even though ws1 is closed
fs.writeFileSync(path.join(CONTENT_DIR, 'after-close.html'), '<h2>After</h2>');
await sleep(300);
// If we got here without error, the test passes
});
await test('handles malformed JSON from client gracefully', async () => {
const ws = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise(resolve => ws.on('open', resolve));
// Send invalid JSON — server should not crash
ws.send('not json at all {{{');
await sleep(300);
// Verify server is still responsive
const res = await fetch(`http://localhost:${TEST_PORT}/`);
assert.strictEqual(res.status, 200, 'Server should still be running');
ws.close();
});
// ========== File Watching ==========
console.log('\n--- File Watching ---');
await test('sends reload on new .html file', async () => {
const ws = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise(resolve => ws.on('open', resolve));
let gotReload = false;
ws.on('message', (data) => {
if (JSON.parse(data.toString()).type === 'reload') gotReload = true;
});
fs.writeFileSync(path.join(CONTENT_DIR, 'watch-new.html'), '<h2>New</h2>');
await sleep(500);
assert(gotReload, 'Should send reload on new file');
ws.close();
});
await test('sends reload on .html file change', async () => {
const filePath = path.join(CONTENT_DIR, 'watch-change.html');
fs.writeFileSync(filePath, '<h2>Original</h2>');
await sleep(500);
const ws = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise(resolve => ws.on('open', resolve));
let gotReload = false;
ws.on('message', (data) => {
if (JSON.parse(data.toString()).type === 'reload') gotReload = true;
});
fs.writeFileSync(filePath, '<h2>Modified</h2>');
await sleep(500);
assert(gotReload, 'Should send reload on file change');
ws.close();
});
await test('does NOT send reload for non-.html files', async () => {
const ws = new WebSocket(`ws://localhost:${TEST_PORT}`);
await new Promise(resolve => ws.on('open', resolve));
let gotReload = false;
ws.on('message', (data) => {
if (JSON.parse(data.toString()).type === 'reload') gotReload = true;
});
fs.writeFileSync(path.join(CONTENT_DIR, 'data.txt'), 'not html');
await sleep(500);
assert(!gotReload, 'Should NOT reload for non-HTML files');
ws.close();
});
await test('clears state/events on new screen', async () => {
// Create an events file
const eventsFile = path.join(STATE_DIR, 'events');
fs.writeFileSync(eventsFile, '{"choice":"a"}\n');
assert(fs.existsSync(eventsFile));
fs.writeFileSync(path.join(CONTENT_DIR, 'clear-events.html'), '<h2>New screen</h2>');
await sleep(500);
assert(!fs.existsSync(eventsFile), 'state/events should be cleared on new screen');
});
await test('logs screen-added on new file', async () => {
stdoutAccum = '';
fs.writeFileSync(path.join(CONTENT_DIR, 'log-test.html'), '<h2>Log</h2>');
await sleep(500);
assert(stdoutAccum.includes('screen-added'), 'Should log screen-added');
});
await test('logs screen-updated on file change', async () => {
const filePath = path.join(CONTENT_DIR, 'log-update.html');
fs.writeFileSync(filePath, '<h2>V1</h2>');
await sleep(500);
stdoutAccum = '';
fs.writeFileSync(filePath, '<h2>V2</h2>');
await sleep(500);
assert(stdoutAccum.includes('screen-updated'), 'Should log screen-updated');
});
// ========== Helper.js Content ==========
console.log('\n--- Helper.js Verification ---');
await test('helper.js defines required APIs', () => {
const helperContent = fs.readFileSync(
path.join(__dirname, '../../skills/brainstorming/scripts/helper.js'), 'utf-8'
);
assert(helperContent.includes('toggleSelect'), 'Should define toggleSelect');
assert(helperContent.includes('sendEvent'), 'Should define sendEvent');
assert(helperContent.includes('selectedChoice'), 'Should track selectedChoice');
assert(helperContent.includes('brainstorm'), 'Should expose brainstorm API');
return Promise.resolve();
});
// ========== Frame Template ==========
console.log('\n--- Frame Template Verification ---');
await test('frame template has required structure', () => {
const template = fs.readFileSync(
path.join(__dirname, '../../skills/brainstorming/scripts/frame-template.html'), 'utf-8'
);
assert(template.includes('indicator-bar'), 'Should have indicator bar');
assert(template.includes('indicator-text'), 'Should have indicator text');
assert(template.includes('<!-- CONTENT -->'), 'Should have content placeholder');
assert(template.includes('claude-content'), 'Should have content container');
return Promise.resolve();
});
// ========== Summary ==========
console.log(`\n--- Results: ${passed} passed, ${failed} failed ---`);
if (failed > 0) process.exit(1);
} finally {
server.kill();
await sleep(100);
cleanup();
}
}
runTests().catch(err => {
console.error('Test failed:', err);
process.exit(1);
});

View File

@@ -0,0 +1,351 @@
#!/usr/bin/env bash
# Windows lifecycle tests for the brainstorm server.
#
# Verifies that the brainstorm server survives the 60-second lifecycle
# check on Windows, where OWNER_PID monitoring is disabled because the
# MSYS2 PID namespace is invisible to Node.js.
#
# Requirements:
# - Node.js in PATH
# - Run from the repository root, or set SUPERPOWERS_ROOT
# - On Windows: Git Bash (OSTYPE=msys*)
#
# Usage:
# bash tests/brainstorm-server/windows-lifecycle.test.sh
set -uo pipefail
# ========== Configuration ==========
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="${SUPERPOWERS_ROOT:-$(cd "$SCRIPT_DIR/../.." && pwd)}"
START_SCRIPT="$REPO_ROOT/skills/brainstorming/scripts/start-server.sh"
STOP_SCRIPT="$REPO_ROOT/skills/brainstorming/scripts/stop-server.sh"
SERVER_JS="$REPO_ROOT/skills/brainstorming/scripts/server.js"
TEST_DIR="${TMPDIR:-/tmp}/brainstorm-win-test-$$"
passed=0
failed=0
skipped=0
# ========== Helpers ==========
cleanup() {
# Kill any server processes we started
for pidvar in SERVER_PID CONTROL_PID STOP_TEST_PID; do
pid="${!pidvar:-}"
if [[ -n "$pid" ]]; then
kill "$pid" 2>/dev/null || true
wait "$pid" 2>/dev/null || true
fi
done
if [[ -n "${TEST_DIR:-}" && -d "$TEST_DIR" ]]; then
rm -rf "$TEST_DIR"
fi
}
trap cleanup EXIT
pass() {
echo " PASS: $1"
passed=$((passed + 1))
}
fail() {
echo " FAIL: $1"
echo " $2"
failed=$((failed + 1))
}
skip() {
echo " SKIP: $1 ($2)"
skipped=$((skipped + 1))
}
wait_for_server_info() {
local dir="$1"
for _ in $(seq 1 50); do
if [[ -f "$dir/.server-info" ]]; then
return 0
fi
sleep 0.1
done
return 1
}
get_port_from_info() {
# Read the port from .server-info. Use grep/sed instead of Node.js
# to avoid MSYS2-to-Windows path translation issues.
grep -o '"port":[0-9]*' "$1/.server-info" | head -1 | sed 's/"port"://'
}
http_check() {
local port="$1"
node -e "
const http = require('http');
http.get('http://localhost:$port/', (res) => {
process.exit(res.statusCode === 200 ? 0 : 1);
}).on('error', () => process.exit(1));
" 2>/dev/null
}
# ========== Platform Detection ==========
echo ""
echo "=== Brainstorm Server Windows Lifecycle Tests ==="
echo "Platform: ${OSTYPE:-unknown}"
echo "MSYSTEM: ${MSYSTEM:-unset}"
echo "Node: $(node --version 2>/dev/null || echo 'not found')"
echo ""
is_windows="false"
case "${OSTYPE:-}" in
msys*|cygwin*|mingw*) is_windows="true" ;;
esac
if [[ -n "${MSYSTEM:-}" ]]; then
is_windows="true"
fi
if [[ "$is_windows" != "true" ]]; then
echo "NOTE: Not running on Windows/MSYS2 (OSTYPE=${OSTYPE:-unset})."
echo "Windows-specific tests will be skipped. Tests 4-6 still run."
echo ""
fi
mkdir -p "$TEST_DIR"
SERVER_PID=""
CONTROL_PID=""
STOP_TEST_PID=""
# ========== Test 1: OWNER_PID is empty on Windows ==========
echo "--- Owner PID Resolution ---"
if [[ "$is_windows" == "true" ]]; then
# Replicate the PID resolution logic from start-server.sh lines 104-112
TEST_OWNER_PID="$(ps -o ppid= -p "$PPID" 2>/dev/null | tr -d ' ' || true)"
if [[ -z "$TEST_OWNER_PID" || "$TEST_OWNER_PID" == "1" ]]; then
TEST_OWNER_PID="$PPID"
fi
# The fix: clear on Windows
case "${OSTYPE:-}" in
msys*|cygwin*|mingw*) TEST_OWNER_PID="" ;;
esac
if [[ -z "$TEST_OWNER_PID" ]]; then
pass "OWNER_PID is empty on Windows after fix"
else
fail "OWNER_PID is empty on Windows after fix" \
"Expected empty, got '$TEST_OWNER_PID'"
fi
else
skip "OWNER_PID is empty on Windows" "not on Windows"
fi
# ========== Test 2: start-server.sh passes empty BRAINSTORM_OWNER_PID ==========
if [[ "$is_windows" == "true" ]]; then
# Use a fake 'node' that captures the env var and exits
FAKE_NODE_DIR="$TEST_DIR/fake-bin"
mkdir -p "$FAKE_NODE_DIR"
cat > "$FAKE_NODE_DIR/node" <<'FAKENODE'
#!/usr/bin/env bash
echo "CAPTURED_OWNER_PID=${BRAINSTORM_OWNER_PID:-__UNSET__}"
exit 0
FAKENODE
chmod +x "$FAKE_NODE_DIR/node"
captured=$(PATH="$FAKE_NODE_DIR:$PATH" bash "$START_SCRIPT" --project-dir "$TEST_DIR/session" --foreground 2>/dev/null || true)
owner_pid_value=$(echo "$captured" | grep "CAPTURED_OWNER_PID=" | head -1 | sed 's/CAPTURED_OWNER_PID=//')
if [[ "$owner_pid_value" == "" || "$owner_pid_value" == "__UNSET__" ]]; then
pass "start-server.sh passes empty BRAINSTORM_OWNER_PID on Windows"
else
fail "start-server.sh passes empty BRAINSTORM_OWNER_PID on Windows" \
"Expected empty or unset, got '$owner_pid_value'"
fi
rm -rf "$FAKE_NODE_DIR" "$TEST_DIR/session"
else
skip "start-server.sh passes empty BRAINSTORM_OWNER_PID" "not on Windows"
fi
# ========== Test 3: Auto-foreground detection on Windows ==========
echo ""
echo "--- Foreground Mode Detection ---"
if [[ "$is_windows" == "true" ]]; then
FAKE_NODE_DIR="$TEST_DIR/fake-bin"
mkdir -p "$FAKE_NODE_DIR"
cat > "$FAKE_NODE_DIR/node" <<'FAKENODE'
#!/usr/bin/env bash
echo "FOREGROUND_MODE=true"
exit 0
FAKENODE
chmod +x "$FAKE_NODE_DIR/node"
# Run WITHOUT --foreground flag — Windows should auto-detect
captured=$(PATH="$FAKE_NODE_DIR:$PATH" bash "$START_SCRIPT" --project-dir "$TEST_DIR/session2" 2>/dev/null || true)
if echo "$captured" | grep -q "FOREGROUND_MODE=true"; then
pass "Windows auto-detects foreground mode"
else
fail "Windows auto-detects foreground mode" \
"Expected foreground code path, output: $captured"
fi
rm -rf "$FAKE_NODE_DIR" "$TEST_DIR/session2"
else
skip "Windows auto-detects foreground mode" "not on Windows"
fi
# ========== Test 4: Server survives past 60-second lifecycle check ==========
echo ""
echo "--- Server Survival (lifecycle check) ---"
mkdir -p "$TEST_DIR/survival"
echo " Starting server (will wait ~75s to verify survival past lifecycle check)..."
BRAINSTORM_DIR="$TEST_DIR/survival" \
BRAINSTORM_HOST="127.0.0.1" \
BRAINSTORM_URL_HOST="localhost" \
BRAINSTORM_OWNER_PID="" \
BRAINSTORM_PORT=$((49152 + RANDOM % 16383)) \
node "$SERVER_JS" > "$TEST_DIR/survival/.server.log" 2>&1 &
SERVER_PID=$!
if ! wait_for_server_info "$TEST_DIR/survival"; then
fail "Server starts successfully" "Server did not write .server-info within 5 seconds"
kill "$SERVER_PID" 2>/dev/null || true
SERVER_PID=""
else
pass "Server starts successfully with empty OWNER_PID"
SERVER_PORT=$(get_port_from_info "$TEST_DIR/survival")
sleep 75
if kill -0 "$SERVER_PID" 2>/dev/null; then
pass "Server is still alive after 75 seconds"
else
fail "Server is still alive after 75 seconds" \
"Server died. Log tail: $(tail -5 "$TEST_DIR/survival/.server.log" 2>/dev/null)"
fi
if http_check "$SERVER_PORT"; then
pass "Server responds to HTTP after lifecycle check window"
else
fail "Server responds to HTTP after lifecycle check window" \
"HTTP request to port $SERVER_PORT failed"
fi
if grep -q "owner process exited" "$TEST_DIR/survival/.server.log" 2>/dev/null; then
fail "No 'owner process exited' in logs" \
"Found spurious owner-exit shutdown in log"
else
pass "No 'owner process exited' in logs"
fi
kill "$SERVER_PID" 2>/dev/null || true
wait "$SERVER_PID" 2>/dev/null || true
SERVER_PID=""
fi
# ========== Test 5: Bad OWNER_PID causes shutdown (control) ==========
echo ""
echo "--- Control: Bad OWNER_PID causes shutdown ---"
mkdir -p "$TEST_DIR/control"
# Find a PID that does not exist
BAD_PID=99999
while kill -0 "$BAD_PID" 2>/dev/null; do
BAD_PID=$((BAD_PID + 1))
done
BRAINSTORM_DIR="$TEST_DIR/control" \
BRAINSTORM_HOST="127.0.0.1" \
BRAINSTORM_URL_HOST="localhost" \
BRAINSTORM_OWNER_PID="$BAD_PID" \
BRAINSTORM_PORT=$((49152 + RANDOM % 16383)) \
node "$SERVER_JS" > "$TEST_DIR/control/.server.log" 2>&1 &
CONTROL_PID=$!
if ! wait_for_server_info "$TEST_DIR/control"; then
fail "Control server starts" "Server did not write .server-info within 5 seconds"
kill "$CONTROL_PID" 2>/dev/null || true
CONTROL_PID=""
else
pass "Control server starts with bad OWNER_PID=$BAD_PID"
echo " Waiting ~75s for lifecycle check to kill server..."
sleep 75
if kill -0 "$CONTROL_PID" 2>/dev/null; then
fail "Control server self-terminates with bad OWNER_PID" \
"Server is still alive (expected it to die)"
kill "$CONTROL_PID" 2>/dev/null || true
else
pass "Control server self-terminates with bad OWNER_PID"
fi
if grep -q "owner process exited" "$TEST_DIR/control/.server.log" 2>/dev/null; then
pass "Control server logs 'owner process exited'"
else
fail "Control server logs 'owner process exited'" \
"Log tail: $(tail -5 "$TEST_DIR/control/.server.log" 2>/dev/null)"
fi
fi
wait "$CONTROL_PID" 2>/dev/null || true
CONTROL_PID=""
# ========== Test 6: stop-server.sh cleanly stops the server ==========
echo ""
echo "--- Clean Shutdown ---"
mkdir -p "$TEST_DIR/stop-test"
BRAINSTORM_DIR="$TEST_DIR/stop-test" \
BRAINSTORM_HOST="127.0.0.1" \
BRAINSTORM_URL_HOST="localhost" \
BRAINSTORM_OWNER_PID="" \
BRAINSTORM_PORT=$((49152 + RANDOM % 16383)) \
node "$SERVER_JS" > "$TEST_DIR/stop-test/.server.log" 2>&1 &
STOP_TEST_PID=$!
echo "$STOP_TEST_PID" > "$TEST_DIR/stop-test/.server.pid"
if ! wait_for_server_info "$TEST_DIR/stop-test"; then
fail "Stop-test server starts" "Server did not start"
kill "$STOP_TEST_PID" 2>/dev/null || true
STOP_TEST_PID=""
else
bash "$STOP_SCRIPT" "$TEST_DIR/stop-test" >/dev/null 2>&1 || true
sleep 1
if ! kill -0 "$STOP_TEST_PID" 2>/dev/null; then
pass "stop-server.sh cleanly stops the server"
else
fail "stop-server.sh cleanly stops the server" \
"Server PID $STOP_TEST_PID is still alive after stop"
kill "$STOP_TEST_PID" 2>/dev/null || true
fi
fi
wait "$STOP_TEST_PID" 2>/dev/null || true
STOP_TEST_PID=""
# ========== Summary ==========
echo ""
echo "=== Results: $passed passed, $failed failed, $skipped skipped ==="
if [[ $failed -gt 0 ]]; then
exit 1
fi
exit 0

View File

@@ -0,0 +1,392 @@
/**
* Unit tests for the zero-dependency WebSocket protocol implementation.
*
* Tests the WebSocket frame encoding/decoding, handshake computation,
* and protocol-level behavior independent of the HTTP server.
*
* The module under test exports:
* - computeAcceptKey(clientKey) -> string
* - encodeFrame(opcode, payload) -> Buffer
* - decodeFrame(buffer) -> { opcode, payload, bytesConsumed } | null
* - OPCODES: { TEXT, CLOSE, PING, PONG }
*/
const assert = require('assert');
const crypto = require('crypto');
const path = require('path');
// The module under test — will be the new zero-dep server file
const SERVER_PATH = path.join(__dirname, '../../skills/brainstorming/scripts/server.cjs');
let ws;
try {
ws = require(SERVER_PATH);
} catch (e) {
// Module doesn't exist yet (TDD — tests written before implementation)
console.error(`Cannot load ${SERVER_PATH}: ${e.message}`);
console.error('This is expected if running tests before implementation.');
process.exit(1);
}
function runTests() {
let passed = 0;
let failed = 0;
function test(name, fn) {
try {
fn();
console.log(` PASS: ${name}`);
passed++;
} catch (e) {
console.log(` FAIL: ${name}`);
console.log(` ${e.message}`);
failed++;
}
}
// ========== Handshake ==========
console.log('\n--- WebSocket Handshake ---');
test('computeAcceptKey produces correct RFC 6455 accept value', () => {
// RFC 6455 Section 4.2.2 example
// The magic GUID is "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
const clientKey = 'dGhlIHNhbXBsZSBub25jZQ==';
const expected = 's3pPLMBiTxaQ9kYGzzhZRbK+xOo=';
assert.strictEqual(ws.computeAcceptKey(clientKey), expected);
});
test('computeAcceptKey produces valid base64 for random keys', () => {
for (let i = 0; i < 10; i++) {
const randomKey = crypto.randomBytes(16).toString('base64');
const result = ws.computeAcceptKey(randomKey);
// Result should be valid base64
assert.strictEqual(Buffer.from(result, 'base64').toString('base64'), result);
// SHA-1 output is 20 bytes, base64 encoded = 28 chars
assert.strictEqual(result.length, 28);
}
});
// ========== Frame Encoding ==========
console.log('\n--- Frame Encoding (server -> client) ---');
test('encodes small text frame (< 126 bytes)', () => {
const payload = 'Hello';
const frame = ws.encodeFrame(ws.OPCODES.TEXT, Buffer.from(payload));
// FIN bit + TEXT opcode = 0x81, length = 5
assert.strictEqual(frame[0], 0x81);
assert.strictEqual(frame[1], 5);
assert.strictEqual(frame.slice(2).toString(), 'Hello');
assert.strictEqual(frame.length, 7);
});
test('encodes empty text frame', () => {
const frame = ws.encodeFrame(ws.OPCODES.TEXT, Buffer.alloc(0));
assert.strictEqual(frame[0], 0x81);
assert.strictEqual(frame[1], 0);
assert.strictEqual(frame.length, 2);
});
test('encodes medium text frame (126-65535 bytes)', () => {
const payload = Buffer.alloc(200, 0x41); // 200 'A's
const frame = ws.encodeFrame(ws.OPCODES.TEXT, payload);
assert.strictEqual(frame[0], 0x81);
assert.strictEqual(frame[1], 126); // extended length marker
assert.strictEqual(frame.readUInt16BE(2), 200);
assert.strictEqual(frame.slice(4).toString(), payload.toString());
assert.strictEqual(frame.length, 204);
});
test('encodes frame at exactly 126 bytes (boundary)', () => {
const payload = Buffer.alloc(126, 0x42);
const frame = ws.encodeFrame(ws.OPCODES.TEXT, payload);
assert.strictEqual(frame[1], 126); // extended length marker
assert.strictEqual(frame.readUInt16BE(2), 126);
assert.strictEqual(frame.length, 130);
});
test('encodes frame at exactly 125 bytes (max small)', () => {
const payload = Buffer.alloc(125, 0x43);
const frame = ws.encodeFrame(ws.OPCODES.TEXT, payload);
assert.strictEqual(frame[1], 125);
assert.strictEqual(frame.length, 127);
});
test('encodes large frame (> 65535 bytes)', () => {
const payload = Buffer.alloc(70000, 0x44);
const frame = ws.encodeFrame(ws.OPCODES.TEXT, payload);
assert.strictEqual(frame[0], 0x81);
assert.strictEqual(frame[1], 127); // 64-bit length marker
// 8-byte extended length at offset 2
const len = Number(frame.readBigUInt64BE(2));
assert.strictEqual(len, 70000);
assert.strictEqual(frame.length, 10 + 70000);
});
test('encodes close frame', () => {
const frame = ws.encodeFrame(ws.OPCODES.CLOSE, Buffer.alloc(0));
assert.strictEqual(frame[0], 0x88); // FIN + CLOSE
assert.strictEqual(frame[1], 0);
});
test('encodes pong frame with payload', () => {
const payload = Buffer.from('ping-data');
const frame = ws.encodeFrame(ws.OPCODES.PONG, payload);
assert.strictEqual(frame[0], 0x8A); // FIN + PONG
assert.strictEqual(frame[1], payload.length);
assert.strictEqual(frame.slice(2).toString(), 'ping-data');
});
test('server frames are never masked (per RFC 6455)', () => {
const frame = ws.encodeFrame(ws.OPCODES.TEXT, Buffer.from('test'));
// Bit 7 of byte 1 is the mask bit — must be 0 for server frames
assert.strictEqual(frame[1] & 0x80, 0);
});
// ========== Frame Decoding ==========
console.log('\n--- Frame Decoding (client -> server) ---');
// Helper: create a masked client frame
function makeClientFrame(opcode, payload, fin = true) {
const buf = Buffer.from(payload);
const mask = crypto.randomBytes(4);
const masked = Buffer.alloc(buf.length);
for (let i = 0; i < buf.length; i++) {
masked[i] = buf[i] ^ mask[i % 4];
}
let header;
const finBit = fin ? 0x80 : 0x00;
if (buf.length < 126) {
header = Buffer.alloc(6);
header[0] = finBit | opcode;
header[1] = 0x80 | buf.length; // mask bit set
mask.copy(header, 2);
} else if (buf.length < 65536) {
header = Buffer.alloc(8);
header[0] = finBit | opcode;
header[1] = 0x80 | 126;
header.writeUInt16BE(buf.length, 2);
mask.copy(header, 4);
} else {
header = Buffer.alloc(14);
header[0] = finBit | opcode;
header[1] = 0x80 | 127;
header.writeBigUInt64BE(BigInt(buf.length), 2);
mask.copy(header, 10);
}
return Buffer.concat([header, masked]);
}
test('decodes small masked text frame', () => {
const frame = makeClientFrame(0x01, 'Hello');
const result = ws.decodeFrame(frame);
assert(result, 'Should return a result');
assert.strictEqual(result.opcode, ws.OPCODES.TEXT);
assert.strictEqual(result.payload.toString(), 'Hello');
assert.strictEqual(result.bytesConsumed, frame.length);
});
test('decodes empty masked text frame', () => {
const frame = makeClientFrame(0x01, '');
const result = ws.decodeFrame(frame);
assert(result, 'Should return a result');
assert.strictEqual(result.opcode, ws.OPCODES.TEXT);
assert.strictEqual(result.payload.length, 0);
});
test('decodes medium masked text frame (126-65535 bytes)', () => {
const payload = 'A'.repeat(200);
const frame = makeClientFrame(0x01, payload);
const result = ws.decodeFrame(frame);
assert(result, 'Should return a result');
assert.strictEqual(result.payload.toString(), payload);
});
test('decodes large masked text frame (> 65535 bytes)', () => {
const payload = 'B'.repeat(70000);
const frame = makeClientFrame(0x01, payload);
const result = ws.decodeFrame(frame);
assert(result, 'Should return a result');
assert.strictEqual(result.payload.length, 70000);
assert.strictEqual(result.payload.toString(), payload);
});
test('decodes masked close frame', () => {
const frame = makeClientFrame(0x08, '');
const result = ws.decodeFrame(frame);
assert(result, 'Should return a result');
assert.strictEqual(result.opcode, ws.OPCODES.CLOSE);
});
test('decodes masked ping frame', () => {
const frame = makeClientFrame(0x09, 'ping!');
const result = ws.decodeFrame(frame);
assert(result, 'Should return a result');
assert.strictEqual(result.opcode, ws.OPCODES.PING);
assert.strictEqual(result.payload.toString(), 'ping!');
});
test('returns null for incomplete frame (not enough header bytes)', () => {
const result = ws.decodeFrame(Buffer.from([0x81]));
assert.strictEqual(result, null, 'Should return null for 1-byte buffer');
});
test('returns null for incomplete frame (header ok, payload truncated)', () => {
// Create a valid frame then truncate it
const frame = makeClientFrame(0x01, 'Hello World');
const truncated = frame.slice(0, frame.length - 3);
const result = ws.decodeFrame(truncated);
assert.strictEqual(result, null, 'Should return null for truncated frame');
});
test('returns null for incomplete extended-length header', () => {
// Frame claiming 16-bit length but only 3 bytes total
const buf = Buffer.alloc(3);
buf[0] = 0x81;
buf[1] = 0x80 | 126; // masked, 16-bit extended
// Missing the 2 length bytes + mask
const result = ws.decodeFrame(buf);
assert.strictEqual(result, null);
});
test('rejects unmasked client frame', () => {
// Server MUST reject unmasked client frames per RFC 6455 Section 5.1
const buf = Buffer.alloc(7);
buf[0] = 0x81; // FIN + TEXT
buf[1] = 5; // length 5, NO mask bit
Buffer.from('Hello').copy(buf, 2);
assert.throws(() => ws.decodeFrame(buf), /mask/i, 'Should reject unmasked client frame');
});
test('handles multiple frames in a single buffer', () => {
const frame1 = makeClientFrame(0x01, 'first');
const frame2 = makeClientFrame(0x01, 'second');
const combined = Buffer.concat([frame1, frame2]);
const result1 = ws.decodeFrame(combined);
assert(result1, 'Should decode first frame');
assert.strictEqual(result1.payload.toString(), 'first');
assert.strictEqual(result1.bytesConsumed, frame1.length);
const result2 = ws.decodeFrame(combined.slice(result1.bytesConsumed));
assert(result2, 'Should decode second frame');
assert.strictEqual(result2.payload.toString(), 'second');
});
test('correctly unmasks with all mask byte values', () => {
// Use a known mask to verify unmasking arithmetic
const payload = Buffer.from('ABCDEFGH');
const mask = Buffer.from([0xFF, 0x00, 0xAA, 0x55]);
const masked = Buffer.alloc(payload.length);
for (let i = 0; i < payload.length; i++) {
masked[i] = payload[i] ^ mask[i % 4];
}
// Build frame manually
const header = Buffer.alloc(6);
header[0] = 0x81; // FIN + TEXT
header[1] = 0x80 | payload.length;
mask.copy(header, 2);
const frame = Buffer.concat([header, masked]);
const result = ws.decodeFrame(frame);
assert.strictEqual(result.payload.toString(), 'ABCDEFGH');
});
// ========== Frame Encoding Boundary at 65535/65536 ==========
console.log('\n--- Frame Size Boundaries ---');
test('encodes frame at exactly 65535 bytes (max 16-bit)', () => {
const payload = Buffer.alloc(65535, 0x45);
const frame = ws.encodeFrame(ws.OPCODES.TEXT, payload);
assert.strictEqual(frame[1], 126);
assert.strictEqual(frame.readUInt16BE(2), 65535);
assert.strictEqual(frame.length, 4 + 65535);
});
test('encodes frame at exactly 65536 bytes (min 64-bit)', () => {
const payload = Buffer.alloc(65536, 0x46);
const frame = ws.encodeFrame(ws.OPCODES.TEXT, payload);
assert.strictEqual(frame[1], 127);
assert.strictEqual(Number(frame.readBigUInt64BE(2)), 65536);
assert.strictEqual(frame.length, 10 + 65536);
});
test('decodes frame at 65535 bytes boundary', () => {
const payload = 'X'.repeat(65535);
const frame = makeClientFrame(0x01, payload);
const result = ws.decodeFrame(frame);
assert(result);
assert.strictEqual(result.payload.length, 65535);
});
test('decodes frame at 65536 bytes boundary', () => {
const payload = 'Y'.repeat(65536);
const frame = makeClientFrame(0x01, payload);
const result = ws.decodeFrame(frame);
assert(result);
assert.strictEqual(result.payload.length, 65536);
});
// ========== Close Frame with Status Code ==========
console.log('\n--- Close Frame Details ---');
test('decodes close frame with status code', () => {
// Close frame payload: 2-byte status code + optional reason
const statusBuf = Buffer.alloc(2);
statusBuf.writeUInt16BE(1000); // Normal closure
const frame = makeClientFrame(0x08, statusBuf);
const result = ws.decodeFrame(frame);
assert.strictEqual(result.opcode, ws.OPCODES.CLOSE);
assert.strictEqual(result.payload.readUInt16BE(0), 1000);
});
test('decodes close frame with status code and reason', () => {
const reason = 'Normal shutdown';
const payload = Buffer.alloc(2 + reason.length);
payload.writeUInt16BE(1000);
payload.write(reason, 2);
const frame = makeClientFrame(0x08, payload);
const result = ws.decodeFrame(frame);
assert.strictEqual(result.opcode, ws.OPCODES.CLOSE);
assert.strictEqual(result.payload.slice(2).toString(), reason);
});
// ========== JSON Roundtrip ==========
console.log('\n--- JSON Message Roundtrip ---');
test('roundtrip encode/decode of JSON message', () => {
const msg = { type: 'reload' };
const payload = Buffer.from(JSON.stringify(msg));
const serverFrame = ws.encodeFrame(ws.OPCODES.TEXT, payload);
// Verify we can read what we encoded (unmasked server frame)
// Server frames don't go through decodeFrame (that expects masked),
// so just verify the payload bytes directly
let offset;
if (serverFrame[1] < 126) {
offset = 2;
} else if (serverFrame[1] === 126) {
offset = 4;
} else {
offset = 10;
}
const decoded = JSON.parse(serverFrame.slice(offset).toString());
assert.deepStrictEqual(decoded, msg);
});
test('roundtrip masked client JSON message', () => {
const msg = { type: 'click', choice: 'a', text: 'Option A', timestamp: 1706000101 };
const frame = makeClientFrame(0x01, JSON.stringify(msg));
const result = ws.decodeFrame(frame);
const decoded = JSON.parse(result.payload.toString());
assert.deepStrictEqual(decoded, msg);
});
// ========== Summary ==========
console.log(`\n--- Results: ${passed} passed, ${failed} failed ---`);
if (failed > 0) process.exit(1);
}
runTests();

View File

@@ -0,0 +1,158 @@
# Claude Code Skills Tests
Automated tests for superpowers skills using Claude Code CLI.
## Overview
This test suite verifies that skills are loaded correctly and Claude follows them as expected. Tests invoke Claude Code in headless mode (`claude -p`) and verify the behavior.
## Requirements
- Claude Code CLI installed and in PATH (`claude --version` should work)
- Local superpowers plugin installed (see main README for installation)
## Running Tests
### Run all fast tests (recommended):
```bash
./run-skill-tests.sh
```
### Run integration tests (slow, 10-30 minutes):
```bash
./run-skill-tests.sh --integration
```
### Run specific test:
```bash
./run-skill-tests.sh --test test-subagent-driven-development.sh
```
### Run with verbose output:
```bash
./run-skill-tests.sh --verbose
```
### Set custom timeout:
```bash
./run-skill-tests.sh --timeout 1800 # 30 minutes for integration tests
```
## Test Structure
### test-helpers.sh
Common functions for skills testing:
- `run_claude "prompt" [timeout]` - Run Claude with prompt
- `assert_contains output pattern name` - Verify pattern exists
- `assert_not_contains output pattern name` - Verify pattern absent
- `assert_count output pattern count name` - Verify exact count
- `assert_order output pattern_a pattern_b name` - Verify order
- `create_test_project` - Create temp test directory
- `create_test_plan project_dir` - Create sample plan file
### Test Files
Each test file:
1. Sources `test-helpers.sh`
2. Runs Claude Code with specific prompts
3. Verifies expected behavior using assertions
4. Returns 0 on success, non-zero on failure
## Example Test
```bash
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/test-helpers.sh"
echo "=== Test: My Skill ==="
# Ask Claude about the skill
output=$(run_claude "What does the my-skill skill do?" 30)
# Verify response
assert_contains "$output" "expected behavior" "Skill describes behavior"
echo "=== All tests passed ==="
```
## Current Tests
### Fast Tests (run by default)
#### test-subagent-driven-development.sh
Tests skill content and requirements (~2 minutes):
- Skill loading and accessibility
- Workflow ordering (spec compliance before code quality)
- Self-review requirements documented
- Plan reading efficiency documented
- Spec compliance reviewer skepticism documented
- Review loops documented
- Task context provision documented
### Integration Tests (use --integration flag)
#### test-subagent-driven-development-integration.sh
Full workflow execution test (~10-30 minutes):
- Creates real test project with Node.js setup
- Creates implementation plan with 2 tasks
- Executes plan using subagent-driven-development
- Verifies actual behaviors:
- Plan read once at start (not per task)
- Full task text provided in subagent prompts
- Subagents perform self-review before reporting
- Spec compliance review happens before code quality
- Spec reviewer reads code independently
- Working implementation is produced
- Tests pass
- Proper git commits created
**What it tests:**
- The workflow actually works end-to-end
- Our improvements are actually applied
- Subagents follow the skill correctly
- Final code is functional and tested
## Adding New Tests
1. Create new test file: `test-<skill-name>.sh`
2. Source test-helpers.sh
3. Write tests using `run_claude` and assertions
4. Add to test list in `run-skill-tests.sh`
5. Make executable: `chmod +x test-<skill-name>.sh`
## Timeout Considerations
- Default timeout: 5 minutes per test
- Claude Code may take time to respond
- Adjust with `--timeout` if needed
- Tests should be focused to avoid long runs
## Debugging Failed Tests
With `--verbose`, you'll see full Claude output:
```bash
./run-skill-tests.sh --verbose --test test-subagent-driven-development.sh
```
Without verbose, only failures show output.
## CI/CD Integration
To run in CI:
```bash
# Run with explicit timeout for CI environments
./run-skill-tests.sh --timeout 900
# Exit code 0 = success, non-zero = failure
```
## Notes
- Tests verify skill *instructions*, not full execution
- Full workflow tests would be very slow
- Focus on verifying key skill requirements
- Tests should be deterministic
- Avoid testing implementation details

View File

@@ -0,0 +1,168 @@
#!/usr/bin/env python3
"""
Analyze token usage from Claude Code session transcripts.
Breaks down usage by main session and individual subagents.
"""
import json
import sys
from pathlib import Path
from collections import defaultdict
def analyze_main_session(filepath):
"""Analyze a session file and return token usage broken down by agent."""
main_usage = {
'input_tokens': 0,
'output_tokens': 0,
'cache_creation': 0,
'cache_read': 0,
'messages': 0
}
# Track usage per subagent
subagent_usage = defaultdict(lambda: {
'input_tokens': 0,
'output_tokens': 0,
'cache_creation': 0,
'cache_read': 0,
'messages': 0,
'description': None
})
with open(filepath, 'r') as f:
for line in f:
try:
data = json.loads(line)
# Main session assistant messages
if data.get('type') == 'assistant' and 'message' in data:
main_usage['messages'] += 1
msg_usage = data['message'].get('usage', {})
main_usage['input_tokens'] += msg_usage.get('input_tokens', 0)
main_usage['output_tokens'] += msg_usage.get('output_tokens', 0)
main_usage['cache_creation'] += msg_usage.get('cache_creation_input_tokens', 0)
main_usage['cache_read'] += msg_usage.get('cache_read_input_tokens', 0)
# Subagent tool results
if data.get('type') == 'user' and 'toolUseResult' in data:
result = data['toolUseResult']
if 'usage' in result and 'agentId' in result:
agent_id = result['agentId']
usage = result['usage']
# Get description from prompt if available
if subagent_usage[agent_id]['description'] is None:
prompt = result.get('prompt', '')
# Extract first line as description
first_line = prompt.split('\n')[0] if prompt else f"agent-{agent_id}"
if first_line.startswith('You are '):
first_line = first_line[8:] # Remove "You are "
subagent_usage[agent_id]['description'] = first_line[:60]
subagent_usage[agent_id]['messages'] += 1
subagent_usage[agent_id]['input_tokens'] += usage.get('input_tokens', 0)
subagent_usage[agent_id]['output_tokens'] += usage.get('output_tokens', 0)
subagent_usage[agent_id]['cache_creation'] += usage.get('cache_creation_input_tokens', 0)
subagent_usage[agent_id]['cache_read'] += usage.get('cache_read_input_tokens', 0)
except Exception:
pass
return main_usage, dict(subagent_usage)
def format_tokens(n):
"""Format token count with thousands separators."""
return f"{n:,}"
def calculate_cost(usage, input_cost_per_m=3.0, output_cost_per_m=15.0):
"""Calculate estimated cost in dollars."""
total_input = usage['input_tokens'] + usage['cache_creation'] + usage['cache_read']
input_cost = total_input * input_cost_per_m / 1_000_000
output_cost = usage['output_tokens'] * output_cost_per_m / 1_000_000
return input_cost + output_cost
def main():
if len(sys.argv) < 2:
print("Usage: analyze-token-usage.py <session-file.jsonl>")
sys.exit(1)
main_session_file = sys.argv[1]
if not Path(main_session_file).exists():
print(f"Error: Session file not found: {main_session_file}")
sys.exit(1)
# Analyze the session
main_usage, subagent_usage = analyze_main_session(main_session_file)
print("=" * 100)
print("TOKEN USAGE ANALYSIS")
print("=" * 100)
print()
# Print breakdown
print("Usage Breakdown:")
print("-" * 100)
print(f"{'Agent':<15} {'Description':<35} {'Msgs':>5} {'Input':>10} {'Output':>10} {'Cache':>10} {'Cost':>8}")
print("-" * 100)
# Main session
cost = calculate_cost(main_usage)
print(f"{'main':<15} {'Main session (coordinator)':<35} "
f"{main_usage['messages']:>5} "
f"{format_tokens(main_usage['input_tokens']):>10} "
f"{format_tokens(main_usage['output_tokens']):>10} "
f"{format_tokens(main_usage['cache_read']):>10} "
f"${cost:>7.2f}")
# Subagents (sorted by agent ID)
for agent_id in sorted(subagent_usage.keys()):
usage = subagent_usage[agent_id]
cost = calculate_cost(usage)
desc = usage['description'] or f"agent-{agent_id}"
print(f"{agent_id:<15} {desc:<35} "
f"{usage['messages']:>5} "
f"{format_tokens(usage['input_tokens']):>10} "
f"{format_tokens(usage['output_tokens']):>10} "
f"{format_tokens(usage['cache_read']):>10} "
f"${cost:>7.2f}")
print("-" * 100)
# Calculate totals
total_usage = {
'input_tokens': main_usage['input_tokens'],
'output_tokens': main_usage['output_tokens'],
'cache_creation': main_usage['cache_creation'],
'cache_read': main_usage['cache_read'],
'messages': main_usage['messages']
}
for usage in subagent_usage.values():
total_usage['input_tokens'] += usage['input_tokens']
total_usage['output_tokens'] += usage['output_tokens']
total_usage['cache_creation'] += usage['cache_creation']
total_usage['cache_read'] += usage['cache_read']
total_usage['messages'] += usage['messages']
total_input = total_usage['input_tokens'] + total_usage['cache_creation'] + total_usage['cache_read']
total_tokens = total_input + total_usage['output_tokens']
total_cost = calculate_cost(total_usage)
print()
print("TOTALS:")
print(f" Total messages: {format_tokens(total_usage['messages'])}")
print(f" Input tokens: {format_tokens(total_usage['input_tokens'])}")
print(f" Output tokens: {format_tokens(total_usage['output_tokens'])}")
print(f" Cache creation tokens: {format_tokens(total_usage['cache_creation'])}")
print(f" Cache read tokens: {format_tokens(total_usage['cache_read'])}")
print()
print(f" Total input (incl cache): {format_tokens(total_input)}")
print(f" Total tokens: {format_tokens(total_tokens)}")
print()
print(f" Estimated cost: ${total_cost:.2f}")
print(" (at $3/$15 per M tokens for input/output)")
print()
print("=" * 100)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,187 @@
#!/usr/bin/env bash
# Test runner for Claude Code skills
# Tests skills by invoking Claude Code CLI and verifying behavior
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"
echo "========================================"
echo " Claude Code Skills Test Suite"
echo "========================================"
echo ""
echo "Repository: $(cd ../.. && pwd)"
echo "Test time: $(date)"
echo "Claude version: $(claude --version 2>/dev/null || echo 'not found')"
echo ""
# Check if Claude Code is available
if ! command -v claude &> /dev/null; then
echo "ERROR: Claude Code CLI not found"
echo "Install Claude Code first: https://code.claude.com"
exit 1
fi
# Parse command line arguments
VERBOSE=false
SPECIFIC_TEST=""
TIMEOUT=300 # Default 5 minute timeout per test
RUN_INTEGRATION=false
while [[ $# -gt 0 ]]; do
case $1 in
--verbose|-v)
VERBOSE=true
shift
;;
--test|-t)
SPECIFIC_TEST="$2"
shift 2
;;
--timeout)
TIMEOUT="$2"
shift 2
;;
--integration|-i)
RUN_INTEGRATION=true
shift
;;
--help|-h)
echo "Usage: $0 [options]"
echo ""
echo "Options:"
echo " --verbose, -v Show verbose output"
echo " --test, -t NAME Run only the specified test"
echo " --timeout SECONDS Set timeout per test (default: 300)"
echo " --integration, -i Run integration tests (slow, 10-30 min)"
echo " --help, -h Show this help"
echo ""
echo "Tests:"
echo " test-subagent-driven-development.sh Test skill loading and requirements"
echo ""
echo "Integration Tests (use --integration):"
echo " test-subagent-driven-development-integration.sh Full workflow execution"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# List of skill tests to run (fast unit tests)
tests=(
"test-subagent-driven-development.sh"
)
# Integration tests (slow, full execution)
integration_tests=(
"test-subagent-driven-development-integration.sh"
)
# Add integration tests if requested
if [ "$RUN_INTEGRATION" = true ]; then
tests+=("${integration_tests[@]}")
fi
# Filter to specific test if requested
if [ -n "$SPECIFIC_TEST" ]; then
tests=("$SPECIFIC_TEST")
fi
# Track results
passed=0
failed=0
skipped=0
# Run each test
for test in "${tests[@]}"; do
echo "----------------------------------------"
echo "Running: $test"
echo "----------------------------------------"
test_path="$SCRIPT_DIR/$test"
if [ ! -f "$test_path" ]; then
echo " [SKIP] Test file not found: $test"
skipped=$((skipped + 1))
continue
fi
if [ ! -x "$test_path" ]; then
echo " Making $test executable..."
chmod +x "$test_path"
fi
start_time=$(date +%s)
if [ "$VERBOSE" = true ]; then
if timeout "$TIMEOUT" bash "$test_path"; then
end_time=$(date +%s)
duration=$((end_time - start_time))
echo ""
echo " [PASS] $test (${duration}s)"
passed=$((passed + 1))
else
exit_code=$?
end_time=$(date +%s)
duration=$((end_time - start_time))
echo ""
if [ $exit_code -eq 124 ]; then
echo " [FAIL] $test (timeout after ${TIMEOUT}s)"
else
echo " [FAIL] $test (${duration}s)"
fi
failed=$((failed + 1))
fi
else
# Capture output for non-verbose mode
if output=$(timeout "$TIMEOUT" bash "$test_path" 2>&1); then
end_time=$(date +%s)
duration=$((end_time - start_time))
echo " [PASS] (${duration}s)"
passed=$((passed + 1))
else
exit_code=$?
end_time=$(date +%s)
duration=$((end_time - start_time))
if [ $exit_code -eq 124 ]; then
echo " [FAIL] (timeout after ${TIMEOUT}s)"
else
echo " [FAIL] (${duration}s)"
fi
echo ""
echo " Output:"
echo "$output" | sed 's/^/ /'
failed=$((failed + 1))
fi
fi
echo ""
done
# Print summary
echo "========================================"
echo " Test Results Summary"
echo "========================================"
echo ""
echo " Passed: $passed"
echo " Failed: $failed"
echo " Skipped: $skipped"
echo ""
if [ "$RUN_INTEGRATION" = false ] && [ ${#integration_tests[@]} -gt 0 ]; then
echo "Note: Integration tests were not run (they take 10-30 minutes)."
echo "Use --integration flag to run full workflow execution tests."
echo ""
fi
if [ $failed -gt 0 ]; then
echo "STATUS: FAILED"
exit 1
else
echo "STATUS: PASSED"
exit 0
fi

View File

@@ -0,0 +1,177 @@
#!/usr/bin/env bash
# Integration Test: Document Review System
# Actually runs spec/plan review and verifies reviewers catch issues
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/test-helpers.sh"
echo "========================================"
echo " Integration Test: Document Review System"
echo "========================================"
echo ""
echo "This test verifies the document review system by:"
echo " 1. Creating a spec with intentional errors"
echo " 2. Running the spec document reviewer"
echo " 3. Verifying the reviewer catches the errors"
echo ""
# Create test project
TEST_PROJECT=$(create_test_project)
echo "Test project: $TEST_PROJECT"
# Trap to cleanup
trap "cleanup_test_project $TEST_PROJECT" EXIT
cd "$TEST_PROJECT"
# Create directory structure
mkdir -p docs/superpowers/specs
# Create a spec document WITH INTENTIONAL ERRORS for the reviewer to catch
cat > docs/superpowers/specs/test-feature-design.md <<'EOF'
# Test Feature Design
## Overview
This is a test feature that does something useful.
## Requirements
1. The feature should work correctly
2. It should be fast
3. TODO: Add more requirements here
## Architecture
The feature will use a simple architecture with:
- A frontend component
- A backend service
- Error handling will be specified later once we understand the failure modes better
## Data Flow
Data flows from the frontend to the backend.
## Testing Strategy
Tests will be written to cover the main functionality.
EOF
# Initialize git repo
git init --quiet
git config user.email "test@test.com"
git config user.name "Test User"
git add .
git commit -m "Initial commit with test spec" --quiet
echo ""
echo "Created test spec with intentional errors:"
echo " - TODO placeholder in Requirements section"
echo " - 'specified later' deferral in Architecture section"
echo ""
echo "Running spec document reviewer..."
echo ""
# Run Claude to review the spec
OUTPUT_FILE="$TEST_PROJECT/claude-output.txt"
PROMPT="You are testing the spec document reviewer.
Read the spec-document-reviewer-prompt.md template in skills/brainstorming/ to understand the review format.
Then review the spec at $TEST_PROJECT/docs/superpowers/specs/test-feature-design.md using the criteria from that template.
Look for:
- TODOs, placeholders, 'TBD', incomplete sections
- Sections saying 'to be defined later' or 'will spec when X is done'
- Sections noticeably less detailed than others
Output your review in the format specified in the template."
echo "================================================================================"
cd "$SCRIPT_DIR/../.." && timeout 120 claude -p "$PROMPT" --permission-mode bypassPermissions 2>&1 | tee "$OUTPUT_FILE" || {
echo ""
echo "================================================================================"
echo "EXECUTION FAILED (exit code: $?)"
exit 1
}
echo "================================================================================"
echo ""
echo "Analyzing reviewer output..."
echo ""
# Verification tests
FAILED=0
echo "=== Verification Tests ==="
echo ""
# Test 1: Reviewer found the TODO
echo "Test 1: Reviewer found TODO..."
if grep -qi "TODO" "$OUTPUT_FILE" && grep -qi "requirements\|Requirements" "$OUTPUT_FILE"; then
echo " [PASS] Reviewer identified TODO in Requirements section"
else
echo " [FAIL] Reviewer did not identify TODO"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 2: Reviewer found the "specified later" deferral
echo "Test 2: Reviewer found 'specified later' deferral..."
if grep -qi "specified later\|later\|defer\|incomplete\|error handling" "$OUTPUT_FILE"; then
echo " [PASS] Reviewer identified deferred content"
else
echo " [FAIL] Reviewer did not identify deferred content"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 3: Reviewer output includes Issues section
echo "Test 3: Review output format..."
if grep -qi "issues\|Issues" "$OUTPUT_FILE"; then
echo " [PASS] Review includes Issues section"
else
echo " [FAIL] Review missing Issues section"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 4: Reviewer did NOT approve (found issues)
echo "Test 4: Reviewer verdict..."
if grep -qi "Issues Found\|❌\|not approved\|issues found" "$OUTPUT_FILE"; then
echo " [PASS] Reviewer correctly found issues (not approved)"
elif grep -qi "Approved\|✅" "$OUTPUT_FILE" && ! grep -qi "Issues Found\|❌" "$OUTPUT_FILE"; then
echo " [FAIL] Reviewer incorrectly approved spec with errors"
FAILED=$((FAILED + 1))
else
echo " [PASS] Reviewer identified problems (ambiguous format but found issues)"
fi
echo ""
# Summary
echo "========================================"
echo " Test Summary"
echo "========================================"
echo ""
if [ $FAILED -eq 0 ]; then
echo "STATUS: PASSED"
echo "All verification tests passed!"
echo ""
echo "The spec document reviewer correctly:"
echo " ✓ Found TODO placeholder"
echo " ✓ Found 'specified later' deferral"
echo " ✓ Produced properly formatted review"
echo " ✓ Did not approve spec with errors"
exit 0
else
echo "STATUS: FAILED"
echo "Failed $FAILED verification tests"
echo ""
echo "Output saved to: $OUTPUT_FILE"
echo ""
echo "Review the output to see what went wrong."
exit 1
fi

View File

@@ -0,0 +1,202 @@
#!/usr/bin/env bash
# Helper functions for Claude Code skill tests
# Run Claude Code with a prompt and capture output
# Usage: run_claude "prompt text" [timeout_seconds] [allowed_tools]
run_claude() {
local prompt="$1"
local timeout="${2:-60}"
local allowed_tools="${3:-}"
local output_file=$(mktemp)
# Build command
local cmd="claude -p \"$prompt\""
if [ -n "$allowed_tools" ]; then
cmd="$cmd --allowed-tools=$allowed_tools"
fi
# Run Claude in headless mode with timeout
if timeout "$timeout" bash -c "$cmd" > "$output_file" 2>&1; then
cat "$output_file"
rm -f "$output_file"
return 0
else
local exit_code=$?
cat "$output_file" >&2
rm -f "$output_file"
return $exit_code
fi
}
# Check if output contains a pattern
# Usage: assert_contains "output" "pattern" "test name"
assert_contains() {
local output="$1"
local pattern="$2"
local test_name="${3:-test}"
if echo "$output" | grep -q "$pattern"; then
echo " [PASS] $test_name"
return 0
else
echo " [FAIL] $test_name"
echo " Expected to find: $pattern"
echo " In output:"
echo "$output" | sed 's/^/ /'
return 1
fi
}
# Check if output does NOT contain a pattern
# Usage: assert_not_contains "output" "pattern" "test name"
assert_not_contains() {
local output="$1"
local pattern="$2"
local test_name="${3:-test}"
if echo "$output" | grep -q "$pattern"; then
echo " [FAIL] $test_name"
echo " Did not expect to find: $pattern"
echo " In output:"
echo "$output" | sed 's/^/ /'
return 1
else
echo " [PASS] $test_name"
return 0
fi
}
# Check if output matches a count
# Usage: assert_count "output" "pattern" expected_count "test name"
assert_count() {
local output="$1"
local pattern="$2"
local expected="$3"
local test_name="${4:-test}"
local actual=$(echo "$output" | grep -c "$pattern" || echo "0")
if [ "$actual" -eq "$expected" ]; then
echo " [PASS] $test_name (found $actual instances)"
return 0
else
echo " [FAIL] $test_name"
echo " Expected $expected instances of: $pattern"
echo " Found $actual instances"
echo " In output:"
echo "$output" | sed 's/^/ /'
return 1
fi
}
# Check if pattern A appears before pattern B
# Usage: assert_order "output" "pattern_a" "pattern_b" "test name"
assert_order() {
local output="$1"
local pattern_a="$2"
local pattern_b="$3"
local test_name="${4:-test}"
# Get line numbers where patterns appear
local line_a=$(echo "$output" | grep -n "$pattern_a" | head -1 | cut -d: -f1)
local line_b=$(echo "$output" | grep -n "$pattern_b" | head -1 | cut -d: -f1)
if [ -z "$line_a" ]; then
echo " [FAIL] $test_name: pattern A not found: $pattern_a"
return 1
fi
if [ -z "$line_b" ]; then
echo " [FAIL] $test_name: pattern B not found: $pattern_b"
return 1
fi
if [ "$line_a" -lt "$line_b" ]; then
echo " [PASS] $test_name (A at line $line_a, B at line $line_b)"
return 0
else
echo " [FAIL] $test_name"
echo " Expected '$pattern_a' before '$pattern_b'"
echo " But found A at line $line_a, B at line $line_b"
return 1
fi
}
# Create a temporary test project directory
# Usage: test_project=$(create_test_project)
create_test_project() {
local test_dir=$(mktemp -d)
echo "$test_dir"
}
# Cleanup test project
# Usage: cleanup_test_project "$test_dir"
cleanup_test_project() {
local test_dir="$1"
if [ -d "$test_dir" ]; then
rm -rf "$test_dir"
fi
}
# Create a simple plan file for testing
# Usage: create_test_plan "$project_dir" "$plan_name"
create_test_plan() {
local project_dir="$1"
local plan_name="${2:-test-plan}"
local plan_file="$project_dir/docs/superpowers/plans/$plan_name.md"
mkdir -p "$(dirname "$plan_file")"
cat > "$plan_file" <<'EOF'
# Test Implementation Plan
## Task 1: Create Hello Function
Create a simple hello function that returns "Hello, World!".
**File:** `src/hello.js`
**Implementation:**
```javascript
export function hello() {
return "Hello, World!";
}
```
**Tests:** Write a test that verifies the function returns the expected string.
**Verification:** `npm test`
## Task 2: Create Goodbye Function
Create a goodbye function that takes a name and returns a goodbye message.
**File:** `src/goodbye.js`
**Implementation:**
```javascript
export function goodbye(name) {
return `Goodbye, ${name}!`;
}
```
**Tests:** Write tests for:
- Default name
- Custom name
- Edge cases (empty string, null)
**Verification:** `npm test`
EOF
echo "$plan_file"
}
# Export functions for use in tests
export -f run_claude
export -f assert_contains
export -f assert_not_contains
export -f assert_count
export -f assert_order
export -f create_test_project
export -f cleanup_test_project
export -f create_test_plan

View File

@@ -0,0 +1,314 @@
#!/usr/bin/env bash
# Integration Test: subagent-driven-development workflow
# Actually executes a plan and verifies the new workflow behaviors
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/test-helpers.sh"
echo "========================================"
echo " Integration Test: subagent-driven-development"
echo "========================================"
echo ""
echo "This test executes a real plan using the skill and verifies:"
echo " 1. Plan is read once (not per task)"
echo " 2. Full task text provided to subagents"
echo " 3. Subagents perform self-review"
echo " 4. Spec compliance review before code quality"
echo " 5. Review loops when issues found"
echo " 6. Spec reviewer reads code independently"
echo ""
echo "WARNING: This test may take 10-30 minutes to complete."
echo ""
# Create test project
TEST_PROJECT=$(create_test_project)
echo "Test project: $TEST_PROJECT"
# Trap to cleanup
trap "cleanup_test_project $TEST_PROJECT" EXIT
# Set up minimal Node.js project
cd "$TEST_PROJECT"
cat > package.json <<'EOF'
{
"name": "test-project",
"version": "1.0.0",
"type": "module",
"scripts": {
"test": "node --test"
}
}
EOF
mkdir -p src test docs/superpowers/plans
# Create a simple implementation plan
cat > docs/superpowers/plans/implementation-plan.md <<'EOF'
# Test Implementation Plan
This is a minimal plan to test the subagent-driven-development workflow.
## Task 1: Create Add Function
Create a function that adds two numbers.
**File:** `src/math.js`
**Requirements:**
- Function named `add`
- Takes two parameters: `a` and `b`
- Returns the sum of `a` and `b`
- Export the function
**Implementation:**
```javascript
export function add(a, b) {
return a + b;
}
```
**Tests:** Create `test/math.test.js` that verifies:
- `add(2, 3)` returns `5`
- `add(0, 0)` returns `0`
- `add(-1, 1)` returns `0`
**Verification:** `npm test`
## Task 2: Create Multiply Function
Create a function that multiplies two numbers.
**File:** `src/math.js` (add to existing file)
**Requirements:**
- Function named `multiply`
- Takes two parameters: `a` and `b`
- Returns the product of `a` and `b`
- Export the function
- DO NOT add any extra features (like power, divide, etc.)
**Implementation:**
```javascript
export function multiply(a, b) {
return a * b;
}
```
**Tests:** Add to `test/math.test.js`:
- `multiply(2, 3)` returns `6`
- `multiply(0, 5)` returns `0`
- `multiply(-2, 3)` returns `-6`
**Verification:** `npm test`
EOF
# Initialize git repo
git init --quiet
git config user.email "test@test.com"
git config user.name "Test User"
git add .
git commit -m "Initial commit" --quiet
echo ""
echo "Project setup complete. Starting execution..."
echo ""
# Run Claude with subagent-driven-development
# Capture full output to analyze
OUTPUT_FILE="$TEST_PROJECT/claude-output.txt"
# Create prompt file
cat > "$TEST_PROJECT/prompt.txt" <<'EOF'
I want you to execute the implementation plan at docs/superpowers/plans/implementation-plan.md using the subagent-driven-development skill.
IMPORTANT: Follow the skill exactly. I will be verifying that you:
1. Read the plan once at the beginning
2. Provide full task text to subagents (don't make them read files)
3. Ensure subagents do self-review before reporting
4. Run spec compliance review before code quality review
5. Use review loops when issues are found
Begin now. Execute the plan.
EOF
# Note: We use a longer timeout since this is integration testing
# Use --allowed-tools to enable tool usage in headless mode
# IMPORTANT: Run from superpowers directory so local dev skills are available
PROMPT="Change to directory $TEST_PROJECT and then execute the implementation plan at docs/superpowers/plans/implementation-plan.md using the subagent-driven-development skill.
IMPORTANT: Follow the skill exactly. I will be verifying that you:
1. Read the plan once at the beginning
2. Provide full task text to subagents (don't make them read files)
3. Ensure subagents do self-review before reporting
4. Run spec compliance review before code quality review
5. Use review loops when issues are found
Begin now. Execute the plan."
echo "Running Claude (output will be shown below and saved to $OUTPUT_FILE)..."
echo "================================================================================"
cd "$SCRIPT_DIR/../.." && timeout 1800 claude -p "$PROMPT" --allowed-tools=all --add-dir "$TEST_PROJECT" --permission-mode bypassPermissions 2>&1 | tee "$OUTPUT_FILE" || {
echo ""
echo "================================================================================"
echo "EXECUTION FAILED (exit code: $?)"
exit 1
}
echo "================================================================================"
echo ""
echo "Execution complete. Analyzing results..."
echo ""
# Find the session transcript
# Session files are in ~/.claude/projects/-<working-dir>/<session-id>.jsonl
WORKING_DIR_ESCAPED=$(echo "$SCRIPT_DIR/../.." | sed 's/\//-/g' | sed 's/^-//')
SESSION_DIR="$HOME/.claude/projects/$WORKING_DIR_ESCAPED"
# Find the most recent session file (created during this test run)
SESSION_FILE=$(find "$SESSION_DIR" -name "*.jsonl" -type f -mmin -60 2>/dev/null | sort -r | head -1)
if [ -z "$SESSION_FILE" ]; then
echo "ERROR: Could not find session transcript file"
echo "Looked in: $SESSION_DIR"
exit 1
fi
echo "Analyzing session transcript: $(basename "$SESSION_FILE")"
echo ""
# Verification tests
FAILED=0
echo "=== Verification Tests ==="
echo ""
# Test 1: Skill was invoked
echo "Test 1: Skill tool invoked..."
if grep -q '"name":"Skill".*"skill":"superpowers:subagent-driven-development"' "$SESSION_FILE"; then
echo " [PASS] subagent-driven-development skill was invoked"
else
echo " [FAIL] Skill was not invoked"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 2: Subagents were used (Task tool)
echo "Test 2: Subagents dispatched..."
task_count=$(grep -c '"name":"Task"' "$SESSION_FILE" || echo "0")
if [ "$task_count" -ge 2 ]; then
echo " [PASS] $task_count subagents dispatched"
else
echo " [FAIL] Only $task_count subagent(s) dispatched (expected >= 2)"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 3: TodoWrite was used for tracking
echo "Test 3: Task tracking..."
todo_count=$(grep -c '"name":"TodoWrite"' "$SESSION_FILE" || echo "0")
if [ "$todo_count" -ge 1 ]; then
echo " [PASS] TodoWrite used $todo_count time(s) for task tracking"
else
echo " [FAIL] TodoWrite not used"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 6: Implementation actually works
echo "Test 6: Implementation verification..."
if [ -f "$TEST_PROJECT/src/math.js" ]; then
echo " [PASS] src/math.js created"
if grep -q "export function add" "$TEST_PROJECT/src/math.js"; then
echo " [PASS] add function exists"
else
echo " [FAIL] add function missing"
FAILED=$((FAILED + 1))
fi
if grep -q "export function multiply" "$TEST_PROJECT/src/math.js"; then
echo " [PASS] multiply function exists"
else
echo " [FAIL] multiply function missing"
FAILED=$((FAILED + 1))
fi
else
echo " [FAIL] src/math.js not created"
FAILED=$((FAILED + 1))
fi
if [ -f "$TEST_PROJECT/test/math.test.js" ]; then
echo " [PASS] test/math.test.js created"
else
echo " [FAIL] test/math.test.js not created"
FAILED=$((FAILED + 1))
fi
# Try running tests
if cd "$TEST_PROJECT" && npm test > test-output.txt 2>&1; then
echo " [PASS] Tests pass"
else
echo " [FAIL] Tests failed"
cat test-output.txt
FAILED=$((FAILED + 1))
fi
echo ""
# Test 7: Git commits show proper workflow
echo "Test 7: Git commit history..."
commit_count=$(git -C "$TEST_PROJECT" log --oneline | wc -l)
if [ "$commit_count" -gt 2 ]; then # Initial + at least 2 task commits
echo " [PASS] Multiple commits created ($commit_count total)"
else
echo " [FAIL] Too few commits ($commit_count, expected >2)"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 8: Check for extra features (spec compliance should catch)
echo "Test 8: No extra features added (spec compliance)..."
if grep -q "export function divide\|export function power\|export function subtract" "$TEST_PROJECT/src/math.js" 2>/dev/null; then
echo " [WARN] Extra features found (spec review should have caught this)"
# Not failing on this as it tests reviewer effectiveness
else
echo " [PASS] No extra features added"
fi
echo ""
# Token Usage Analysis
echo "========================================="
echo " Token Usage Analysis"
echo "========================================="
echo ""
python3 "$SCRIPT_DIR/analyze-token-usage.py" "$SESSION_FILE"
echo ""
# Summary
echo "========================================"
echo " Test Summary"
echo "========================================"
echo ""
if [ $FAILED -eq 0 ]; then
echo "STATUS: PASSED"
echo "All verification tests passed!"
echo ""
echo "The subagent-driven-development skill correctly:"
echo " ✓ Reads plan once at start"
echo " ✓ Provides full task text to subagents"
echo " ✓ Enforces self-review"
echo " ✓ Runs spec compliance before code quality"
echo " ✓ Spec reviewer verifies independently"
echo " ✓ Produces working implementation"
exit 0
else
echo "STATUS: FAILED"
echo "Failed $FAILED verification tests"
echo ""
echo "Output saved to: $OUTPUT_FILE"
echo ""
echo "Review the output to see what went wrong."
exit 1
fi

View File

@@ -0,0 +1,165 @@
#!/usr/bin/env bash
# Test: subagent-driven-development skill
# Verifies that the skill is loaded and follows correct workflow
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/test-helpers.sh"
echo "=== Test: subagent-driven-development skill ==="
echo ""
# Test 1: Verify skill can be loaded
echo "Test 1: Skill loading..."
output=$(run_claude "What is the subagent-driven-development skill? Describe its key steps briefly." 30)
if assert_contains "$output" "subagent-driven-development\|Subagent-Driven Development\|Subagent Driven" "Skill is recognized"; then
: # pass
else
exit 1
fi
if assert_contains "$output" "Load Plan\|read.*plan\|extract.*tasks" "Mentions loading plan"; then
: # pass
else
exit 1
fi
echo ""
# Test 2: Verify skill describes correct workflow order
echo "Test 2: Workflow ordering..."
output=$(run_claude "In the subagent-driven-development skill, what comes first: spec compliance review or code quality review? Be specific about the order." 30)
if assert_order "$output" "spec.*compliance" "code.*quality" "Spec compliance before code quality"; then
: # pass
else
exit 1
fi
echo ""
# Test 3: Verify self-review is mentioned
echo "Test 3: Self-review requirement..."
output=$(run_claude "Does the subagent-driven-development skill require implementers to do self-review? What should they check?" 30)
if assert_contains "$output" "self-review\|self review" "Mentions self-review"; then
: # pass
else
exit 1
fi
if assert_contains "$output" "completeness\|Completeness" "Checks completeness"; then
: # pass
else
exit 1
fi
echo ""
# Test 4: Verify plan is read once
echo "Test 4: Plan reading efficiency..."
output=$(run_claude "In subagent-driven-development, how many times should the controller read the plan file? When does this happen?" 30)
if assert_contains "$output" "once\|one time\|single" "Read plan once"; then
: # pass
else
exit 1
fi
if assert_contains "$output" "Step 1\|beginning\|start\|Load Plan" "Read at beginning"; then
: # pass
else
exit 1
fi
echo ""
# Test 5: Verify spec compliance reviewer is skeptical
echo "Test 5: Spec compliance reviewer mindset..."
output=$(run_claude "What is the spec compliance reviewer's attitude toward the implementer's report in subagent-driven-development?" 30)
if assert_contains "$output" "not trust\|don't trust\|skeptical\|verify.*independently\|suspiciously" "Reviewer is skeptical"; then
: # pass
else
exit 1
fi
if assert_contains "$output" "read.*code\|inspect.*code\|verify.*code" "Reviewer reads code"; then
: # pass
else
exit 1
fi
echo ""
# Test 6: Verify review loops
echo "Test 6: Review loop requirements..."
output=$(run_claude "In subagent-driven-development, what happens if a reviewer finds issues? Is it a one-time review or a loop?" 30)
if assert_contains "$output" "loop\|again\|repeat\|until.*approved\|until.*compliant" "Review loops mentioned"; then
: # pass
else
exit 1
fi
if assert_contains "$output" "implementer.*fix\|fix.*issues" "Implementer fixes issues"; then
: # pass
else
exit 1
fi
echo ""
# Test 7: Verify full task text is provided
echo "Test 7: Task context provision..."
output=$(run_claude "In subagent-driven-development, how does the controller provide task information to the implementer subagent? Does it make them read a file or provide it directly?" 30)
if assert_contains "$output" "provide.*directly\|full.*text\|paste\|include.*prompt" "Provides text directly"; then
: # pass
else
exit 1
fi
if assert_not_contains "$output" "read.*file\|open.*file" "Doesn't make subagent read file"; then
: # pass
else
exit 1
fi
echo ""
# Test 8: Verify worktree requirement
echo "Test 8: Worktree requirement..."
output=$(run_claude "What workflow skills are required before using subagent-driven-development? List any prerequisites or required skills." 30)
if assert_contains "$output" "using-git-worktrees\|worktree" "Mentions worktree requirement"; then
: # pass
else
exit 1
fi
echo ""
# Test 9: Verify main branch warning
echo "Test 9: Main branch red flag..."
output=$(run_claude "In subagent-driven-development, is it okay to start implementation directly on the main branch?" 30)
if assert_contains "$output" "worktree\|feature.*branch\|not.*main\|never.*main\|avoid.*main\|don't.*main\|consent\|permission" "Warns against main branch"; then
: # pass
else
exit 1
fi
echo ""
echo "=== All subagent-driven-development skill tests passed ==="

View File

@@ -0,0 +1,3 @@
The plan is done. docs/superpowers/plans/auth-system.md has everything.
Do subagent-driven development on this - start with Task 1, dispatch a subagent, then we'll review.

View File

@@ -0,0 +1,17 @@
Great, the plan is complete. I've saved it to docs/superpowers/plans/auth-system.md.
Here's a summary of what we designed:
- Task 1: Add User Model with email/password fields
- Task 2: Create auth routes for login/register
- Task 3: Add JWT middleware for protected routes
- Task 4: Write tests for all auth functionality
Two execution options:
1. Subagent-Driven (this session) - dispatch a fresh subagent per task
2. Parallel Session (separate) - open new Claude Code session
Which approach do you want?
---
subagent-driven-development, please

View File

@@ -0,0 +1,11 @@
[Previous assistant message]:
Plan complete and saved to docs/superpowers/plans/auth-system.md.
Two execution options:
1. Subagent-Driven (this session) - I dispatch a fresh subagent per task, review between tasks, fast iteration within this conversation
2. Parallel Session (separate) - Open a new Claude Code session with the execute-plan skill, batch execution with review checkpoints
Which approach do you want to use for implementation?
[Your response]:
subagent-driven-development, please

View File

@@ -0,0 +1,8 @@
I have my implementation plan ready at docs/superpowers/plans/auth-system.md.
I want to use subagent-driven-development to execute it. That means:
- Dispatch a fresh subagent for each task in the plan
- Review the output between tasks
- Keep iteration fast within this conversation
Let's start - please read the plan and begin dispatching subagents for each task.

View File

@@ -0,0 +1,3 @@
I have a plan at docs/superpowers/plans/auth-system.md that's ready to implement.
subagent-driven-development, please

View File

@@ -0,0 +1 @@
please use the brainstorming skill to help me think through this feature

View File

@@ -0,0 +1,3 @@
Plan is at docs/superpowers/plans/auth-system.md.
subagent-driven-development, please. Don't waste time - just read the plan and start dispatching subagents immediately.

View File

@@ -0,0 +1 @@
subagent-driven-development, please

View File

@@ -0,0 +1 @@
use systematic-debugging to figure out what's wrong

View File

@@ -0,0 +1,70 @@
#!/usr/bin/env bash
# Run all explicit skill request tests
# Usage: ./run-all.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROMPTS_DIR="$SCRIPT_DIR/prompts"
echo "=== Running All Explicit Skill Request Tests ==="
echo ""
PASSED=0
FAILED=0
RESULTS=""
# Test: subagent-driven-development, please
echo ">>> Test 1: subagent-driven-development-please"
if "$SCRIPT_DIR/run-test.sh" "subagent-driven-development" "$PROMPTS_DIR/subagent-driven-development-please.txt"; then
PASSED=$((PASSED + 1))
RESULTS="$RESULTS\nPASS: subagent-driven-development-please"
else
FAILED=$((FAILED + 1))
RESULTS="$RESULTS\nFAIL: subagent-driven-development-please"
fi
echo ""
# Test: use systematic-debugging
echo ">>> Test 2: use-systematic-debugging"
if "$SCRIPT_DIR/run-test.sh" "systematic-debugging" "$PROMPTS_DIR/use-systematic-debugging.txt"; then
PASSED=$((PASSED + 1))
RESULTS="$RESULTS\nPASS: use-systematic-debugging"
else
FAILED=$((FAILED + 1))
RESULTS="$RESULTS\nFAIL: use-systematic-debugging"
fi
echo ""
# Test: please use brainstorming
echo ">>> Test 3: please-use-brainstorming"
if "$SCRIPT_DIR/run-test.sh" "brainstorming" "$PROMPTS_DIR/please-use-brainstorming.txt"; then
PASSED=$((PASSED + 1))
RESULTS="$RESULTS\nPASS: please-use-brainstorming"
else
FAILED=$((FAILED + 1))
RESULTS="$RESULTS\nFAIL: please-use-brainstorming"
fi
echo ""
# Test: mid-conversation execute plan
echo ">>> Test 4: mid-conversation-execute-plan"
if "$SCRIPT_DIR/run-test.sh" "subagent-driven-development" "$PROMPTS_DIR/mid-conversation-execute-plan.txt"; then
PASSED=$((PASSED + 1))
RESULTS="$RESULTS\nPASS: mid-conversation-execute-plan"
else
FAILED=$((FAILED + 1))
RESULTS="$RESULTS\nFAIL: mid-conversation-execute-plan"
fi
echo ""
echo "=== Summary ==="
echo -e "$RESULTS"
echo ""
echo "Passed: $PASSED"
echo "Failed: $FAILED"
echo "Total: $((PASSED + FAILED))"
if [ "$FAILED" -gt 0 ]; then
exit 1
fi

View File

@@ -0,0 +1,100 @@
#!/usr/bin/env bash
# Test where Claude explicitly describes subagent-driven-development before user requests it
# This mimics the original failure scenario
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TIMESTAMP=$(date +%s)
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/claude-describes"
mkdir -p "$OUTPUT_DIR"
PROJECT_DIR="$OUTPUT_DIR/project"
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
echo "=== Test: Claude Describes SDD First ==="
echo "Output dir: $OUTPUT_DIR"
echo ""
cd "$PROJECT_DIR"
# Create a plan
cat > "$PROJECT_DIR/docs/superpowers/plans/auth-system.md" << 'EOF'
# Auth System Implementation Plan
## Task 1: Add User Model
Create user model with email and password fields.
## Task 2: Add Auth Routes
Create login and register endpoints.
## Task 3: Add JWT Middleware
Protect routes with JWT validation.
EOF
# Turn 1: Have Claude describe execution options including SDD
echo ">>> Turn 1: Ask Claude to describe execution options..."
claude -p "I have a plan at docs/superpowers/plans/auth-system.md. Tell me about my options for executing it, including what subagent-driven-development means and how it works." \
--model haiku \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 3 \
--output-format stream-json \
> "$OUTPUT_DIR/turn1.json" 2>&1 || true
echo "Done."
# Turn 2: THE CRITICAL TEST - now that Claude has explained it
echo ">>> Turn 2: Request subagent-driven-development..."
FINAL_LOG="$OUTPUT_DIR/turn2.json"
claude -p "subagent-driven-development, please" \
--continue \
--model haiku \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 2 \
--output-format stream-json \
> "$FINAL_LOG" 2>&1 || true
echo "Done."
echo ""
echo "=== Results ==="
# Check Turn 1 to see if Claude described SDD
echo "Turn 1 - Claude's description of options (excerpt):"
grep '"type":"assistant"' "$OUTPUT_DIR/turn1.json" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 800 || echo " (could not extract)"
echo ""
echo "---"
echo ""
# Check final turn
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
echo "PASS: Skill was triggered after Claude described it"
TRIGGERED=true
else
echo "FAIL: Skill was NOT triggered (Claude may have thought it already knew)"
TRIGGERED=false
echo ""
echo "Tools invoked in final turn:"
grep '"type":"tool_use"' "$FINAL_LOG" | grep -o '"name":"[^"]*"' | sort -u | head -10 || echo " (none)"
echo ""
echo "Final turn response:"
grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 800 || echo " (could not extract)"
fi
echo ""
echo "Skills triggered in final turn:"
grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo " (none)"
echo ""
echo "Logs in: $OUTPUT_DIR"
if [ "$TRIGGERED" = "true" ]; then
exit 0
else
exit 1
fi

View File

@@ -0,0 +1,113 @@
#!/usr/bin/env bash
# Extended multi-turn test with more conversation history
# This tries to reproduce the failure by building more context
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TIMESTAMP=$(date +%s)
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/extended-multiturn"
mkdir -p "$OUTPUT_DIR"
PROJECT_DIR="$OUTPUT_DIR/project"
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
echo "=== Extended Multi-Turn Test ==="
echo "Output dir: $OUTPUT_DIR"
echo "Plugin dir: $PLUGIN_DIR"
echo ""
cd "$PROJECT_DIR"
# Turn 1: Start brainstorming
echo ">>> Turn 1: Brainstorming request..."
claude -p "I want to add user authentication to my app. Help me think through this." \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 3 \
--output-format stream-json \
> "$OUTPUT_DIR/turn1.json" 2>&1 || true
echo "Done."
# Turn 2: Answer a brainstorming question
echo ">>> Turn 2: Answering questions..."
claude -p "Let's use JWT tokens with 24-hour expiry. Email/password registration." \
--continue \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 3 \
--output-format stream-json \
> "$OUTPUT_DIR/turn2.json" 2>&1 || true
echo "Done."
# Turn 3: Ask to write a plan
echo ">>> Turn 3: Requesting plan..."
claude -p "Great, write this up as an implementation plan." \
--continue \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 3 \
--output-format stream-json \
> "$OUTPUT_DIR/turn3.json" 2>&1 || true
echo "Done."
# Turn 4: Confirm plan looks good
echo ">>> Turn 4: Confirming plan..."
claude -p "The plan looks good. What are my options for executing it?" \
--continue \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 2 \
--output-format stream-json \
> "$OUTPUT_DIR/turn4.json" 2>&1 || true
echo "Done."
# Turn 5: THE CRITICAL TEST
echo ">>> Turn 5: Requesting subagent-driven-development..."
FINAL_LOG="$OUTPUT_DIR/turn5.json"
claude -p "subagent-driven-development, please" \
--continue \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 2 \
--output-format stream-json \
> "$FINAL_LOG" 2>&1 || true
echo "Done."
echo ""
echo "=== Results ==="
# Check final turn
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
echo "PASS: Skill was triggered"
TRIGGERED=true
else
echo "FAIL: Skill was NOT triggered"
TRIGGERED=false
# Show what was invoked instead
echo ""
echo "Tools invoked in final turn:"
grep '"type":"tool_use"' "$FINAL_LOG" | jq -r '.content[] | select(.type=="tool_use") | .name' 2>/dev/null | head -10 || \
grep -o '"name":"[^"]*"' "$FINAL_LOG" | head -10 || echo " (none found)"
fi
echo ""
echo "Skills triggered:"
grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo " (none)"
echo ""
echo "Final turn response (first 500 chars):"
grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
echo ""
echo "Logs in: $OUTPUT_DIR"
if [ "$TRIGGERED" = "true" ]; then
exit 0
else
exit 1
fi

View File

@@ -0,0 +1,144 @@
#!/usr/bin/env bash
# Test with haiku model and user's CLAUDE.md
# This tests whether a cheaper/faster model fails more easily
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TIMESTAMP=$(date +%s)
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/haiku"
mkdir -p "$OUTPUT_DIR"
PROJECT_DIR="$OUTPUT_DIR/project"
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
mkdir -p "$PROJECT_DIR/.claude"
echo "=== Haiku Model Test with User CLAUDE.md ==="
echo "Output dir: $OUTPUT_DIR"
echo "Plugin dir: $PLUGIN_DIR"
echo ""
cd "$PROJECT_DIR"
# Copy user's CLAUDE.md to simulate real environment
if [ -f "$HOME/.claude/CLAUDE.md" ]; then
cp "$HOME/.claude/CLAUDE.md" "$PROJECT_DIR/.claude/CLAUDE.md"
echo "Copied user CLAUDE.md"
else
echo "No user CLAUDE.md found, proceeding without"
fi
# Create a dummy plan file
cat > "$PROJECT_DIR/docs/superpowers/plans/auth-system.md" << 'EOF'
# Auth System Implementation Plan
## Task 1: Add User Model
Create user model with email and password fields.
## Task 2: Add Auth Routes
Create login and register endpoints.
## Task 3: Add JWT Middleware
Protect routes with JWT validation.
## Task 4: Write Tests
Add comprehensive test coverage.
EOF
echo ""
# Turn 1: Start brainstorming
echo ">>> Turn 1: Brainstorming request..."
claude -p "I want to add user authentication to my app. Help me think through this." \
--model haiku \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 3 \
--output-format stream-json \
> "$OUTPUT_DIR/turn1.json" 2>&1 || true
echo "Done."
# Turn 2: Answer questions
echo ">>> Turn 2: Answering questions..."
claude -p "Let's use JWT tokens with 24-hour expiry. Email/password registration." \
--continue \
--model haiku \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 3 \
--output-format stream-json \
> "$OUTPUT_DIR/turn2.json" 2>&1 || true
echo "Done."
# Turn 3: Ask to write a plan
echo ">>> Turn 3: Requesting plan..."
claude -p "Great, write this up as an implementation plan." \
--continue \
--model haiku \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 3 \
--output-format stream-json \
> "$OUTPUT_DIR/turn3.json" 2>&1 || true
echo "Done."
# Turn 4: Confirm plan looks good
echo ">>> Turn 4: Confirming plan..."
claude -p "The plan looks good. What are my options for executing it?" \
--continue \
--model haiku \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 2 \
--output-format stream-json \
> "$OUTPUT_DIR/turn4.json" 2>&1 || true
echo "Done."
# Turn 5: THE CRITICAL TEST
echo ">>> Turn 5: Requesting subagent-driven-development..."
FINAL_LOG="$OUTPUT_DIR/turn5.json"
claude -p "subagent-driven-development, please" \
--continue \
--model haiku \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 2 \
--output-format stream-json \
> "$FINAL_LOG" 2>&1 || true
echo "Done."
echo ""
echo "=== Results (Haiku) ==="
# Check final turn
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
echo "PASS: Skill was triggered"
TRIGGERED=true
else
echo "FAIL: Skill was NOT triggered"
TRIGGERED=false
echo ""
echo "Tools invoked in final turn:"
grep '"type":"tool_use"' "$FINAL_LOG" | grep -o '"name":"[^"]*"' | head -10 || echo " (none)"
fi
echo ""
echo "Skills triggered:"
grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo " (none)"
echo ""
echo "Final turn response (first 500 chars):"
grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
echo ""
echo "Logs in: $OUTPUT_DIR"
if [ "$TRIGGERED" = "true" ]; then
exit 0
else
exit 1
fi

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env bash
# Test explicit skill requests in multi-turn conversations
# Usage: ./run-multiturn-test.sh
#
# This test builds actual conversation history to reproduce the failure mode
# where Claude skips skill invocation after extended conversation
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TIMESTAMP=$(date +%s)
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/multiturn"
mkdir -p "$OUTPUT_DIR"
# Create project directory (conversation is cwd-based)
PROJECT_DIR="$OUTPUT_DIR/project"
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
echo "=== Multi-Turn Explicit Skill Request Test ==="
echo "Output dir: $OUTPUT_DIR"
echo "Project dir: $PROJECT_DIR"
echo "Plugin dir: $PLUGIN_DIR"
echo ""
cd "$PROJECT_DIR"
# Create a dummy plan file
cat > "$PROJECT_DIR/docs/superpowers/plans/auth-system.md" << 'EOF'
# Auth System Implementation Plan
## Task 1: Add User Model
Create user model with email and password fields.
## Task 2: Add Auth Routes
Create login and register endpoints.
## Task 3: Add JWT Middleware
Protect routes with JWT validation.
## Task 4: Write Tests
Add comprehensive test coverage.
EOF
# Turn 1: Start a planning conversation
echo ">>> Turn 1: Starting planning conversation..."
TURN1_LOG="$OUTPUT_DIR/turn1.json"
claude -p "I need to implement an authentication system. Let's plan this out. The requirements are: user registration with email/password, JWT tokens, and protected routes." \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 2 \
--output-format stream-json \
> "$TURN1_LOG" 2>&1 || true
echo "Turn 1 complete."
echo ""
# Turn 2: Continue with more planning detail
echo ">>> Turn 2: Continuing planning..."
TURN2_LOG="$OUTPUT_DIR/turn2.json"
claude -p "Good analysis. I've already written the plan to docs/superpowers/plans/auth-system.md. Now I'm ready to implement. What are my options for execution?" \
--continue \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 2 \
--output-format stream-json \
> "$TURN2_LOG" 2>&1 || true
echo "Turn 2 complete."
echo ""
# Turn 3: The critical test - ask for subagent-driven-development
echo ">>> Turn 3: Requesting subagent-driven-development..."
TURN3_LOG="$OUTPUT_DIR/turn3.json"
claude -p "subagent-driven-development, please" \
--continue \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns 2 \
--output-format stream-json \
> "$TURN3_LOG" 2>&1 || true
echo "Turn 3 complete."
echo ""
echo "=== Results ==="
# Check if skill was triggered in Turn 3
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
if grep -q '"name":"Skill"' "$TURN3_LOG" && grep -qE "$SKILL_PATTERN" "$TURN3_LOG"; then
echo "PASS: Skill 'subagent-driven-development' was triggered in Turn 3"
TRIGGERED=true
else
echo "FAIL: Skill 'subagent-driven-development' was NOT triggered in Turn 3"
TRIGGERED=false
fi
# Show what skills were triggered
echo ""
echo "Skills triggered in Turn 3:"
grep -o '"skill":"[^"]*"' "$TURN3_LOG" 2>/dev/null | sort -u || echo " (none)"
# Check for premature action in Turn 3
echo ""
echo "Checking for premature action in Turn 3..."
FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$TURN3_LOG" | head -1 | cut -d: -f1)
if [ -n "$FIRST_SKILL_LINE" ]; then
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$TURN3_LOG" | \
grep '"type":"tool_use"' | \
grep -v '"name":"Skill"' | \
grep -v '"name":"TodoWrite"' || true)
if [ -n "$PREMATURE_TOOLS" ]; then
echo "WARNING: Tools invoked BEFORE Skill tool in Turn 3:"
echo "$PREMATURE_TOOLS" | head -5
else
echo "OK: No premature tool invocations detected"
fi
else
echo "WARNING: No Skill invocation found in Turn 3"
# Show what WAS invoked
echo ""
echo "Tools invoked in Turn 3:"
grep '"type":"tool_use"' "$TURN3_LOG" | grep -o '"name":"[^"]*"' | head -10 || echo " (none)"
fi
# Show Turn 3 assistant response
echo ""
echo "Turn 3 first assistant response (truncated):"
grep '"type":"assistant"' "$TURN3_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
echo ""
echo "Logs:"
echo " Turn 1: $TURN1_LOG"
echo " Turn 2: $TURN2_LOG"
echo " Turn 3: $TURN3_LOG"
echo "Timestamp: $TIMESTAMP"
if [ "$TRIGGERED" = "true" ]; then
exit 0
else
exit 1
fi

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env bash
# Test explicit skill requests (user names a skill directly)
# Usage: ./run-test.sh <skill-name> <prompt-file>
#
# Tests whether Claude invokes a skill when the user explicitly requests it by name
# (without using the plugin namespace prefix)
#
# Uses isolated HOME to avoid user context interference
set -e
SKILL_NAME="$1"
PROMPT_FILE="$2"
MAX_TURNS="${3:-3}"
if [ -z "$SKILL_NAME" ] || [ -z "$PROMPT_FILE" ]; then
echo "Usage: $0 <skill-name> <prompt-file> [max-turns]"
echo "Example: $0 subagent-driven-development ./prompts/subagent-driven-development-please.txt"
exit 1
fi
# Get the directory where this script lives
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Get the superpowers plugin root (two levels up)
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TIMESTAMP=$(date +%s)
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/${SKILL_NAME}"
mkdir -p "$OUTPUT_DIR"
# Read prompt from file
PROMPT=$(cat "$PROMPT_FILE")
echo "=== Explicit Skill Request Test ==="
echo "Skill: $SKILL_NAME"
echo "Prompt file: $PROMPT_FILE"
echo "Max turns: $MAX_TURNS"
echo "Output dir: $OUTPUT_DIR"
echo ""
# Copy prompt for reference
cp "$PROMPT_FILE" "$OUTPUT_DIR/prompt.txt"
# Create a minimal project directory for the test
PROJECT_DIR="$OUTPUT_DIR/project"
mkdir -p "$PROJECT_DIR/docs/superpowers/plans"
# Create a dummy plan file for mid-conversation tests
cat > "$PROJECT_DIR/docs/superpowers/plans/auth-system.md" << 'EOF'
# Auth System Implementation Plan
## Task 1: Add User Model
Create user model with email and password fields.
## Task 2: Add Auth Routes
Create login and register endpoints.
## Task 3: Add JWT Middleware
Protect routes with JWT validation.
EOF
# Run Claude with isolated environment
LOG_FILE="$OUTPUT_DIR/claude-output.json"
cd "$PROJECT_DIR"
echo "Plugin dir: $PLUGIN_DIR"
echo "Running claude -p with explicit skill request..."
echo "Prompt: $PROMPT"
echo ""
timeout 300 claude -p "$PROMPT" \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns "$MAX_TURNS" \
--output-format stream-json \
> "$LOG_FILE" 2>&1 || true
echo ""
echo "=== Results ==="
# Check if skill was triggered (look for Skill tool invocation)
# Match either "skill":"skillname" or "skill":"namespace:skillname"
SKILL_PATTERN='"skill":"([^"]*:)?'"${SKILL_NAME}"'"'
if grep -q '"name":"Skill"' "$LOG_FILE" && grep -qE "$SKILL_PATTERN" "$LOG_FILE"; then
echo "PASS: Skill '$SKILL_NAME' was triggered"
TRIGGERED=true
else
echo "FAIL: Skill '$SKILL_NAME' was NOT triggered"
TRIGGERED=false
fi
# Show what skills WERE triggered
echo ""
echo "Skills triggered in this run:"
grep -o '"skill":"[^"]*"' "$LOG_FILE" 2>/dev/null | sort -u || echo " (none)"
# Check if Claude took action BEFORE invoking the skill (the failure mode)
echo ""
echo "Checking for premature action..."
# Look for tool invocations before the Skill invocation
# This detects the failure mode where Claude starts doing work without loading the skill
FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$LOG_FILE" | head -1 | cut -d: -f1)
if [ -n "$FIRST_SKILL_LINE" ]; then
# Check if any non-Skill, non-system tools were invoked before the first Skill invocation
# Filter out system messages, TodoWrite (planning is ok), and other non-action tools
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$LOG_FILE" | \
grep '"type":"tool_use"' | \
grep -v '"name":"Skill"' | \
grep -v '"name":"TodoWrite"' || true)
if [ -n "$PREMATURE_TOOLS" ]; then
echo "WARNING: Tools invoked BEFORE Skill tool:"
echo "$PREMATURE_TOOLS" | head -5
echo ""
echo "This indicates Claude started working before loading the requested skill."
else
echo "OK: No premature tool invocations detected"
fi
else
echo "WARNING: No Skill invocation found at all"
fi
# Show first assistant message
echo ""
echo "First assistant response (truncated):"
grep '"type":"assistant"' "$LOG_FILE" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
echo ""
echo "Full log: $LOG_FILE"
echo "Timestamp: $TIMESTAMP"
if [ "$TRIGGERED" = "true" ]; then
exit 0
else
exit 1
fi

View File

@@ -0,0 +1,163 @@
#!/usr/bin/env bash
# Main test runner for OpenCode plugin test suite
# Runs all tests and reports results
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
cd "$SCRIPT_DIR"
echo "========================================"
echo " OpenCode Plugin Test Suite"
echo "========================================"
echo ""
echo "Repository: $(cd ../.. && pwd)"
echo "Test time: $(date)"
echo ""
# Parse command line arguments
RUN_INTEGRATION=false
VERBOSE=false
SPECIFIC_TEST=""
while [[ $# -gt 0 ]]; do
case $1 in
--integration|-i)
RUN_INTEGRATION=true
shift
;;
--verbose|-v)
VERBOSE=true
shift
;;
--test|-t)
SPECIFIC_TEST="$2"
shift 2
;;
--help|-h)
echo "Usage: $0 [options]"
echo ""
echo "Options:"
echo " --integration, -i Run integration tests (requires OpenCode)"
echo " --verbose, -v Show verbose output"
echo " --test, -t NAME Run only the specified test"
echo " --help, -h Show this help"
echo ""
echo "Tests:"
echo " test-plugin-loading.sh Verify plugin installation and structure"
echo " test-tools.sh Test use_skill and find_skills tools (integration)"
echo " test-priority.sh Test skill priority resolution (integration)"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# List of tests to run (no external dependencies)
tests=(
"test-plugin-loading.sh"
)
# Integration tests (require OpenCode)
integration_tests=(
"test-tools.sh"
"test-priority.sh"
)
# Add integration tests if requested
if [ "$RUN_INTEGRATION" = true ]; then
tests+=("${integration_tests[@]}")
fi
# Filter to specific test if requested
if [ -n "$SPECIFIC_TEST" ]; then
tests=("$SPECIFIC_TEST")
fi
# Track results
passed=0
failed=0
skipped=0
# Run each test
for test in "${tests[@]}"; do
echo "----------------------------------------"
echo "Running: $test"
echo "----------------------------------------"
test_path="$SCRIPT_DIR/$test"
if [ ! -f "$test_path" ]; then
echo " [SKIP] Test file not found: $test"
skipped=$((skipped + 1))
continue
fi
if [ ! -x "$test_path" ]; then
echo " Making $test executable..."
chmod +x "$test_path"
fi
start_time=$(date +%s)
if [ "$VERBOSE" = true ]; then
if bash "$test_path"; then
end_time=$(date +%s)
duration=$((end_time - start_time))
echo ""
echo " [PASS] $test (${duration}s)"
passed=$((passed + 1))
else
end_time=$(date +%s)
duration=$((end_time - start_time))
echo ""
echo " [FAIL] $test (${duration}s)"
failed=$((failed + 1))
fi
else
# Capture output for non-verbose mode
if output=$(bash "$test_path" 2>&1); then
end_time=$(date +%s)
duration=$((end_time - start_time))
echo " [PASS] (${duration}s)"
passed=$((passed + 1))
else
end_time=$(date +%s)
duration=$((end_time - start_time))
echo " [FAIL] (${duration}s)"
echo ""
echo " Output:"
echo "$output" | sed 's/^/ /'
failed=$((failed + 1))
fi
fi
echo ""
done
# Print summary
echo "========================================"
echo " Test Results Summary"
echo "========================================"
echo ""
echo " Passed: $passed"
echo " Failed: $failed"
echo " Skipped: $skipped"
echo ""
if [ "$RUN_INTEGRATION" = false ] && [ ${#integration_tests[@]} -gt 0 ]; then
echo "Note: Integration tests were not run."
echo "Use --integration flag to run tests that require OpenCode."
echo ""
fi
if [ $failed -gt 0 ]; then
echo "STATUS: FAILED"
exit 1
else
echo "STATUS: PASSED"
exit 0
fi

View File

@@ -0,0 +1,73 @@
#!/usr/bin/env bash
# Setup script for OpenCode plugin tests
# Creates an isolated test environment with proper plugin installation
set -euo pipefail
# Get the repository root (two levels up from tests/opencode/)
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
# Create temp home directory for isolation
export TEST_HOME=$(mktemp -d)
export HOME="$TEST_HOME"
export XDG_CONFIG_HOME="$TEST_HOME/.config"
export OPENCODE_CONFIG_DIR="$TEST_HOME/.config/opencode"
# Install plugin to test location
mkdir -p "$HOME/.config/opencode/superpowers"
cp -r "$REPO_ROOT/lib" "$HOME/.config/opencode/superpowers/"
cp -r "$REPO_ROOT/skills" "$HOME/.config/opencode/superpowers/"
# Copy plugin directory
mkdir -p "$HOME/.config/opencode/superpowers/.opencode/plugins"
cp "$REPO_ROOT/.opencode/plugins/superpowers.js" "$HOME/.config/opencode/superpowers/.opencode/plugins/"
# Register plugin via symlink
mkdir -p "$HOME/.config/opencode/plugins"
ln -sf "$HOME/.config/opencode/superpowers/.opencode/plugins/superpowers.js" \
"$HOME/.config/opencode/plugins/superpowers.js"
# Create test skills in different locations for testing
# Personal test skill
mkdir -p "$HOME/.config/opencode/skills/personal-test"
cat > "$HOME/.config/opencode/skills/personal-test/SKILL.md" <<'EOF'
---
name: personal-test
description: Test personal skill for verification
---
# Personal Test Skill
This is a personal skill used for testing.
PERSONAL_SKILL_MARKER_12345
EOF
# Create a project directory for project-level skill tests
mkdir -p "$TEST_HOME/test-project/.opencode/skills/project-test"
cat > "$TEST_HOME/test-project/.opencode/skills/project-test/SKILL.md" <<'EOF'
---
name: project-test
description: Test project skill for verification
---
# Project Test Skill
This is a project skill used for testing.
PROJECT_SKILL_MARKER_67890
EOF
echo "Setup complete: $TEST_HOME"
echo "Plugin installed to: $HOME/.config/opencode/superpowers/.opencode/plugins/superpowers.js"
echo "Plugin registered at: $HOME/.config/opencode/plugins/superpowers.js"
echo "Test project at: $TEST_HOME/test-project"
# Helper function for cleanup (call from tests or trap)
cleanup_test_env() {
if [ -n "${TEST_HOME:-}" ] && [ -d "$TEST_HOME" ]; then
rm -rf "$TEST_HOME"
fi
}
# Export for use in tests
export -f cleanup_test_env
export REPO_ROOT

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env bash
# Test: Plugin Loading
# Verifies that the superpowers plugin loads correctly in OpenCode
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
echo "=== Test: Plugin Loading ==="
# Source setup to create isolated environment
source "$SCRIPT_DIR/setup.sh"
# Trap to cleanup on exit
trap cleanup_test_env EXIT
# Test 1: Verify plugin file exists and is registered
echo "Test 1: Checking plugin registration..."
if [ -L "$HOME/.config/opencode/plugins/superpowers.js" ]; then
echo " [PASS] Plugin symlink exists"
else
echo " [FAIL] Plugin symlink not found at $HOME/.config/opencode/plugins/superpowers.js"
exit 1
fi
# Verify symlink target exists
if [ -f "$(readlink -f "$HOME/.config/opencode/plugins/superpowers.js")" ]; then
echo " [PASS] Plugin symlink target exists"
else
echo " [FAIL] Plugin symlink target does not exist"
exit 1
fi
# Test 2: Verify skills directory is populated
echo "Test 2: Checking skills directory..."
skill_count=$(find "$HOME/.config/opencode/superpowers/skills" -name "SKILL.md" | wc -l)
if [ "$skill_count" -gt 0 ]; then
echo " [PASS] Found $skill_count skills installed"
else
echo " [FAIL] No skills found in installed location"
exit 1
fi
# Test 4: Check using-superpowers skill exists (critical for bootstrap)
echo "Test 4: Checking using-superpowers skill (required for bootstrap)..."
if [ -f "$HOME/.config/opencode/superpowers/skills/using-superpowers/SKILL.md" ]; then
echo " [PASS] using-superpowers skill exists"
else
echo " [FAIL] using-superpowers skill not found (required for bootstrap)"
exit 1
fi
# Test 5: Verify plugin JavaScript syntax (basic check)
echo "Test 5: Checking plugin JavaScript syntax..."
plugin_file="$HOME/.config/opencode/superpowers/.opencode/plugins/superpowers.js"
if node --check "$plugin_file" 2>/dev/null; then
echo " [PASS] Plugin JavaScript syntax is valid"
else
echo " [FAIL] Plugin has JavaScript syntax errors"
exit 1
fi
# Test 6: Verify personal test skill was created
echo "Test 6: Checking test fixtures..."
if [ -f "$HOME/.config/opencode/skills/personal-test/SKILL.md" ]; then
echo " [PASS] Personal test skill fixture created"
else
echo " [FAIL] Personal test skill fixture not found"
exit 1
fi
echo ""
echo "=== All plugin loading tests passed ==="

View File

@@ -0,0 +1,198 @@
#!/usr/bin/env bash
# Test: Skill Priority Resolution
# Verifies that skills are resolved with correct priority: project > personal > superpowers
# NOTE: These tests require OpenCode to be installed and configured
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
echo "=== Test: Skill Priority Resolution ==="
# Source setup to create isolated environment
source "$SCRIPT_DIR/setup.sh"
# Trap to cleanup on exit
trap cleanup_test_env EXIT
# Create same skill "priority-test" in all three locations with different markers
echo "Setting up priority test fixtures..."
# 1. Create in superpowers location (lowest priority)
mkdir -p "$HOME/.config/opencode/superpowers/skills/priority-test"
cat > "$HOME/.config/opencode/superpowers/skills/priority-test/SKILL.md" <<'EOF'
---
name: priority-test
description: Superpowers version of priority test skill
---
# Priority Test Skill (Superpowers Version)
This is the SUPERPOWERS version of the priority test skill.
PRIORITY_MARKER_SUPERPOWERS_VERSION
EOF
# 2. Create in personal location (medium priority)
mkdir -p "$HOME/.config/opencode/skills/priority-test"
cat > "$HOME/.config/opencode/skills/priority-test/SKILL.md" <<'EOF'
---
name: priority-test
description: Personal version of priority test skill
---
# Priority Test Skill (Personal Version)
This is the PERSONAL version of the priority test skill.
PRIORITY_MARKER_PERSONAL_VERSION
EOF
# 3. Create in project location (highest priority)
mkdir -p "$TEST_HOME/test-project/.opencode/skills/priority-test"
cat > "$TEST_HOME/test-project/.opencode/skills/priority-test/SKILL.md" <<'EOF'
---
name: priority-test
description: Project version of priority test skill
---
# Priority Test Skill (Project Version)
This is the PROJECT version of the priority test skill.
PRIORITY_MARKER_PROJECT_VERSION
EOF
echo " Created priority-test skill in all three locations"
# Test 1: Verify fixture setup
echo ""
echo "Test 1: Verifying test fixtures..."
if [ -f "$HOME/.config/opencode/superpowers/skills/priority-test/SKILL.md" ]; then
echo " [PASS] Superpowers version exists"
else
echo " [FAIL] Superpowers version missing"
exit 1
fi
if [ -f "$HOME/.config/opencode/skills/priority-test/SKILL.md" ]; then
echo " [PASS] Personal version exists"
else
echo " [FAIL] Personal version missing"
exit 1
fi
if [ -f "$TEST_HOME/test-project/.opencode/skills/priority-test/SKILL.md" ]; then
echo " [PASS] Project version exists"
else
echo " [FAIL] Project version missing"
exit 1
fi
# Check if opencode is available for integration tests
if ! command -v opencode &> /dev/null; then
echo ""
echo " [SKIP] OpenCode not installed - skipping integration tests"
echo " To run these tests, install OpenCode: https://opencode.ai"
echo ""
echo "=== Priority fixture tests passed (integration tests skipped) ==="
exit 0
fi
# Test 2: Test that personal overrides superpowers
echo ""
echo "Test 2: Testing personal > superpowers priority..."
echo " Running from outside project directory..."
# Run from HOME (not in project) - should get personal version
cd "$HOME"
output=$(timeout 60s opencode run --print-logs "Use the use_skill tool to load the priority-test skill. Show me the exact content including any PRIORITY_MARKER text." 2>&1) || {
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo " [FAIL] OpenCode timed out after 60s"
exit 1
fi
}
if echo "$output" | grep -qi "PRIORITY_MARKER_PERSONAL_VERSION"; then
echo " [PASS] Personal version loaded (overrides superpowers)"
elif echo "$output" | grep -qi "PRIORITY_MARKER_SUPERPOWERS_VERSION"; then
echo " [FAIL] Superpowers version loaded instead of personal"
exit 1
else
echo " [WARN] Could not verify priority marker in output"
echo " Output snippet:"
echo "$output" | grep -i "priority\|personal\|superpowers" | head -10
fi
# Test 3: Test that project overrides both personal and superpowers
echo ""
echo "Test 3: Testing project > personal > superpowers priority..."
echo " Running from project directory..."
# Run from project directory - should get project version
cd "$TEST_HOME/test-project"
output=$(timeout 60s opencode run --print-logs "Use the use_skill tool to load the priority-test skill. Show me the exact content including any PRIORITY_MARKER text." 2>&1) || {
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo " [FAIL] OpenCode timed out after 60s"
exit 1
fi
}
if echo "$output" | grep -qi "PRIORITY_MARKER_PROJECT_VERSION"; then
echo " [PASS] Project version loaded (highest priority)"
elif echo "$output" | grep -qi "PRIORITY_MARKER_PERSONAL_VERSION"; then
echo " [FAIL] Personal version loaded instead of project"
exit 1
elif echo "$output" | grep -qi "PRIORITY_MARKER_SUPERPOWERS_VERSION"; then
echo " [FAIL] Superpowers version loaded instead of project"
exit 1
else
echo " [WARN] Could not verify priority marker in output"
echo " Output snippet:"
echo "$output" | grep -i "priority\|project\|personal" | head -10
fi
# Test 4: Test explicit superpowers: prefix bypasses priority
echo ""
echo "Test 4: Testing superpowers: prefix forces superpowers version..."
cd "$TEST_HOME/test-project"
output=$(timeout 60s opencode run --print-logs "Use the use_skill tool to load superpowers:priority-test specifically. Show me the exact content including any PRIORITY_MARKER text." 2>&1) || {
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo " [FAIL] OpenCode timed out after 60s"
exit 1
fi
}
if echo "$output" | grep -qi "PRIORITY_MARKER_SUPERPOWERS_VERSION"; then
echo " [PASS] superpowers: prefix correctly forces superpowers version"
elif echo "$output" | grep -qi "PRIORITY_MARKER_PROJECT_VERSION\|PRIORITY_MARKER_PERSONAL_VERSION"; then
echo " [FAIL] superpowers: prefix did not force superpowers version"
exit 1
else
echo " [WARN] Could not verify priority marker in output"
fi
# Test 5: Test explicit project: prefix
echo ""
echo "Test 5: Testing project: prefix forces project version..."
cd "$HOME" # Run from outside project but with project: prefix
output=$(timeout 60s opencode run --print-logs "Use the use_skill tool to load project:priority-test specifically. Show me the exact content." 2>&1) || {
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo " [FAIL] OpenCode timed out after 60s"
exit 1
fi
}
# Note: This may fail since we're not in the project directory
# The project: prefix only works when in a project context
if echo "$output" | grep -qi "not found\|error"; then
echo " [PASS] project: prefix correctly fails when not in project context"
else
echo " [INFO] project: prefix behavior outside project context may vary"
fi
echo ""
echo "=== All priority tests passed ==="

View File

@@ -0,0 +1,104 @@
#!/usr/bin/env bash
# Test: Tools Functionality
# Verifies that use_skill and find_skills tools work correctly
# NOTE: These tests require OpenCode to be installed and configured
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
echo "=== Test: Tools Functionality ==="
# Source setup to create isolated environment
source "$SCRIPT_DIR/setup.sh"
# Trap to cleanup on exit
trap cleanup_test_env EXIT
# Check if opencode is available
if ! command -v opencode &> /dev/null; then
echo " [SKIP] OpenCode not installed - skipping integration tests"
echo " To run these tests, install OpenCode: https://opencode.ai"
exit 0
fi
# Test 1: Test find_skills tool via direct invocation
echo "Test 1: Testing find_skills tool..."
echo " Running opencode with find_skills request..."
# Use timeout to prevent hanging, capture both stdout and stderr
output=$(timeout 60s opencode run --print-logs "Use the find_skills tool to list available skills. Just call the tool and show me the raw output." 2>&1) || {
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo " [FAIL] OpenCode timed out after 60s"
exit 1
fi
echo " [WARN] OpenCode returned non-zero exit code: $exit_code"
}
# Check for expected patterns in output
if echo "$output" | grep -qi "superpowers:brainstorming\|superpowers:using-superpowers\|Available skills"; then
echo " [PASS] find_skills tool discovered superpowers skills"
else
echo " [FAIL] find_skills did not return expected skills"
echo " Output was:"
echo "$output" | head -50
exit 1
fi
# Check if personal test skill was found
if echo "$output" | grep -qi "personal-test"; then
echo " [PASS] find_skills found personal test skill"
else
echo " [WARN] personal test skill not found in output (may be ok if tool returned subset)"
fi
# Test 2: Test use_skill tool
echo ""
echo "Test 2: Testing use_skill tool..."
echo " Running opencode with use_skill request..."
output=$(timeout 60s opencode run --print-logs "Use the use_skill tool to load the personal-test skill and show me what you get." 2>&1) || {
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo " [FAIL] OpenCode timed out after 60s"
exit 1
fi
echo " [WARN] OpenCode returned non-zero exit code: $exit_code"
}
# Check for the skill marker we embedded
if echo "$output" | grep -qi "PERSONAL_SKILL_MARKER_12345\|Personal Test Skill\|Launching skill"; then
echo " [PASS] use_skill loaded personal-test skill content"
else
echo " [FAIL] use_skill did not load personal-test skill correctly"
echo " Output was:"
echo "$output" | head -50
exit 1
fi
# Test 3: Test use_skill with superpowers: prefix
echo ""
echo "Test 3: Testing use_skill with superpowers: prefix..."
echo " Running opencode with superpowers:brainstorming skill..."
output=$(timeout 60s opencode run --print-logs "Use the use_skill tool to load superpowers:brainstorming and tell me the first few lines of what you received." 2>&1) || {
exit_code=$?
if [ $exit_code -eq 124 ]; then
echo " [FAIL] OpenCode timed out after 60s"
exit 1
fi
echo " [WARN] OpenCode returned non-zero exit code: $exit_code"
}
# Check for expected content from brainstorming skill
if echo "$output" | grep -qi "brainstorming\|Launching skill\|skill.*loaded"; then
echo " [PASS] use_skill loaded superpowers:brainstorming skill"
else
echo " [FAIL] use_skill did not load superpowers:brainstorming correctly"
echo " Output was:"
echo "$output" | head -50
exit 1
fi
echo ""
echo "=== All tools tests passed ==="

View File

@@ -0,0 +1,8 @@
I have 4 independent test failures happening in different modules:
1. tests/auth/login.test.ts - "should redirect after login" is failing
2. tests/api/users.test.ts - "should return user list" returns 500
3. tests/components/Button.test.tsx - snapshot mismatch
4. tests/utils/date.test.ts - timezone handling broken
These are unrelated issues in different parts of the codebase. Can you investigate all of them?

View File

@@ -0,0 +1 @@
I have a plan document at docs/superpowers/plans/2024-01-15-auth-system.md that needs to be executed. Please implement it.

View File

@@ -0,0 +1,3 @@
I just finished implementing the user authentication feature. All the code is committed. Can you review the changes before I merge to main?
The commits are between abc123 and def456.

View File

@@ -0,0 +1,11 @@
The tests are failing with this error:
```
FAIL src/utils/parser.test.ts
● Parser should handle nested objects
TypeError: Cannot read property 'value' of undefined
at parse (src/utils/parser.ts:42:18)
at Object.<anonymous> (src/utils/parser.test.ts:28:20)
```
Can you figure out what's going wrong and fix it?

View File

@@ -0,0 +1,7 @@
I need to add a new feature to validate email addresses. It should:
- Check that there's an @ symbol
- Check that there's at least one character before the @
- Check that there's a dot in the domain part
- Return true/false
Can you implement this?

View File

@@ -0,0 +1,10 @@
Here's the spec for our new authentication system:
Requirements:
- Users can register with email/password
- Users can log in and receive a JWT token
- Protected routes require valid JWT
- Tokens expire after 24 hours
- Support password reset via email
We need to implement this. There are multiple steps involved - user model, auth routes, middleware, email service integration.

View File

@@ -0,0 +1,60 @@
#!/usr/bin/env bash
# Run all skill triggering tests
# Usage: ./run-all.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROMPTS_DIR="$SCRIPT_DIR/prompts"
SKILLS=(
"systematic-debugging"
"test-driven-development"
"writing-plans"
"dispatching-parallel-agents"
"executing-plans"
"requesting-code-review"
)
echo "=== Running Skill Triggering Tests ==="
echo ""
PASSED=0
FAILED=0
RESULTS=()
for skill in "${SKILLS[@]}"; do
prompt_file="$PROMPTS_DIR/${skill}.txt"
if [ ! -f "$prompt_file" ]; then
echo "⚠️ SKIP: No prompt file for $skill"
continue
fi
echo "Testing: $skill"
if "$SCRIPT_DIR/run-test.sh" "$skill" "$prompt_file" 3 2>&1 | tee /tmp/skill-test-$skill.log; then
PASSED=$((PASSED + 1))
RESULTS+=("$skill")
else
FAILED=$((FAILED + 1))
RESULTS+=("$skill")
fi
echo ""
echo "---"
echo ""
done
echo ""
echo "=== Summary ==="
for result in "${RESULTS[@]}"; do
echo " $result"
done
echo ""
echo "Passed: $PASSED"
echo "Failed: $FAILED"
if [ $FAILED -gt 0 ]; then
exit 1
fi

View File

@@ -0,0 +1,88 @@
#!/usr/bin/env bash
# Test skill triggering with naive prompts
# Usage: ./run-test.sh <skill-name> <prompt-file>
#
# Tests whether Claude triggers a skill based on a natural prompt
# (without explicitly mentioning the skill)
set -e
SKILL_NAME="$1"
PROMPT_FILE="$2"
MAX_TURNS="${3:-3}"
if [ -z "$SKILL_NAME" ] || [ -z "$PROMPT_FILE" ]; then
echo "Usage: $0 <skill-name> <prompt-file> [max-turns]"
echo "Example: $0 systematic-debugging ./test-prompts/debugging.txt"
exit 1
fi
# Get the directory where this script lives (should be tests/skill-triggering)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Get the superpowers plugin root (two levels up from tests/skill-triggering)
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TIMESTAMP=$(date +%s)
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/skill-triggering/${SKILL_NAME}"
mkdir -p "$OUTPUT_DIR"
# Read prompt from file
PROMPT=$(cat "$PROMPT_FILE")
echo "=== Skill Triggering Test ==="
echo "Skill: $SKILL_NAME"
echo "Prompt file: $PROMPT_FILE"
echo "Max turns: $MAX_TURNS"
echo "Output dir: $OUTPUT_DIR"
echo ""
# Copy prompt for reference
cp "$PROMPT_FILE" "$OUTPUT_DIR/prompt.txt"
# Run Claude
LOG_FILE="$OUTPUT_DIR/claude-output.json"
cd "$OUTPUT_DIR"
echo "Plugin dir: $PLUGIN_DIR"
echo "Running claude -p with naive prompt..."
timeout 300 claude -p "$PROMPT" \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--max-turns "$MAX_TURNS" \
--output-format stream-json \
> "$LOG_FILE" 2>&1 || true
echo ""
echo "=== Results ==="
# Check if skill was triggered (look for Skill tool invocation)
# In stream-json, tool invocations have "name":"Skill" (not "tool":"Skill")
# Match either "skill":"skillname" or "skill":"namespace:skillname"
SKILL_PATTERN='"skill":"([^"]*:)?'"${SKILL_NAME}"'"'
if grep -q '"name":"Skill"' "$LOG_FILE" && grep -qE "$SKILL_PATTERN" "$LOG_FILE"; then
echo "✅ PASS: Skill '$SKILL_NAME' was triggered"
TRIGGERED=true
else
echo "❌ FAIL: Skill '$SKILL_NAME' was NOT triggered"
TRIGGERED=false
fi
# Show what skills WERE triggered
echo ""
echo "Skills triggered in this run:"
grep -o '"skill":"[^"]*"' "$LOG_FILE" 2>/dev/null | sort -u || echo " (none)"
# Show first assistant message
echo ""
echo "First assistant response (truncated):"
grep '"type":"assistant"' "$LOG_FILE" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
echo ""
echo "Full log: $LOG_FILE"
echo "Timestamp: $TIMESTAMP"
if [ "$TRIGGERED" = "true" ]; then
exit 0
else
exit 1
fi

View File

@@ -0,0 +1,81 @@
# Go Fractals CLI - Design
## Overview
A command-line tool that generates ASCII art fractals. Supports two fractal types with configurable output.
## Usage
```bash
# Sierpinski triangle
fractals sierpinski --size 32 --depth 5
# Mandelbrot set
fractals mandelbrot --width 80 --height 24 --iterations 100
# Custom character
fractals sierpinski --size 16 --char '#'
# Help
fractals --help
fractals sierpinski --help
```
## Commands
### `sierpinski`
Generates a Sierpinski triangle using recursive subdivision.
Flags:
- `--size` (default: 32) - Width of the triangle base in characters
- `--depth` (default: 5) - Recursion depth
- `--char` (default: '*') - Character to use for filled points
Output: Triangle printed to stdout, one line per row.
### `mandelbrot`
Renders the Mandelbrot set as ASCII art. Maps iteration count to characters.
Flags:
- `--width` (default: 80) - Output width in characters
- `--height` (default: 24) - Output height in characters
- `--iterations` (default: 100) - Maximum iterations for escape calculation
- `--char` (default: gradient) - Single character, or omit for gradient " .:-=+*#%@"
Output: Rectangle printed to stdout.
## Architecture
```
cmd/
fractals/
main.go # Entry point, CLI setup
internal/
sierpinski/
sierpinski.go # Algorithm
sierpinski_test.go
mandelbrot/
mandelbrot.go # Algorithm
mandelbrot_test.go
cli/
root.go # Root command, help
sierpinski.go # Sierpinski subcommand
mandelbrot.go # Mandelbrot subcommand
```
## Dependencies
- Go 1.21+
- `github.com/spf13/cobra` for CLI
## Acceptance Criteria
1. `fractals --help` shows usage
2. `fractals sierpinski` outputs a recognizable triangle
3. `fractals mandelbrot` outputs a recognizable Mandelbrot set
4. `--size`, `--width`, `--height`, `--depth`, `--iterations` flags work
5. `--char` customizes output character
6. Invalid inputs produce clear error messages
7. All tests pass

View File

@@ -0,0 +1,172 @@
# Go Fractals CLI - Implementation Plan
Execute this plan using the `superpowers:subagent-driven-development` skill.
## Context
Building a CLI tool that generates ASCII fractals. See `design.md` for full specification.
## Tasks
### Task 1: Project Setup
Create the Go module and directory structure.
**Do:**
- Initialize `go.mod` with module name `github.com/superpowers-test/fractals`
- Create directory structure: `cmd/fractals/`, `internal/sierpinski/`, `internal/mandelbrot/`, `internal/cli/`
- Create minimal `cmd/fractals/main.go` that prints "fractals cli"
- Add `github.com/spf13/cobra` dependency
**Verify:**
- `go build ./cmd/fractals` succeeds
- `./fractals` prints "fractals cli"
---
### Task 2: CLI Framework with Help
Set up Cobra root command with help output.
**Do:**
- Create `internal/cli/root.go` with root command
- Configure help text showing available subcommands
- Wire root command into `main.go`
**Verify:**
- `./fractals --help` shows usage with "sierpinski" and "mandelbrot" listed as available commands
- `./fractals` (no args) shows help
---
### Task 3: Sierpinski Algorithm
Implement the Sierpinski triangle generation algorithm.
**Do:**
- Create `internal/sierpinski/sierpinski.go`
- Implement `Generate(size, depth int, char rune) []string` that returns lines of the triangle
- Use recursive midpoint subdivision algorithm
- Create `internal/sierpinski/sierpinski_test.go` with tests:
- Small triangle (size=4, depth=2) matches expected output
- Size=1 returns single character
- Depth=0 returns filled triangle
**Verify:**
- `go test ./internal/sierpinski/...` passes
---
### Task 4: Sierpinski CLI Integration
Wire the Sierpinski algorithm to a CLI subcommand.
**Do:**
- Create `internal/cli/sierpinski.go` with `sierpinski` subcommand
- Add flags: `--size` (default 32), `--depth` (default 5), `--char` (default '*')
- Call `sierpinski.Generate()` and print result to stdout
**Verify:**
- `./fractals sierpinski` outputs a triangle
- `./fractals sierpinski --size 16 --depth 3` outputs smaller triangle
- `./fractals sierpinski --help` shows flag documentation
---
### Task 5: Mandelbrot Algorithm
Implement the Mandelbrot set ASCII renderer.
**Do:**
- Create `internal/mandelbrot/mandelbrot.go`
- Implement `Render(width, height, maxIter int, char string) []string`
- Map complex plane region (-2.5 to 1.0 real, -1.0 to 1.0 imaginary) to output dimensions
- Map iteration count to character gradient " .:-=+*#%@" (or single char if provided)
- Create `internal/mandelbrot/mandelbrot_test.go` with tests:
- Output dimensions match requested width/height
- Known point inside set (0,0) maps to max-iteration character
- Known point outside set (2,0) maps to low-iteration character
**Verify:**
- `go test ./internal/mandelbrot/...` passes
---
### Task 6: Mandelbrot CLI Integration
Wire the Mandelbrot algorithm to a CLI subcommand.
**Do:**
- Create `internal/cli/mandelbrot.go` with `mandelbrot` subcommand
- Add flags: `--width` (default 80), `--height` (default 24), `--iterations` (default 100), `--char` (default "")
- Call `mandelbrot.Render()` and print result to stdout
**Verify:**
- `./fractals mandelbrot` outputs recognizable Mandelbrot set
- `./fractals mandelbrot --width 40 --height 12` outputs smaller version
- `./fractals mandelbrot --help` shows flag documentation
---
### Task 7: Character Set Configuration
Ensure `--char` flag works consistently across both commands.
**Do:**
- Verify Sierpinski `--char` flag passes character to algorithm
- For Mandelbrot, `--char` should use single character instead of gradient
- Add tests for custom character output
**Verify:**
- `./fractals sierpinski --char '#'` uses '#' character
- `./fractals mandelbrot --char '.'` uses '.' for all filled points
- Tests pass
---
### Task 8: Input Validation and Error Handling
Add validation for invalid inputs.
**Do:**
- Sierpinski: size must be > 0, depth must be >= 0
- Mandelbrot: width/height must be > 0, iterations must be > 0
- Return clear error messages for invalid inputs
- Add tests for error cases
**Verify:**
- `./fractals sierpinski --size 0` prints error, exits non-zero
- `./fractals mandelbrot --width -1` prints error, exits non-zero
- Error messages are clear and helpful
---
### Task 9: Integration Tests
Add integration tests that invoke the CLI.
**Do:**
- Create `cmd/fractals/main_test.go` or `test/integration_test.go`
- Test full CLI invocation for both commands
- Verify output format and exit codes
- Test error cases return non-zero exit
**Verify:**
- `go test ./...` passes all tests including integration tests
---
### Task 10: README
Document usage and examples.
**Do:**
- Create `README.md` with:
- Project description
- Installation: `go install ./cmd/fractals`
- Usage examples for both commands
- Example output (small samples)
**Verify:**
- README accurately describes the tool
- Examples in README actually work

View File

@@ -0,0 +1,45 @@
#!/usr/bin/env bash
# Scaffold the Go Fractals test project
# Usage: ./scaffold.sh /path/to/target/directory
set -e
TARGET_DIR="${1:?Usage: $0 <target-directory>}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# Create target directory
mkdir -p "$TARGET_DIR"
cd "$TARGET_DIR"
# Initialize git repo
git init
# Copy design and plan
cp "$SCRIPT_DIR/design.md" .
cp "$SCRIPT_DIR/plan.md" .
# Create .claude settings to allow reads/writes in this directory
mkdir -p .claude
cat > .claude/settings.local.json << 'SETTINGS'
{
"permissions": {
"allow": [
"Read(**)",
"Edit(**)",
"Write(**)",
"Bash(go:*)",
"Bash(mkdir:*)",
"Bash(git:*)"
]
}
}
SETTINGS
# Create initial commit
git add .
git commit -m "Initial project setup with design and plan"
echo "Scaffolded Go Fractals project at: $TARGET_DIR"
echo ""
echo "To run the test:"
echo " claude -p \"Execute this plan using superpowers:subagent-driven-development. Plan: $TARGET_DIR/plan.md\" --plugin-dir /path/to/superpowers"

View File

@@ -0,0 +1,106 @@
#!/usr/bin/env bash
# Run a subagent-driven-development test
# Usage: ./run-test.sh <test-name> [--plugin-dir <path>]
#
# Example:
# ./run-test.sh go-fractals
# ./run-test.sh svelte-todo --plugin-dir /path/to/superpowers
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
TEST_NAME="${1:?Usage: $0 <test-name> [--plugin-dir <path>]}"
shift
# Parse optional arguments
PLUGIN_DIR=""
while [[ $# -gt 0 ]]; do
case $1 in
--plugin-dir)
PLUGIN_DIR="$2"
shift 2
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Default plugin dir to parent of tests directory
if [[ -z "$PLUGIN_DIR" ]]; then
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
fi
# Verify test exists
TEST_DIR="$SCRIPT_DIR/$TEST_NAME"
if [[ ! -d "$TEST_DIR" ]]; then
echo "Error: Test '$TEST_NAME' not found at $TEST_DIR"
echo "Available tests:"
ls -1 "$SCRIPT_DIR" | grep -v '\.sh$' | grep -v '\.md$'
exit 1
fi
# Create timestamped output directory
TIMESTAMP=$(date +%s)
OUTPUT_BASE="/tmp/superpowers-tests/$TIMESTAMP/subagent-driven-development"
OUTPUT_DIR="$OUTPUT_BASE/$TEST_NAME"
mkdir -p "$OUTPUT_DIR"
echo "=== Subagent-Driven Development Test ==="
echo "Test: $TEST_NAME"
echo "Output: $OUTPUT_DIR"
echo "Plugin: $PLUGIN_DIR"
echo ""
# Scaffold the project
echo ">>> Scaffolding project..."
"$TEST_DIR/scaffold.sh" "$OUTPUT_DIR/project"
echo ""
# Prepare the prompt
PLAN_PATH="$OUTPUT_DIR/project/plan.md"
PROMPT="Execute this plan using superpowers:subagent-driven-development. The plan is at: $PLAN_PATH"
# Run Claude with JSON output for token tracking
LOG_FILE="$OUTPUT_DIR/claude-output.json"
echo ">>> Running Claude..."
echo "Prompt: $PROMPT"
echo "Log file: $LOG_FILE"
echo ""
# Run claude and capture output
# Using stream-json to get token usage stats
# --dangerously-skip-permissions for automated testing (subagents don't inherit parent settings)
cd "$OUTPUT_DIR/project"
claude -p "$PROMPT" \
--plugin-dir "$PLUGIN_DIR" \
--dangerously-skip-permissions \
--output-format stream-json \
--verbose \
> "$LOG_FILE" 2>&1 || true
# Extract final stats
echo ""
echo ">>> Test complete"
echo "Project directory: $OUTPUT_DIR/project"
echo "Claude log: $LOG_FILE"
echo ""
# Show token usage if available
if command -v jq &> /dev/null; then
echo ">>> Token usage:"
# Extract usage from the last message with usage info
jq -s '[.[] | select(.type == "result")] | last | .usage' "$LOG_FILE" 2>/dev/null || echo "(could not parse usage)"
echo ""
fi
echo ">>> Next steps:"
echo "1. Review the project: cd $OUTPUT_DIR/project"
echo "2. Review Claude's log: less $LOG_FILE"
echo "3. Check if tests pass:"
if [[ "$TEST_NAME" == "go-fractals" ]]; then
echo " cd $OUTPUT_DIR/project && go test ./..."
elif [[ "$TEST_NAME" == "svelte-todo" ]]; then
echo " cd $OUTPUT_DIR/project && npm test && npx playwright test"
fi

View File

@@ -0,0 +1,70 @@
# Svelte Todo List - Design
## Overview
A simple todo list application built with Svelte. Supports creating, completing, and deleting todos with localStorage persistence.
## Features
- Add new todos
- Mark todos as complete/incomplete
- Delete todos
- Filter by: All / Active / Completed
- Clear all completed todos
- Persist to localStorage
- Show count of remaining items
## User Interface
```
┌─────────────────────────────────────────┐
│ Svelte Todos │
├─────────────────────────────────────────┤
│ [________________________] [Add] │
├─────────────────────────────────────────┤
│ [ ] Buy groceries [x] │
│ [✓] Walk the dog [x] │
│ [ ] Write code [x] │
├─────────────────────────────────────────┤
│ 2 items left │
│ [All] [Active] [Completed] [Clear ✓] │
└─────────────────────────────────────────┘
```
## Components
```
src/
App.svelte # Main app, state management
lib/
TodoInput.svelte # Text input + Add button
TodoList.svelte # List container
TodoItem.svelte # Single todo with checkbox, text, delete
FilterBar.svelte # Filter buttons + clear completed
store.ts # Svelte store for todos
storage.ts # localStorage persistence
```
## Data Model
```typescript
interface Todo {
id: string; // UUID
text: string; // Todo text
completed: boolean;
}
type Filter = 'all' | 'active' | 'completed';
```
## Acceptance Criteria
1. Can add a todo by typing and pressing Enter or clicking Add
2. Can toggle todo completion by clicking checkbox
3. Can delete a todo by clicking X button
4. Filter buttons show correct subset of todos
5. "X items left" shows count of incomplete todos
6. "Clear completed" removes all completed todos
7. Todos persist across page refresh (localStorage)
8. Empty state shows helpful message
9. All tests pass

View File

@@ -0,0 +1,222 @@
# Svelte Todo List - Implementation Plan
Execute this plan using the `superpowers:subagent-driven-development` skill.
## Context
Building a todo list app with Svelte. See `design.md` for full specification.
## Tasks
### Task 1: Project Setup
Create the Svelte project with Vite.
**Do:**
- Run `npm create vite@latest . -- --template svelte-ts`
- Install dependencies with `npm install`
- Verify dev server works
- Clean up default Vite template content from App.svelte
**Verify:**
- `npm run dev` starts server
- App shows minimal "Svelte Todos" heading
- `npm run build` succeeds
---
### Task 2: Todo Store
Create the Svelte store for todo state management.
**Do:**
- Create `src/lib/store.ts`
- Define `Todo` interface with id, text, completed
- Create writable store with initial empty array
- Export functions: `addTodo(text)`, `toggleTodo(id)`, `deleteTodo(id)`, `clearCompleted()`
- Create `src/lib/store.test.ts` with tests for each function
**Verify:**
- Tests pass: `npm run test` (install vitest if needed)
---
### Task 3: localStorage Persistence
Add persistence layer for todos.
**Do:**
- Create `src/lib/storage.ts`
- Implement `loadTodos(): Todo[]` and `saveTodos(todos: Todo[])`
- Handle JSON parse errors gracefully (return empty array)
- Integrate with store: load on init, save on change
- Add tests for load/save/error handling
**Verify:**
- Tests pass
- Manual test: add todo, refresh page, todo persists
---
### Task 4: TodoInput Component
Create the input component for adding todos.
**Do:**
- Create `src/lib/TodoInput.svelte`
- Text input bound to local state
- Add button calls `addTodo()` and clears input
- Enter key also submits
- Disable Add button when input is empty
- Add component tests
**Verify:**
- Tests pass
- Component renders input and button
---
### Task 5: TodoItem Component
Create the single todo item component.
**Do:**
- Create `src/lib/TodoItem.svelte`
- Props: `todo: Todo`
- Checkbox toggles completion (calls `toggleTodo`)
- Text with strikethrough when completed
- Delete button (X) calls `deleteTodo`
- Add component tests
**Verify:**
- Tests pass
- Component renders checkbox, text, delete button
---
### Task 6: TodoList Component
Create the list container component.
**Do:**
- Create `src/lib/TodoList.svelte`
- Props: `todos: Todo[]`
- Renders TodoItem for each todo
- Shows "No todos yet" when empty
- Add component tests
**Verify:**
- Tests pass
- Component renders list of TodoItems
---
### Task 7: FilterBar Component
Create the filter and status bar component.
**Do:**
- Create `src/lib/FilterBar.svelte`
- Props: `todos: Todo[]`, `filter: Filter`, `onFilterChange: (f: Filter) => void`
- Show count: "X items left" (incomplete count)
- Three filter buttons: All, Active, Completed
- Active filter is visually highlighted
- "Clear completed" button (hidden when no completed todos)
- Add component tests
**Verify:**
- Tests pass
- Component renders count, filters, clear button
---
### Task 8: App Integration
Wire all components together in App.svelte.
**Do:**
- Import all components and store
- Add filter state (default: 'all')
- Compute filtered todos based on filter state
- Render: heading, TodoInput, TodoList, FilterBar
- Pass appropriate props to each component
**Verify:**
- App renders all components
- Adding todos works
- Toggling works
- Deleting works
---
### Task 9: Filter Functionality
Ensure filtering works end-to-end.
**Do:**
- Verify filter buttons change displayed todos
- 'all' shows all todos
- 'active' shows only incomplete todos
- 'completed' shows only completed todos
- Clear completed removes completed todos and resets filter if needed
- Add integration tests
**Verify:**
- Filter tests pass
- Manual verification of all filter states
---
### Task 10: Styling and Polish
Add CSS styling for usability.
**Do:**
- Style the app to match the design mockup
- Completed todos have strikethrough and muted color
- Active filter button is highlighted
- Input has focus styles
- Delete button appears on hover (or always on mobile)
- Responsive layout
**Verify:**
- App is visually usable
- Styles don't break functionality
---
### Task 11: End-to-End Tests
Add Playwright tests for full user flows.
**Do:**
- Install Playwright: `npm init playwright@latest`
- Create `tests/todo.spec.ts`
- Test flows:
- Add a todo
- Complete a todo
- Delete a todo
- Filter todos
- Clear completed
- Persistence (add, reload, verify)
**Verify:**
- `npx playwright test` passes
---
### Task 12: README
Document the project.
**Do:**
- Create `README.md` with:
- Project description
- Setup: `npm install`
- Development: `npm run dev`
- Testing: `npm test` and `npx playwright test`
- Build: `npm run build`
**Verify:**
- README accurately describes the project
- Instructions work

View File

@@ -0,0 +1,46 @@
#!/usr/bin/env bash
# Scaffold the Svelte Todo test project
# Usage: ./scaffold.sh /path/to/target/directory
set -e
TARGET_DIR="${1:?Usage: $0 <target-directory>}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# Create target directory
mkdir -p "$TARGET_DIR"
cd "$TARGET_DIR"
# Initialize git repo
git init
# Copy design and plan
cp "$SCRIPT_DIR/design.md" .
cp "$SCRIPT_DIR/plan.md" .
# Create .claude settings to allow reads/writes in this directory
mkdir -p .claude
cat > .claude/settings.local.json << 'SETTINGS'
{
"permissions": {
"allow": [
"Read(**)",
"Edit(**)",
"Write(**)",
"Bash(npm:*)",
"Bash(npx:*)",
"Bash(mkdir:*)",
"Bash(git:*)"
]
}
}
SETTINGS
# Create initial commit
git add .
git commit -m "Initial project setup with design and plan"
echo "Scaffolded Svelte Todo project at: $TARGET_DIR"
echo ""
echo "To run the test:"
echo " claude -p \"Execute this plan using superpowers:subagent-driven-development. Plan: $TARGET_DIR/plan.md\" --plugin-dir /path/to/superpowers"