import { spawn, exec } from 'child_process'; const delay = ms => new Promise(res => setTimeout(res, ms)); async function killServer() { return new Promise(r => exec('taskkill /F /IM llama-server.exe', () => { setTimeout(r, 2000); })); } async function testContextSize(modelPath, contextSize) { console.log(`\nTesting ${modelPath} with -c ${contextSize}...`); await killServer(); const args = [ '--model', `models\\${modelPath}`, '-ngl', '999', '-c', contextSize.toString(), '-fa', 'on', '--cache-type-k', 'q4_0', '--cache-type-v', 'q4_0', '-ub', '512', '-b', '2048', '-t', '6', '-tb', '6', '--split-mode', 'row', '--prio', '3', '--fit', 'off', '--port', '8000', '--host', '0.0.0.0' ]; const server = spawn('llama_bin_run\\llama-server.exe', args, { stdio: 'pipe' }); let booted = false; let oomed = false; server.stderr.on('data', (d) => { const text = d.toString(); if (text.toLowerCase().includes('out of memory') || text.includes('failed to allocate')) { oomed = true; } }); for (let i = 0; i < 20; i++) { if (oomed) break; try { const res = await fetch('http://127.0.0.1:8000/health', { timeout: 2000 }); if (res.status === 200) { booted = true; break; } } catch(e) {} await delay(2000); } if (oomed || !booted) { console.log(`āŒ Failed: Out of Memory at -c ${contextSize}`); server.kill('SIGKILL'); await killServer(); return false; } console.log(`āœ… Booted! Running Benchmark...`); // Benchmark const bench = await new Promise(r => exec('node scripts/quick_pptest.mjs', (err, stdout, stderr) => { r(stdout || stderr); })); console.log(bench); await killServer(); return true; } async function findMaxContext(modelName) { const contexts = [262144, 131072, 65536, 32768, 16384, 8192]; let maxFound = false; for (const c of contexts) { const success = await testContextSize(modelName, c); if (success) { maxFound = true; console.log(`\nšŸŽ‰ MAX STABLE CONTEXT FOR ${modelName}: ${c}`); break; } } if (!maxFound) { console.log(`\nāŒ Failed to find any working context size for ${modelName}`); } } async function main() { exec('set CUDA_VISIBLE_DEVICES='); console.log("============= QWEN 27B Q4_K_M ============="); await findMaxContext('Qwen3.5-27B-Q4_K_M.gguf'); console.log("\n============= GEMMA 4 31B Q4_K_M ============="); await findMaxContext('gemma-4-31B-it-Q4_K_M.gguf'); } main();