// Quick PP+TG speed test const BASE = "http://127.0.0.1:8000"; async function test(label, prompt, maxTok) { const t0 = Date.now(); const r = await fetch(`${BASE}/v1/chat/completions`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: "m", messages: [{ role: "user", content: prompt }], max_tokens: maxTok, temperature: 0 }), signal: AbortSignal.timeout(600000), }); const d = await r.json(); const dt = (Date.now() - t0) / 1000; const u = d.usage || {}; const pp = u.prompt_tokens || 0; const tg = u.completion_tokens || 0; const ppSpeed = pp > 0 ? (pp / dt).toFixed(1) : "?"; const tgSpeed = tg > 0 ? (tg / dt).toFixed(1) : "?"; console.log(`${label} | PP:${pp}tok ${ppSpeed}t/s | TG:${tg}tok ${tgSpeed}t/s | ${dt.toFixed(1)}s`); } const short = "Count 1 to 20."; const long = "x".repeat(3000) + " Summarize above in 3 words."; const code = Array(200).fill("function foo(x) { return x * 2 + Math.random(); }").join("\n") + "\n\nRefactor above to arrow functions. Show first 5 lines."; await test("warmup", short, 20); await test("SHORT", short, 200); await test("3K-PP", long, 100); await test("10K-CODE", code, 100); await test("TG-500", short, 500); console.log("DONE");