feat: Variet Engine v1.0 + 5-model tuning complete
Phase 01 (LLM Tuning): - Gemma4 26B: 74.65 t/s (fast) - Qwen 35B: 61.62 t/s (balanced) - Gemma4 31B: 16.0 t/s (deep-coder) - Qwen 27B: 16.7 t/s (deep-logic) - Qwen 122B: 8.95 t/s (ultra, GPU 1 only) Phase 02 (API Engine): - FastAPI reverse proxy on port 8000 - /engine/switch hot-swap with 503 protection - config/engine_models.json as single source of truth - Replaced 4 individual .bat files with unified engine File cleanup: - scripts/ 85 files -> 9 + _archive/ - Root .bat files -> _archive/
This commit is contained in:
31
scripts/_archive/benchmarks/quick_pptest.mjs
Normal file
31
scripts/_archive/benchmarks/quick_pptest.mjs
Normal file
@@ -0,0 +1,31 @@
|
||||
// Quick PP+TG speed test
|
||||
const BASE = "http://127.0.0.1:8000";
|
||||
|
||||
async function test(label, prompt, maxTok) {
|
||||
const t0 = Date.now();
|
||||
const r = await fetch(`${BASE}/v1/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ model: "m", messages: [{ role: "user", content: prompt }], max_tokens: maxTok, temperature: 0 }),
|
||||
signal: AbortSignal.timeout(600000),
|
||||
});
|
||||
const d = await r.json();
|
||||
const dt = (Date.now() - t0) / 1000;
|
||||
const u = d.usage || {};
|
||||
const pp = u.prompt_tokens || 0;
|
||||
const tg = u.completion_tokens || 0;
|
||||
const ppSpeed = pp > 0 ? (pp / dt).toFixed(1) : "?";
|
||||
const tgSpeed = tg > 0 ? (tg / dt).toFixed(1) : "?";
|
||||
console.log(`${label} | PP:${pp}tok ${ppSpeed}t/s | TG:${tg}tok ${tgSpeed}t/s | ${dt.toFixed(1)}s`);
|
||||
}
|
||||
|
||||
const short = "Count 1 to 20.";
|
||||
const long = "x".repeat(3000) + " Summarize above in 3 words.";
|
||||
const code = Array(200).fill("function foo(x) { return x * 2 + Math.random(); }").join("\n") + "\n\nRefactor above to arrow functions. Show first 5 lines.";
|
||||
|
||||
await test("warmup", short, 20);
|
||||
await test("SHORT", short, 200);
|
||||
await test("3K-PP", long, 100);
|
||||
await test("10K-CODE", code, 100);
|
||||
await test("TG-500", short, 500);
|
||||
console.log("DONE");
|
||||
Reference in New Issue
Block a user