Files
variet_llm/scripts/_archive/results/tune_results_gemma4_ncpumoe.json
Variet-Worker c111b3a9b0 feat: Variet Engine v1.0 + 5-model tuning complete
Phase 01 (LLM Tuning):
- Gemma4 26B: 74.65 t/s (fast)
- Qwen 35B: 61.62 t/s (balanced)
- Gemma4 31B: 16.0 t/s (deep-coder)
- Qwen 27B: 16.7 t/s (deep-logic)
- Qwen 122B: 8.95 t/s (ultra, GPU 1 only)

Phase 02 (API Engine):
- FastAPI reverse proxy on port 8000
- /engine/switch hot-swap with 503 protection
- config/engine_models.json as single source of truth
- Replaced 4 individual .bat files with unified engine

File cleanup:
- scripts/ 85 files -> 9 + _archive/
- Root .bat files -> _archive/
2026-04-07 18:08:58 +09:00

201 lines
3.6 KiB
JSON

[
{
"label": "ncpumoe=0",
"ncpumoe": 0,
"avg": 15.396949591766335,
"best": 20.220093309883133,
"vram": 12011,
"nommap": false
},
{
"label": "ncpumoe=5",
"ncpumoe": 5,
"avg": 4.853957926040404,
"best": 4.9029479257524216,
"vram": 11945,
"nommap": false
},
{
"label": "ncpumoe=10",
"ncpumoe": 10,
"avg": 20.64137159193706,
"best": 26.474940718957154,
"vram": 12020,
"nommap": false
},
{
"label": "ncpumoe=15",
"ncpumoe": 15,
"avg": 13.424368433101165,
"best": 13.698684361880598,
"vram": 12018,
"nommap": false
},
{
"label": "ncpumoe=20",
"ncpumoe": 20,
"avg": 10.338449574838693,
"best": 13.495275411319872,
"vram": 11530,
"nommap": true
},
{
"label": "ncpumoe=25",
"ncpumoe": 25,
"avg": 12.920348175328435,
"best": 12.99923042323437,
"vram": 11625,
"nommap": true
},
{
"label": "ncpumoe=30",
"ncpumoe": 30,
"avg": 13.251690836275145,
"best": 13.253697466971921,
"vram": 9064,
"nommap": true
},
{
"label": "ncpumoe=7",
"ncpumoe": 7,
"avg": 16.31796299658782,
"best": 23.160760806218782,
"vram": 11994,
"nommap": false
},
{
"label": "ncpumoe=9",
"ncpumoe": 9,
"avg": 7.469651892205037,
"best": 10.875064047449284,
"vram": 11941,
"nommap": false
},
{
"label": "ncpumoe=11",
"ncpumoe": 11,
"avg": 14.814740144776437,
"best": 15.199641279675724,
"vram": 11984,
"nommap": false
},
{
"label": "ncpumoe=13",
"ncpumoe": 13,
"avg": 14.183175252947136,
"best": 14.427257794639086,
"vram": 12003,
"nommap": false
},
{
"label": "t=2",
"ncpumoe": 10,
"avg": 28.551811207068425,
"best": 28.688565545389164,
"vram": 11968,
"t": 2,
"nommap": false
},
{
"label": "t=4",
"ncpumoe": 10,
"avg": 30.8619310622166,
"best": 31.17677746690393,
"vram": 11972,
"t": 4,
"nommap": false
},
{
"label": "t=6",
"ncpumoe": 10,
"avg": 30.578454576249854,
"best": 30.971792125516313,
"vram": 11983,
"t": 6,
"nommap": false
},
{
"label": "t=8",
"ncpumoe": 10,
"avg": 30.529393512116172,
"best": 30.954830478128166,
"vram": 11982,
"t": 8,
"nommap": false
},
{
"label": "t=10",
"ncpumoe": 10,
"avg": 30.773041112229503,
"best": 31.00899077264753,
"vram": 11972,
"t": 10,
"nommap": false
},
{
"label": "ub=256,b=1024",
"ncpumoe": 10,
"avg": 30.49319055490045,
"best": 30.691055921541377,
"vram": 11993,
"t": 4,
"ub": 256,
"b": 1024,
"nommap": false
},
{
"label": "ub=512,b=2048",
"ncpumoe": 10,
"avg": 30.923573731331718,
"best": 31.902272031660825,
"vram": 11995,
"t": 4,
"ub": 512,
"b": 2048,
"nommap": false
},
{
"label": "ub=512,b=4096",
"ncpumoe": 10,
"avg": 30.723820162954862,
"best": 31.065476003548053,
"vram": 11966,
"t": 4,
"ub": 512,
"b": 4096,
"nommap": false
},
{
"label": "ub=1024,b=2048",
"ncpumoe": 10,
"avg": 30.489888387093156,
"best": 30.982074615885946,
"vram": 11964,
"t": 4,
"ub": 1024,
"b": 2048,
"nommap": false
},
{
"label": "kv=q4_0",
"ncpumoe": 10,
"avg": 30.63156129571348,
"best": 31.088674795634944,
"vram": 11988,
"t": 4,
"ctk": "q4_0",
"ctv": "q4_0",
"nommap": false
},
{
"label": "kv=q8_0",
"ncpumoe": 10,
"avg": 29.6114222576863,
"best": 30.580427895917573,
"vram": 11980,
"t": 4,
"ctk": "q8_0",
"ctv": "q8_0",
"nommap": false
}
]