Files
variet_llm/scripts/_archive/results/qwen_fullgpu_results.json
Variet-Worker c111b3a9b0 feat: Variet Engine v1.0 + 5-model tuning complete
Phase 01 (LLM Tuning):
- Gemma4 26B: 74.65 t/s (fast)
- Qwen 35B: 61.62 t/s (balanced)
- Gemma4 31B: 16.0 t/s (deep-coder)
- Qwen 27B: 16.7 t/s (deep-logic)
- Qwen 122B: 8.95 t/s (ultra, GPU 1 only)

Phase 02 (API Engine):
- FastAPI reverse proxy on port 8000
- /engine/switch hot-swap with 503 protection
- config/engine_models.json as single source of truth
- Replaced 4 individual .bat files with unified engine

File cleanup:
- scripts/ 85 files -> 9 + _archive/
- Root .bat files -> _archive/
2026-04-07 18:08:58 +09:00

834 lines
14 KiB
JSON

[
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU minbatch",
"avg_tps": 65.11,
"best_tps": 65.49,
"boot": 9,
"vram_total": 19177,
"vram": [
{
"gpu": 0,
"used": 10039,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU nommap small",
"avg_tps": 65.01,
"best_tps": 65.36,
"boot": 6,
"vram_total": 19672,
"vram": [
{
"gpu": 0,
"used": 10342,
"total": 12288
},
{
"gpu": 1,
"used": 9330,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU row-split",
"avg_tps": 13.65,
"best_tps": 14.82,
"boot": 9,
"vram_total": 19427,
"vram": [
{
"gpu": 0,
"used": 10311,
"total": 12288
},
{
"gpu": 1,
"used": 9116,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"splitMode": "row",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU ts=0.5,0.5",
"avg_tps": 64.92,
"best_tps": 65.23,
"boot": 9,
"vram_total": 19664,
"vram": [
{
"gpu": 0,
"used": 10334,
"total": 12288
},
{
"gpu": 1,
"used": 9330,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU all-tricks",
"avg_tps": 64.72,
"best_tps": 64.89,
"boot": 6,
"vram_total": 19171,
"vram": [
{
"gpu": 0,
"used": 10033,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"noMmap": true,
"defragThold": 0.1,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune t=2",
"avg_tps": 64.87,
"best_tps": 65.13,
"boot": 9,
"vram_total": 19170,
"vram": [
{
"gpu": 0,
"used": 10032,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 2,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune t=6",
"avg_tps": 64.88,
"best_tps": 65.17,
"boot": 9,
"vram_total": 19168,
"vram": [
{
"gpu": 0,
"used": 10030,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 6,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune t=8",
"avg_tps": 64.5,
"best_tps": 64.77,
"boot": 9,
"vram_total": 19168,
"vram": [
{
"gpu": 0,
"used": 10030,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 8,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune ub=256 b=1024",
"avg_tps": 64.73,
"best_tps": 64.98,
"boot": 9,
"vram_total": 20640,
"vram": [
{
"gpu": 0,
"used": 10928,
"total": 12288
},
{
"gpu": 1,
"used": 9712,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 256,
"b": 1024,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune ub=256 b=2048",
"avg_tps": 63.69,
"best_tps": 64.94,
"boot": 12,
"vram_total": 20614,
"vram": [
{
"gpu": 0,
"used": 10902,
"total": 12288
},
{
"gpu": 1,
"used": 9712,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 256,
"b": 2048,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune kv=q8_0/q8_0",
"avg_tps": 64.78,
"best_tps": 65.08,
"boot": 9,
"vram_total": 20422,
"vram": [
{
"gpu": 0,
"used": 10644,
"total": 12288
},
{
"gpu": 1,
"used": 9778,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q8_0",
"ctv": "q8_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune kv=f16/f16",
"avg_tps": 65.53,
"best_tps": 65.81,
"boot": 9,
"vram_total": 22812,
"vram": [
{
"gpu": 0,
"used": 11846,
"total": 12288
},
{
"gpu": 1,
"used": 10966,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "f16",
"ctv": "f16"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "FINAL",
"avg_tps": 66.31,
"best_tps": 66.53,
"boot": 9,
"vram_total": 22811,
"vram": [
{
"gpu": 0,
"used": 11845,
"total": 12288
},
{
"gpu": 1,
"used": 10966,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "f16",
"ctv": "f16"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "pure-GPU minbatch",
"avg_tps": 63.06,
"best_tps": 64.16,
"boot": 12,
"vram_total": 22747,
"vram": [
{
"gpu": 0,
"used": 11895,
"total": 12288
},
{
"gpu": 1,
"used": 10852,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "pure-GPU nommap small",
"avg_tps": 63.75,
"best_tps": 63.98,
"boot": 9,
"vram_total": 22579,
"vram": [
{
"gpu": 0,
"used": 11797,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "pure-GPU ts=0.5,0.5",
"avg_tps": 62.88,
"best_tps": 63.9,
"boot": 12,
"vram_total": 22578,
"vram": [
{
"gpu": 0,
"used": 11796,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "pure-GPU all-tricks",
"avg_tps": 62.55,
"best_tps": 63.71,
"boot": 9,
"vram_total": 22743,
"vram": [
{
"gpu": 0,
"used": 11891,
"total": 12288
},
{
"gpu": 1,
"used": 10852,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"noMmap": true,
"defragThold": 0.1,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune t=2",
"avg_tps": 63.07,
"best_tps": 64.08,
"boot": 9,
"vram_total": 22601,
"vram": [
{
"gpu": 0,
"used": 11819,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 2,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune t=6",
"avg_tps": 63.58,
"best_tps": 64.04,
"boot": 9,
"vram_total": 22583,
"vram": [
{
"gpu": 0,
"used": 11801,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 6,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune t=8",
"avg_tps": 62.92,
"best_tps": 63.73,
"boot": 9,
"vram_total": 22536,
"vram": [
{
"gpu": 0,
"used": 11754,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 8,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune ub=256 b=1024",
"avg_tps": 62.76,
"best_tps": 63.86,
"boot": 9,
"vram_total": 22874,
"vram": [
{
"gpu": 0,
"used": 11968,
"total": 12288
},
{
"gpu": 1,
"used": 10906,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 256,
"b": 1024,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune ub=256 b=2048",
"avg_tps": 62.74,
"best_tps": 63.9,
"boot": 9,
"vram_total": 22912,
"vram": [
{
"gpu": 0,
"used": 12006,
"total": 12288
},
{
"gpu": 1,
"used": 10906,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 256,
"b": 2048,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "FINAL",
"avg_tps": 63.71,
"best_tps": 64.39,
"boot": 9,
"vram_total": 22566,
"vram": [
{
"gpu": 0,
"used": 11784,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "pure-GPU nommap small",
"avg_tps": 62.29,
"best_tps": 63.03,
"boot": 9,
"vram_total": 22975,
"vram": [
{
"gpu": 0,
"used": 12007,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "pure-GPU ts=0.5,0.5",
"avg_tps": 63.89,
"best_tps": 64.91,
"boot": 12,
"vram_total": 23002,
"vram": [
{
"gpu": 0,
"used": 12034,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "tune t=2",
"avg_tps": 64.1,
"best_tps": 64.54,
"boot": 12,
"vram_total": 22980,
"vram": [
{
"gpu": 0,
"used": 12012,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 2,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "tune t=6",
"avg_tps": 64.18,
"best_tps": 64.72,
"boot": 12,
"vram_total": 22982,
"vram": [
{
"gpu": 0,
"used": 12014,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 6,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "tune t=8",
"avg_tps": 63.11,
"best_tps": 64.02,
"boot": 12,
"vram_total": 22980,
"vram": [
{
"gpu": 0,
"used": 12012,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 8,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
}
]