Files
variet_llm/scripts/_archive/results/dual_gpu_results.json
Variet-Worker c111b3a9b0 feat: Variet Engine v1.0 + 5-model tuning complete
Phase 01 (LLM Tuning):
- Gemma4 26B: 74.65 t/s (fast)
- Qwen 35B: 61.62 t/s (balanced)
- Gemma4 31B: 16.0 t/s (deep-coder)
- Qwen 27B: 16.7 t/s (deep-logic)
- Qwen 122B: 8.95 t/s (ultra, GPU 1 only)

Phase 02 (API Engine):
- FastAPI reverse proxy on port 8000
- /engine/switch hot-swap with 503 protection
- config/engine_models.json as single source of truth
- Replaced 4 individual .bat files with unified engine

File cleanup:
- scripts/ 85 files -> 9 + _archive/
- Root .bat files -> _archive/
2026-04-07 18:08:58 +09:00

1654 lines
27 KiB
JSON

[
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ngl=999 pure-GPU",
"avg_tps": 63.21,
"best_tps": 63.78,
"boot": 9.1,
"vram": [
{
"gpu": 0,
"used": 11770,
"total": 12288
},
{
"gpu": 1,
"used": 10411,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "compare: cpu-moe",
"avg_tps": 12.92,
"best_tps": 14.21,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 3096,
"total": 12288
},
{
"gpu": 1,
"used": 3497,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"cpuMoe": true
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=2",
"avg_tps": 64.1,
"best_tps": 64.27,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11728,
"total": 12288
},
{
"gpu": 1,
"used": 10411,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=4",
"avg_tps": 64,
"best_tps": 64.39,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11728,
"total": 12288
},
{
"gpu": 1,
"used": 10411,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=8",
"avg_tps": 63.75,
"best_tps": 63.9,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11728,
"total": 12288
},
{
"gpu": 1,
"used": 10411,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 8,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=10",
"avg_tps": 64.01,
"best_tps": 64.14,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11728,
"total": 12288
},
{
"gpu": 1,
"used": 10411,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 10,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=12",
"avg_tps": 63.86,
"best_tps": 63.98,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11728,
"total": 12288
},
{
"gpu": 1,
"used": 10411,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 12,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=256 b=1024",
"avg_tps": 63.8,
"best_tps": 64.12,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10504,
"total": 12288
},
{
"gpu": 1,
"used": 9619,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 256,
"b": 1024,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=256 b=2048",
"avg_tps": 63.88,
"best_tps": 64.04,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10504,
"total": 12288
},
{
"gpu": 1,
"used": 9619,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 256,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=512 b=4096",
"avg_tps": 63.91,
"best_tps": 64.18,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11728,
"total": 12288
},
{
"gpu": 1,
"used": 10411,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 4096,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=1024 b=2048",
"avg_tps": 63.86,
"best_tps": 64.1,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10956,
"total": 12288
},
{
"gpu": 1,
"used": 9907,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 1024,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=1024 b=4096",
"avg_tps": 63.85,
"best_tps": 64.06,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10956,
"total": 12288
},
{
"gpu": 1,
"used": 9907,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 1024,
"b": 4096,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "kv=q8_0/q8_0",
"avg_tps": 64.14,
"best_tps": 64.39,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10670,
"total": 12288
},
{
"gpu": 1,
"used": 10169,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 2048,
"ctk": "q8_0",
"ctv": "q8_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "kv=q4_0/q8_0",
"avg_tps": 37.52,
"best_tps": 37.86,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10394,
"total": 12288
},
{
"gpu": 1,
"used": 9753,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q8_0"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "kv=f16/f16",
"avg_tps": 63.48,
"best_tps": 64.31,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11700,
"total": 12288
},
{
"gpu": 1,
"used": 11667,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 2048,
"ctk": "f16",
"ctv": "f16"
}
},
{
"model": "Gemma4-26B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "FINAL",
"avg_tps": 64.05,
"best_tps": 64.29,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10667,
"total": 12288
},
{
"gpu": 1,
"used": 10169,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 2048,
"ctk": "q8_0",
"ctv": "q8_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "ngl=999 pure-GPU",
"avg_tps": 76.01,
"best_tps": 76.31,
"boot": 12.1,
"vram": [
{
"gpu": 0,
"used": 11784,
"total": 12288
},
{
"gpu": 1,
"used": 10454,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "compare: cpu-moe",
"avg_tps": 10.19,
"best_tps": 10.49,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 2652,
"total": 12288
},
{
"gpu": 1,
"used": 2982,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"cpuMoe": true
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=2",
"avg_tps": 75.67,
"best_tps": 75.87,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11783,
"total": 12288
},
{
"gpu": 1,
"used": 10454,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=4",
"avg_tps": 75.61,
"best_tps": 75.87,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11783,
"total": 12288
},
{
"gpu": 1,
"used": 10454,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=8",
"avg_tps": 75.42,
"best_tps": 75.59,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11783,
"total": 12288
},
{
"gpu": 1,
"used": 10454,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 8,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=10",
"avg_tps": 75.71,
"best_tps": 75.82,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11783,
"total": 12288
},
{
"gpu": 1,
"used": 10454,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 10,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=12",
"avg_tps": 75.08,
"best_tps": 75.7,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11783,
"total": 12288
},
{
"gpu": 1,
"used": 10454,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 12,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=256 b=1024",
"avg_tps": 75.16,
"best_tps": 75.64,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10559,
"total": 12288
},
{
"gpu": 1,
"used": 9662,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 256,
"b": 1024,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=256 b=2048",
"avg_tps": 75.68,
"best_tps": 76.05,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10559,
"total": 12288
},
{
"gpu": 1,
"used": 9662,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 256,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=512 b=4096",
"avg_tps": 75.92,
"best_tps": 76.16,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11784,
"total": 12288
},
{
"gpu": 1,
"used": 10454,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 4096,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=1024 b=2048",
"avg_tps": 75.7,
"best_tps": 75.9,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11012,
"total": 12288
},
{
"gpu": 1,
"used": 9950,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 1024,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=1024 b=4096",
"avg_tps": 75.77,
"best_tps": 75.99,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11011,
"total": 12288
},
{
"gpu": 1,
"used": 9950,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 1024,
"b": 4096,
"ctk": "q4_0",
"ctv": "q4_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "kv=q8_0/q8_0",
"avg_tps": 76.3,
"best_tps": 76.69,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10725,
"total": 12288
},
{
"gpu": 1,
"used": 10212,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q8_0",
"ctv": "q8_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "kv=q4_0/q8_0",
"avg_tps": 42.88,
"best_tps": 44.58,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 10439,
"total": 12288
},
{
"gpu": 1,
"used": 9796,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q8_0"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "kv=f16/f16",
"avg_tps": 76.36,
"best_tps": 76.78,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11761,
"total": 12288
},
{
"gpu": 1,
"used": 11710,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "f16",
"ctv": "f16"
}
},
{
"model": "Gemma4-26B Q4_K_M",
"quant": "Q4_K_M",
"label": "FINAL",
"avg_tps": 76.4,
"best_tps": 76.75,
"boot": 9,
"vram": [
{
"gpu": 0,
"used": 11761,
"total": 12288
},
{
"gpu": 1,
"used": 11710,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "f16",
"ctv": "f16"
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "n-cpu-moe=5",
"avg_tps": 51.43,
"best_tps": 52.07,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10365,
"total": 12288
},
{
"gpu": 1,
"used": 11152,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=2",
"avg_tps": 43.8,
"best_tps": 46.4,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10365,
"total": 12288
},
{
"gpu": 1,
"used": 11152,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=4",
"avg_tps": 49.21,
"best_tps": 52.78,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10353,
"total": 12288
},
{
"gpu": 1,
"used": 11152,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=8",
"avg_tps": 46.43,
"best_tps": 50.49,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10397,
"total": 12288
},
{
"gpu": 1,
"used": 11152,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 8,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=10",
"avg_tps": 46.12,
"best_tps": 50.06,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10351,
"total": 12288
},
{
"gpu": 1,
"used": 11152,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 10,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "t=12",
"avg_tps": 45.23,
"best_tps": 47.1,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10337,
"total": 12288
},
{
"gpu": 1,
"used": 11152,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 12,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=256 b=1024",
"avg_tps": 48.9,
"best_tps": 52.3,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 9834,
"total": 12288
},
{
"gpu": 1,
"used": 10906,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 256,
"b": 1024,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=256 b=2048",
"avg_tps": 49.62,
"best_tps": 52.52,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 9833,
"total": 12288
},
{
"gpu": 1,
"used": 10906,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 256,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=512 b=4096",
"avg_tps": 48.78,
"best_tps": 52.14,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10337,
"total": 12288
},
{
"gpu": 1,
"used": 11152,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 4096,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=1024 b=2048",
"avg_tps": 49.95,
"best_tps": 52.53,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 11124,
"total": 12288
},
{
"gpu": 1,
"used": 11644,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 1024,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "ub=1024 b=4096",
"avg_tps": 48.75,
"best_tps": 52.06,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 11123,
"total": 12288
},
{
"gpu": 1,
"used": 11644,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 1024,
"b": 4096,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "kv=q4_0/q8_0",
"avg_tps": 42.81,
"best_tps": 44.14,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10681,
"total": 12288
},
{
"gpu": 1,
"used": 11472,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q8_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B MXFP4_MOE",
"quant": "MXFP4_MOE",
"label": "FINAL",
"avg_tps": 46.66,
"best_tps": 47.09,
"boot": 15,
"vram": [
{
"gpu": 0,
"used": 10476,
"total": 12288
},
{
"gpu": 1,
"used": 11152,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "n-cpu-moe=5",
"avg_tps": 49.01,
"best_tps": 53.09,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10606,
"total": 12288
},
{
"gpu": 1,
"used": 11338,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 6,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=2",
"avg_tps": 45.73,
"best_tps": 47.87,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10599,
"total": 12288
},
{
"gpu": 1,
"used": 11338,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 2,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=4",
"avg_tps": 50.98,
"best_tps": 54.33,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10601,
"total": 12288
},
{
"gpu": 1,
"used": 11338,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=8",
"avg_tps": 48.45,
"best_tps": 52.1,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10596,
"total": 12288
},
{
"gpu": 1,
"used": 11338,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 8,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=10",
"avg_tps": 47.83,
"best_tps": 51.45,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10595,
"total": 12288
},
{
"gpu": 1,
"used": 11338,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 10,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "t=12",
"avg_tps": 43.77,
"best_tps": 46.79,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10589,
"total": 12288
},
{
"gpu": 1,
"used": 11338,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 12,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=256 b=1024",
"avg_tps": 52.14,
"best_tps": 53.82,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10089,
"total": 12288
},
{
"gpu": 1,
"used": 11092,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 256,
"b": 1024,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=256 b=2048",
"avg_tps": 50.23,
"best_tps": 53.66,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10091,
"total": 12288
},
{
"gpu": 1,
"used": 11092,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 256,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=512 b=2048",
"avg_tps": 49.89,
"best_tps": 53.89,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10595,
"total": 12288
},
{
"gpu": 1,
"used": 11338,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 512,
"b": 2048,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "ub=512 b=4096",
"avg_tps": 50.4,
"best_tps": 54.19,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10564,
"total": 12288
},
{
"gpu": 1,
"used": 11338,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 512,
"b": 4096,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "kv=q8_0/q8_0",
"avg_tps": 51.84,
"best_tps": 53.53,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10726,
"total": 12288
},
{
"gpu": 1,
"used": 11732,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 256,
"b": 1024,
"ctk": "q8_0",
"ctv": "q8_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "kv=q4_0/q8_0",
"avg_tps": 43.22,
"best_tps": 45.99,
"boot": 12,
"vram": [
{
"gpu": 0,
"used": 10410,
"total": 12288
},
{
"gpu": 1,
"used": 11412,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 256,
"b": 1024,
"ctk": "q4_0",
"ctv": "q8_0",
"nCpuMoe": 5
}
},
{
"model": "Qwen3.5-35B Q4_K_M",
"quant": "Q4_K_M",
"label": "FINAL",
"avg_tps": 52.05,
"best_tps": 54.48,
"boot": 12.1,
"vram": [
{
"gpu": 0,
"used": 10062,
"total": 12288
},
{
"gpu": 1,
"used": 11092,
"total": 12288
}
],
"params": {
"ngl": 999,
"t": 4,
"ub": 256,
"b": 1024,
"ctk": "q4_0",
"ctv": "q4_0",
"nCpuMoe": 5
}
}
]