Phase 01 (LLM Tuning): - Gemma4 26B: 74.65 t/s (fast) - Qwen 35B: 61.62 t/s (balanced) - Gemma4 31B: 16.0 t/s (deep-coder) - Qwen 27B: 16.7 t/s (deep-logic) - Qwen 122B: 8.95 t/s (ultra, GPU 1 only) Phase 02 (API Engine): - FastAPI reverse proxy on port 8000 - /engine/switch hot-swap with 503 protection - config/engine_models.json as single source of truth - Replaced 4 individual .bat files with unified engine File cleanup: - scripts/ 85 files -> 9 + _archive/ - Root .bat files -> _archive/
1654 lines
27 KiB
JSON
1654 lines
27 KiB
JSON
[
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ngl=999 pure-GPU",
|
|
"avg_tps": 63.21,
|
|
"best_tps": 63.78,
|
|
"boot": 9.1,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11770,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10411,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "compare: cpu-moe",
|
|
"avg_tps": 12.92,
|
|
"best_tps": 14.21,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 3096,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 3497,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"cpuMoe": true
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=2",
|
|
"avg_tps": 64.1,
|
|
"best_tps": 64.27,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11728,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10411,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=4",
|
|
"avg_tps": 64,
|
|
"best_tps": 64.39,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11728,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10411,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=8",
|
|
"avg_tps": 63.75,
|
|
"best_tps": 63.9,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11728,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10411,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=10",
|
|
"avg_tps": 64.01,
|
|
"best_tps": 64.14,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11728,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10411,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 10,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=12",
|
|
"avg_tps": 63.86,
|
|
"best_tps": 63.98,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11728,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10411,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 12,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=256 b=1024",
|
|
"avg_tps": 63.8,
|
|
"best_tps": 64.12,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10504,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9619,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=256 b=2048",
|
|
"avg_tps": 63.88,
|
|
"best_tps": 64.04,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10504,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9619,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 256,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=512 b=4096",
|
|
"avg_tps": 63.91,
|
|
"best_tps": 64.18,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11728,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10411,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=1024 b=2048",
|
|
"avg_tps": 63.86,
|
|
"best_tps": 64.1,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10956,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9907,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=1024 b=4096",
|
|
"avg_tps": 63.85,
|
|
"best_tps": 64.06,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10956,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9907,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 1024,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "kv=q8_0/q8_0",
|
|
"avg_tps": 64.14,
|
|
"best_tps": 64.39,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10670,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10169,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "kv=q4_0/q8_0",
|
|
"avg_tps": 37.52,
|
|
"best_tps": 37.86,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10394,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9753,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q8_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "kv=f16/f16",
|
|
"avg_tps": 63.48,
|
|
"best_tps": 64.31,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11700,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11667,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "f16",
|
|
"ctv": "f16"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "FINAL",
|
|
"avg_tps": 64.05,
|
|
"best_tps": 64.29,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10667,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10169,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ngl=999 pure-GPU",
|
|
"avg_tps": 76.01,
|
|
"best_tps": 76.31,
|
|
"boot": 12.1,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11784,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10454,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "compare: cpu-moe",
|
|
"avg_tps": 10.19,
|
|
"best_tps": 10.49,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 2652,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 2982,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"cpuMoe": true
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=2",
|
|
"avg_tps": 75.67,
|
|
"best_tps": 75.87,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11783,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10454,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=4",
|
|
"avg_tps": 75.61,
|
|
"best_tps": 75.87,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11783,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10454,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=8",
|
|
"avg_tps": 75.42,
|
|
"best_tps": 75.59,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11783,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10454,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=10",
|
|
"avg_tps": 75.71,
|
|
"best_tps": 75.82,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11783,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10454,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 10,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=12",
|
|
"avg_tps": 75.08,
|
|
"best_tps": 75.7,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11783,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10454,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 12,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=256 b=1024",
|
|
"avg_tps": 75.16,
|
|
"best_tps": 75.64,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10559,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9662,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=256 b=2048",
|
|
"avg_tps": 75.68,
|
|
"best_tps": 76.05,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10559,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9662,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 256,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=512 b=4096",
|
|
"avg_tps": 75.92,
|
|
"best_tps": 76.16,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11784,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10454,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=1024 b=2048",
|
|
"avg_tps": 75.7,
|
|
"best_tps": 75.9,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11012,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9950,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=1024 b=4096",
|
|
"avg_tps": 75.77,
|
|
"best_tps": 75.99,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11011,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9950,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 1024,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "kv=q8_0/q8_0",
|
|
"avg_tps": 76.3,
|
|
"best_tps": 76.69,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10725,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10212,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "kv=q4_0/q8_0",
|
|
"avg_tps": 42.88,
|
|
"best_tps": 44.58,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10439,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9796,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q8_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "kv=f16/f16",
|
|
"avg_tps": 76.36,
|
|
"best_tps": 76.78,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11761,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11710,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "f16",
|
|
"ctv": "f16"
|
|
}
|
|
},
|
|
{
|
|
"model": "Gemma4-26B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "FINAL",
|
|
"avg_tps": 76.4,
|
|
"best_tps": 76.75,
|
|
"boot": 9,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11761,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11710,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "f16",
|
|
"ctv": "f16"
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "n-cpu-moe=5",
|
|
"avg_tps": 51.43,
|
|
"best_tps": 52.07,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10365,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11152,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=2",
|
|
"avg_tps": 43.8,
|
|
"best_tps": 46.4,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10365,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11152,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=4",
|
|
"avg_tps": 49.21,
|
|
"best_tps": 52.78,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10353,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11152,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=8",
|
|
"avg_tps": 46.43,
|
|
"best_tps": 50.49,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10397,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11152,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=10",
|
|
"avg_tps": 46.12,
|
|
"best_tps": 50.06,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10351,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11152,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 10,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "t=12",
|
|
"avg_tps": 45.23,
|
|
"best_tps": 47.1,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10337,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11152,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 12,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=256 b=1024",
|
|
"avg_tps": 48.9,
|
|
"best_tps": 52.3,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 9834,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10906,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=256 b=2048",
|
|
"avg_tps": 49.62,
|
|
"best_tps": 52.52,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 9833,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10906,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 256,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=512 b=4096",
|
|
"avg_tps": 48.78,
|
|
"best_tps": 52.14,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10337,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11152,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=1024 b=2048",
|
|
"avg_tps": 49.95,
|
|
"best_tps": 52.53,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11124,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11644,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "ub=1024 b=4096",
|
|
"avg_tps": 48.75,
|
|
"best_tps": 52.06,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11123,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11644,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 1024,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "kv=q4_0/q8_0",
|
|
"avg_tps": 42.81,
|
|
"best_tps": 44.14,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10681,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11472,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q8_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B MXFP4_MOE",
|
|
"quant": "MXFP4_MOE",
|
|
"label": "FINAL",
|
|
"avg_tps": 46.66,
|
|
"best_tps": 47.09,
|
|
"boot": 15,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10476,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11152,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "n-cpu-moe=5",
|
|
"avg_tps": 49.01,
|
|
"best_tps": 53.09,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10606,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11338,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=2",
|
|
"avg_tps": 45.73,
|
|
"best_tps": 47.87,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10599,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11338,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=4",
|
|
"avg_tps": 50.98,
|
|
"best_tps": 54.33,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10601,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11338,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=8",
|
|
"avg_tps": 48.45,
|
|
"best_tps": 52.1,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10596,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11338,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=10",
|
|
"avg_tps": 47.83,
|
|
"best_tps": 51.45,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10595,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11338,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 10,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "t=12",
|
|
"avg_tps": 43.77,
|
|
"best_tps": 46.79,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10589,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11338,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 12,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=256 b=1024",
|
|
"avg_tps": 52.14,
|
|
"best_tps": 53.82,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10089,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11092,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=256 b=2048",
|
|
"avg_tps": 50.23,
|
|
"best_tps": 53.66,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10091,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11092,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=512 b=2048",
|
|
"avg_tps": 49.89,
|
|
"best_tps": 53.89,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10595,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11338,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "ub=512 b=4096",
|
|
"avg_tps": 50.4,
|
|
"best_tps": 54.19,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10564,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11338,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 512,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "kv=q8_0/q8_0",
|
|
"avg_tps": 51.84,
|
|
"best_tps": 53.53,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10726,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11732,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "kv=q4_0/q8_0",
|
|
"avg_tps": 43.22,
|
|
"best_tps": 45.99,
|
|
"boot": 12,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10410,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11412,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ctk": "q4_0",
|
|
"ctv": "q8_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
},
|
|
{
|
|
"model": "Qwen3.5-35B Q4_K_M",
|
|
"quant": "Q4_K_M",
|
|
"label": "FINAL",
|
|
"avg_tps": 52.05,
|
|
"best_tps": 54.48,
|
|
"boot": 12.1,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10062,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 11092,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"ngl": 999,
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"nCpuMoe": 5
|
|
}
|
|
}
|
|
] |