Files
variet_llm/scripts/qwen_fullgpu_results.json

834 lines
14 KiB
JSON

[
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU minbatch",
"avg_tps": 65.11,
"best_tps": 65.49,
"boot": 9,
"vram_total": 19177,
"vram": [
{
"gpu": 0,
"used": 10039,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU nommap small",
"avg_tps": 65.01,
"best_tps": 65.36,
"boot": 6,
"vram_total": 19672,
"vram": [
{
"gpu": 0,
"used": 10342,
"total": 12288
},
{
"gpu": 1,
"used": 9330,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU row-split",
"avg_tps": 13.65,
"best_tps": 14.82,
"boot": 9,
"vram_total": 19427,
"vram": [
{
"gpu": 0,
"used": 10311,
"total": 12288
},
{
"gpu": 1,
"used": 9116,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"splitMode": "row",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU ts=0.5,0.5",
"avg_tps": 64.92,
"best_tps": 65.23,
"boot": 9,
"vram_total": 19664,
"vram": [
{
"gpu": 0,
"used": 10334,
"total": 12288
},
{
"gpu": 1,
"used": 9330,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "pure-GPU all-tricks",
"avg_tps": 64.72,
"best_tps": 64.89,
"boot": 6,
"vram_total": 19171,
"vram": [
{
"gpu": 0,
"used": 10033,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"noMmap": true,
"defragThold": 0.1,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune t=2",
"avg_tps": 64.87,
"best_tps": 65.13,
"boot": 9,
"vram_total": 19170,
"vram": [
{
"gpu": 0,
"used": 10032,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 2,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune t=6",
"avg_tps": 64.88,
"best_tps": 65.17,
"boot": 9,
"vram_total": 19168,
"vram": [
{
"gpu": 0,
"used": 10030,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 6,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune t=8",
"avg_tps": 64.5,
"best_tps": 64.77,
"boot": 9,
"vram_total": 19168,
"vram": [
{
"gpu": 0,
"used": 10030,
"total": 12288
},
{
"gpu": 1,
"used": 9138,
"total": 12288
}
],
"params": {
"t": 8,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune ub=256 b=1024",
"avg_tps": 64.73,
"best_tps": 64.98,
"boot": 9,
"vram_total": 20640,
"vram": [
{
"gpu": 0,
"used": 10928,
"total": 12288
},
{
"gpu": 1,
"used": 9712,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 256,
"b": 1024,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune ub=256 b=2048",
"avg_tps": 63.69,
"best_tps": 64.94,
"boot": 12,
"vram_total": 20614,
"vram": [
{
"gpu": 0,
"used": 10902,
"total": 12288
},
{
"gpu": 1,
"used": 9712,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 256,
"b": 2048,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune kv=q8_0/q8_0",
"avg_tps": 64.78,
"best_tps": 65.08,
"boot": 9,
"vram_total": 20422,
"vram": [
{
"gpu": 0,
"used": 10644,
"total": 12288
},
{
"gpu": 1,
"used": 9778,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q8_0",
"ctv": "q8_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "tune kv=f16/f16",
"avg_tps": 65.53,
"best_tps": 65.81,
"boot": 9,
"vram_total": 22812,
"vram": [
{
"gpu": 0,
"used": 11846,
"total": 12288
},
{
"gpu": 1,
"used": 10966,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "f16",
"ctv": "f16"
},
"gpu_only": true
},
{
"model": "Qwen3.5 UD-IQ4_NL",
"label": "FINAL",
"avg_tps": 66.31,
"best_tps": 66.53,
"boot": 9,
"vram_total": 22811,
"vram": [
{
"gpu": 0,
"used": 11845,
"total": 12288
},
{
"gpu": 1,
"used": 10966,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "f16",
"ctv": "f16"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "pure-GPU minbatch",
"avg_tps": 63.06,
"best_tps": 64.16,
"boot": 12,
"vram_total": 22747,
"vram": [
{
"gpu": 0,
"used": 11895,
"total": 12288
},
{
"gpu": 1,
"used": 10852,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "pure-GPU nommap small",
"avg_tps": 63.75,
"best_tps": 63.98,
"boot": 9,
"vram_total": 22579,
"vram": [
{
"gpu": 0,
"used": 11797,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "pure-GPU ts=0.5,0.5",
"avg_tps": 62.88,
"best_tps": 63.9,
"boot": 12,
"vram_total": 22578,
"vram": [
{
"gpu": 0,
"used": 11796,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "pure-GPU all-tricks",
"avg_tps": 62.55,
"best_tps": 63.71,
"boot": 9,
"vram_total": 22743,
"vram": [
{
"gpu": 0,
"used": 11891,
"total": 12288
},
{
"gpu": 1,
"used": 10852,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 64,
"b": 256,
"noMmap": true,
"defragThold": 0.1,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune t=2",
"avg_tps": 63.07,
"best_tps": 64.08,
"boot": 9,
"vram_total": 22601,
"vram": [
{
"gpu": 0,
"used": 11819,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 2,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune t=6",
"avg_tps": 63.58,
"best_tps": 64.04,
"boot": 9,
"vram_total": 22583,
"vram": [
{
"gpu": 0,
"used": 11801,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 6,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune t=8",
"avg_tps": 62.92,
"best_tps": 63.73,
"boot": 9,
"vram_total": 22536,
"vram": [
{
"gpu": 0,
"used": 11754,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 8,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune ub=256 b=1024",
"avg_tps": 62.76,
"best_tps": 63.86,
"boot": 9,
"vram_total": 22874,
"vram": [
{
"gpu": 0,
"used": 11968,
"total": 12288
},
{
"gpu": 1,
"used": 10906,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 256,
"b": 1024,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "tune ub=256 b=2048",
"avg_tps": 62.74,
"best_tps": 63.9,
"boot": 9,
"vram_total": 22912,
"vram": [
{
"gpu": 0,
"used": 12006,
"total": 12288
},
{
"gpu": 1,
"used": 10906,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 256,
"b": 2048,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 MXFP4_MOE",
"label": "FINAL",
"avg_tps": 63.71,
"best_tps": 64.39,
"boot": 9,
"vram_total": 22566,
"vram": [
{
"gpu": 0,
"used": 11784,
"total": 12288
},
{
"gpu": 1,
"used": 10782,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "pure-GPU nommap small",
"avg_tps": 62.29,
"best_tps": 63.03,
"boot": 9,
"vram_total": 22975,
"vram": [
{
"gpu": 0,
"used": 12007,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"noMmap": true,
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "pure-GPU ts=0.5,0.5",
"avg_tps": 63.89,
"best_tps": 64.91,
"boot": 12,
"vram_total": 23002,
"vram": [
{
"gpu": 0,
"used": 12034,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 4,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "tune t=2",
"avg_tps": 64.1,
"best_tps": 64.54,
"boot": 12,
"vram_total": 22980,
"vram": [
{
"gpu": 0,
"used": 12012,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 2,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "tune t=6",
"avg_tps": 64.18,
"best_tps": 64.72,
"boot": 12,
"vram_total": 22982,
"vram": [
{
"gpu": 0,
"used": 12014,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 6,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
},
{
"model": "Qwen3.5 Q4_K_M",
"label": "tune t=8",
"avg_tps": 63.11,
"best_tps": 64.02,
"boot": 12,
"vram_total": 22980,
"vram": [
{
"gpu": 0,
"used": 12012,
"total": 12288
},
{
"gpu": 1,
"used": 10968,
"total": 12288
}
],
"params": {
"t": 8,
"ub": 128,
"b": 512,
"tensorSplit": "0.5,0.5",
"ngl": 999,
"ctk": "q4_0",
"ctv": "q4_0"
},
"gpu_only": true
}
]