834 lines
14 KiB
JSON
834 lines
14 KiB
JSON
[
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "pure-GPU minbatch",
|
|
"avg_tps": 65.11,
|
|
"best_tps": 65.49,
|
|
"boot": 9,
|
|
"vram_total": 19177,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10039,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9138,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "pure-GPU nommap small",
|
|
"avg_tps": 65.01,
|
|
"best_tps": 65.36,
|
|
"boot": 6,
|
|
"vram_total": 19672,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10342,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9330,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "pure-GPU row-split",
|
|
"avg_tps": 13.65,
|
|
"best_tps": 14.82,
|
|
"boot": 9,
|
|
"vram_total": 19427,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10311,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9116,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"splitMode": "row",
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "pure-GPU ts=0.5,0.5",
|
|
"avg_tps": 64.92,
|
|
"best_tps": 65.23,
|
|
"boot": 9,
|
|
"vram_total": 19664,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10334,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9330,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"tensorSplit": "0.5,0.5",
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "pure-GPU all-tricks",
|
|
"avg_tps": 64.72,
|
|
"best_tps": 64.89,
|
|
"boot": 6,
|
|
"vram_total": 19171,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10033,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9138,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"noMmap": true,
|
|
"defragThold": 0.1,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "tune t=2",
|
|
"avg_tps": 64.87,
|
|
"best_tps": 65.13,
|
|
"boot": 9,
|
|
"vram_total": 19170,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10032,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9138,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 2,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "tune t=6",
|
|
"avg_tps": 64.88,
|
|
"best_tps": 65.17,
|
|
"boot": 9,
|
|
"vram_total": 19168,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10030,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9138,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 6,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "tune t=8",
|
|
"avg_tps": 64.5,
|
|
"best_tps": 64.77,
|
|
"boot": 9,
|
|
"vram_total": 19168,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10030,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9138,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 8,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "tune ub=256 b=1024",
|
|
"avg_tps": 64.73,
|
|
"best_tps": 64.98,
|
|
"boot": 9,
|
|
"vram_total": 20640,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10928,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9712,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "tune ub=256 b=2048",
|
|
"avg_tps": 63.69,
|
|
"best_tps": 64.94,
|
|
"boot": 12,
|
|
"vram_total": 20614,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10902,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9712,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 2048,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "tune kv=q8_0/q8_0",
|
|
"avg_tps": 64.78,
|
|
"best_tps": 65.08,
|
|
"boot": 9,
|
|
"vram_total": 20422,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 10644,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 9778,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"ngl": 999,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "tune kv=f16/f16",
|
|
"avg_tps": 65.53,
|
|
"best_tps": 65.81,
|
|
"boot": 9,
|
|
"vram_total": 22812,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11846,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10966,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"ngl": 999,
|
|
"ctk": "f16",
|
|
"ctv": "f16"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 UD-IQ4_NL",
|
|
"label": "FINAL",
|
|
"avg_tps": 66.31,
|
|
"best_tps": 66.53,
|
|
"boot": 9,
|
|
"vram_total": 22811,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11845,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10966,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"ngl": 999,
|
|
"ctk": "f16",
|
|
"ctv": "f16"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "pure-GPU minbatch",
|
|
"avg_tps": 63.06,
|
|
"best_tps": 64.16,
|
|
"boot": 12,
|
|
"vram_total": 22747,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11895,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10852,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "pure-GPU nommap small",
|
|
"avg_tps": 63.75,
|
|
"best_tps": 63.98,
|
|
"boot": 9,
|
|
"vram_total": 22579,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11797,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10782,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "pure-GPU ts=0.5,0.5",
|
|
"avg_tps": 62.88,
|
|
"best_tps": 63.9,
|
|
"boot": 12,
|
|
"vram_total": 22578,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11796,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10782,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"tensorSplit": "0.5,0.5",
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "pure-GPU all-tricks",
|
|
"avg_tps": 62.55,
|
|
"best_tps": 63.71,
|
|
"boot": 9,
|
|
"vram_total": 22743,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11891,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10852,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 64,
|
|
"b": 256,
|
|
"noMmap": true,
|
|
"defragThold": 0.1,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "tune t=2",
|
|
"avg_tps": 63.07,
|
|
"best_tps": 64.08,
|
|
"boot": 9,
|
|
"vram_total": 22601,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11819,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10782,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 2,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "tune t=6",
|
|
"avg_tps": 63.58,
|
|
"best_tps": 64.04,
|
|
"boot": 9,
|
|
"vram_total": 22583,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11801,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10782,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 6,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "tune t=8",
|
|
"avg_tps": 62.92,
|
|
"best_tps": 63.73,
|
|
"boot": 9,
|
|
"vram_total": 22536,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11754,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10782,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 8,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "tune ub=256 b=1024",
|
|
"avg_tps": 62.76,
|
|
"best_tps": 63.86,
|
|
"boot": 9,
|
|
"vram_total": 22874,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11968,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10906,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "tune ub=256 b=2048",
|
|
"avg_tps": 62.74,
|
|
"best_tps": 63.9,
|
|
"boot": 9,
|
|
"vram_total": 22912,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 12006,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10906,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 256,
|
|
"b": 2048,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 MXFP4_MOE",
|
|
"label": "FINAL",
|
|
"avg_tps": 63.71,
|
|
"best_tps": 64.39,
|
|
"boot": 9,
|
|
"vram_total": 22566,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 11784,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10782,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 Q4_K_M",
|
|
"label": "pure-GPU nommap small",
|
|
"avg_tps": 62.29,
|
|
"best_tps": 63.03,
|
|
"boot": 9,
|
|
"vram_total": 22975,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 12007,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10968,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"noMmap": true,
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 Q4_K_M",
|
|
"label": "pure-GPU ts=0.5,0.5",
|
|
"avg_tps": 63.89,
|
|
"best_tps": 64.91,
|
|
"boot": 12,
|
|
"vram_total": 23002,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 12034,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10968,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"tensorSplit": "0.5,0.5",
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 Q4_K_M",
|
|
"label": "tune t=2",
|
|
"avg_tps": 64.1,
|
|
"best_tps": 64.54,
|
|
"boot": 12,
|
|
"vram_total": 22980,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 12012,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10968,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 2,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"tensorSplit": "0.5,0.5",
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 Q4_K_M",
|
|
"label": "tune t=6",
|
|
"avg_tps": 64.18,
|
|
"best_tps": 64.72,
|
|
"boot": 12,
|
|
"vram_total": 22982,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 12014,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10968,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 6,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"tensorSplit": "0.5,0.5",
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
},
|
|
{
|
|
"model": "Qwen3.5 Q4_K_M",
|
|
"label": "tune t=8",
|
|
"avg_tps": 63.11,
|
|
"best_tps": 64.02,
|
|
"boot": 12,
|
|
"vram_total": 22980,
|
|
"vram": [
|
|
{
|
|
"gpu": 0,
|
|
"used": 12012,
|
|
"total": 12288
|
|
},
|
|
{
|
|
"gpu": 1,
|
|
"used": 10968,
|
|
"total": 12288
|
|
}
|
|
],
|
|
"params": {
|
|
"t": 8,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"tensorSplit": "0.5,0.5",
|
|
"ngl": 999,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0"
|
|
},
|
|
"gpu_only": true
|
|
}
|
|
] |