[ { "model": "Qwen3.5 UD-IQ4_NL", "label": "pure-GPU minbatch", "avg_tps": 65.11, "best_tps": 65.49, "boot": 9, "vram_total": 19177, "vram": [ { "gpu": 0, "used": 10039, "total": 12288 }, { "gpu": 1, "used": 9138, "total": 12288 } ], "params": { "t": 4, "ub": 64, "b": 256, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "pure-GPU nommap small", "avg_tps": 65.01, "best_tps": 65.36, "boot": 6, "vram_total": 19672, "vram": [ { "gpu": 0, "used": 10342, "total": 12288 }, { "gpu": 1, "used": 9330, "total": 12288 } ], "params": { "t": 4, "ub": 128, "b": 512, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "pure-GPU row-split", "avg_tps": 13.65, "best_tps": 14.82, "boot": 9, "vram_total": 19427, "vram": [ { "gpu": 0, "used": 10311, "total": 12288 }, { "gpu": 1, "used": 9116, "total": 12288 } ], "params": { "t": 4, "ub": 128, "b": 512, "splitMode": "row", "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "pure-GPU ts=0.5,0.5", "avg_tps": 64.92, "best_tps": 65.23, "boot": 9, "vram_total": 19664, "vram": [ { "gpu": 0, "used": 10334, "total": 12288 }, { "gpu": 1, "used": 9330, "total": 12288 } ], "params": { "t": 4, "ub": 128, "b": 512, "tensorSplit": "0.5,0.5", "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "pure-GPU all-tricks", "avg_tps": 64.72, "best_tps": 64.89, "boot": 6, "vram_total": 19171, "vram": [ { "gpu": 0, "used": 10033, "total": 12288 }, { "gpu": 1, "used": 9138, "total": 12288 } ], "params": { "t": 4, "ub": 64, "b": 256, "noMmap": true, "defragThold": 0.1, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "tune t=2", "avg_tps": 64.87, "best_tps": 65.13, "boot": 9, "vram_total": 19170, "vram": [ { "gpu": 0, "used": 10032, "total": 12288 }, { "gpu": 1, "used": 9138, "total": 12288 } ], "params": { "t": 2, "ub": 64, "b": 256, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "tune t=6", "avg_tps": 64.88, "best_tps": 65.17, "boot": 9, "vram_total": 19168, "vram": [ { "gpu": 0, "used": 10030, "total": 12288 }, { "gpu": 1, "used": 9138, "total": 12288 } ], "params": { "t": 6, "ub": 64, "b": 256, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "tune t=8", "avg_tps": 64.5, "best_tps": 64.77, "boot": 9, "vram_total": 19168, "vram": [ { "gpu": 0, "used": 10030, "total": 12288 }, { "gpu": 1, "used": 9138, "total": 12288 } ], "params": { "t": 8, "ub": 64, "b": 256, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "tune ub=256 b=1024", "avg_tps": 64.73, "best_tps": 64.98, "boot": 9, "vram_total": 20640, "vram": [ { "gpu": 0, "used": 10928, "total": 12288 }, { "gpu": 1, "used": 9712, "total": 12288 } ], "params": { "t": 4, "ub": 256, "b": 1024, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "tune ub=256 b=2048", "avg_tps": 63.69, "best_tps": 64.94, "boot": 12, "vram_total": 20614, "vram": [ { "gpu": 0, "used": 10902, "total": 12288 }, { "gpu": 1, "used": 9712, "total": 12288 } ], "params": { "t": 4, "ub": 256, "b": 2048, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "tune kv=q8_0/q8_0", "avg_tps": 64.78, "best_tps": 65.08, "boot": 9, "vram_total": 20422, "vram": [ { "gpu": 0, "used": 10644, "total": 12288 }, { "gpu": 1, "used": 9778, "total": 12288 } ], "params": { "t": 4, "ub": 64, "b": 256, "ngl": 999, "ctk": "q8_0", "ctv": "q8_0" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "tune kv=f16/f16", "avg_tps": 65.53, "best_tps": 65.81, "boot": 9, "vram_total": 22812, "vram": [ { "gpu": 0, "used": 11846, "total": 12288 }, { "gpu": 1, "used": 10966, "total": 12288 } ], "params": { "t": 4, "ub": 64, "b": 256, "ngl": 999, "ctk": "f16", "ctv": "f16" }, "gpu_only": true }, { "model": "Qwen3.5 UD-IQ4_NL", "label": "FINAL", "avg_tps": 66.31, "best_tps": 66.53, "boot": 9, "vram_total": 22811, "vram": [ { "gpu": 0, "used": 11845, "total": 12288 }, { "gpu": 1, "used": 10966, "total": 12288 } ], "params": { "t": 4, "ub": 64, "b": 256, "ngl": 999, "ctk": "f16", "ctv": "f16" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "pure-GPU minbatch", "avg_tps": 63.06, "best_tps": 64.16, "boot": 12, "vram_total": 22747, "vram": [ { "gpu": 0, "used": 11895, "total": 12288 }, { "gpu": 1, "used": 10852, "total": 12288 } ], "params": { "t": 4, "ub": 64, "b": 256, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "pure-GPU nommap small", "avg_tps": 63.75, "best_tps": 63.98, "boot": 9, "vram_total": 22579, "vram": [ { "gpu": 0, "used": 11797, "total": 12288 }, { "gpu": 1, "used": 10782, "total": 12288 } ], "params": { "t": 4, "ub": 128, "b": 512, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "pure-GPU ts=0.5,0.5", "avg_tps": 62.88, "best_tps": 63.9, "boot": 12, "vram_total": 22578, "vram": [ { "gpu": 0, "used": 11796, "total": 12288 }, { "gpu": 1, "used": 10782, "total": 12288 } ], "params": { "t": 4, "ub": 128, "b": 512, "tensorSplit": "0.5,0.5", "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "pure-GPU all-tricks", "avg_tps": 62.55, "best_tps": 63.71, "boot": 9, "vram_total": 22743, "vram": [ { "gpu": 0, "used": 11891, "total": 12288 }, { "gpu": 1, "used": 10852, "total": 12288 } ], "params": { "t": 4, "ub": 64, "b": 256, "noMmap": true, "defragThold": 0.1, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "tune t=2", "avg_tps": 63.07, "best_tps": 64.08, "boot": 9, "vram_total": 22601, "vram": [ { "gpu": 0, "used": 11819, "total": 12288 }, { "gpu": 1, "used": 10782, "total": 12288 } ], "params": { "t": 2, "ub": 128, "b": 512, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "tune t=6", "avg_tps": 63.58, "best_tps": 64.04, "boot": 9, "vram_total": 22583, "vram": [ { "gpu": 0, "used": 11801, "total": 12288 }, { "gpu": 1, "used": 10782, "total": 12288 } ], "params": { "t": 6, "ub": 128, "b": 512, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "tune t=8", "avg_tps": 62.92, "best_tps": 63.73, "boot": 9, "vram_total": 22536, "vram": [ { "gpu": 0, "used": 11754, "total": 12288 }, { "gpu": 1, "used": 10782, "total": 12288 } ], "params": { "t": 8, "ub": 128, "b": 512, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "tune ub=256 b=1024", "avg_tps": 62.76, "best_tps": 63.86, "boot": 9, "vram_total": 22874, "vram": [ { "gpu": 0, "used": 11968, "total": 12288 }, { "gpu": 1, "used": 10906, "total": 12288 } ], "params": { "t": 4, "ub": 256, "b": 1024, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "tune ub=256 b=2048", "avg_tps": 62.74, "best_tps": 63.9, "boot": 9, "vram_total": 22912, "vram": [ { "gpu": 0, "used": 12006, "total": 12288 }, { "gpu": 1, "used": 10906, "total": 12288 } ], "params": { "t": 4, "ub": 256, "b": 2048, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 MXFP4_MOE", "label": "FINAL", "avg_tps": 63.71, "best_tps": 64.39, "boot": 9, "vram_total": 22566, "vram": [ { "gpu": 0, "used": 11784, "total": 12288 }, { "gpu": 1, "used": 10782, "total": 12288 } ], "params": { "t": 4, "ub": 128, "b": 512, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 Q4_K_M", "label": "pure-GPU nommap small", "avg_tps": 62.29, "best_tps": 63.03, "boot": 9, "vram_total": 22975, "vram": [ { "gpu": 0, "used": 12007, "total": 12288 }, { "gpu": 1, "used": 10968, "total": 12288 } ], "params": { "t": 4, "ub": 128, "b": 512, "noMmap": true, "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 Q4_K_M", "label": "pure-GPU ts=0.5,0.5", "avg_tps": 63.89, "best_tps": 64.91, "boot": 12, "vram_total": 23002, "vram": [ { "gpu": 0, "used": 12034, "total": 12288 }, { "gpu": 1, "used": 10968, "total": 12288 } ], "params": { "t": 4, "ub": 128, "b": 512, "tensorSplit": "0.5,0.5", "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 Q4_K_M", "label": "tune t=2", "avg_tps": 64.1, "best_tps": 64.54, "boot": 12, "vram_total": 22980, "vram": [ { "gpu": 0, "used": 12012, "total": 12288 }, { "gpu": 1, "used": 10968, "total": 12288 } ], "params": { "t": 2, "ub": 128, "b": 512, "tensorSplit": "0.5,0.5", "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 Q4_K_M", "label": "tune t=6", "avg_tps": 64.18, "best_tps": 64.72, "boot": 12, "vram_total": 22982, "vram": [ { "gpu": 0, "used": 12014, "total": 12288 }, { "gpu": 1, "used": 10968, "total": 12288 } ], "params": { "t": 6, "ub": 128, "b": 512, "tensorSplit": "0.5,0.5", "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true }, { "model": "Qwen3.5 Q4_K_M", "label": "tune t=8", "avg_tps": 63.11, "best_tps": 64.02, "boot": 12, "vram_total": 22980, "vram": [ { "gpu": 0, "used": 12012, "total": 12288 }, { "gpu": 1, "used": 10968, "total": 12288 } ], "params": { "t": 8, "ub": 128, "b": 512, "tensorSplit": "0.5,0.5", "ngl": 999, "ctk": "q4_0", "ctv": "q4_0" }, "gpu_only": true } ]