[ { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ngl=999 pure-GPU", "avg_tps": 63.21, "best_tps": 63.78, "boot": 9.1, "vram": [ { "gpu": 0, "used": 11770, "total": 12288 }, { "gpu": 1, "used": 10411, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "compare: cpu-moe", "avg_tps": 12.92, "best_tps": 14.21, "boot": 12, "vram": [ { "gpu": 0, "used": 3096, "total": 12288 }, { "gpu": 1, "used": 3497, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "cpuMoe": true } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=2", "avg_tps": 64.1, "best_tps": 64.27, "boot": 9, "vram": [ { "gpu": 0, "used": 11728, "total": 12288 }, { "gpu": 1, "used": 10411, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=4", "avg_tps": 64, "best_tps": 64.39, "boot": 9, "vram": [ { "gpu": 0, "used": 11728, "total": 12288 }, { "gpu": 1, "used": 10411, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=8", "avg_tps": 63.75, "best_tps": 63.9, "boot": 9, "vram": [ { "gpu": 0, "used": 11728, "total": 12288 }, { "gpu": 1, "used": 10411, "total": 12288 } ], "params": { "ngl": 999, "t": 8, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=10", "avg_tps": 64.01, "best_tps": 64.14, "boot": 9, "vram": [ { "gpu": 0, "used": 11728, "total": 12288 }, { "gpu": 1, "used": 10411, "total": 12288 } ], "params": { "ngl": 999, "t": 10, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=12", "avg_tps": 63.86, "best_tps": 63.98, "boot": 9, "vram": [ { "gpu": 0, "used": 11728, "total": 12288 }, { "gpu": 1, "used": 10411, "total": 12288 } ], "params": { "ngl": 999, "t": 12, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=256 b=1024", "avg_tps": 63.8, "best_tps": 64.12, "boot": 9, "vram": [ { "gpu": 0, "used": 10504, "total": 12288 }, { "gpu": 1, "used": 9619, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 256, "b": 1024, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=256 b=2048", "avg_tps": 63.88, "best_tps": 64.04, "boot": 9, "vram": [ { "gpu": 0, "used": 10504, "total": 12288 }, { "gpu": 1, "used": 9619, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 256, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=512 b=4096", "avg_tps": 63.91, "best_tps": 64.18, "boot": 9, "vram": [ { "gpu": 0, "used": 11728, "total": 12288 }, { "gpu": 1, "used": 10411, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 4096, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=1024 b=2048", "avg_tps": 63.86, "best_tps": 64.1, "boot": 9, "vram": [ { "gpu": 0, "used": 10956, "total": 12288 }, { "gpu": 1, "used": 9907, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 1024, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=1024 b=4096", "avg_tps": 63.85, "best_tps": 64.06, "boot": 9, "vram": [ { "gpu": 0, "used": 10956, "total": 12288 }, { "gpu": 1, "used": 9907, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 1024, "b": 4096, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "kv=q8_0/q8_0", "avg_tps": 64.14, "best_tps": 64.39, "boot": 9, "vram": [ { "gpu": 0, "used": 10670, "total": 12288 }, { "gpu": 1, "used": 10169, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 2048, "ctk": "q8_0", "ctv": "q8_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "kv=q4_0/q8_0", "avg_tps": 37.52, "best_tps": 37.86, "boot": 9, "vram": [ { "gpu": 0, "used": 10394, "total": 12288 }, { "gpu": 1, "used": 9753, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q8_0" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "kv=f16/f16", "avg_tps": 63.48, "best_tps": 64.31, "boot": 9, "vram": [ { "gpu": 0, "used": 11700, "total": 12288 }, { "gpu": 1, "used": 11667, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 2048, "ctk": "f16", "ctv": "f16" } }, { "model": "Gemma4-26B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "FINAL", "avg_tps": 64.05, "best_tps": 64.29, "boot": 9, "vram": [ { "gpu": 0, "used": 10667, "total": 12288 }, { "gpu": 1, "used": 10169, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 2048, "ctk": "q8_0", "ctv": "q8_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "ngl=999 pure-GPU", "avg_tps": 76.01, "best_tps": 76.31, "boot": 12.1, "vram": [ { "gpu": 0, "used": 11784, "total": 12288 }, { "gpu": 1, "used": 10454, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "compare: cpu-moe", "avg_tps": 10.19, "best_tps": 10.49, "boot": 12, "vram": [ { "gpu": 0, "used": 2652, "total": 12288 }, { "gpu": 1, "used": 2982, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "cpuMoe": true } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "t=2", "avg_tps": 75.67, "best_tps": 75.87, "boot": 9, "vram": [ { "gpu": 0, "used": 11783, "total": 12288 }, { "gpu": 1, "used": 10454, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "t=4", "avg_tps": 75.61, "best_tps": 75.87, "boot": 9, "vram": [ { "gpu": 0, "used": 11783, "total": 12288 }, { "gpu": 1, "used": 10454, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "t=8", "avg_tps": 75.42, "best_tps": 75.59, "boot": 9, "vram": [ { "gpu": 0, "used": 11783, "total": 12288 }, { "gpu": 1, "used": 10454, "total": 12288 } ], "params": { "ngl": 999, "t": 8, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "t=10", "avg_tps": 75.71, "best_tps": 75.82, "boot": 9, "vram": [ { "gpu": 0, "used": 11783, "total": 12288 }, { "gpu": 1, "used": 10454, "total": 12288 } ], "params": { "ngl": 999, "t": 10, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "t=12", "avg_tps": 75.08, "best_tps": 75.7, "boot": 9, "vram": [ { "gpu": 0, "used": 11783, "total": 12288 }, { "gpu": 1, "used": 10454, "total": 12288 } ], "params": { "ngl": 999, "t": 12, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "ub=256 b=1024", "avg_tps": 75.16, "best_tps": 75.64, "boot": 9, "vram": [ { "gpu": 0, "used": 10559, "total": 12288 }, { "gpu": 1, "used": 9662, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 256, "b": 1024, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "ub=256 b=2048", "avg_tps": 75.68, "best_tps": 76.05, "boot": 9, "vram": [ { "gpu": 0, "used": 10559, "total": 12288 }, { "gpu": 1, "used": 9662, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 256, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "ub=512 b=4096", "avg_tps": 75.92, "best_tps": 76.16, "boot": 9, "vram": [ { "gpu": 0, "used": 11784, "total": 12288 }, { "gpu": 1, "used": 10454, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 4096, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "ub=1024 b=2048", "avg_tps": 75.7, "best_tps": 75.9, "boot": 9, "vram": [ { "gpu": 0, "used": 11012, "total": 12288 }, { "gpu": 1, "used": 9950, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 1024, "b": 2048, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "ub=1024 b=4096", "avg_tps": 75.77, "best_tps": 75.99, "boot": 9, "vram": [ { "gpu": 0, "used": 11011, "total": 12288 }, { "gpu": 1, "used": 9950, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 1024, "b": 4096, "ctk": "q4_0", "ctv": "q4_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "kv=q8_0/q8_0", "avg_tps": 76.3, "best_tps": 76.69, "boot": 9, "vram": [ { "gpu": 0, "used": 10725, "total": 12288 }, { "gpu": 1, "used": 10212, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q8_0", "ctv": "q8_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "kv=q4_0/q8_0", "avg_tps": 42.88, "best_tps": 44.58, "boot": 9, "vram": [ { "gpu": 0, "used": 10439, "total": 12288 }, { "gpu": 1, "used": 9796, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q8_0" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "kv=f16/f16", "avg_tps": 76.36, "best_tps": 76.78, "boot": 9, "vram": [ { "gpu": 0, "used": 11761, "total": 12288 }, { "gpu": 1, "used": 11710, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "f16", "ctv": "f16" } }, { "model": "Gemma4-26B Q4_K_M", "quant": "Q4_K_M", "label": "FINAL", "avg_tps": 76.4, "best_tps": 76.75, "boot": 9, "vram": [ { "gpu": 0, "used": 11761, "total": 12288 }, { "gpu": 1, "used": 11710, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "f16", "ctv": "f16" } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "n-cpu-moe=5", "avg_tps": 51.43, "best_tps": 52.07, "boot": 12, "vram": [ { "gpu": 0, "used": 10365, "total": 12288 }, { "gpu": 1, "used": 11152, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=2", "avg_tps": 43.8, "best_tps": 46.4, "boot": 12, "vram": [ { "gpu": 0, "used": 10365, "total": 12288 }, { "gpu": 1, "used": 11152, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=4", "avg_tps": 49.21, "best_tps": 52.78, "boot": 12, "vram": [ { "gpu": 0, "used": 10353, "total": 12288 }, { "gpu": 1, "used": 11152, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=8", "avg_tps": 46.43, "best_tps": 50.49, "boot": 12, "vram": [ { "gpu": 0, "used": 10397, "total": 12288 }, { "gpu": 1, "used": 11152, "total": 12288 } ], "params": { "ngl": 999, "t": 8, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=10", "avg_tps": 46.12, "best_tps": 50.06, "boot": 12, "vram": [ { "gpu": 0, "used": 10351, "total": 12288 }, { "gpu": 1, "used": 11152, "total": 12288 } ], "params": { "ngl": 999, "t": 10, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "t=12", "avg_tps": 45.23, "best_tps": 47.1, "boot": 12, "vram": [ { "gpu": 0, "used": 10337, "total": 12288 }, { "gpu": 1, "used": 11152, "total": 12288 } ], "params": { "ngl": 999, "t": 12, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=256 b=1024", "avg_tps": 48.9, "best_tps": 52.3, "boot": 12, "vram": [ { "gpu": 0, "used": 9834, "total": 12288 }, { "gpu": 1, "used": 10906, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 256, "b": 1024, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=256 b=2048", "avg_tps": 49.62, "best_tps": 52.52, "boot": 12, "vram": [ { "gpu": 0, "used": 9833, "total": 12288 }, { "gpu": 1, "used": 10906, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 256, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=512 b=4096", "avg_tps": 48.78, "best_tps": 52.14, "boot": 12, "vram": [ { "gpu": 0, "used": 10337, "total": 12288 }, { "gpu": 1, "used": 11152, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 4096, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=1024 b=2048", "avg_tps": 49.95, "best_tps": 52.53, "boot": 12, "vram": [ { "gpu": 0, "used": 11124, "total": 12288 }, { "gpu": 1, "used": 11644, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 1024, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "ub=1024 b=4096", "avg_tps": 48.75, "best_tps": 52.06, "boot": 12, "vram": [ { "gpu": 0, "used": 11123, "total": 12288 }, { "gpu": 1, "used": 11644, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 1024, "b": 4096, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "kv=q4_0/q8_0", "avg_tps": 42.81, "best_tps": 44.14, "boot": 12, "vram": [ { "gpu": 0, "used": 10681, "total": 12288 }, { "gpu": 1, "used": 11472, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q8_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B MXFP4_MOE", "quant": "MXFP4_MOE", "label": "FINAL", "avg_tps": 46.66, "best_tps": 47.09, "boot": 15, "vram": [ { "gpu": 0, "used": 10476, "total": 12288 }, { "gpu": 1, "used": 11152, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "n-cpu-moe=5", "avg_tps": 49.01, "best_tps": 53.09, "boot": 12, "vram": [ { "gpu": 0, "used": 10606, "total": 12288 }, { "gpu": 1, "used": 11338, "total": 12288 } ], "params": { "ngl": 999, "t": 6, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "t=2", "avg_tps": 45.73, "best_tps": 47.87, "boot": 12, "vram": [ { "gpu": 0, "used": 10599, "total": 12288 }, { "gpu": 1, "used": 11338, "total": 12288 } ], "params": { "ngl": 999, "t": 2, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "t=4", "avg_tps": 50.98, "best_tps": 54.33, "boot": 12, "vram": [ { "gpu": 0, "used": 10601, "total": 12288 }, { "gpu": 1, "used": 11338, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "t=8", "avg_tps": 48.45, "best_tps": 52.1, "boot": 12, "vram": [ { "gpu": 0, "used": 10596, "total": 12288 }, { "gpu": 1, "used": 11338, "total": 12288 } ], "params": { "ngl": 999, "t": 8, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "t=10", "avg_tps": 47.83, "best_tps": 51.45, "boot": 12, "vram": [ { "gpu": 0, "used": 10595, "total": 12288 }, { "gpu": 1, "used": 11338, "total": 12288 } ], "params": { "ngl": 999, "t": 10, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "t=12", "avg_tps": 43.77, "best_tps": 46.79, "boot": 12, "vram": [ { "gpu": 0, "used": 10589, "total": 12288 }, { "gpu": 1, "used": 11338, "total": 12288 } ], "params": { "ngl": 999, "t": 12, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "ub=256 b=1024", "avg_tps": 52.14, "best_tps": 53.82, "boot": 12, "vram": [ { "gpu": 0, "used": 10089, "total": 12288 }, { "gpu": 1, "used": 11092, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 256, "b": 1024, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "ub=256 b=2048", "avg_tps": 50.23, "best_tps": 53.66, "boot": 12, "vram": [ { "gpu": 0, "used": 10091, "total": 12288 }, { "gpu": 1, "used": 11092, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 256, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "ub=512 b=2048", "avg_tps": 49.89, "best_tps": 53.89, "boot": 12, "vram": [ { "gpu": 0, "used": 10595, "total": 12288 }, { "gpu": 1, "used": 11338, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 512, "b": 2048, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "ub=512 b=4096", "avg_tps": 50.4, "best_tps": 54.19, "boot": 12, "vram": [ { "gpu": 0, "used": 10564, "total": 12288 }, { "gpu": 1, "used": 11338, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 512, "b": 4096, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "kv=q8_0/q8_0", "avg_tps": 51.84, "best_tps": 53.53, "boot": 12, "vram": [ { "gpu": 0, "used": 10726, "total": 12288 }, { "gpu": 1, "used": 11732, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 256, "b": 1024, "ctk": "q8_0", "ctv": "q8_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "kv=q4_0/q8_0", "avg_tps": 43.22, "best_tps": 45.99, "boot": 12, "vram": [ { "gpu": 0, "used": 10410, "total": 12288 }, { "gpu": 1, "used": 11412, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 256, "b": 1024, "ctk": "q4_0", "ctv": "q8_0", "nCpuMoe": 5 } }, { "model": "Qwen3.5-35B Q4_K_M", "quant": "Q4_K_M", "label": "FINAL", "avg_tps": 52.05, "best_tps": 54.48, "boot": 12.1, "vram": [ { "gpu": 0, "used": 10062, "total": 12288 }, { "gpu": 1, "used": 11092, "total": 12288 } ], "params": { "ngl": 999, "t": 4, "ub": 256, "b": 1024, "ctk": "q4_0", "ctv": "q4_0", "nCpuMoe": 5 } } ]