Phase 01 (LLM Tuning): - Gemma4 26B: 74.65 t/s (fast) - Qwen 35B: 61.62 t/s (balanced) - Gemma4 31B: 16.0 t/s (deep-coder) - Qwen 27B: 16.7 t/s (deep-logic) - Qwen 122B: 8.95 t/s (ultra, GPU 1 only) Phase 02 (API Engine): - FastAPI reverse proxy on port 8000 - /engine/switch hot-swap with 503 protection - config/engine_models.json as single source of truth - Replaced 4 individual .bat files with unified engine File cleanup: - scripts/ 85 files -> 9 + _archive/ - Root .bat files -> _archive/
591 lines
11 KiB
JSON
591 lines
11 KiB
JSON
[
|
|
{
|
|
"ngl": 22,
|
|
"t": 8,
|
|
"tb": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.22049935826915,
|
|
"best_tps": 25.971732307567606,
|
|
"vram_used": 11953,
|
|
"vram_total": 12288,
|
|
"label": "ngl=22"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 8,
|
|
"tb": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.805518952775174,
|
|
"best_tps": 25.953896683689454,
|
|
"vram_used": 11942,
|
|
"vram_total": 12288,
|
|
"label": "ngl=21"
|
|
},
|
|
{
|
|
"ngl": 20,
|
|
"t": 8,
|
|
"tb": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 23.537353232262834,
|
|
"best_tps": 24.32109262330477,
|
|
"vram_used": 11972,
|
|
"vram_total": 12288,
|
|
"label": "ngl=20"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 2,
|
|
"tb": 2,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 20.167581352340264,
|
|
"best_tps": 20.701192443418005,
|
|
"vram_used": 11969,
|
|
"vram_total": 12288,
|
|
"label": "t=2 | tb=2"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.689104997668554,
|
|
"best_tps": 26.328541632880874,
|
|
"vram_used": 11975,
|
|
"vram_total": 12288,
|
|
"label": "t=4 | tb=4"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.294470150452725,
|
|
"best_tps": 26.541251363470614,
|
|
"vram_used": 11984,
|
|
"vram_total": 12288,
|
|
"label": "t=4 | tb=8"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 6,
|
|
"tb": 6,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.307859289404675,
|
|
"best_tps": 26.292208504543133,
|
|
"vram_used": 11984,
|
|
"vram_total": 12288,
|
|
"label": "t=6 | tb=6"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 6,
|
|
"tb": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.230599923243314,
|
|
"best_tps": 26.366065850165732,
|
|
"vram_used": 11983,
|
|
"vram_total": 12288,
|
|
"label": "t=6 | tb=8"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 8,
|
|
"tb": 8,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.113108026759278,
|
|
"best_tps": 26.123872617669583,
|
|
"vram_used": 11984,
|
|
"vram_total": 12288,
|
|
"label": "t=8 | tb=8"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 8,
|
|
"tb": 12,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.05545428888364,
|
|
"best_tps": 26.06377500079152,
|
|
"vram_used": 11983,
|
|
"vram_total": 12288,
|
|
"label": "t=8 | tb=12"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 10,
|
|
"tb": 10,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 24.706926870374986,
|
|
"best_tps": 25.03033604251865,
|
|
"vram_used": 11984,
|
|
"vram_total": 12288,
|
|
"label": "t=10 | tb=10"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 12,
|
|
"tb": 12,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 22.468055564001904,
|
|
"best_tps": 23.425983251691825,
|
|
"vram_used": 11989,
|
|
"vram_total": 12288,
|
|
"label": "t=12 | tb=12"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 16,
|
|
"tb": 16,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 21.176973905195442,
|
|
"best_tps": 21.482429642395456,
|
|
"vram_used": 12021,
|
|
"vram_total": 12288,
|
|
"label": "t=16 | tb=16"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 128,
|
|
"b": 512,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.545748810106186,
|
|
"best_tps": 26.344547829145817,
|
|
"vram_used": 11986,
|
|
"vram_total": 12288,
|
|
"label": "ub=128 | b=512"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 256,
|
|
"b": 1024,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.503875205368377,
|
|
"best_tps": 26.393548686102108,
|
|
"vram_used": 11981,
|
|
"vram_total": 12288,
|
|
"label": "ub=256 | b=1024"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 256,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.46500292415627,
|
|
"best_tps": 26.2726382287537,
|
|
"vram_used": 11981,
|
|
"vram_total": 12288,
|
|
"label": "ub=256 | b=2048"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 512,
|
|
"b": 1024,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.50982209452459,
|
|
"best_tps": 26.292282671074723,
|
|
"vram_used": 12020,
|
|
"vram_total": 12288,
|
|
"label": "ub=512 | b=1024"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 512,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.39646674356899,
|
|
"best_tps": 26.28106356028714,
|
|
"vram_used": 12020,
|
|
"vram_total": 12288,
|
|
"label": "ub=512 | b=2048"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 512,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.471945933724726,
|
|
"best_tps": 26.268422652962233,
|
|
"vram_used": 12021,
|
|
"vram_total": 12288,
|
|
"label": "ub=512 | b=4096"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.722119623856702,
|
|
"best_tps": 26.497264927416403,
|
|
"vram_used": 12019,
|
|
"vram_total": 12288,
|
|
"label": "ub=1024 | b=2048"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 4096,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.665819493145943,
|
|
"best_tps": 26.301163428594148,
|
|
"vram_used": 12019,
|
|
"vram_total": 12288,
|
|
"label": "ub=1024 | b=4096"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q4_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.464915272955533,
|
|
"best_tps": 26.40667691713752,
|
|
"vram_used": 12019,
|
|
"vram_total": 12288,
|
|
"label": "ctk=q4_0 | ctv=q4_0"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.489715990281564,
|
|
"best_tps": 25.884133821146627,
|
|
"vram_used": 12011,
|
|
"vram_total": 12288,
|
|
"label": "ctk=q8_0 | ctv=q8_0"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q4_0",
|
|
"ctv": "q8_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 22.751034104721082,
|
|
"best_tps": 22.91250972782414,
|
|
"vram_used": 12017,
|
|
"vram_total": 12288,
|
|
"label": "ctk=q4_0 | ctv=q8_0"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "f16",
|
|
"ctv": "f16",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 24.745831571513975,
|
|
"best_tps": 25.53926086004382,
|
|
"vram_used": 11985,
|
|
"vram_total": 12288,
|
|
"label": "ctk=f16 | ctv=f16"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 25.21575943186602,
|
|
"best_tps": 25.796865637378264,
|
|
"vram_used": 12013,
|
|
"vram_total": 12288,
|
|
"label": "mmap=True | poll=50 | prio=2"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": false,
|
|
"prio": 2,
|
|
"poll": 50,
|
|
"avg_tps": 23.88172807693179,
|
|
"best_tps": 24.803356430302312,
|
|
"vram_used": 12016,
|
|
"vram_total": 12288,
|
|
"label": "mmap=False | poll=50 | prio=2"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 0,
|
|
"avg_tps": 25.041321207287698,
|
|
"best_tps": 25.88479834694897,
|
|
"vram_used": 12017,
|
|
"vram_total": 12288,
|
|
"label": "mmap=True | poll=0 | prio=2"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 2,
|
|
"poll": 100,
|
|
"avg_tps": 25.27990666474703,
|
|
"best_tps": 26.034861156695197,
|
|
"vram_used": 12017,
|
|
"vram_total": 12288,
|
|
"label": "mmap=True | poll=100 | prio=2"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": true,
|
|
"prio": 3,
|
|
"poll": 50,
|
|
"avg_tps": 25.360977804679788,
|
|
"best_tps": 26.0705565191107,
|
|
"vram_used": 12022,
|
|
"vram_total": 12288,
|
|
"label": "mmap=True | poll=50 | prio=3"
|
|
},
|
|
{
|
|
"ngl": 21,
|
|
"t": 4,
|
|
"tb": 4,
|
|
"ub": 1024,
|
|
"b": 2048,
|
|
"ctk": "q8_0",
|
|
"ctv": "q8_0",
|
|
"fa": "on",
|
|
"mlock": true,
|
|
"mmap": false,
|
|
"prio": 3,
|
|
"poll": 0,
|
|
"avg_tps": 24.156893523381967,
|
|
"best_tps": 24.840307911026144,
|
|
"vram_used": 12021,
|
|
"vram_total": 12288,
|
|
"label": "mmap=False | poll=0 | prio=3"
|
|
}
|
|
] |