Update tuning scripts and add task creation to sync_vikunja.js

2026-04-06 21:49:56 +09:00
parent 626a089b6b
commit 7c7a899fd5
61 changed files with 8705 additions and 1566 deletions
--- a/scripts/dual_gpu_summary.txt
+++ b/scripts/dual_gpu_summary.txt
@@ -0,0 +1,31 @@
+Dual-GPU Benchmark v2 — 2026-04-06T06:52:08.868Z
+2x RTX 3060 12GB | 256K Context | 58 configs | 69.4 min
+
+=======================================================
+  RANKING
+=======================================================
+
+  🥇 #1: Gemma4-26B Q4_K_M
+      AVG: 76.4 t/s | BEST: 76.75 t/s | Boot: 9s
+      ngl=999 t=6 ub=512 b=2048 ctk=f16 ctv=f16
+
+  🥈 #2: Gemma4-26B MXFP4_MOE
+      AVG: 64.05 t/s | BEST: 64.29 t/s | Boot: 9s
+      ngl=999 t=2 ub=512 b=2048 ctk=q8_0 ctv=q8_0
+
+  🥉 #3: Qwen3.5-35B Q4_K_M
+      AVG: 52.05 t/s | BEST: 54.48 t/s | Boot: 12.1s
+      ngl=999 t=4 ub=256 b=1024 ctk=q4_0 ctv=q4_0
+      --n-cpu-moe 5
+
+     #4: Qwen3.5-35B MXFP4_MOE
+      AVG: 46.66 t/s | BEST: 47.09 t/s | Boot: 15s
+      ngl=999 t=6 ub=512 b=2048 ctk=q4_0 ctv=q4_0
+      --n-cpu-moe 5
+
+=======================================================
+  ★ CHAMPION: Gemma4-26B Q4_K_M — 76.4 t/s
+=======================================================
+
+  Recommended:
+    llama-server --model models\gemma-4-26B-A4B-it-Q4_K_M.gguf -ngl 999 -c 262144 -t 6 -tb 6 -ub 512 -b 2048 -fa on --cache-type-k f16 --cache-type-v f16 --prio 3 --poll 50 --mlock --port 8000 --host 0.0.0.0