feat: Variet Engine v1.0 + 5-model tuning complete
Phase 01 (LLM Tuning): - Gemma4 26B: 74.65 t/s (fast) - Qwen 35B: 61.62 t/s (balanced) - Gemma4 31B: 16.0 t/s (deep-coder) - Qwen 27B: 16.7 t/s (deep-logic) - Qwen 122B: 8.95 t/s (ultra, GPU 1 only) Phase 02 (API Engine): - FastAPI reverse proxy on port 8000 - /engine/switch hot-swap with 503 protection - config/engine_models.json as single source of truth - Replaced 4 individual .bat files with unified engine File cleanup: - scripts/ 85 files -> 9 + _archive/ - Root .bat files -> _archive/
This commit is contained in:
47
scripts/_archive/results/122b_final_results.json
Normal file
47
scripts/_archive/results/122b_final_results.json
Normal file
@@ -0,0 +1,47 @@
|
||||
[
|
||||
{
|
||||
"name": "Baseline: all expert CPU",
|
||||
"avg_tps": 8.72,
|
||||
"best_tps": 8.74,
|
||||
"vram_gpu0": 620,
|
||||
"vram_gpu1": 6493,
|
||||
"vram_total": 7113,
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "n-cpu-moe=60 (4 layers expert GPU)",
|
||||
"avg_tps": 8.72,
|
||||
"best_tps": 8.77,
|
||||
"vram_gpu0": 638,
|
||||
"vram_gpu1": 6493,
|
||||
"vram_total": 7131,
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "n-cpu-moe=56 (8 layers expert GPU)",
|
||||
"avg_tps": 8.72,
|
||||
"best_tps": 8.8,
|
||||
"vram_gpu0": 624,
|
||||
"vram_gpu1": 6493,
|
||||
"vram_total": 7117,
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "n-cpu-moe=52 (12 layers expert GPU)",
|
||||
"avg_tps": 8.76,
|
||||
"best_tps": 8.79,
|
||||
"vram_gpu0": 634,
|
||||
"vram_gpu1": 6493,
|
||||
"vram_total": 7127,
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "n-cpu-moe=48 (16 layers expert GPU)",
|
||||
"avg_tps": 8.81,
|
||||
"best_tps": 8.95,
|
||||
"vram_gpu0": 632,
|
||||
"vram_gpu1": 6493,
|
||||
"vram_total": 7125,
|
||||
"status": "OK"
|
||||
}
|
||||
]
|
||||
52
scripts/_archive/results/122b_gpu1_results.json
Normal file
52
scripts/_archive/results/122b_gpu1_results.json
Normal file
@@ -0,0 +1,52 @@
|
||||
[
|
||||
{
|
||||
"name": "GPU1 only + Expert CPU + 8t",
|
||||
"avg_tps": 8.74,
|
||||
"best_tps": 8.75,
|
||||
"vram_gpu0": 618,
|
||||
"vram_gpu1": 6493,
|
||||
"vram_total": 7111,
|
||||
"pcie": "1, 4 | 4, 16",
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "GPU1 only + Expert CPU + 16t",
|
||||
"avg_tps": 8.0,
|
||||
"best_tps": 8.02,
|
||||
"vram_gpu0": 619,
|
||||
"vram_gpu1": 6493,
|
||||
"vram_total": 7112,
|
||||
"pcie": "1, 4 | 4, 16",
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "Both GPU (main=1) + Expert CPU + 8t",
|
||||
"avg_tps": 4.71,
|
||||
"best_tps": 4.75,
|
||||
"vram_gpu0": 4220,
|
||||
"vram_gpu1": 3779,
|
||||
"vram_total": 7999,
|
||||
"pcie": "3, 4 | 4, 16",
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "Both GPU (ts 0.2,0.8) + Expert CPU + 8t",
|
||||
"avg_tps": 4.53,
|
||||
"best_tps": 4.6,
|
||||
"vram_gpu0": 2666,
|
||||
"vram_gpu1": 5333,
|
||||
"vram_total": 7999,
|
||||
"pcie": "2, 4 | 4, 16",
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "GPU1 only + Expert CPU + 8t + b4096",
|
||||
"avg_tps": 8.73,
|
||||
"best_tps": 8.77,
|
||||
"vram_gpu0": 615,
|
||||
"vram_gpu1": 6895,
|
||||
"vram_total": 7510,
|
||||
"pcie": "1, 4 | 4, 16",
|
||||
"status": "OK"
|
||||
}
|
||||
]
|
||||
37
scripts/_archive/results/122b_ncpumoe_results.json
Normal file
37
scripts/_archive/results/122b_ncpumoe_results.json
Normal file
@@ -0,0 +1,37 @@
|
||||
[
|
||||
{
|
||||
"name": "n-cpu-moe=64 (all CPU)",
|
||||
"n_cpu_moe": 64,
|
||||
"speed_tps": 4.87,
|
||||
"vram_gpu0": 4257,
|
||||
"vram_gpu1": 3793,
|
||||
"vram_total": 8050,
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "n-cpu-moe=56 (8 layers GPU expert)",
|
||||
"n_cpu_moe": 56,
|
||||
"speed_tps": 4.78,
|
||||
"vram_gpu0": 4233,
|
||||
"vram_gpu1": 3793,
|
||||
"vram_total": 8026,
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "n-cpu-moe=48 (16 layers GPU expert)",
|
||||
"n_cpu_moe": 48,
|
||||
"speed_tps": 4.82,
|
||||
"vram_gpu0": 4233,
|
||||
"vram_gpu1": 3793,
|
||||
"vram_total": 8026,
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "n-cpu-moe=40 (24 layers GPU expert)",
|
||||
"status": "BOOT_FAIL"
|
||||
},
|
||||
{
|
||||
"name": "n-cpu-moe=32 (32 layers GPU expert)",
|
||||
"status": "BOOT_FAIL"
|
||||
}
|
||||
]
|
||||
43
scripts/_archive/results/122b_optimization_results.json
Normal file
43
scripts/_archive/results/122b_optimization_results.json
Normal file
@@ -0,0 +1,43 @@
|
||||
[
|
||||
{
|
||||
"name": "ngl=999 + expert CPU + no-mmap",
|
||||
"ngl": 999,
|
||||
"avg_tps": 4.8,
|
||||
"best_tps": 4.84,
|
||||
"vram_gpu0": 4225,
|
||||
"vram_gpu1": 3779,
|
||||
"vram_total": 8004,
|
||||
"pcie": "3, 4\r | 4, 16",
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "ngl=10 (pure, no expert override)",
|
||||
"ngl": 10,
|
||||
"avg_tps": 2.52,
|
||||
"best_tps": 2.56,
|
||||
"vram_gpu0": 10309,
|
||||
"vram_gpu1": 5871,
|
||||
"vram_total": 16180,
|
||||
"pcie": "1, 4\r | 1, 16",
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "ngl=12 (pure)",
|
||||
"ngl": 12,
|
||||
"avg_tps": 2.86,
|
||||
"best_tps": 2.86,
|
||||
"vram_gpu0": 11807,
|
||||
"vram_gpu1": 7377,
|
||||
"vram_total": 19184,
|
||||
"pcie": "2, 4\r | 2, 16",
|
||||
"status": "OK"
|
||||
},
|
||||
{
|
||||
"name": "ngl=14 (pure)",
|
||||
"status": "BOOT_FAIL"
|
||||
},
|
||||
{
|
||||
"name": "ngl=999 + upper expert CPU (blk 32-63)",
|
||||
"status": "BOOT_FAIL"
|
||||
}
|
||||
]
|
||||
68
scripts/_archive/results/deep_tier_auto_results.json
Normal file
68
scripts/_archive/results/deep_tier_auto_results.json
Normal file
@@ -0,0 +1,68 @@
|
||||
[
|
||||
{
|
||||
"name": "Qwen 27B - 256K (q4_0)",
|
||||
"status": "Success",
|
||||
"vram": [
|
||||
"0, 10853 MiB, 12288 MiB",
|
||||
"1, 10951 MiB, 12288 MiB"
|
||||
],
|
||||
"tests": {
|
||||
"code": {
|
||||
"time": 17.89,
|
||||
"tokens": 300,
|
||||
"tps": 16.77,
|
||||
"res": "..."
|
||||
},
|
||||
"logical": {
|
||||
"time": 17.96,
|
||||
"tokens": 300,
|
||||
"tps": 16.71,
|
||||
"res": "..."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Gemma 31B - 32K (q4_0)",
|
||||
"status": "Success",
|
||||
"vram": [
|
||||
"0, 9834 MiB, 12288 MiB",
|
||||
"1, 9963 MiB, 12288 MiB"
|
||||
],
|
||||
"tests": {
|
||||
"code": {
|
||||
"time": 18.75,
|
||||
"tokens": 300,
|
||||
"tps": 16.0,
|
||||
"res": "```python\nfrom typing import List, Any\n\ndef merge_sorted_lists(list1: List[Any], list2: List[Any]) -> List[Any]:\n \"\"\"\n Merges two sorted lists i..."
|
||||
},
|
||||
"logical": {
|
||||
"time": 18.82,
|
||||
"tokens": 300,
|
||||
"tps": 15.94,
|
||||
"res": "..."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Gemma 31B - 64K (q4_0)",
|
||||
"status": "Success",
|
||||
"vram": [
|
||||
"0, 10346 MiB, 12288 MiB",
|
||||
"1, 10387 MiB, 12288 MiB"
|
||||
],
|
||||
"tests": {
|
||||
"code": {
|
||||
"time": 18.75,
|
||||
"tokens": 300,
|
||||
"tps": 16.0,
|
||||
"res": "```python\nfrom typing import List, Any\n\ndef merge_sorted_lists(list1: List[Any], list2: List[Any]) -> List[Any]:\n \"\"\"\n Merges two sorted lists i..."
|
||||
},
|
||||
"logical": {
|
||||
"time": 18.83,
|
||||
"tokens": 300,
|
||||
"tps": 15.93,
|
||||
"res": "..."
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
68
scripts/_archive/results/deep_tier_extreme_results.json
Normal file
68
scripts/_archive/results/deep_tier_extreme_results.json
Normal file
@@ -0,0 +1,68 @@
|
||||
[
|
||||
{
|
||||
"name": "Qwen 27B - 256K 극한 (q4_0, ub=512)",
|
||||
"status": "Success",
|
||||
"vram": [
|
||||
"0, 11120 MiB, 12288 MiB",
|
||||
"1, 11081 MiB, 12288 MiB"
|
||||
],
|
||||
"tests": {
|
||||
"code": {
|
||||
"time": 17.97,
|
||||
"tokens": 300,
|
||||
"tps": 16.7,
|
||||
"res": "..."
|
||||
},
|
||||
"logical": {
|
||||
"time": 18.01,
|
||||
"tokens": 300,
|
||||
"tps": 16.65,
|
||||
"res": "..."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Gemma 31B - 128K 확장 (q4_0)",
|
||||
"status": "Success",
|
||||
"vram": [
|
||||
"0, 11437 MiB, 12288 MiB",
|
||||
"1, 11259 MiB, 12288 MiB"
|
||||
],
|
||||
"tests": {
|
||||
"code": {
|
||||
"time": 18.75,
|
||||
"tokens": 300,
|
||||
"tps": 16.0,
|
||||
"res": "```python\nfrom typing import List, Any\n\ndef merge_sorted_lists(list1: List[Any], list2: List[Any]) -> List[Any]:\n \"\"\"\n Merges two sorted lists i..."
|
||||
},
|
||||
"logical": {
|
||||
"time": 18.79,
|
||||
"tokens": 300,
|
||||
"tps": 15.97,
|
||||
"res": "..."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Gemma 31B - 192K 극한 (q4_0)",
|
||||
"status": "Success",
|
||||
"vram": [
|
||||
"0, 11888 MiB, 12288 MiB",
|
||||
"1, 11754 MiB, 12288 MiB"
|
||||
],
|
||||
"tests": {
|
||||
"code": {
|
||||
"time": 18.69,
|
||||
"tokens": 300,
|
||||
"tps": 16.05,
|
||||
"res": "```python\nfrom typing import List, Any\n\ndef merge_sorted_lists(list1: List[Any], list2: List[Any]) -> List[Any]:\n \"\"\"\n Merges two sorted lists i..."
|
||||
},
|
||||
"logical": {
|
||||
"time": 18.77,
|
||||
"tokens": 300,
|
||||
"tps": 15.98,
|
||||
"res": "..."
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
1654
scripts/_archive/results/dual_gpu_results.json
Normal file
1654
scripts/_archive/results/dual_gpu_results.json
Normal file
File diff suppressed because it is too large
Load Diff
31
scripts/_archive/results/dual_gpu_summary.txt
Normal file
31
scripts/_archive/results/dual_gpu_summary.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
Dual-GPU Benchmark v2 — 2026-04-06T06:52:08.868Z
|
||||
2x RTX 3060 12GB | 256K Context | 58 configs | 69.4 min
|
||||
|
||||
=======================================================
|
||||
RANKING
|
||||
=======================================================
|
||||
|
||||
🥇 #1: Gemma4-26B Q4_K_M
|
||||
AVG: 76.4 t/s | BEST: 76.75 t/s | Boot: 9s
|
||||
ngl=999 t=6 ub=512 b=2048 ctk=f16 ctv=f16
|
||||
|
||||
🥈 #2: Gemma4-26B MXFP4_MOE
|
||||
AVG: 64.05 t/s | BEST: 64.29 t/s | Boot: 9s
|
||||
ngl=999 t=2 ub=512 b=2048 ctk=q8_0 ctv=q8_0
|
||||
|
||||
🥉 #3: Qwen3.5-35B Q4_K_M
|
||||
AVG: 52.05 t/s | BEST: 54.48 t/s | Boot: 12.1s
|
||||
ngl=999 t=4 ub=256 b=1024 ctk=q4_0 ctv=q4_0
|
||||
--n-cpu-moe 5
|
||||
|
||||
#4: Qwen3.5-35B MXFP4_MOE
|
||||
AVG: 46.66 t/s | BEST: 47.09 t/s | Boot: 15s
|
||||
ngl=999 t=6 ub=512 b=2048 ctk=q4_0 ctv=q4_0
|
||||
--n-cpu-moe 5
|
||||
|
||||
=======================================================
|
||||
★ CHAMPION: Gemma4-26B Q4_K_M — 76.4 t/s
|
||||
=======================================================
|
||||
|
||||
Recommended:
|
||||
llama-server --model models\gemma-4-26B-A4B-it-Q4_K_M.gguf -ngl 999 -c 262144 -t 6 -tb 6 -ub 512 -b 2048 -fa on --cache-type-k f16 --cache-type-v f16 --prio 3 --poll 50 --mlock --port 8000 --host 0.0.0.0
|
||||
8
scripts/_archive/results/gemma4_test_result.txt
Normal file
8
scripts/_archive/results/gemma4_test_result.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
==================================================
|
||||
Gemma4 26B Q4_K_M 5-Run Results:
|
||||
AVG: 74.65 t/s
|
||||
BEST: 75.07 t/s
|
||||
MIN: 74.27 t/s
|
||||
Runs: ['74.59', '74.68', '74.65', '75.07', '74.27']
|
||||
==================================================
|
||||
12
scripts/_archive/results/llm_judge_answers.json
Normal file
12
scripts/_archive/results/llm_judge_answers.json
Normal file
File diff suppressed because one or more lines are too long
124
scripts/_archive/results/quality_result_gemma4.json
Normal file
124
scripts/_archive/results/quality_result_gemma4.json
Normal file
@@ -0,0 +1,124 @@
|
||||
[
|
||||
{
|
||||
"id": "code_generate",
|
||||
"category": "coding",
|
||||
"name": "Python 함수 생성",
|
||||
"model": "gemma4",
|
||||
"response": "```python\nfrom typing import List\n\ndef merge_sorted",
|
||||
"tokens": 800,
|
||||
"time": 11.21,
|
||||
"tps": 71.34,
|
||||
"eval_criteria": [
|
||||
"correctness",
|
||||
"type_hints",
|
||||
"docstring",
|
||||
"edge_cases"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "code_debug",
|
||||
"category": "coding",
|
||||
"name": "버그 찾기 & 수정",
|
||||
"model": "gemma4",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 11.2,
|
||||
"tps": 71.4,
|
||||
"eval_criteria": [
|
||||
"bug_identified",
|
||||
"correct_fix",
|
||||
"clean_code"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "code_refactor",
|
||||
"category": "coding",
|
||||
"name": "TypeScript 리팩토링",
|
||||
"model": "gemma4",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 11.23,
|
||||
"tps": 71.26,
|
||||
"eval_criteria": [
|
||||
"types",
|
||||
"error_handling",
|
||||
"backoff",
|
||||
"production_quality"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "korean_schedule",
|
||||
"category": "assistant_kr",
|
||||
"name": "한국어 일정 관리",
|
||||
"model": "gemma4",
|
||||
"response": "요청하신 내일 일정을 정리하고, 서울 시내 이동 시간을 고려하여 현실적인 가능성을 분석해",
|
||||
"tokens": 800,
|
||||
"time": 11.2,
|
||||
"tps": 71.43,
|
||||
"eval_criteria": [
|
||||
"korean_fluency",
|
||||
"schedule_analysis",
|
||||
"practical_advice"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "korean_email",
|
||||
"category": "assistant_kr",
|
||||
"name": "한국어 이메일 요약",
|
||||
"model": "gemma4",
|
||||
"response": "요청하신 내용을 다음과 같이 요약 및 정리해 드립니다.\n\n**[3줄 요약]**\n1. 본부장님 지시로 Q2 마케팅 예산이 기존 대비 15% 삭감되었습니다.\n2. 이에 따라 ROI가 낮은 채널(인스타그램 등)을 중심으로 예산 조정이 필요합니다.\n3. 수요일 수정안 제출을 위해 채널별 삭감 우선순위 결정이 시급합니다.\n\n**[필요 액션]**\n* **채널별 삭감 우선순위 정리 및 회신** (기한: **화요일 오전까지**)",
|
||||
"tokens": 686,
|
||||
"time": 9.67,
|
||||
"tps": 70.95,
|
||||
"eval_criteria": [
|
||||
"korean_summary",
|
||||
"action_items",
|
||||
"conciseness"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "tool_calling",
|
||||
"category": "tool_use",
|
||||
"name": "Function Calling (JSON)",
|
||||
"model": "gemma4",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 11.19,
|
||||
"tps": 71.49,
|
||||
"eval_criteria": [
|
||||
"correct_sequence",
|
||||
"valid_json",
|
||||
"complete_args"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "structured_output",
|
||||
"category": "tool_use",
|
||||
"name": "구조화 출력 (JSON)",
|
||||
"model": "gemma4",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 11.41,
|
||||
"tps": 70.12,
|
||||
"eval_criteria": [
|
||||
"correct_parsing",
|
||||
"valid_json",
|
||||
"completeness"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "reasoning",
|
||||
"category": "reasoning",
|
||||
"name": "논리 추론",
|
||||
"model": "gemma4",
|
||||
"response": "To find the amount of wheat produced by each field, we can use algebra. We will express the production of all fields in terms of one variable.\n\n### Step 1: Define the variables\nLet **$B$** be the amount of wheat produced by **Field B**.\n\nBased on the problem description, we can define the other fields relative to $B$:\n* **Field A** produces 20% more than Field B:\n $A = B + 0.20B = 1.2B$\n* **Field C** produces ",
|
||||
"tokens": 800,
|
||||
"time": 11.2,
|
||||
"tps": 71.44,
|
||||
"eval_criteria": [
|
||||
"correct_answer",
|
||||
"clear_steps",
|
||||
"math_accuracy"
|
||||
]
|
||||
}
|
||||
]
|
||||
124
scripts/_archive/results/quality_result_qwen27b.json
Normal file
124
scripts/_archive/results/quality_result_qwen27b.json
Normal file
@@ -0,0 +1,124 @@
|
||||
[
|
||||
{
|
||||
"id": "code_generate",
|
||||
"category": "coding",
|
||||
"name": "Python 함수 생성",
|
||||
"model": "qwen27b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 47.56,
|
||||
"tps": 16.82,
|
||||
"eval_criteria": [
|
||||
"correctness",
|
||||
"type_hints",
|
||||
"docstring",
|
||||
"edge_cases"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "code_debug",
|
||||
"category": "coding",
|
||||
"name": "버그 찾기 & 수정",
|
||||
"model": "qwen27b",
|
||||
"response": "# Fixed Code\n\n```python\ndef find_duplicates(arr):\n count = {}\n for item in arr:\n count[item] = count.get(item, 0) + 1\n \n # Filter to only include items that appear more than once\n duplicates = {item: count for item, count in count.items() if count > 1}\n return duplicates\n```\n\n## What Was Fixed\n\n1. **Changed return type**: Now returns a dictionary `{value: count}` instead of a list\n2. **Fixed counting logic**: Counts all occurrences of each item, not just marks as seen\n3. **Handles empty lists**: Returns `{}` instead of crashing\n4. **Removed unnecessary set conversion**: No need to convert to set since we're returning a dict\n\n## Test Cases\n\n```python\nprint(find_duplicates([1, 2, 2, 3, 3, 3])) # {2: 2, 3: 3}\nprint(find_duplicates([])) # {}\nprint(find_duplicates([1, 1, 1])) # {1: 3}\nprint(find_duplicates([1, 2, 3])) # {}\nprint(find_duplicates([5])) # {}\n```\n\n## Alternative (More Pythonic)\n\n```python\nfrom collections import Counter\n\ndef find_duplicates(arr):\n count = Counter(arr)\n return {item: count for item, count in count.items() if count > 1}\n```\n\nThis version uses `Counter` from the `collections`",
|
||||
"tokens": 800,
|
||||
"time": 48.01,
|
||||
"tps": 16.66,
|
||||
"eval_criteria": [
|
||||
"bug_identified",
|
||||
"correct_fix",
|
||||
"clean_code"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "code_refactor",
|
||||
"category": "coding",
|
||||
"name": "TypeScript 리팩토링",
|
||||
"model": "qwen27b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 47.99,
|
||||
"tps": 16.67,
|
||||
"eval_criteria": [
|
||||
"types",
|
||||
"error_handling",
|
||||
"backoff",
|
||||
"production_quality"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "korean_schedule",
|
||||
"category": "assistant_kr",
|
||||
"name": "한국어 일정 관리",
|
||||
"model": "qwen27b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 47.75,
|
||||
"tps": 16.75,
|
||||
"eval_criteria": [
|
||||
"korean_fluency",
|
||||
"schedule_analysis",
|
||||
"practical_advice"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "korean_email",
|
||||
"category": "assistant_kr",
|
||||
"name": "한국어 이메일 요약",
|
||||
"model": "qwen27b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 48.05,
|
||||
"tps": 16.65,
|
||||
"eval_criteria": [
|
||||
"korean_summary",
|
||||
"action_items",
|
||||
"conciseness"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "tool_calling",
|
||||
"category": "tool_use",
|
||||
"name": "Function Calling (JSON)",
|
||||
"model": "qwen27b",
|
||||
"response": "[{\"tool\": \"get_calendar\", \"args\": {\"date\": \"tomorrow\"}}, {\"tool\": \"search_web\", \"args\": {\"query\": \"latest quarterly report\"}}, {\"tool\": \"send_email\", \"args\": {\"to\": \"john@example.com\", \"subject\": \"Quarterly Report Summary\", \"body\": \"Summary of the latest quarterly report attached for your review.\"}}]",
|
||||
"tokens": 719,
|
||||
"time": 43.06,
|
||||
"tps": 16.7,
|
||||
"eval_criteria": [
|
||||
"correct_sequence",
|
||||
"valid_json",
|
||||
"complete_args"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "structured_output",
|
||||
"category": "tool_use",
|
||||
"name": "구조화 출력 (JSON)",
|
||||
"model": "qwen27b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 48.01,
|
||||
"tps": 16.66,
|
||||
"eval_criteria": [
|
||||
"correct_parsing",
|
||||
"valid_json",
|
||||
"completeness"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "reasoning",
|
||||
"category": "reasoning",
|
||||
"name": "논리 추론",
|
||||
"model": "qwen27b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 47.67,
|
||||
"tps": 16.78,
|
||||
"eval_criteria": [
|
||||
"correct_answer",
|
||||
"clear_steps",
|
||||
"math_accuracy"
|
||||
]
|
||||
}
|
||||
]
|
||||
124
scripts/_archive/results/quality_result_qwen35b.json
Normal file
124
scripts/_archive/results/quality_result_qwen35b.json
Normal file
@@ -0,0 +1,124 @@
|
||||
[
|
||||
{
|
||||
"id": "code_generate",
|
||||
"category": "coding",
|
||||
"name": "Python 함수 생성",
|
||||
"model": "qwen35b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 12.7,
|
||||
"tps": 62.97,
|
||||
"eval_criteria": [
|
||||
"correctness",
|
||||
"type_hints",
|
||||
"docstring",
|
||||
"edge_cases"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "code_debug",
|
||||
"category": "coding",
|
||||
"name": "버그 찾기 & 수정",
|
||||
"model": "qwen35b",
|
||||
"response": "Here's the corrected version of the function that returns a dictionary of duplicate values with their occurrence counts:\n\n```python\ndef find_duplicates(arr):\n count = {}\n for item in arr:\n count[item] = count.get(item, 0) + 1\n \n return {item: cnt for item, cnt in count.items() if cnt > 1}\n```\n\n### Explanation of Fixes:\n\n1. **Correct Output Format:** The function now returns a dictionary where each key is a duplicate value and the corresponding value is its total count of occurrences.\n2. **Handles Empty Lists:** The function works correctly with an empty list, returning an empty dictionary `{}`.\n3. **Accurate Duplicate Detection:** Only values that appear more than once are included in the result.\n\n### Example Usage:\n\n```python\nprint(find_duplicates([1, 2, 2, 3, 3, 3])) # Output: {2: 2, 3: 3}\nprint(find_duplicates([])) # Output: {}\nprint(find_duplicates([1, 1, 1])) # Output: {1: 3}\nprint(find_duplicates([1, 2, 3])) # Output: {}\n```\n\nThis implementation is efficient, readable, and meets the specified requirements.",
|
||||
"tokens": 788,
|
||||
"time": 12.8,
|
||||
"tps": 61.58,
|
||||
"eval_criteria": [
|
||||
"bug_identified",
|
||||
"correct_fix",
|
||||
"clean_code"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "code_refactor",
|
||||
"category": "coding",
|
||||
"name": "TypeScript 리팩토링",
|
||||
"model": "qwen35b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 13.05,
|
||||
"tps": 61.3,
|
||||
"eval_criteria": [
|
||||
"types",
|
||||
"error_handling",
|
||||
"backoff",
|
||||
"production_quality"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "korean_schedule",
|
||||
"category": "assistant_kr",
|
||||
"name": "한국어 일정 관리",
|
||||
"model": "qwen35b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 12.89,
|
||||
"tps": 62.08,
|
||||
"eval_criteria": [
|
||||
"korean_fluency",
|
||||
"schedule_analysis",
|
||||
"practical_advice"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "korean_email",
|
||||
"category": "assistant_kr",
|
||||
"name": "한국어 이메일 요약",
|
||||
"model": "qwen35b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 13.06,
|
||||
"tps": 61.27,
|
||||
"eval_criteria": [
|
||||
"korean_summary",
|
||||
"action_items",
|
||||
"conciseness"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "tool_calling",
|
||||
"category": "tool_use",
|
||||
"name": "Function Calling (JSON)",
|
||||
"model": "qwen35b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 12.92,
|
||||
"tps": 61.9,
|
||||
"eval_criteria": [
|
||||
"correct_sequence",
|
||||
"valid_json",
|
||||
"complete_args"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "structured_output",
|
||||
"category": "tool_use",
|
||||
"name": "구조화 출력 (JSON)",
|
||||
"model": "qwen35b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 13.04,
|
||||
"tps": 61.34,
|
||||
"eval_criteria": [
|
||||
"correct_parsing",
|
||||
"valid_json",
|
||||
"completeness"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "reasoning",
|
||||
"category": "reasoning",
|
||||
"name": "논리 추론",
|
||||
"model": "qwen35b",
|
||||
"response": "",
|
||||
"tokens": 800,
|
||||
"time": 12.86,
|
||||
"tps": 62.21,
|
||||
"eval_criteria": [
|
||||
"correct_answer",
|
||||
"clear_steps",
|
||||
"math_accuracy"
|
||||
]
|
||||
}
|
||||
]
|
||||
834
scripts/_archive/results/qwen_fullgpu_results.json
Normal file
834
scripts/_archive/results/qwen_fullgpu_results.json
Normal file
@@ -0,0 +1,834 @@
|
||||
[
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "pure-GPU minbatch",
|
||||
"avg_tps": 65.11,
|
||||
"best_tps": 65.49,
|
||||
"boot": 9,
|
||||
"vram_total": 19177,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10039,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9138,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "pure-GPU nommap small",
|
||||
"avg_tps": 65.01,
|
||||
"best_tps": 65.36,
|
||||
"boot": 6,
|
||||
"vram_total": 19672,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10342,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9330,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "pure-GPU row-split",
|
||||
"avg_tps": 13.65,
|
||||
"best_tps": 14.82,
|
||||
"boot": 9,
|
||||
"vram_total": 19427,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10311,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9116,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"splitMode": "row",
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "pure-GPU ts=0.5,0.5",
|
||||
"avg_tps": 64.92,
|
||||
"best_tps": 65.23,
|
||||
"boot": 9,
|
||||
"vram_total": 19664,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10334,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9330,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"tensorSplit": "0.5,0.5",
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "pure-GPU all-tricks",
|
||||
"avg_tps": 64.72,
|
||||
"best_tps": 64.89,
|
||||
"boot": 6,
|
||||
"vram_total": 19171,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10033,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9138,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"noMmap": true,
|
||||
"defragThold": 0.1,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "tune t=2",
|
||||
"avg_tps": 64.87,
|
||||
"best_tps": 65.13,
|
||||
"boot": 9,
|
||||
"vram_total": 19170,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10032,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9138,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 2,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "tune t=6",
|
||||
"avg_tps": 64.88,
|
||||
"best_tps": 65.17,
|
||||
"boot": 9,
|
||||
"vram_total": 19168,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10030,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9138,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 6,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "tune t=8",
|
||||
"avg_tps": 64.5,
|
||||
"best_tps": 64.77,
|
||||
"boot": 9,
|
||||
"vram_total": 19168,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10030,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9138,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 8,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "tune ub=256 b=1024",
|
||||
"avg_tps": 64.73,
|
||||
"best_tps": 64.98,
|
||||
"boot": 9,
|
||||
"vram_total": 20640,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10928,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9712,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 256,
|
||||
"b": 1024,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "tune ub=256 b=2048",
|
||||
"avg_tps": 63.69,
|
||||
"best_tps": 64.94,
|
||||
"boot": 12,
|
||||
"vram_total": 20614,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10902,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9712,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 256,
|
||||
"b": 2048,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "tune kv=q8_0/q8_0",
|
||||
"avg_tps": 64.78,
|
||||
"best_tps": 65.08,
|
||||
"boot": 9,
|
||||
"vram_total": 20422,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 10644,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 9778,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"ngl": 999,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "tune kv=f16/f16",
|
||||
"avg_tps": 65.53,
|
||||
"best_tps": 65.81,
|
||||
"boot": 9,
|
||||
"vram_total": 22812,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11846,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10966,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"ngl": 999,
|
||||
"ctk": "f16",
|
||||
"ctv": "f16"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 UD-IQ4_NL",
|
||||
"label": "FINAL",
|
||||
"avg_tps": 66.31,
|
||||
"best_tps": 66.53,
|
||||
"boot": 9,
|
||||
"vram_total": 22811,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11845,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10966,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"ngl": 999,
|
||||
"ctk": "f16",
|
||||
"ctv": "f16"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "pure-GPU minbatch",
|
||||
"avg_tps": 63.06,
|
||||
"best_tps": 64.16,
|
||||
"boot": 12,
|
||||
"vram_total": 22747,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11895,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10852,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "pure-GPU nommap small",
|
||||
"avg_tps": 63.75,
|
||||
"best_tps": 63.98,
|
||||
"boot": 9,
|
||||
"vram_total": 22579,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11797,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10782,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "pure-GPU ts=0.5,0.5",
|
||||
"avg_tps": 62.88,
|
||||
"best_tps": 63.9,
|
||||
"boot": 12,
|
||||
"vram_total": 22578,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11796,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10782,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"tensorSplit": "0.5,0.5",
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "pure-GPU all-tricks",
|
||||
"avg_tps": 62.55,
|
||||
"best_tps": 63.71,
|
||||
"boot": 9,
|
||||
"vram_total": 22743,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11891,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10852,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 64,
|
||||
"b": 256,
|
||||
"noMmap": true,
|
||||
"defragThold": 0.1,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "tune t=2",
|
||||
"avg_tps": 63.07,
|
||||
"best_tps": 64.08,
|
||||
"boot": 9,
|
||||
"vram_total": 22601,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11819,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10782,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 2,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "tune t=6",
|
||||
"avg_tps": 63.58,
|
||||
"best_tps": 64.04,
|
||||
"boot": 9,
|
||||
"vram_total": 22583,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11801,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10782,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 6,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "tune t=8",
|
||||
"avg_tps": 62.92,
|
||||
"best_tps": 63.73,
|
||||
"boot": 9,
|
||||
"vram_total": 22536,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11754,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10782,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 8,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "tune ub=256 b=1024",
|
||||
"avg_tps": 62.76,
|
||||
"best_tps": 63.86,
|
||||
"boot": 9,
|
||||
"vram_total": 22874,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11968,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10906,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 256,
|
||||
"b": 1024,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "tune ub=256 b=2048",
|
||||
"avg_tps": 62.74,
|
||||
"best_tps": 63.9,
|
||||
"boot": 9,
|
||||
"vram_total": 22912,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 12006,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10906,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 256,
|
||||
"b": 2048,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 MXFP4_MOE",
|
||||
"label": "FINAL",
|
||||
"avg_tps": 63.71,
|
||||
"best_tps": 64.39,
|
||||
"boot": 9,
|
||||
"vram_total": 22566,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 11784,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10782,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 Q4_K_M",
|
||||
"label": "pure-GPU nommap small",
|
||||
"avg_tps": 62.29,
|
||||
"best_tps": 63.03,
|
||||
"boot": 9,
|
||||
"vram_total": 22975,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 12007,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10968,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"noMmap": true,
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 Q4_K_M",
|
||||
"label": "pure-GPU ts=0.5,0.5",
|
||||
"avg_tps": 63.89,
|
||||
"best_tps": 64.91,
|
||||
"boot": 12,
|
||||
"vram_total": 23002,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 12034,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10968,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"tensorSplit": "0.5,0.5",
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 Q4_K_M",
|
||||
"label": "tune t=2",
|
||||
"avg_tps": 64.1,
|
||||
"best_tps": 64.54,
|
||||
"boot": 12,
|
||||
"vram_total": 22980,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 12012,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10968,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 2,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"tensorSplit": "0.5,0.5",
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 Q4_K_M",
|
||||
"label": "tune t=6",
|
||||
"avg_tps": 64.18,
|
||||
"best_tps": 64.72,
|
||||
"boot": 12,
|
||||
"vram_total": 22982,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 12014,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10968,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 6,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"tensorSplit": "0.5,0.5",
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
},
|
||||
{
|
||||
"model": "Qwen3.5 Q4_K_M",
|
||||
"label": "tune t=8",
|
||||
"avg_tps": 63.11,
|
||||
"best_tps": 64.02,
|
||||
"boot": 12,
|
||||
"vram_total": 22980,
|
||||
"vram": [
|
||||
{
|
||||
"gpu": 0,
|
||||
"used": 12012,
|
||||
"total": 12288
|
||||
},
|
||||
{
|
||||
"gpu": 1,
|
||||
"used": 10968,
|
||||
"total": 12288
|
||||
}
|
||||
],
|
||||
"params": {
|
||||
"t": 8,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"tensorSplit": "0.5,0.5",
|
||||
"ngl": 999,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0"
|
||||
},
|
||||
"gpu_only": true
|
||||
}
|
||||
]
|
||||
12
scripts/_archive/results/qwen_intermediate.csv
Normal file
12
scripts/_archive/results/qwen_intermediate.csv
Normal file
@@ -0,0 +1,12 @@
|
||||
model,label,avg,best,mode,vram,t,ub,b,kv,split,mmap
|
||||
UD-IQ4_NL,pure-GPU minbatch,65.11,65.49,GPU,19177,t4,ub64,b256,q4_0/q4_0,,
|
||||
UD-IQ4_NL,pure-GPU nommap small,65.01,65.36,GPU,19672,t4,ub128,b512,q4_0/q4_0,,nommap
|
||||
UD-IQ4_NL,pure-GPU row-split,13.65,14.82,GPU,19427,t4,ub128,b512,q4_0/q4_0,row,
|
||||
UD-IQ4_NL,pure-GPU ts=0.5,0.5,64.92,65.23,GPU,19664,t4,ub128,b512,q4_0/q4_0,,
|
||||
UD-IQ4_NL,pure-GPU all-tricks,64.72,64.89,GPU,19171,t4,ub64,b256,q4_0/q4_0,,nommap
|
||||
UD-IQ4_NL,tune t=2,64.87,65.13,GPU,19170,t2,ub64,b256,q4_0/q4_0,,
|
||||
UD-IQ4_NL,tune t=6,64.88,65.17,GPU,19168,t6,ub64,b256,q4_0/q4_0,,
|
||||
UD-IQ4_NL,tune t=8,64.5,64.77,GPU,19168,t8,ub64,b256,q4_0/q4_0,,
|
||||
UD-IQ4_NL,tune ub=256 b=1024,64.73,64.98,GPU,20640,t4,ub256,b1024,q4_0/q4_0,,
|
||||
UD-IQ4_NL,tune ub=256 b=2048,63.69,64.94,GPU,20614,t4,ub256,b2048,q4_0/q4_0,,
|
||||
UD-IQ4_NL,tune kv=q8_0/q8_0,64.78,65.08,GPU,20422,t4,ub64,b256,q8_0/q8_0,,
|
||||
|
8
scripts/_archive/results/split_test_result.txt
Normal file
8
scripts/_archive/results/split_test_result.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
==================================================
|
||||
TS=0.5,0.5 5-Run Results (with --mlock --poll 50):
|
||||
AVG: 61.94 t/s
|
||||
BEST: 62.06 t/s
|
||||
MIN: 61.74 t/s
|
||||
Runs: ['62.06', '61.74', '61.92', '62.00', '61.96']
|
||||
==================================================
|
||||
591
scripts/_archive/results/tune_results_gemma4_256k.json
Normal file
591
scripts/_archive/results/tune_results_gemma4_256k.json
Normal file
@@ -0,0 +1,591 @@
|
||||
[
|
||||
{
|
||||
"ngl": 22,
|
||||
"t": 8,
|
||||
"tb": 8,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.22049935826915,
|
||||
"best_tps": 25.971732307567606,
|
||||
"vram_used": 11953,
|
||||
"vram_total": 12288,
|
||||
"label": "ngl=22"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 8,
|
||||
"tb": 8,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.805518952775174,
|
||||
"best_tps": 25.953896683689454,
|
||||
"vram_used": 11942,
|
||||
"vram_total": 12288,
|
||||
"label": "ngl=21"
|
||||
},
|
||||
{
|
||||
"ngl": 20,
|
||||
"t": 8,
|
||||
"tb": 8,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 23.537353232262834,
|
||||
"best_tps": 24.32109262330477,
|
||||
"vram_used": 11972,
|
||||
"vram_total": 12288,
|
||||
"label": "ngl=20"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 2,
|
||||
"tb": 2,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 20.167581352340264,
|
||||
"best_tps": 20.701192443418005,
|
||||
"vram_used": 11969,
|
||||
"vram_total": 12288,
|
||||
"label": "t=2 | tb=2"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.689104997668554,
|
||||
"best_tps": 26.328541632880874,
|
||||
"vram_used": 11975,
|
||||
"vram_total": 12288,
|
||||
"label": "t=4 | tb=4"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 8,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.294470150452725,
|
||||
"best_tps": 26.541251363470614,
|
||||
"vram_used": 11984,
|
||||
"vram_total": 12288,
|
||||
"label": "t=4 | tb=8"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 6,
|
||||
"tb": 6,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.307859289404675,
|
||||
"best_tps": 26.292208504543133,
|
||||
"vram_used": 11984,
|
||||
"vram_total": 12288,
|
||||
"label": "t=6 | tb=6"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 6,
|
||||
"tb": 8,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.230599923243314,
|
||||
"best_tps": 26.366065850165732,
|
||||
"vram_used": 11983,
|
||||
"vram_total": 12288,
|
||||
"label": "t=6 | tb=8"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 8,
|
||||
"tb": 8,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.113108026759278,
|
||||
"best_tps": 26.123872617669583,
|
||||
"vram_used": 11984,
|
||||
"vram_total": 12288,
|
||||
"label": "t=8 | tb=8"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 8,
|
||||
"tb": 12,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.05545428888364,
|
||||
"best_tps": 26.06377500079152,
|
||||
"vram_used": 11983,
|
||||
"vram_total": 12288,
|
||||
"label": "t=8 | tb=12"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 10,
|
||||
"tb": 10,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 24.706926870374986,
|
||||
"best_tps": 25.03033604251865,
|
||||
"vram_used": 11984,
|
||||
"vram_total": 12288,
|
||||
"label": "t=10 | tb=10"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 12,
|
||||
"tb": 12,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 22.468055564001904,
|
||||
"best_tps": 23.425983251691825,
|
||||
"vram_used": 11989,
|
||||
"vram_total": 12288,
|
||||
"label": "t=12 | tb=12"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 16,
|
||||
"tb": 16,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 21.176973905195442,
|
||||
"best_tps": 21.482429642395456,
|
||||
"vram_used": 12021,
|
||||
"vram_total": 12288,
|
||||
"label": "t=16 | tb=16"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.545748810106186,
|
||||
"best_tps": 26.344547829145817,
|
||||
"vram_used": 11986,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=128 | b=512"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 256,
|
||||
"b": 1024,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.503875205368377,
|
||||
"best_tps": 26.393548686102108,
|
||||
"vram_used": 11981,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=256 | b=1024"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 256,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.46500292415627,
|
||||
"best_tps": 26.2726382287537,
|
||||
"vram_used": 11981,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=256 | b=2048"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 1024,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.50982209452459,
|
||||
"best_tps": 26.292282671074723,
|
||||
"vram_used": 12020,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=512 | b=1024"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.39646674356899,
|
||||
"best_tps": 26.28106356028714,
|
||||
"vram_used": 12020,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=512 | b=2048"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.471945933724726,
|
||||
"best_tps": 26.268422652962233,
|
||||
"vram_used": 12021,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=512 | b=4096"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.722119623856702,
|
||||
"best_tps": 26.497264927416403,
|
||||
"vram_used": 12019,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=1024 | b=2048"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.665819493145943,
|
||||
"best_tps": 26.301163428594148,
|
||||
"vram_used": 12019,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=1024 | b=4096"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.464915272955533,
|
||||
"best_tps": 26.40667691713752,
|
||||
"vram_used": 12019,
|
||||
"vram_total": 12288,
|
||||
"label": "ctk=q4_0 | ctv=q4_0"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.489715990281564,
|
||||
"best_tps": 25.884133821146627,
|
||||
"vram_used": 12011,
|
||||
"vram_total": 12288,
|
||||
"label": "ctk=q8_0 | ctv=q8_0"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 22.751034104721082,
|
||||
"best_tps": 22.91250972782414,
|
||||
"vram_used": 12017,
|
||||
"vram_total": 12288,
|
||||
"label": "ctk=q4_0 | ctv=q8_0"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "f16",
|
||||
"ctv": "f16",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 24.745831571513975,
|
||||
"best_tps": 25.53926086004382,
|
||||
"vram_used": 11985,
|
||||
"vram_total": 12288,
|
||||
"label": "ctk=f16 | ctv=f16"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.21575943186602,
|
||||
"best_tps": 25.796865637378264,
|
||||
"vram_used": 12013,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=True | poll=50 | prio=2"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": false,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 23.88172807693179,
|
||||
"best_tps": 24.803356430302312,
|
||||
"vram_used": 12016,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=False | poll=50 | prio=2"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 0,
|
||||
"avg_tps": 25.041321207287698,
|
||||
"best_tps": 25.88479834694897,
|
||||
"vram_used": 12017,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=True | poll=0 | prio=2"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 100,
|
||||
"avg_tps": 25.27990666474703,
|
||||
"best_tps": 26.034861156695197,
|
||||
"vram_used": 12017,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=True | poll=100 | prio=2"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 3,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.360977804679788,
|
||||
"best_tps": 26.0705565191107,
|
||||
"vram_used": 12022,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=True | poll=50 | prio=3"
|
||||
},
|
||||
{
|
||||
"ngl": 21,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": false,
|
||||
"prio": 3,
|
||||
"poll": 0,
|
||||
"avg_tps": 24.156893523381967,
|
||||
"best_tps": 24.840307911026144,
|
||||
"vram_used": 12021,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=False | poll=0 | prio=3"
|
||||
}
|
||||
]
|
||||
201
scripts/_archive/results/tune_results_gemma4_ncpumoe.json
Normal file
201
scripts/_archive/results/tune_results_gemma4_ncpumoe.json
Normal file
@@ -0,0 +1,201 @@
|
||||
[
|
||||
{
|
||||
"label": "ncpumoe=0",
|
||||
"ncpumoe": 0,
|
||||
"avg": 15.396949591766335,
|
||||
"best": 20.220093309883133,
|
||||
"vram": 12011,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=5",
|
||||
"ncpumoe": 5,
|
||||
"avg": 4.853957926040404,
|
||||
"best": 4.9029479257524216,
|
||||
"vram": 11945,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=10",
|
||||
"ncpumoe": 10,
|
||||
"avg": 20.64137159193706,
|
||||
"best": 26.474940718957154,
|
||||
"vram": 12020,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=15",
|
||||
"ncpumoe": 15,
|
||||
"avg": 13.424368433101165,
|
||||
"best": 13.698684361880598,
|
||||
"vram": 12018,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=20",
|
||||
"ncpumoe": 20,
|
||||
"avg": 10.338449574838693,
|
||||
"best": 13.495275411319872,
|
||||
"vram": 11530,
|
||||
"nommap": true
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=25",
|
||||
"ncpumoe": 25,
|
||||
"avg": 12.920348175328435,
|
||||
"best": 12.99923042323437,
|
||||
"vram": 11625,
|
||||
"nommap": true
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=30",
|
||||
"ncpumoe": 30,
|
||||
"avg": 13.251690836275145,
|
||||
"best": 13.253697466971921,
|
||||
"vram": 9064,
|
||||
"nommap": true
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=7",
|
||||
"ncpumoe": 7,
|
||||
"avg": 16.31796299658782,
|
||||
"best": 23.160760806218782,
|
||||
"vram": 11994,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=9",
|
||||
"ncpumoe": 9,
|
||||
"avg": 7.469651892205037,
|
||||
"best": 10.875064047449284,
|
||||
"vram": 11941,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=11",
|
||||
"ncpumoe": 11,
|
||||
"avg": 14.814740144776437,
|
||||
"best": 15.199641279675724,
|
||||
"vram": 11984,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ncpumoe=13",
|
||||
"ncpumoe": 13,
|
||||
"avg": 14.183175252947136,
|
||||
"best": 14.427257794639086,
|
||||
"vram": 12003,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "t=2",
|
||||
"ncpumoe": 10,
|
||||
"avg": 28.551811207068425,
|
||||
"best": 28.688565545389164,
|
||||
"vram": 11968,
|
||||
"t": 2,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "t=4",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.8619310622166,
|
||||
"best": 31.17677746690393,
|
||||
"vram": 11972,
|
||||
"t": 4,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "t=6",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.578454576249854,
|
||||
"best": 30.971792125516313,
|
||||
"vram": 11983,
|
||||
"t": 6,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "t=8",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.529393512116172,
|
||||
"best": 30.954830478128166,
|
||||
"vram": 11982,
|
||||
"t": 8,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "t=10",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.773041112229503,
|
||||
"best": 31.00899077264753,
|
||||
"vram": 11972,
|
||||
"t": 10,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ub=256,b=1024",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.49319055490045,
|
||||
"best": 30.691055921541377,
|
||||
"vram": 11993,
|
||||
"t": 4,
|
||||
"ub": 256,
|
||||
"b": 1024,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ub=512,b=2048",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.923573731331718,
|
||||
"best": 31.902272031660825,
|
||||
"vram": 11995,
|
||||
"t": 4,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ub=512,b=4096",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.723820162954862,
|
||||
"best": 31.065476003548053,
|
||||
"vram": 11966,
|
||||
"t": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "ub=1024,b=2048",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.489888387093156,
|
||||
"best": 30.982074615885946,
|
||||
"vram": 11964,
|
||||
"t": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "kv=q4_0",
|
||||
"ncpumoe": 10,
|
||||
"avg": 30.63156129571348,
|
||||
"best": 31.088674795634944,
|
||||
"vram": 11988,
|
||||
"t": 4,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"nommap": false
|
||||
},
|
||||
{
|
||||
"label": "kv=q8_0",
|
||||
"ncpumoe": 10,
|
||||
"avg": 29.6114222576863,
|
||||
"best": 30.580427895917573,
|
||||
"vram": 11980,
|
||||
"t": 4,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"nommap": false
|
||||
}
|
||||
]
|
||||
522
scripts/_archive/results/tune_results_qwen35b_256k.json
Normal file
522
scripts/_archive/results/tune_results_qwen35b_256k.json
Normal file
@@ -0,0 +1,522 @@
|
||||
[
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 6,
|
||||
"tb": 6,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.169961832638464,
|
||||
"best_tps": 26.533887071573073,
|
||||
"vram_used": 4994,
|
||||
"vram_total": 12288,
|
||||
"label": "cpu_moe=True"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": false,
|
||||
"t": 6,
|
||||
"tb": 6,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 11.065030380022206,
|
||||
"best_tps": 11.083028272674314,
|
||||
"vram_used": 11949,
|
||||
"vram_total": 12288,
|
||||
"label": "cpu_moe=False"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 2,
|
||||
"tb": 2,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 21.473286428302767,
|
||||
"best_tps": 21.746637577851104,
|
||||
"vram_used": 4994,
|
||||
"vram_total": 12288,
|
||||
"label": "t=2 | tb=2"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.552358479030676,
|
||||
"best_tps": 27.314237654089343,
|
||||
"vram_used": 4991,
|
||||
"vram_total": 12288,
|
||||
"label": "t=4 | tb=4"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 6,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.347068485327956,
|
||||
"best_tps": 26.87924726131441,
|
||||
"vram_used": 4993,
|
||||
"vram_total": 12288,
|
||||
"label": "t=4 | tb=6"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 6,
|
||||
"tb": 6,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.331286039513458,
|
||||
"best_tps": 26.81427299445741,
|
||||
"vram_used": 5001,
|
||||
"vram_total": 12288,
|
||||
"label": "t=6 | tb=6"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 6,
|
||||
"tb": 8,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.391160513711274,
|
||||
"best_tps": 26.735573238878736,
|
||||
"vram_used": 5001,
|
||||
"vram_total": 12288,
|
||||
"label": "t=6 | tb=8"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 8,
|
||||
"tb": 8,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 25.32340666199144,
|
||||
"best_tps": 25.87949347494079,
|
||||
"vram_used": 4995,
|
||||
"vram_total": 12288,
|
||||
"label": "t=8 | tb=8"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 10,
|
||||
"tb": 10,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 23.752277317850815,
|
||||
"best_tps": 24.98242898809555,
|
||||
"vram_used": 5011,
|
||||
"vram_total": 12288,
|
||||
"label": "t=10 | tb=10"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 12,
|
||||
"tb": 12,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 21.75032196383532,
|
||||
"best_tps": 23.18963400077116,
|
||||
"vram_used": 5104,
|
||||
"vram_total": 12288,
|
||||
"label": "t=12 | tb=12"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 128,
|
||||
"b": 512,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 13.27593572827031,
|
||||
"best_tps": 13.337407402920235,
|
||||
"vram_used": 4391,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=128 | b=512"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 256,
|
||||
"b": 1024,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.638687188233188,
|
||||
"best_tps": 27.361082444434413,
|
||||
"vram_used": 4495,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=256 | b=1024"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 256,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.29069503392877,
|
||||
"best_tps": 26.63368832924803,
|
||||
"vram_used": 4490,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=256 | b=2048"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 1024,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.518331831441134,
|
||||
"best_tps": 26.972021321271527,
|
||||
"vram_used": 4984,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=512 | b=1024"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.401541912276873,
|
||||
"best_tps": 26.46530849236633,
|
||||
"vram_used": 4990,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=512 | b=2048"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 26.892711500590455,
|
||||
"best_tps": 26.892711500590455,
|
||||
"vram_used": 5006,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=512 | b=4096"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 2048,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 12.600209659679201,
|
||||
"best_tps": 12.759356030807627,
|
||||
"vram_used": 12020,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=1024 | b=2048"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 1024,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 6.023959262370547,
|
||||
"best_tps": 8.284882268188156,
|
||||
"vram_used": 11931,
|
||||
"vram_total": 12288,
|
||||
"label": "ub=1024 | b=4096"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 12.96992950856374,
|
||||
"best_tps": 12.96992950856374,
|
||||
"vram_used": 12022,
|
||||
"vram_total": 12288,
|
||||
"label": "ctk=q4_0 | ctv=q4_0"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q8_0",
|
||||
"ctv": "q8_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 11.420078920350697,
|
||||
"best_tps": 13.524778595767653,
|
||||
"vram_used": 12030,
|
||||
"vram_total": 12288,
|
||||
"label": "ctk=q8_0 | ctv=q8_0"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "f16",
|
||||
"ctv": "f16",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 11.978106511464183,
|
||||
"best_tps": 13.729190013094977,
|
||||
"vram_used": 11518,
|
||||
"vram_total": 12288,
|
||||
"label": "ctk=f16 | ctv=f16"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 16.164278220452957,
|
||||
"best_tps": 22.645890325274323,
|
||||
"vram_used": 11623,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=True | poll=50 | prio=2"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": false,
|
||||
"prio": 2,
|
||||
"poll": 50,
|
||||
"avg_tps": 16.555542780023114,
|
||||
"best_tps": 23.333815015033892,
|
||||
"vram_used": 9062,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=False | poll=50 | prio=2"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 0,
|
||||
"avg_tps": 13.003619379106329,
|
||||
"best_tps": 13.031594557134142,
|
||||
"vram_used": 11994,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=True | poll=0 | prio=2"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 2,
|
||||
"poll": 100,
|
||||
"avg_tps": 5.7762452690702935,
|
||||
"best_tps": 5.795560155803046,
|
||||
"vram_used": 11953,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=True | poll=100 | prio=2"
|
||||
},
|
||||
{
|
||||
"ngl": 999,
|
||||
"cpu_moe": true,
|
||||
"t": 4,
|
||||
"tb": 4,
|
||||
"ub": 512,
|
||||
"b": 4096,
|
||||
"ctk": "q4_0",
|
||||
"ctv": "q4_0",
|
||||
"fa": "on",
|
||||
"mlock": true,
|
||||
"mmap": true,
|
||||
"prio": 3,
|
||||
"poll": 50,
|
||||
"avg_tps": 12.59406799687573,
|
||||
"best_tps": 14.966737641114795,
|
||||
"vram_used": 11996,
|
||||
"vram_total": 12288,
|
||||
"label": "mmap=True | poll=50 | prio=3"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user