From f3e9e9f053323635798530224b243aa0085276bb Mon Sep 17 00:00:00 2001 From: Variet Worker Date: Sun, 12 Apr 2026 23:44:15 +0900 Subject: [PATCH] =?UTF-8?q?feat(engine):=20balanced=20=EC=97=AD=ED=95=A0?= =?UTF-8?q?=20jinja=20thinking=20+=20checkpoint=20RAM=20=EC=98=A4=ED=94=84?= =?UTF-8?q?=EB=A1=9C=EB=93=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - --jinja + --chat-template-kwargs '{"enable_thinking":true}' 추가 - -cram 8192: context checkpoint를 GPU 대신 CPU RAM에 저장 (GPU CUDA OOM 크래시 방지 — cuMemSetAccess 실패 at device:1) Co-Authored-By: Claude Opus 4.6 (1M context) --- config/engine_models.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/config/engine_models.json b/config/engine_models.json index 3bd0b64..cc2cf7e 100644 --- a/config/engine_models.json +++ b/config/engine_models.json @@ -46,6 +46,9 @@ "--mmproj", "models/mmproj-F16.gguf", "--no-mmproj-offload", + "--jinja", + "--chat-template-kwargs", + "{\"enable_thinking\":true}", "-ngl", "999", "-c", @@ -63,7 +66,9 @@ "-b", "512", "-ts", - "0.48,0.52" + "0.48,0.52", + "-cram", + "8192" ] }, "deep-coder": {