feat: Variet Engine v1.0 + 5-model tuning complete

Phase 01 (LLM Tuning):
- Gemma4 26B: 74.65 t/s (fast)
- Qwen 35B: 61.62 t/s (balanced)
- Gemma4 31B: 16.0 t/s (deep-coder)
- Qwen 27B: 16.7 t/s (deep-logic)
- Qwen 122B: 8.95 t/s (ultra, GPU 1 only)

Phase 02 (API Engine):
- FastAPI reverse proxy on port 8000
- /engine/switch hot-swap with 503 protection
- config/engine_models.json as single source of truth
- Replaced 4 individual .bat files with unified engine

File cleanup:
- scripts/ 85 files -> 9 + _archive/
- Root .bat files -> _archive/
This commit is contained in:
Variet-Worker
2026-04-07 18:08:58 +09:00
parent 7c7a899fd5
commit c111b3a9b0
414 changed files with 3402 additions and 68598 deletions

View File

@@ -0,0 +1,36 @@
import urllib.request
import json
import traceback
BASE_URL = "http://127.0.0.1:8000"
prompt = "수백만 건의 실시간 주식 틱 데이터를 수집하고, 이를 가공하여 초당 수천 명의 클라이언트에게 웹소켓으로 지연 없이 브로드캐스팅하는 시스템을 설계해야 합니다. 언어, 메시지 큐, 데이터베이스, 캐싱 전략 등을 포함해 구체적인 아키텍처를 제안하고, 병목 현상에 대비한 해결책을 설명하세요."
def test():
try:
payload = json.dumps({
"model": "m",
"messages": [
{"role": "system", "content": "You are a world-class IT system architect and developer. Please output your response in Korean."},
{"role": "user", "content": prompt}
],
"max_tokens": 4096,
"temperature": 0.1
}).encode('utf-8')
req = urllib.request.Request(
f"{BASE_URL}/v1/chat/completions",
data=payload,
headers={"Content-Type": "application/json"}
)
print("전송 중... (타임아웃 300초)")
resp = urllib.request.urlopen(req, timeout=300).read()
res_json = json.loads(resp)
print("\n=== 결과 ===")
print(res_json["choices"][0]["message"]["content"])
except Exception as e:
print("\n=== 에러 발생 ===")
print(e)
traceback.print_exc()
if __name__ == "__main__":
test()