variet_llm/scripts/_archive/benchmarks/test_qwen.py

import urllib.request
import json
import traceback

BASE_URL = "http://127.0.0.1:8000"
prompt = "수백만 건의 실시간 주식 틱 데이터를 수집하고, 이를 가공하여 초당 수천 명의 클라이언트에게 웹소켓으로 지연 없이 브로드캐스팅하는 시스템을 설계해야 합니다. 언어, 메시지 큐, 데이터베이스, 캐싱 전략 등을 포함해 구체적인 아키텍처를 제안하고, 병목 현상에 대비한 해결책을 설명하세요."

def test():
    try:
        payload = json.dumps({
            "model": "m",
            "messages": [
                {"role": "system", "content": "You are a world-class IT system architect and developer. Please output your response in Korean."},
                {"role": "user", "content": prompt}
            ],
            "max_tokens": 4096,
            "temperature": 0.1
        }).encode('utf-8')

        req = urllib.request.Request(
            f"{BASE_URL}/v1/chat/completions",
            data=payload,
            headers={"Content-Type": "application/json"}
        )
        print("전송 중... (타임아웃 300초)")
        resp = urllib.request.urlopen(req, timeout=300).read()
        res_json = json.loads(resp)
        print("\n=== 결과 ===")
        print(res_json["choices"][0]["message"]["content"])
    except Exception as e:
        print("\n=== 에러 발생 ===")
        print(e)
        traceback.print_exc()

if __name__ == "__main__":
    test()