chore: initial project setup with agent guide

2026-04-05 00:43:39 +09:00
commit 7890ff6644
1368 changed files with 213076 additions and 0 deletions
--- a/scripts/perf_test.py
+++ b/scripts/perf_test.py
@@ -0,0 +1,123 @@
+import time
+import json
+import urllib.request
+import sys
+
+try:
+    sys.stdout.reconfigure(encoding='utf-8')
+except AttributeError:
+    pass
+
+BASE_URL = "http://127.0.0.1:8000"
+
+def check_server():
+    """Check if server is up"""
+    try:
+        req = urllib.request.Request(f"{BASE_URL}/health")
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            data = json.loads(resp.read())
+            return data.get("status") == "ok"
+    except:
+        return False
+
+def run_benchmark(prompt, max_tokens=100, label="Test"):
+    """Run a single benchmark request and return results"""
+    payload = json.dumps({
+        "model": "local-model",
+        "messages": [{"role": "user", "content": prompt}],
+        "max_tokens": max_tokens,
+        "temperature": 0.0
+    }).encode("utf-8")
+
+    req = urllib.request.Request(
+        f"{BASE_URL}/v1/chat/completions",
+        data=payload,
+        headers={"Content-Type": "application/json"}
+    )
+
+    start = time.time()
+    with urllib.request.urlopen(req, timeout=300) as resp:
+        result = json.loads(resp.read())
+    elapsed = time.time() - start
+
+    content = result["choices"][0]["message"].get("content", "")
+    usage = result.get("usage", {})
+    prompt_tokens = usage.get("prompt_tokens", 0)
+    completion_tokens = usage.get("completion_tokens", 0)
+
+    gen_tps = completion_tokens / elapsed if elapsed > 0 else 0
+
+    return {
+        "label": label,
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+        "elapsed": elapsed,
+        "gen_tps_approx": gen_tps,
+        "content_preview": content[:100]
+    }
+
+def main():
+    print("=" * 60)
+    print("  LLM Performance Benchmark Tool")
+    print("=" * 60)
+    print()
+
+    # Wait for server
+    print("[1/3] Checking server health...")
+    for i in range(30):
+        if check_server():
+            print("  -> Server is ready!")
+            break
+        print(f"  -> Waiting for server... ({i+1}/30)")
+        time.sleep(2)
+    else:
+        print("  -> ERROR: Server not responding after 60s")
+        return
+
+    # Warmup
+    print()
+    print("[2/3] Warmup run (short)...")
+    try:
+        warmup = run_benchmark("Say hello in 5 words.", max_tokens=20, label="Warmup")
+        print(f"  -> Warmup done: {warmup['completion_tokens']} tokens in {warmup['elapsed']:.2f}s")
+    except Exception as e:
+        print(f"  -> Warmup failed: {e}")
+
+    # Main benchmark
+    print()
+    print("[3/3] Running main benchmark...")
+    print("-" * 60)
+
+    test_prompt = "Count from 1 to 50, writing each number on a new line."
+    
+    results = []
+    for i in range(3):
+        print(f"  Run {i+1}/3...")
+        try:
+            r = run_benchmark(test_prompt, max_tokens=200, label=f"Run {i+1}")
+            results.append(r)
+            print(f"    Tokens: {r['completion_tokens']} | "
+                  f"Time: {r['elapsed']:.2f}s | "
+                  f"Speed: {r['gen_tps_approx']:.2f} t/s (approx)")
+        except Exception as e:
+            print(f"    ERROR: {e}")
+
+    if results:
+        print()
+        print("=" * 60)
+        print("  RESULTS SUMMARY")
+        print("=" * 60)
+        avg_tps = sum(r["gen_tps_approx"] for r in results) / len(results)
+        max_tps = max(r["gen_tps_approx"] for r in results)
+        min_tps = min(r["gen_tps_approx"] for r in results)
+        print(f"  Runs:     {len(results)}")
+        print(f"  Avg TPS:  {avg_tps:.2f} t/s (approx, includes prompt eval)")
+        print(f"  Min TPS:  {min_tps:.2f} t/s")
+        print(f"  Max TPS:  {max_tps:.2f} t/s")
+        print()
+        print("  NOTE: Check server console for exact generation t/s")
+        print("  (the 'eval time' line shows pure token generation speed)")
+        print("=" * 60)
+
+if __name__ == "__main__":
+    main()