"""Phase 01 style short-prompt benchmark using llama.cpp internal timings.""" import json import urllib.request import sys try: sys.stdout.reconfigure(encoding="utf-8") except Exception: pass def bench_text(model_name, n=200): payload = json.dumps({ "model": model_name, "messages": [{"role": "user", "content": "Count from 1 to 50, each number on a new line."}], "max_tokens": n, "temperature": 0, }).encode() req = urllib.request.Request( "http://127.0.0.1:8000/v1/chat/completions", data=payload, headers={"Content-Type": "application/json"}, ) with urllib.request.urlopen(req, timeout=120) as r: return json.loads(r.read()).get("timings", {}) def bench_image(model_name, image_path, prompt): import base64 with open(image_path, "rb") as f: b64 = base64.b64encode(f.read()).decode() payload = json.dumps({ "model": model_name, "messages": [{ "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}, ], }], "max_tokens": 100, "temperature": 0.3, }).encode() req = urllib.request.Request( "http://127.0.0.1:8000/v1/chat/completions", data=payload, headers={"Content-Type": "application/json"}, ) with urllib.request.urlopen(req, timeout=600) as r: return json.loads(r.read()).get("timings", {}) def main(): label = sys.argv[1] if len(sys.argv) > 1 else "run" model = sys.argv[2] if len(sys.argv) > 2 else "fast" do_image = "--image" in sys.argv print(f"=== [{label}] model={model} do_image={do_image} ===") print("warmup...") try: bench_text(model, 10) except Exception as e: print(f"warmup err: {e}") print("text 5-run:") runs = [] for i in range(5): t = bench_text(model, 200) runs.append(t["predicted_per_second"]) print(f" Run {i+1}: gen {t['predicted_per_second']:.2f} t/s ({t['predicted_n']} tok, {t['predicted_ms']:.0f}ms) | prompt {t['prompt_per_second']:.1f} t/s ({t['prompt_n']} tok)") avg = sum(runs) / len(runs) print(f" TEXT AVG: {avg:.2f} t/s BEST: {max(runs):.2f} MIN: {min(runs):.2f}") if do_image: prompts = [ "What do you see in this image? One sentence.", "Describe the subject and background in one sentence.", "What is the most prominent feature? One sentence.", ] print("vision 3-run (640x640 cat):") for i, p in enumerate(prompts): t = bench_image(model, "logs/vision_test/sample.jpg", p) print(f" Run {i+1}: prompt {t['prompt_n']} tok ({t['prompt_ms']:.0f}ms, {t['prompt_per_second']:.1f} t/s) | gen {t['predicted_n']} tok ({t['predicted_per_second']:.1f} t/s)") if __name__ == "__main__": main()