import { readFileSync, writeFileSync, mkdirSync } from "node:fs"; import { dirname, join } from "node:path"; import { homedir } from "node:os"; import { performance } from "node:perf_hooks"; import { OLLAMA_URL, OLLAMA_PORT } from "../config.js"; import { getActiveChatModel } from "../runtime-config.js"; import { log, err } from "../log.js"; import type { ModelDef } from "../registry/models.js"; const BOLD = "\x1b[1m"; const DIM = "\x1b[2m"; const RESET = "\x1b[0m"; const BENCHMARKS_PATH = join( homedir(), ".config", "localcode", "benchmarks.json", ); interface BenchPrompt { label: string; system: string; user: string; } const PROMPTS: BenchPrompt[] = [ { label: "fizzbuzz", system: "You are an expert programmer.", user: "Write a fizzbuzz function in Python.", }, { label: "BST class", system: "You are an expert programmer.", user: "Write a binary search tree implementation in TypeScript with insert, delete, and search methods.", }, { label: "code review", system: "You are an expert code reviewer.", user: `Review this code and suggest improvements: function processData(data) { var result = []; for (var i = 0; i < data.length; i++) { if (data[i].active == true) { var item = {}; item.name = data[i].firstName + " " + data[i].lastName; item.email = data[i].email; item.score = data[i].points / data[i].maxPoints * 100; if (item.score >= 90) { item.grade = "A"; } else if (item.score >= 80) { item.grade = "B"; } else if (item.score >= 70) { item.grade = "C"; } else if (item.score >= 60) { item.grade = "D"; } else { item.grade = "F"; } result.push(item); } } result.sort(function(a, b) { return b.score - a.score; }); return result; }`, }, ]; interface PromptResult { label: string; promptTokens: number; completionTokens: number; elapsedMs: number; tokensPerSec: number; } interface BenchmarkEntry { timestamp: string; model: string; modelName: string; results: PromptResult[]; avgTokPerSec: number; } async function checkHealth(): Promise { try { const res = await fetch(`${OLLAMA_URL}/api/tags`); return res.ok; } catch { return false; } } async function runPrompt( model: ModelDef, prompt: BenchPrompt, ): Promise { const body = JSON.stringify({ model: model.ollamaTag, messages: [ { role: "system", content: prompt.system }, { role: "user", content: prompt.user }, ], stream: false, }); const start = performance.now(); const res = await fetch( `${OLLAMA_URL}/v1/chat/completions`, { method: "POST", headers: { "Content-Type": "application/json" }, body, }, ); if (!res.ok) { const text = await res.text(); throw new Error(`Server returned ${res.status}: ${text}`); } const elapsed = performance.now() - start; const data = (await res.json()) as { usage?: { prompt_tokens?: number; completion_tokens?: number }; }; const promptTokens = data.usage?.prompt_tokens ?? 0; const completionTokens = data.usage?.completion_tokens ?? 0; const tokPerSec = completionTokens > 0 ? completionTokens / (elapsed / 1000) : 0; return { label: prompt.label, promptTokens, completionTokens, elapsedMs: elapsed, tokensPerSec: tokPerSec, }; } function saveBenchmark(entry: BenchmarkEntry): void { let history: BenchmarkEntry[] = []; try { history = JSON.parse( readFileSync(BENCHMARKS_PATH, "utf-8"), ) as BenchmarkEntry[]; } catch { // No existing file } history.push(entry); mkdirSync(dirname(BENCHMARKS_PATH), { recursive: true }); writeFileSync(BENCHMARKS_PATH, JSON.stringify(history, null, 2) + "\n"); } function printResults(model: ModelDef, results: PromptResult[]): void { console.log(""); console.log(`${BOLD}Model:${RESET} ${model.name} (${model.ollamaTag})`); console.log(`${BOLD}Port:${RESET} ${OLLAMA_PORT}`); console.log(""); // Table header const hdr = [ "Prompt".padEnd(16), "Prompt Tok".padStart(10), "Compl Tok".padStart(10), "Time (s)".padStart(10), "Tok/s".padStart(8), ].join(" "); console.log(` ${BOLD}${hdr}${RESET}`); console.log(` ${"─".repeat(hdr.length)}`); for (const r of results) { const row = [ r.label.padEnd(16), String(r.promptTokens).padStart(10), String(r.completionTokens).padStart(10), (r.elapsedMs / 1000).toFixed(1).padStart(10), r.tokensPerSec.toFixed(1).padStart(8), ].join(" "); console.log(` ${row}`); } const avgTokSec = results.reduce((s, r) => s + r.tokensPerSec, 0) / results.length; console.log(""); console.log(` ${BOLD}Average: ${avgTokSec.toFixed(1)} tok/s${RESET}`); console.log(""); } function printHistory(): void { let history: BenchmarkEntry[] = []; try { history = JSON.parse( readFileSync(BENCHMARKS_PATH, "utf-8"), ) as BenchmarkEntry[]; } catch { console.log("No benchmark history found."); return; } if (history.length === 0) { console.log("No benchmark history found."); return; } console.log(`\n${BOLD}Benchmark History:${RESET}\n`); const hdr = [ "Date".padEnd(20), "Model".padEnd(24), "Avg Tok/s".padStart(10), ].join(" "); console.log(` ${BOLD}${hdr}${RESET}`); console.log(` ${"─".repeat(hdr.length)}`); for (const entry of history) { const date = entry.timestamp.replace("T", " ").slice(0, 19); const row = [ date.padEnd(20), entry.modelName.padEnd(24), entry.avgTokPerSec.toFixed(1).padStart(10), ].join(" "); console.log(` ${row}`); } console.log(""); } export async function runBench(args: string[]): Promise { if (args.includes("--history")) { printHistory(); return; } const healthy = await checkHealth(); if (!healthy) { err("Ollama not running. Start it with: localcode start"); } const model = getActiveChatModel(); log(`Benchmarking ${model.name} (${model.ollamaTag})...`); console.log(`${DIM}Running ${PROMPTS.length} prompts (this may take a minute)...${RESET}`); const results: PromptResult[] = []; for (const prompt of PROMPTS) { process.stdout.write(` ${prompt.label}...`); try { const result = await runPrompt(model, prompt); results.push(result); console.log(` ${result.tokensPerSec.toFixed(1)} tok/s`); } catch (e) { console.log(` FAILED: ${e instanceof Error ? e.message : e}`); } } if (results.length === 0) { err("All prompts failed."); } printResults(model, results); // Save to history const avgTokPerSec = results.reduce((s, r) => s + r.tokensPerSec, 0) / results.length; saveBenchmark({ timestamp: new Date().toISOString(), model: model.id, modelName: model.name, results, avgTokPerSec, }); log(`Results saved to ${BENCHMARKS_PATH}`); }