Script for easily configuring, using, switching and comparing local offline coding models
at main 273 lines 7.0 kB view raw
1import { readFileSync, writeFileSync, mkdirSync } from "node:fs"; 2import { dirname, join } from "node:path"; 3import { homedir } from "node:os"; 4import { performance } from "node:perf_hooks"; 5import { OLLAMA_URL, OLLAMA_PORT } from "../config.js"; 6import { getActiveChatModel } from "../runtime-config.js"; 7import { log, err } from "../log.js"; 8import type { ModelDef } from "../registry/models.js"; 9 10const BOLD = "\x1b[1m"; 11const DIM = "\x1b[2m"; 12const RESET = "\x1b[0m"; 13 14const BENCHMARKS_PATH = join( 15 homedir(), 16 ".config", 17 "localcode", 18 "benchmarks.json", 19); 20 21interface BenchPrompt { 22 label: string; 23 system: string; 24 user: string; 25} 26 27const PROMPTS: BenchPrompt[] = [ 28 { 29 label: "fizzbuzz", 30 system: "You are an expert programmer.", 31 user: "Write a fizzbuzz function in Python.", 32 }, 33 { 34 label: "BST class", 35 system: "You are an expert programmer.", 36 user: "Write a binary search tree implementation in TypeScript with insert, delete, and search methods.", 37 }, 38 { 39 label: "code review", 40 system: "You are an expert code reviewer.", 41 user: `Review this code and suggest improvements: 42 43function processData(data) { 44 var result = []; 45 for (var i = 0; i < data.length; i++) { 46 if (data[i].active == true) { 47 var item = {}; 48 item.name = data[i].firstName + " " + data[i].lastName; 49 item.email = data[i].email; 50 item.score = data[i].points / data[i].maxPoints * 100; 51 if (item.score >= 90) { 52 item.grade = "A"; 53 } else if (item.score >= 80) { 54 item.grade = "B"; 55 } else if (item.score >= 70) { 56 item.grade = "C"; 57 } else if (item.score >= 60) { 58 item.grade = "D"; 59 } else { 60 item.grade = "F"; 61 } 62 result.push(item); 63 } 64 } 65 result.sort(function(a, b) { return b.score - a.score; }); 66 return result; 67}`, 68 }, 69]; 70 71interface PromptResult { 72 label: string; 73 promptTokens: number; 74 completionTokens: number; 75 elapsedMs: number; 76 tokensPerSec: number; 77} 78 79interface BenchmarkEntry { 80 timestamp: string; 81 model: string; 82 modelName: string; 83 results: PromptResult[]; 84 avgTokPerSec: number; 85} 86 87async function checkHealth(): Promise<boolean> { 88 try { 89 const res = await fetch(`${OLLAMA_URL}/api/tags`); 90 return res.ok; 91 } catch { 92 return false; 93 } 94} 95 96async function runPrompt( 97 model: ModelDef, 98 prompt: BenchPrompt, 99): Promise<PromptResult> { 100 const body = JSON.stringify({ 101 model: model.ollamaTag, 102 messages: [ 103 { role: "system", content: prompt.system }, 104 { role: "user", content: prompt.user }, 105 ], 106 stream: false, 107 }); 108 109 const start = performance.now(); 110 const res = await fetch( 111 `${OLLAMA_URL}/v1/chat/completions`, 112 { 113 method: "POST", 114 headers: { "Content-Type": "application/json" }, 115 body, 116 }, 117 ); 118 119 if (!res.ok) { 120 const text = await res.text(); 121 throw new Error(`Server returned ${res.status}: ${text}`); 122 } 123 124 const elapsed = performance.now() - start; 125 const data = (await res.json()) as { 126 usage?: { prompt_tokens?: number; completion_tokens?: number }; 127 }; 128 129 const promptTokens = data.usage?.prompt_tokens ?? 0; 130 const completionTokens = data.usage?.completion_tokens ?? 0; 131 const tokPerSec = 132 completionTokens > 0 ? completionTokens / (elapsed / 1000) : 0; 133 134 return { 135 label: prompt.label, 136 promptTokens, 137 completionTokens, 138 elapsedMs: elapsed, 139 tokensPerSec: tokPerSec, 140 }; 141} 142 143function saveBenchmark(entry: BenchmarkEntry): void { 144 let history: BenchmarkEntry[] = []; 145 try { 146 history = JSON.parse( 147 readFileSync(BENCHMARKS_PATH, "utf-8"), 148 ) as BenchmarkEntry[]; 149 } catch { 150 // No existing file 151 } 152 history.push(entry); 153 mkdirSync(dirname(BENCHMARKS_PATH), { recursive: true }); 154 writeFileSync(BENCHMARKS_PATH, JSON.stringify(history, null, 2) + "\n"); 155} 156 157function printResults(model: ModelDef, results: PromptResult[]): void { 158 console.log(""); 159 console.log(`${BOLD}Model:${RESET} ${model.name} (${model.ollamaTag})`); 160 console.log(`${BOLD}Port:${RESET} ${OLLAMA_PORT}`); 161 console.log(""); 162 163 // Table header 164 const hdr = [ 165 "Prompt".padEnd(16), 166 "Prompt Tok".padStart(10), 167 "Compl Tok".padStart(10), 168 "Time (s)".padStart(10), 169 "Tok/s".padStart(8), 170 ].join(" "); 171 console.log(` ${BOLD}${hdr}${RESET}`); 172 console.log(` ${"─".repeat(hdr.length)}`); 173 174 for (const r of results) { 175 const row = [ 176 r.label.padEnd(16), 177 String(r.promptTokens).padStart(10), 178 String(r.completionTokens).padStart(10), 179 (r.elapsedMs / 1000).toFixed(1).padStart(10), 180 r.tokensPerSec.toFixed(1).padStart(8), 181 ].join(" "); 182 console.log(` ${row}`); 183 } 184 185 const avgTokSec = 186 results.reduce((s, r) => s + r.tokensPerSec, 0) / results.length; 187 console.log(""); 188 console.log(` ${BOLD}Average: ${avgTokSec.toFixed(1)} tok/s${RESET}`); 189 console.log(""); 190} 191 192function printHistory(): void { 193 let history: BenchmarkEntry[] = []; 194 try { 195 history = JSON.parse( 196 readFileSync(BENCHMARKS_PATH, "utf-8"), 197 ) as BenchmarkEntry[]; 198 } catch { 199 console.log("No benchmark history found."); 200 return; 201 } 202 203 if (history.length === 0) { 204 console.log("No benchmark history found."); 205 return; 206 } 207 208 console.log(`\n${BOLD}Benchmark History:${RESET}\n`); 209 const hdr = [ 210 "Date".padEnd(20), 211 "Model".padEnd(24), 212 "Avg Tok/s".padStart(10), 213 ].join(" "); 214 console.log(` ${BOLD}${hdr}${RESET}`); 215 console.log(` ${"─".repeat(hdr.length)}`); 216 217 for (const entry of history) { 218 const date = entry.timestamp.replace("T", " ").slice(0, 19); 219 const row = [ 220 date.padEnd(20), 221 entry.modelName.padEnd(24), 222 entry.avgTokPerSec.toFixed(1).padStart(10), 223 ].join(" "); 224 console.log(` ${row}`); 225 } 226 console.log(""); 227} 228 229export async function runBench(args: string[]): Promise<void> { 230 if (args.includes("--history")) { 231 printHistory(); 232 return; 233 } 234 235 const healthy = await checkHealth(); 236 if (!healthy) { 237 err("Ollama not running. Start it with: localcode start"); 238 } 239 240 const model = getActiveChatModel(); 241 log(`Benchmarking ${model.name} (${model.ollamaTag})...`); 242 console.log(`${DIM}Running ${PROMPTS.length} prompts (this may take a minute)...${RESET}`); 243 244 const results: PromptResult[] = []; 245 for (const prompt of PROMPTS) { 246 process.stdout.write(` ${prompt.label}...`); 247 try { 248 const result = await runPrompt(model, prompt); 249 results.push(result); 250 console.log(` ${result.tokensPerSec.toFixed(1)} tok/s`); 251 } catch (e) { 252 console.log(` FAILED: ${e instanceof Error ? e.message : e}`); 253 } 254 } 255 256 if (results.length === 0) { 257 err("All prompts failed."); 258 } 259 260 printResults(model, results); 261 262 // Save to history 263 const avgTokPerSec = 264 results.reduce((s, r) => s + r.tokensPerSec, 0) / results.length; 265 saveBenchmark({ 266 timestamp: new Date().toISOString(), 267 model: model.id, 268 modelName: model.name, 269 results, 270 avgTokPerSec, 271 }); 272 log(`Results saved to ${BENCHMARKS_PATH}`); 273}