Script for easily configuring, using, switching and comparing local offline coding models
1import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
2import { dirname, join } from "node:path";
3import { homedir } from "node:os";
4import { performance } from "node:perf_hooks";
5import { OLLAMA_URL, OLLAMA_PORT } from "../config.js";
6import { getActiveChatModel } from "../runtime-config.js";
7import { log, err } from "../log.js";
8import type { ModelDef } from "../registry/models.js";
9
10const BOLD = "\x1b[1m";
11const DIM = "\x1b[2m";
12const RESET = "\x1b[0m";
13
14const BENCHMARKS_PATH = join(
15 homedir(),
16 ".config",
17 "localcode",
18 "benchmarks.json",
19);
20
21interface BenchPrompt {
22 label: string;
23 system: string;
24 user: string;
25}
26
27const PROMPTS: BenchPrompt[] = [
28 {
29 label: "fizzbuzz",
30 system: "You are an expert programmer.",
31 user: "Write a fizzbuzz function in Python.",
32 },
33 {
34 label: "BST class",
35 system: "You are an expert programmer.",
36 user: "Write a binary search tree implementation in TypeScript with insert, delete, and search methods.",
37 },
38 {
39 label: "code review",
40 system: "You are an expert code reviewer.",
41 user: `Review this code and suggest improvements:
42
43function processData(data) {
44 var result = [];
45 for (var i = 0; i < data.length; i++) {
46 if (data[i].active == true) {
47 var item = {};
48 item.name = data[i].firstName + " " + data[i].lastName;
49 item.email = data[i].email;
50 item.score = data[i].points / data[i].maxPoints * 100;
51 if (item.score >= 90) {
52 item.grade = "A";
53 } else if (item.score >= 80) {
54 item.grade = "B";
55 } else if (item.score >= 70) {
56 item.grade = "C";
57 } else if (item.score >= 60) {
58 item.grade = "D";
59 } else {
60 item.grade = "F";
61 }
62 result.push(item);
63 }
64 }
65 result.sort(function(a, b) { return b.score - a.score; });
66 return result;
67}`,
68 },
69];
70
71interface PromptResult {
72 label: string;
73 promptTokens: number;
74 completionTokens: number;
75 elapsedMs: number;
76 tokensPerSec: number;
77}
78
79interface BenchmarkEntry {
80 timestamp: string;
81 model: string;
82 modelName: string;
83 results: PromptResult[];
84 avgTokPerSec: number;
85}
86
87async function checkHealth(): Promise<boolean> {
88 try {
89 const res = await fetch(`${OLLAMA_URL}/api/tags`);
90 return res.ok;
91 } catch {
92 return false;
93 }
94}
95
96async function runPrompt(
97 model: ModelDef,
98 prompt: BenchPrompt,
99): Promise<PromptResult> {
100 const body = JSON.stringify({
101 model: model.ollamaTag,
102 messages: [
103 { role: "system", content: prompt.system },
104 { role: "user", content: prompt.user },
105 ],
106 stream: false,
107 });
108
109 const start = performance.now();
110 const res = await fetch(
111 `${OLLAMA_URL}/v1/chat/completions`,
112 {
113 method: "POST",
114 headers: { "Content-Type": "application/json" },
115 body,
116 },
117 );
118
119 if (!res.ok) {
120 const text = await res.text();
121 throw new Error(`Server returned ${res.status}: ${text}`);
122 }
123
124 const elapsed = performance.now() - start;
125 const data = (await res.json()) as {
126 usage?: { prompt_tokens?: number; completion_tokens?: number };
127 };
128
129 const promptTokens = data.usage?.prompt_tokens ?? 0;
130 const completionTokens = data.usage?.completion_tokens ?? 0;
131 const tokPerSec =
132 completionTokens > 0 ? completionTokens / (elapsed / 1000) : 0;
133
134 return {
135 label: prompt.label,
136 promptTokens,
137 completionTokens,
138 elapsedMs: elapsed,
139 tokensPerSec: tokPerSec,
140 };
141}
142
143function saveBenchmark(entry: BenchmarkEntry): void {
144 let history: BenchmarkEntry[] = [];
145 try {
146 history = JSON.parse(
147 readFileSync(BENCHMARKS_PATH, "utf-8"),
148 ) as BenchmarkEntry[];
149 } catch {
150 // No existing file
151 }
152 history.push(entry);
153 mkdirSync(dirname(BENCHMARKS_PATH), { recursive: true });
154 writeFileSync(BENCHMARKS_PATH, JSON.stringify(history, null, 2) + "\n");
155}
156
157function printResults(model: ModelDef, results: PromptResult[]): void {
158 console.log("");
159 console.log(`${BOLD}Model:${RESET} ${model.name} (${model.ollamaTag})`);
160 console.log(`${BOLD}Port:${RESET} ${OLLAMA_PORT}`);
161 console.log("");
162
163 // Table header
164 const hdr = [
165 "Prompt".padEnd(16),
166 "Prompt Tok".padStart(10),
167 "Compl Tok".padStart(10),
168 "Time (s)".padStart(10),
169 "Tok/s".padStart(8),
170 ].join(" ");
171 console.log(` ${BOLD}${hdr}${RESET}`);
172 console.log(` ${"─".repeat(hdr.length)}`);
173
174 for (const r of results) {
175 const row = [
176 r.label.padEnd(16),
177 String(r.promptTokens).padStart(10),
178 String(r.completionTokens).padStart(10),
179 (r.elapsedMs / 1000).toFixed(1).padStart(10),
180 r.tokensPerSec.toFixed(1).padStart(8),
181 ].join(" ");
182 console.log(` ${row}`);
183 }
184
185 const avgTokSec =
186 results.reduce((s, r) => s + r.tokensPerSec, 0) / results.length;
187 console.log("");
188 console.log(` ${BOLD}Average: ${avgTokSec.toFixed(1)} tok/s${RESET}`);
189 console.log("");
190}
191
192function printHistory(): void {
193 let history: BenchmarkEntry[] = [];
194 try {
195 history = JSON.parse(
196 readFileSync(BENCHMARKS_PATH, "utf-8"),
197 ) as BenchmarkEntry[];
198 } catch {
199 console.log("No benchmark history found.");
200 return;
201 }
202
203 if (history.length === 0) {
204 console.log("No benchmark history found.");
205 return;
206 }
207
208 console.log(`\n${BOLD}Benchmark History:${RESET}\n`);
209 const hdr = [
210 "Date".padEnd(20),
211 "Model".padEnd(24),
212 "Avg Tok/s".padStart(10),
213 ].join(" ");
214 console.log(` ${BOLD}${hdr}${RESET}`);
215 console.log(` ${"─".repeat(hdr.length)}`);
216
217 for (const entry of history) {
218 const date = entry.timestamp.replace("T", " ").slice(0, 19);
219 const row = [
220 date.padEnd(20),
221 entry.modelName.padEnd(24),
222 entry.avgTokPerSec.toFixed(1).padStart(10),
223 ].join(" ");
224 console.log(` ${row}`);
225 }
226 console.log("");
227}
228
229export async function runBench(args: string[]): Promise<void> {
230 if (args.includes("--history")) {
231 printHistory();
232 return;
233 }
234
235 const healthy = await checkHealth();
236 if (!healthy) {
237 err("Ollama not running. Start it with: localcode start");
238 }
239
240 const model = getActiveChatModel();
241 log(`Benchmarking ${model.name} (${model.ollamaTag})...`);
242 console.log(`${DIM}Running ${PROMPTS.length} prompts (this may take a minute)...${RESET}`);
243
244 const results: PromptResult[] = [];
245 for (const prompt of PROMPTS) {
246 process.stdout.write(` ${prompt.label}...`);
247 try {
248 const result = await runPrompt(model, prompt);
249 results.push(result);
250 console.log(` ${result.tokensPerSec.toFixed(1)} tok/s`);
251 } catch (e) {
252 console.log(` FAILED: ${e instanceof Error ? e.message : e}`);
253 }
254 }
255
256 if (results.length === 0) {
257 err("All prompts failed.");
258 }
259
260 printResults(model, results);
261
262 // Save to history
263 const avgTokPerSec =
264 results.reduce((s, r) => s + r.tokensPerSec, 0) / results.length;
265 saveBenchmark({
266 timestamp: new Date().toISOString(),
267 model: model.id,
268 modelName: model.name,
269 results,
270 avgTokPerSec,
271 });
272 log(`Results saved to ${BENCHMARKS_PATH}`);
273}