src/commands/bench.ts at main · burrito.space/localcode

burrito.space / localcode
fork atom
Script for easily configuring, using, switching and comparing local offline coding models
fork atom
localcode / src / commands / bench.ts
at main 273 lines 7.0 kB view raw
wrap content
burrito.tngl.sh Replace llama.cpp backend with Ollama 15d ago
e450ff34
  1import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
  2import { dirname, join } from "node:path";
  3import { homedir } from "node:os";
  4import { performance } from "node:perf_hooks";
  5import { OLLAMA_URL, OLLAMA_PORT } from "../config.js";
  6import { getActiveChatModel } from "../runtime-config.js";
  7import { log, err } from "../log.js";
  8import type { ModelDef } from "../registry/models.js";
  9
 10const BOLD = "\x1b[1m";
 11const DIM = "\x1b[2m";
 12const RESET = "\x1b[0m";
 13
 14const BENCHMARKS_PATH = join(
 15  homedir(),
 16  ".config",
 17  "localcode",
 18  "benchmarks.json",
 19);
 20
 21interface BenchPrompt {
 22  label: string;
 23  system: string;
 24  user: string;
 25}
 26
 27const PROMPTS: BenchPrompt[] = [
 28  {
 29    label: "fizzbuzz",
 30    system: "You are an expert programmer.",
 31    user: "Write a fizzbuzz function in Python.",
 32  },
 33  {
 34    label: "BST class",
 35    system: "You are an expert programmer.",
 36    user: "Write a binary search tree implementation in TypeScript with insert, delete, and search methods.",
 37  },
 38  {
 39    label: "code review",
 40    system: "You are an expert code reviewer.",
 41    user: `Review this code and suggest improvements:
 42
 43function processData(data) {
 44  var result = [];
 45  for (var i = 0; i < data.length; i++) {
 46    if (data[i].active == true) {
 47      var item = {};
 48      item.name = data[i].firstName + " " + data[i].lastName;
 49      item.email = data[i].email;
 50      item.score = data[i].points / data[i].maxPoints * 100;
 51      if (item.score >= 90) {
 52        item.grade = "A";
 53      } else if (item.score >= 80) {
 54        item.grade = "B";
 55      } else if (item.score >= 70) {
 56        item.grade = "C";
 57      } else if (item.score >= 60) {
 58        item.grade = "D";
 59      } else {
 60        item.grade = "F";
 61      }
 62      result.push(item);
 63    }
 64  }
 65  result.sort(function(a, b) { return b.score - a.score; });
 66  return result;
 67}`,
 68  },
 69];
 70
 71interface PromptResult {
 72  label: string;
 73  promptTokens: number;
 74  completionTokens: number;
 75  elapsedMs: number;
 76  tokensPerSec: number;
 77}
 78
 79interface BenchmarkEntry {
 80  timestamp: string;
 81  model: string;
 82  modelName: string;
 83  results: PromptResult[];
 84  avgTokPerSec: number;
 85}
 86
 87async function checkHealth(): Promise<boolean> {
 88  try {
 89    const res = await fetch(`${OLLAMA_URL}/api/tags`);
 90    return res.ok;
 91  } catch {
 92    return false;
 93  }
 94}
 95
 96async function runPrompt(
 97  model: ModelDef,
 98  prompt: BenchPrompt,
 99): Promise<PromptResult> {
100  const body = JSON.stringify({
101    model: model.ollamaTag,
102    messages: [
103      { role: "system", content: prompt.system },
104      { role: "user", content: prompt.user },
105    ],
106    stream: false,
107  });
108
109  const start = performance.now();
110  const res = await fetch(
111    `${OLLAMA_URL}/v1/chat/completions`,
112    {
113      method: "POST",
114      headers: { "Content-Type": "application/json" },
115      body,
116    },
117  );
118
119  if (!res.ok) {
120    const text = await res.text();
121    throw new Error(`Server returned ${res.status}: ${text}`);
122  }
123
124  const elapsed = performance.now() - start;
125  const data = (await res.json()) as {
126    usage?: { prompt_tokens?: number; completion_tokens?: number };
127  };
128
129  const promptTokens = data.usage?.prompt_tokens ?? 0;
130  const completionTokens = data.usage?.completion_tokens ?? 0;
131  const tokPerSec =
132    completionTokens > 0 ? completionTokens / (elapsed / 1000) : 0;
133
134  return {
135    label: prompt.label,
136    promptTokens,
137    completionTokens,
138    elapsedMs: elapsed,
139    tokensPerSec: tokPerSec,
140  };
141}
142
143function saveBenchmark(entry: BenchmarkEntry): void {
144  let history: BenchmarkEntry[] = [];
145  try {
146    history = JSON.parse(
147      readFileSync(BENCHMARKS_PATH, "utf-8"),
148    ) as BenchmarkEntry[];
149  } catch {
150    // No existing file
151  }
152  history.push(entry);
153  mkdirSync(dirname(BENCHMARKS_PATH), { recursive: true });
154  writeFileSync(BENCHMARKS_PATH, JSON.stringify(history, null, 2) + "\n");
155}
156
157function printResults(model: ModelDef, results: PromptResult[]): void {
158  console.log("");
159  console.log(`${BOLD}Model:${RESET} ${model.name} (${model.ollamaTag})`);
160  console.log(`${BOLD}Port:${RESET}  ${OLLAMA_PORT}`);
161  console.log("");
162
163  // Table header
164  const hdr = [
165    "Prompt".padEnd(16),
166    "Prompt Tok".padStart(10),
167    "Compl Tok".padStart(10),
168    "Time (s)".padStart(10),
169    "Tok/s".padStart(8),
170  ].join("  ");
171  console.log(`  ${BOLD}${hdr}${RESET}`);
172  console.log(`  ${"─".repeat(hdr.length)}`);
173
174  for (const r of results) {
175    const row = [
176      r.label.padEnd(16),
177      String(r.promptTokens).padStart(10),
178      String(r.completionTokens).padStart(10),
179      (r.elapsedMs / 1000).toFixed(1).padStart(10),
180      r.tokensPerSec.toFixed(1).padStart(8),
181    ].join("  ");
182    console.log(`  ${row}`);
183  }
184
185  const avgTokSec =
186    results.reduce((s, r) => s + r.tokensPerSec, 0) / results.length;
187  console.log("");
188  console.log(`  ${BOLD}Average: ${avgTokSec.toFixed(1)} tok/s${RESET}`);
189  console.log("");
190}
191
192function printHistory(): void {
193  let history: BenchmarkEntry[] = [];
194  try {
195    history = JSON.parse(
196      readFileSync(BENCHMARKS_PATH, "utf-8"),
197    ) as BenchmarkEntry[];
198  } catch {
199    console.log("No benchmark history found.");
200    return;
201  }
202
203  if (history.length === 0) {
204    console.log("No benchmark history found.");
205    return;
206  }
207
208  console.log(`\n${BOLD}Benchmark History:${RESET}\n`);
209  const hdr = [
210    "Date".padEnd(20),
211    "Model".padEnd(24),
212    "Avg Tok/s".padStart(10),
213  ].join("  ");
214  console.log(`  ${BOLD}${hdr}${RESET}`);
215  console.log(`  ${"─".repeat(hdr.length)}`);
216
217  for (const entry of history) {
218    const date = entry.timestamp.replace("T", " ").slice(0, 19);
219    const row = [
220      date.padEnd(20),
221      entry.modelName.padEnd(24),
222      entry.avgTokPerSec.toFixed(1).padStart(10),
223    ].join("  ");
224    console.log(`  ${row}`);
225  }
226  console.log("");
227}
228
229export async function runBench(args: string[]): Promise<void> {
230  if (args.includes("--history")) {
231    printHistory();
232    return;
233  }
234
235  const healthy = await checkHealth();
236  if (!healthy) {
237    err("Ollama not running. Start it with: localcode start");
238  }
239
240  const model = getActiveChatModel();
241  log(`Benchmarking ${model.name} (${model.ollamaTag})...`);
242  console.log(`${DIM}Running ${PROMPTS.length} prompts (this may take a minute)...${RESET}`);
243
244  const results: PromptResult[] = [];
245  for (const prompt of PROMPTS) {
246    process.stdout.write(`  ${prompt.label}...`);
247    try {
248      const result = await runPrompt(model, prompt);
249      results.push(result);
250      console.log(` ${result.tokensPerSec.toFixed(1)} tok/s`);
251    } catch (e) {
252      console.log(` FAILED: ${e instanceof Error ? e.message : e}`);
253    }
254  }
255
256  if (results.length === 0) {
257    err("All prompts failed.");
258  }
259
260  printResults(model, results);
261
262  // Save to history
263  const avgTokPerSec =
264    results.reduce((s, r) => s + r.tokensPerSec, 0) / results.length;
265  saveBenchmark({
266    timestamp: new Date().toISOString(),
267    model: model.id,
268    modelName: model.name,
269    results,
270    avgTokPerSec,
271  });
272  log(`Results saved to ${BENCHMARKS_PATH}`);
273}