src/classifier-llm.ts at main · chadfowler.com/phoenix

chadfowler.com / phoenix
fork atom
Reference implementation for the Phoenix Architecture. Work in progress. aicoding.leaflet.pub/
ai coding crazy
fork atom
phoenix / src / classifier-llm.ts
at main 144 lines 4.6 kB view raw
wrap content
Chad Fowler LLM-enhanced canonicalization & classification + E2E success criteria tests 6w ago
f7b13024
  1/**
  2 * LLM-Enhanced Change Classifier
  3 *
  4 * When the rule-based classifier produces a D (uncertain) classification,
  5 * optionally escalates to an LLM for a more informed decision.
  6 *
  7 * This reduces the D-rate by providing semantic understanding that
  8 * heuristics alone cannot achieve.
  9 */
 10
 11import type { ClauseDiff } from './models/clause.js';
 12import type { CanonicalNode } from './models/canonical.js';
 13import type { ChangeClassification } from './models/classification.js';
 14import { ChangeClass } from './models/classification.js';
 15import type { LLMProvider } from './llm/provider.js';
 16import { classifyChange } from './classifier.js';
 17
 18const CLASSIFY_SYSTEM_PROMPT = `You are a change classification expert for a version control system.
 19
 20Classify the following spec change into exactly one category:
 21- A: Trivial change (formatting, whitespace, rewording with identical meaning)
 22- B: Local semantic change (meaning changed but only affects this clause)
 23- C: Contextual semantic shift (change affects meaning of related clauses/requirements)
 24- D: Truly uncertain (cannot determine impact without more context)
 25
 26Respond with ONLY a single letter: A, B, C, or D.
 27Be conservative: prefer B over D when there's reasonable clarity.
 28Only use D when the change is genuinely ambiguous.`;
 29
 30export interface LLMClassifierOptions {
 31  /** LLM provider for D-class resolution. */
 32  llm: LLMProvider;
 33  /** Only escalate D-class to LLM. Default: true */
 34  dClassOnly?: boolean;
 35}
 36
 37/**
 38 * Classify a change, optionally using LLM for uncertain (D) results.
 39 */
 40export async function classifyChangeWithLLM(
 41  diff: ClauseDiff,
 42  canonBefore: CanonicalNode[],
 43  canonAfter: CanonicalNode[],
 44  warmBefore: string | undefined,
 45  warmAfter: string | undefined,
 46  options?: LLMClassifierOptions,
 47): Promise<ChangeClassification> {
 48  // First: rule-based classification
 49  const result = classifyChange(diff, canonBefore, canonAfter, warmBefore, warmAfter);
 50
 51  // If no LLM or not D-class, return as-is
 52  if (!options?.llm) return result;
 53  if (options.dClassOnly !== false && result.change_class !== ChangeClass.D) {
 54    return result;
 55  }
 56
 57  // Escalate to LLM
 58  try {
 59    const llmClass = await resolveWithLLM(diff, options.llm);
 60    return {
 61      ...result,
 62      change_class: llmClass,
 63      confidence: llmClass === ChangeClass.D ? result.confidence : Math.max(result.confidence, 0.75),
 64      llm_resolved: true,
 65    };
 66  } catch {
 67    // LLM failed — keep the rule-based result
 68    return result;
 69  }
 70}
 71
 72/**
 73 * Batch-classify changes, escalating D-class to LLM.
 74 */
 75export async function classifyChangesWithLLM(
 76  diffs: ClauseDiff[],
 77  canonBefore: CanonicalNode[],
 78  canonAfter: CanonicalNode[],
 79  warmBefore: Map<string, string> | undefined,
 80  warmAfter: Map<string, string> | undefined,
 81  options?: LLMClassifierOptions,
 82): Promise<ChangeClassification[]> {
 83  const results: ChangeClassification[] = [];
 84
 85  for (const diff of diffs) {
 86    const wb = diff.clause_id_before ? warmBefore?.get(diff.clause_id_before) : undefined;
 87    const wa = diff.clause_id_after ? warmAfter?.get(diff.clause_id_after) : undefined;
 88    results.push(await classifyChangeWithLLM(diff, canonBefore, canonAfter, wb, wa, options));
 89  }
 90
 91  return results;
 92}
 93
 94async function resolveWithLLM(diff: ClauseDiff, llm: LLMProvider): Promise<ChangeClass> {
 95  const prompt = buildClassifyPrompt(diff);
 96
 97  const response = await llm.generate(prompt, {
 98    system: CLASSIFY_SYSTEM_PROMPT,
 99    temperature: 0,
100    maxTokens: 8,
101  });
102
103  const letter = response.trim().toUpperCase().charAt(0);
104
105  switch (letter) {
106    case 'A': return ChangeClass.A;
107    case 'B': return ChangeClass.B;
108    case 'C': return ChangeClass.C;
109    case 'D': return ChangeClass.D;
110    default: return ChangeClass.D; // Unrecognized → stay uncertain
111  }
112}
113
114function buildClassifyPrompt(diff: ClauseDiff): string {
115  const lines: string[] = [];
116
117  lines.push('Classify the following spec change:');
118  lines.push('');
119
120  if (diff.clause_before) {
121    lines.push('## Before:');
122    lines.push(`Section: ${diff.section_path_before?.join(' > ') || '(root)'}`);
123    lines.push(diff.clause_before.raw_text.trim());
124    lines.push('');
125  }
126
127  if (diff.clause_after) {
128    lines.push('## After:');
129    lines.push(`Section: ${diff.section_path_after?.join(' > ') || '(root)'}`);
130    lines.push(diff.clause_after.raw_text.trim());
131    lines.push('');
132  }
133
134  if (!diff.clause_before) {
135    lines.push('This is a NEW clause (added).');
136  } else if (!diff.clause_after) {
137    lines.push('This clause was REMOVED.');
138  }
139
140  lines.push('');
141  lines.push('Respond with A, B, C, or D.');
142
143  return lines.join('\n');
144}