Reference implementation for the Phoenix Architecture. Work in progress.
aicoding.leaflet.pub/
ai
coding
crazy
1/**
2 * Change Classification Eval — tests classifier accuracy on known change pairs.
3 */
4import { parseSpec } from '../src/spec-parser.js';
5import { classifyChange } from '../src/classifier.js';
6import { extractCanonicalNodes } from '../src/canonicalizer.js';
7import { diffClauses } from '../src/diff.js';
8import { DiffType } from '../src/models/clause.js';
9
10interface ChangeTestCase {
11 name: string;
12 before: string;
13 after: string;
14 expectedClass: 'A' | 'B' | 'C' | 'D';
15}
16
17const CASES: ChangeTestCase[] = [
18 // Class A: trivial/formatting
19 {
20 name: 'whitespace only',
21 before: '- Users must log in',
22 after: '- Users must log in',
23 expectedClass: 'A',
24 },
25 {
26 name: 'capitalization change',
27 before: '- The system must validate input',
28 after: '- The System Must Validate Input',
29 expectedClass: 'A',
30 },
31 {
32 name: 'punctuation change',
33 before: '- Users must authenticate.',
34 after: '- Users must authenticate',
35 expectedClass: 'A',
36 },
37
38 // Class B: local semantic change
39 {
40 name: 'word substitution (synonym)',
41 before: '- The system must validate user email',
42 after: '- The system must verify user email',
43 expectedClass: 'B',
44 },
45 {
46 name: 'added detail',
47 before: '- Users must authenticate',
48 after: '- Users must authenticate with email and password',
49 expectedClass: 'B',
50 },
51 {
52 name: 'numeric value change',
53 before: '- Passwords must be at least 8 characters',
54 after: '- Passwords must be at least 12 characters',
55 expectedClass: 'B',
56 },
57
58 // Class C: contextual/structural
59 {
60 name: 'section reorganization',
61 before: '## Authentication\n\n- Users must log in\n- Sessions expire after 30 minutes',
62 after: '## Security\n\n- Users must log in\n- Sessions expire after 30 minutes',
63 expectedClass: 'C',
64 },
65
66 // Class D: uncertain/major
67 {
68 name: 'complete rewrite',
69 before: '- The system authenticates users via email and password',
70 after: '- OAuth2 providers handle all authentication flows',
71 expectedClass: 'D',
72 },
73 {
74 name: 'semantic reversal',
75 before: '- Users must provide a password',
76 after: '- Users must use passwordless authentication',
77 expectedClass: 'D',
78 },
79];
80
81let passed = 0;
82let total = 0;
83
84console.log('Change Classification Eval\n');
85
86for (const tc of CASES) {
87 total++;
88 const beforeClauses = parseSpec(`# Test\n\n${tc.before}`, 'test.md');
89 const afterClauses = parseSpec(`# Test\n\n${tc.after}`, 'test.md');
90 const beforeNodes = extractCanonicalNodes(beforeClauses);
91 const afterNodes = extractCanonicalNodes(afterClauses);
92
93 const diffs = diffClauses(beforeClauses, afterClauses);
94 // Find the modified/added diff (skip unchanged)
95 const diff = diffs.find(d => d.diff_type !== DiffType.UNCHANGED) ?? diffs[diffs.length - 1];
96 const result = classifyChange(diff, beforeNodes, afterNodes);
97
98 const ok = result.change_class === tc.expectedClass;
99 if (ok) passed++;
100 console.log(` ${ok ? '✓' : '✗'} ${tc.name}: expected=${tc.expectedClass} got=${result.change_class} conf=${result.confidence.toFixed(2)}`);
101}
102
103console.log(`\nScore: ${passed}/${total} (${(passed/total*100).toFixed(0)}%)`);
104console.log(`val_score=${(passed/total).toFixed(4)}`);