Reference implementation for the Phoenix Architecture. Work in progress. aicoding.leaflet.pub/
ai coding crazy
at main 104 lines 3.2 kB view raw
1/** 2 * Change Classification Eval — tests classifier accuracy on known change pairs. 3 */ 4import { parseSpec } from '../src/spec-parser.js'; 5import { classifyChange } from '../src/classifier.js'; 6import { extractCanonicalNodes } from '../src/canonicalizer.js'; 7import { diffClauses } from '../src/diff.js'; 8import { DiffType } from '../src/models/clause.js'; 9 10interface ChangeTestCase { 11 name: string; 12 before: string; 13 after: string; 14 expectedClass: 'A' | 'B' | 'C' | 'D'; 15} 16 17const CASES: ChangeTestCase[] = [ 18 // Class A: trivial/formatting 19 { 20 name: 'whitespace only', 21 before: '- Users must log in', 22 after: '- Users must log in', 23 expectedClass: 'A', 24 }, 25 { 26 name: 'capitalization change', 27 before: '- The system must validate input', 28 after: '- The System Must Validate Input', 29 expectedClass: 'A', 30 }, 31 { 32 name: 'punctuation change', 33 before: '- Users must authenticate.', 34 after: '- Users must authenticate', 35 expectedClass: 'A', 36 }, 37 38 // Class B: local semantic change 39 { 40 name: 'word substitution (synonym)', 41 before: '- The system must validate user email', 42 after: '- The system must verify user email', 43 expectedClass: 'B', 44 }, 45 { 46 name: 'added detail', 47 before: '- Users must authenticate', 48 after: '- Users must authenticate with email and password', 49 expectedClass: 'B', 50 }, 51 { 52 name: 'numeric value change', 53 before: '- Passwords must be at least 8 characters', 54 after: '- Passwords must be at least 12 characters', 55 expectedClass: 'B', 56 }, 57 58 // Class C: contextual/structural 59 { 60 name: 'section reorganization', 61 before: '## Authentication\n\n- Users must log in\n- Sessions expire after 30 minutes', 62 after: '## Security\n\n- Users must log in\n- Sessions expire after 30 minutes', 63 expectedClass: 'C', 64 }, 65 66 // Class D: uncertain/major 67 { 68 name: 'complete rewrite', 69 before: '- The system authenticates users via email and password', 70 after: '- OAuth2 providers handle all authentication flows', 71 expectedClass: 'D', 72 }, 73 { 74 name: 'semantic reversal', 75 before: '- Users must provide a password', 76 after: '- Users must use passwordless authentication', 77 expectedClass: 'D', 78 }, 79]; 80 81let passed = 0; 82let total = 0; 83 84console.log('Change Classification Eval\n'); 85 86for (const tc of CASES) { 87 total++; 88 const beforeClauses = parseSpec(`# Test\n\n${tc.before}`, 'test.md'); 89 const afterClauses = parseSpec(`# Test\n\n${tc.after}`, 'test.md'); 90 const beforeNodes = extractCanonicalNodes(beforeClauses); 91 const afterNodes = extractCanonicalNodes(afterClauses); 92 93 const diffs = diffClauses(beforeClauses, afterClauses); 94 // Find the modified/added diff (skip unchanged) 95 const diff = diffs.find(d => d.diff_type !== DiffType.UNCHANGED) ?? diffs[diffs.length - 1]; 96 const result = classifyChange(diff, beforeNodes, afterNodes); 97 98 const ok = result.change_class === tc.expectedClass; 99 if (ok) passed++; 100 console.log(` ${ok ? '✓' : '✗'} ${tc.name}: expected=${tc.expectedClass} got=${result.change_class} conf=${result.confidence.toFixed(2)}`); 101} 102 103console.log(`\nScore: ${passed}/${total} (${(passed/total*100).toFixed(0)}%)`); 104console.log(`val_score=${(passed/total).toFixed(4)}`);