Reference implementation for the Phoenix Architecture. Work in progress. aicoding.leaflet.pub/
ai coding crazy
at main 132 lines 4.0 kB view raw
1/** 2 * Clause Diff Engine 3 * 4 * Compares two sets of clauses (before/after) and classifies each change. 5 */ 6 7import type { Clause, ClauseDiff } from './models/clause.js'; 8import { DiffType } from './models/clause.js'; 9 10/** 11 * Diff two clause arrays from the same document. 12 * 13 * Strategy: 14 * 1. Index clauses by normalized_text hash (clause_semhash) 15 * 2. Match by content first, then by section_path for moves 16 * 3. Remaining unmatched = ADDED or REMOVED 17 */ 18export function diffClauses(before: Clause[], after: Clause[]): ClauseDiff[] { 19 const diffs: ClauseDiff[] = []; 20 21 // Build lookup maps 22 const beforeBySemhash = new Map<string, Clause[]>(); 23 const afterBySemhash = new Map<string, Clause[]>(); 24 25 for (const c of before) { 26 const arr = beforeBySemhash.get(c.clause_semhash) ?? []; 27 arr.push(c); 28 beforeBySemhash.set(c.clause_semhash, arr); 29 } 30 for (const c of after) { 31 const arr = afterBySemhash.get(c.clause_semhash) ?? []; 32 arr.push(c); 33 afterBySemhash.set(c.clause_semhash, arr); 34 } 35 36 const matchedBefore = new Set<string>(); 37 const matchedAfter = new Set<string>(); 38 39 // Pass 1: Exact matches (same semhash) 40 for (const [semhash, beforeClauses] of beforeBySemhash) { 41 const afterClauses = afterBySemhash.get(semhash); 42 if (!afterClauses) continue; 43 44 const pairCount = Math.min(beforeClauses.length, afterClauses.length); 45 for (let i = 0; i < pairCount; i++) { 46 const bc = beforeClauses[i]; 47 const ac = afterClauses[i]; 48 matchedBefore.add(bc.clause_id); 49 matchedAfter.add(ac.clause_id); 50 51 const pathBefore = bc.section_path.join('/'); 52 const pathAfter = ac.section_path.join('/'); 53 54 if (pathBefore === pathAfter) { 55 diffs.push({ 56 diff_type: DiffType.UNCHANGED, 57 clause_id_before: bc.clause_id, 58 clause_id_after: ac.clause_id, 59 clause_before: bc, 60 clause_after: ac, 61 section_path_before: bc.section_path, 62 section_path_after: ac.section_path, 63 }); 64 } else { 65 diffs.push({ 66 diff_type: DiffType.MOVED, 67 clause_id_before: bc.clause_id, 68 clause_id_after: ac.clause_id, 69 clause_before: bc, 70 clause_after: ac, 71 section_path_before: bc.section_path, 72 section_path_after: ac.section_path, 73 }); 74 } 75 } 76 } 77 78 // Pass 2: Try to match remaining by section_path (MODIFIED) 79 const unmatchedBefore = before.filter(c => !matchedBefore.has(c.clause_id)); 80 const unmatchedAfter = after.filter(c => !matchedAfter.has(c.clause_id)); 81 82 const afterByPath = new Map<string, Clause[]>(); 83 for (const c of unmatchedAfter) { 84 const key = c.section_path.join('/'); 85 const arr = afterByPath.get(key) ?? []; 86 arr.push(c); 87 afterByPath.set(key, arr); 88 } 89 90 const stillUnmatchedBefore: Clause[] = []; 91 for (const bc of unmatchedBefore) { 92 const key = bc.section_path.join('/'); 93 const candidates = afterByPath.get(key); 94 if (candidates && candidates.length > 0) { 95 const ac = candidates.shift()!; 96 matchedAfter.add(ac.clause_id); 97 diffs.push({ 98 diff_type: DiffType.MODIFIED, 99 clause_id_before: bc.clause_id, 100 clause_id_after: ac.clause_id, 101 clause_before: bc, 102 clause_after: ac, 103 section_path_before: bc.section_path, 104 section_path_after: ac.section_path, 105 }); 106 } else { 107 stillUnmatchedBefore.push(bc); 108 } 109 } 110 111 // Pass 3: Remaining are REMOVED / ADDED 112 for (const bc of stillUnmatchedBefore) { 113 diffs.push({ 114 diff_type: DiffType.REMOVED, 115 clause_id_before: bc.clause_id, 116 clause_before: bc, 117 section_path_before: bc.section_path, 118 }); 119 } 120 121 const stillUnmatchedAfter = unmatchedAfter.filter(c => !matchedAfter.has(c.clause_id)); 122 for (const ac of stillUnmatchedAfter) { 123 diffs.push({ 124 diff_type: DiffType.ADDED, 125 clause_id_after: ac.clause_id, 126 clause_after: ac, 127 section_path_after: ac.section_path, 128 }); 129 } 130 131 return diffs; 132}