Reference implementation for the Phoenix Architecture. Work in progress.
aicoding.leaflet.pub/
ai
coding
crazy
1/**
2 * Clause Diff Engine
3 *
4 * Compares two sets of clauses (before/after) and classifies each change.
5 */
6
7import type { Clause, ClauseDiff } from './models/clause.js';
8import { DiffType } from './models/clause.js';
9
10/**
11 * Diff two clause arrays from the same document.
12 *
13 * Strategy:
14 * 1. Index clauses by normalized_text hash (clause_semhash)
15 * 2. Match by content first, then by section_path for moves
16 * 3. Remaining unmatched = ADDED or REMOVED
17 */
18export function diffClauses(before: Clause[], after: Clause[]): ClauseDiff[] {
19 const diffs: ClauseDiff[] = [];
20
21 // Build lookup maps
22 const beforeBySemhash = new Map<string, Clause[]>();
23 const afterBySemhash = new Map<string, Clause[]>();
24
25 for (const c of before) {
26 const arr = beforeBySemhash.get(c.clause_semhash) ?? [];
27 arr.push(c);
28 beforeBySemhash.set(c.clause_semhash, arr);
29 }
30 for (const c of after) {
31 const arr = afterBySemhash.get(c.clause_semhash) ?? [];
32 arr.push(c);
33 afterBySemhash.set(c.clause_semhash, arr);
34 }
35
36 const matchedBefore = new Set<string>();
37 const matchedAfter = new Set<string>();
38
39 // Pass 1: Exact matches (same semhash)
40 for (const [semhash, beforeClauses] of beforeBySemhash) {
41 const afterClauses = afterBySemhash.get(semhash);
42 if (!afterClauses) continue;
43
44 const pairCount = Math.min(beforeClauses.length, afterClauses.length);
45 for (let i = 0; i < pairCount; i++) {
46 const bc = beforeClauses[i];
47 const ac = afterClauses[i];
48 matchedBefore.add(bc.clause_id);
49 matchedAfter.add(ac.clause_id);
50
51 const pathBefore = bc.section_path.join('/');
52 const pathAfter = ac.section_path.join('/');
53
54 if (pathBefore === pathAfter) {
55 diffs.push({
56 diff_type: DiffType.UNCHANGED,
57 clause_id_before: bc.clause_id,
58 clause_id_after: ac.clause_id,
59 clause_before: bc,
60 clause_after: ac,
61 section_path_before: bc.section_path,
62 section_path_after: ac.section_path,
63 });
64 } else {
65 diffs.push({
66 diff_type: DiffType.MOVED,
67 clause_id_before: bc.clause_id,
68 clause_id_after: ac.clause_id,
69 clause_before: bc,
70 clause_after: ac,
71 section_path_before: bc.section_path,
72 section_path_after: ac.section_path,
73 });
74 }
75 }
76 }
77
78 // Pass 2: Try to match remaining by section_path (MODIFIED)
79 const unmatchedBefore = before.filter(c => !matchedBefore.has(c.clause_id));
80 const unmatchedAfter = after.filter(c => !matchedAfter.has(c.clause_id));
81
82 const afterByPath = new Map<string, Clause[]>();
83 for (const c of unmatchedAfter) {
84 const key = c.section_path.join('/');
85 const arr = afterByPath.get(key) ?? [];
86 arr.push(c);
87 afterByPath.set(key, arr);
88 }
89
90 const stillUnmatchedBefore: Clause[] = [];
91 for (const bc of unmatchedBefore) {
92 const key = bc.section_path.join('/');
93 const candidates = afterByPath.get(key);
94 if (candidates && candidates.length > 0) {
95 const ac = candidates.shift()!;
96 matchedAfter.add(ac.clause_id);
97 diffs.push({
98 diff_type: DiffType.MODIFIED,
99 clause_id_before: bc.clause_id,
100 clause_id_after: ac.clause_id,
101 clause_before: bc,
102 clause_after: ac,
103 section_path_before: bc.section_path,
104 section_path_after: ac.section_path,
105 });
106 } else {
107 stillUnmatchedBefore.push(bc);
108 }
109 }
110
111 // Pass 3: Remaining are REMOVED / ADDED
112 for (const bc of stillUnmatchedBefore) {
113 diffs.push({
114 diff_type: DiffType.REMOVED,
115 clause_id_before: bc.clause_id,
116 clause_before: bc,
117 section_path_before: bc.section_path,
118 });
119 }
120
121 const stillUnmatchedAfter = unmatchedAfter.filter(c => !matchedAfter.has(c.clause_id));
122 for (const ac of stillUnmatchedAfter) {
123 diffs.push({
124 diff_type: DiffType.ADDED,
125 clause_id_after: ac.clause_id,
126 clause_after: ac,
127 section_path_after: ac.section_path,
128 });
129 }
130
131 return diffs;
132}