···11+package diff
22+33+// EditKind defines the type of diff operation.
44+type EditKind int
55+66+const (
77+ Equal EditKind = iota // context: lines unchanged
88+ Insert // added lines ('+')
99+ Delete // removed lines ('-')
1010+ Replace // changed lines (shown as Delete + Insert in unified view)
1111+)
1212+1313+func (e EditKind) String() string {
1414+ switch e {
1515+ case Equal:
1616+ return "Equal"
1717+ case Insert:
1818+ return "Insert"
1919+ case Delete:
2020+ return "Delete"
2121+ case Replace:
2222+ return "Replace"
2323+ default:
2424+ return "Unknown"
2525+ }
2626+}
2727+2828+// Edit represents a single edit operation in a diff.
2929+type Edit struct {
3030+ Kind EditKind // Equal, Insert, or Delete
3131+ AIndex int // index in original sequence
3232+ BIndex int // index in new sequence
3333+ Content string // the line or token
3434+}
3535+3636+// Diff represents a generic diffing algorithm.
3737+type Diff interface {
3838+ // Compute computes the edit operations needed to transform a into b.
3939+ Compute(a, b []string) ([]Edit, error)
4040+4141+ // Name returns the human-readable algorithm name (e.g., "LCS", "Hunt–McIlroy").
4242+ Name() string
4343+}
4444+4545+// LCS implements a shortest-edit-script diff algorithm.
4646+//
4747+// For maintainability we use the classic dynamic-programming formulation based on the longest common subsequence.
4848+// While the original Myers paper achieves O(ND) time, this O(NM) variant is simpler and still practical for the small inputs handled by this package.
4949+type LCS struct{}
5050+5151+func (l *LCS) Name() string { return "LCS" }
5252+5353+// Compute computes the shortest edit script using the LCS diff algorithm.
5454+//
5555+// It builds an LCS matrix and walks it to emit the sequence of Equal, Insert, and Delete operations required to transform a into b.
5656+func (l *LCS) Compute(a, b []string) ([]Edit, error) {
5757+ n := len(a)
5858+ lenB := len(b)
5959+6060+ if n == 0 && lenB == 0 {
6161+ return []Edit{}, nil
6262+ }
6363+6464+ if n == 0 {
6565+ edits := make([]Edit, lenB)
6666+ for i := range lenB {
6767+ edits[i] = Edit{Kind: Insert, AIndex: -1, BIndex: i, Content: b[i]}
6868+ }
6969+ return edits, nil
7070+ }
7171+7272+ if lenB == 0 {
7373+ edits := make([]Edit, n)
7474+ for i := range n {
7575+ edits[i] = Edit{Kind: Delete, AIndex: i, BIndex: -1, Content: a[i]}
7676+ }
7777+ return edits, nil
7878+ }
7979+8080+ lcs := make([][]int, n+1)
8181+ for i := range lcs {
8282+ lcs[i] = make([]int, lenB+1)
8383+ }
8484+8585+ for i := n - 1; i >= 0; i-- {
8686+ for j := lenB - 1; j >= 0; j-- {
8787+ if a[i] == b[j] {
8888+ lcs[i][j] = lcs[i+1][j+1] + 1
8989+ } else if lcs[i+1][j] >= lcs[i][j+1] {
9090+ lcs[i][j] = lcs[i+1][j]
9191+ } else {
9292+ lcs[i][j] = lcs[i][j+1]
9393+ }
9494+ }
9595+ }
9696+9797+ edits := make([]Edit, 0, n+lenB)
9898+9999+ i, j := 0, 0
100100+ for i < n && j < lenB {
101101+ switch {
102102+ case a[i] == b[j]:
103103+ edits = append(edits, Edit{
104104+ Kind: Equal,
105105+ AIndex: i,
106106+ BIndex: j,
107107+ Content: a[i],
108108+ })
109109+ i++
110110+ j++
111111+ case lcs[i+1][j] >= lcs[i][j+1]:
112112+ edits = append(edits, Edit{
113113+ Kind: Delete,
114114+ AIndex: i,
115115+ BIndex: -1,
116116+ Content: a[i],
117117+ })
118118+ i++
119119+ default:
120120+ edits = append(edits, Edit{
121121+ Kind: Insert,
122122+ AIndex: -1,
123123+ BIndex: j,
124124+ Content: b[j],
125125+ })
126126+ j++
127127+ }
128128+ }
129129+130130+ for i < n {
131131+ edits = append(edits, Edit{
132132+ Kind: Delete,
133133+ AIndex: i,
134134+ BIndex: -1,
135135+ Content: a[i],
136136+ })
137137+ i++
138138+ }
139139+140140+ for j < lenB {
141141+ edits = append(edits, Edit{
142142+ Kind: Insert,
143143+ AIndex: -1,
144144+ BIndex: j,
145145+ Content: b[j],
146146+ })
147147+ j++
148148+ }
149149+150150+ return edits, nil
151151+}
152152+153153+// Myers implements the Myers algorithm.
154154+type Myers struct{}
155155+156156+// Name returns algorithm name.
157157+func (m *Myers) Name() string {
158158+ return "Myers"
159159+}
160160+161161+// Compute computes the diff edits needed to transform a into b.
162162+func (m *Myers) Compute(a, b []string) ([]Edit, error) {
163163+ n := len(a)
164164+ mLen := len(b)
165165+ max := n + mLen
166166+167167+ if n == 0 && mLen == 0 {
168168+ return []Edit{}, nil
169169+ }
170170+171171+ if n == 0 {
172172+ edits := make([]Edit, mLen)
173173+ for i := range mLen {
174174+ edits[i] = Edit{Kind: Insert, AIndex: -1, BIndex: i, Content: b[i]}
175175+ }
176176+ return edits, nil
177177+ }
178178+179179+ if mLen == 0 {
180180+ edits := make([]Edit, n)
181181+ for i := range n {
182182+ edits[i] = Edit{Kind: Delete, AIndex: i, BIndex: -1, Content: a[i]}
183183+ }
184184+ return edits, nil
185185+ }
186186+187187+ offset := max
188188+ size := 2*max + 1
189189+ V := make([]int, size)
190190+ trace := make([][]int, max+1)
191191+192192+ if offset+1 < size {
193193+ V[offset+1] = 0
194194+ }
195195+ for D := 0; D <= max; D++ {
196196+ currentV := make([]int, size)
197197+ copy(currentV, V)
198198+ trace[D] = currentV
199199+200200+ for k := -D; k <= D; k += 2 {
201201+ idx := offset + k
202202+203203+ var x int
204204+ if k == -D || (k != D && V[idx-1] < V[idx+1]) {
205205+ x = V[idx+1]
206206+ } else {
207207+ x = V[idx-1] + 1
208208+ }
209209+ y := x - k
210210+211211+ for x < n && y < mLen && a[x] == b[y] {
212212+ x++
213213+ y++
214214+ }
215215+216216+ V[idx] = x
217217+218218+ if x >= n && y >= mLen {
219219+ return m.buildEdits(a, b, trace, D, offset), nil
220220+ }
221221+ }
222222+ }
223223+224224+ return nil, nil
225225+}
226226+227227+// buildEdits reconstructs the edit script from the trace of V arrays.
228228+func (m *Myers) buildEdits(a, b []string, trace [][]int, D, offset int) []Edit {
229229+ var edits []Edit
230230+ x := len(a)
231231+ y := len(b)
232232+233233+ for d := D; d > 0; d-- {
234234+ V := trace[d]
235235+ k := x - y
236236+ idx := offset + k
237237+238238+ var prevK int
239239+ if k == -d || (k != d && V[idx-1] < V[idx+1]) {
240240+ prevK = k + 1
241241+ } else {
242242+ prevK = k - 1
243243+ }
244244+245245+ prevX := V[offset+prevK]
246246+ prevY := prevX - prevK
247247+248248+ var xStart, yStart int
249249+ if prevK == k-1 {
250250+ xStart = prevX + 1
251251+ yStart = prevY
252252+ } else {
253253+ xStart = prevX
254254+ yStart = prevY + 1
255255+ }
256256+257257+ for x > xStart && y > yStart {
258258+ x--
259259+ y--
260260+ edits = append(edits, Edit{
261261+ Kind: Equal,
262262+ AIndex: x,
263263+ BIndex: y,
264264+ Content: a[x],
265265+ })
266266+ }
267267+268268+ if xStart == prevX+1 {
269269+ x--
270270+ edits = append(edits, Edit{
271271+ Kind: Delete,
272272+ AIndex: x,
273273+ BIndex: -1,
274274+ Content: a[x],
275275+ })
276276+ } else {
277277+ y--
278278+ edits = append(edits, Edit{
279279+ Kind: Insert,
280280+ AIndex: -1,
281281+ BIndex: y,
282282+ Content: b[y],
283283+ })
284284+ }
285285+286286+ x = prevX
287287+ y = prevY
288288+ }
289289+290290+ for x > 0 && y > 0 {
291291+ if a[x-1] == b[y-1] {
292292+ x--
293293+ y--
294294+ edits = append(edits, Edit{
295295+ Kind: Equal,
296296+ AIndex: x,
297297+ BIndex: y,
298298+ Content: a[x],
299299+ })
300300+ } else {
301301+ break
302302+ }
303303+ }
304304+305305+ for x > 0 {
306306+ x--
307307+ edits = append(edits, Edit{
308308+ Kind: Delete,
309309+ AIndex: x,
310310+ BIndex: -1,
311311+ Content: a[x],
312312+ })
313313+ }
314314+ for y > 0 {
315315+ y--
316316+ edits = append(edits, Edit{
317317+ Kind: Insert,
318318+ AIndex: -1,
319319+ BIndex: y,
320320+ Content: b[y],
321321+ })
322322+ }
323323+324324+ for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 {
325325+ edits[i], edits[j] = edits[j], edits[i]
326326+ }
327327+ return edits
328328+}
329329+330330+// ApplyEdits applies a sequence of edits to reconstruct the target sequence to verify that the diff is correct.
331331+func ApplyEdits(_ []string, edits []Edit) []string {
332332+ result := make([]string, 0)
333333+ for _, edit := range edits {
334334+ switch edit.Kind {
335335+ case Equal, Insert:
336336+ result = append(result, edit.Content)
337337+ case Delete:
338338+ // Skip deleted lines
339339+ }
340340+ }
341341+ return result
342342+}
343343+344344+// CountEditKinds returns a map counting occurrences of each [EditKind].
345345+func CountEditKinds(edits []Edit) map[EditKind]int {
346346+ counts := make(map[EditKind]int)
347347+ for _, edit := range edits {
348348+ counts[edit.Kind]++
349349+ }
350350+ return counts
351351+}
+264
internal/diff/diff_test.go
···11+package diff
22+33+import (
44+ _ "embed"
55+ "strings"
66+ "testing"
77+)
88+99+type algorithmFactory struct {
1010+ name string
1111+ new func() Diff
1212+}
1313+1414+var diffAlgorithms = []algorithmFactory{
1515+ {name: "LCS", new: func() Diff { return &LCS{} }},
1616+ {name: "Myers", new: func() Diff { return &Myers{} }},
1717+}
1818+1919+//go:embed fixtures/diffs_original.md
2020+var fixtureOriginal string
2121+2222+//go:embed fixtures/diffs_updated.md
2323+var fixtureUpdated string
2424+2525+func TestDiff_Compute_EmptySequences(t *testing.T) {
2626+ for _, alg := range diffAlgorithms {
2727+ alg := alg
2828+ t.Run(alg.name, func(t *testing.T) {
2929+ m := alg.new()
3030+3131+ t.Run("both empty", func(t *testing.T) {
3232+ edits, err := m.Compute([]string{}, []string{})
3333+ if err != nil {
3434+ t.Fatalf("unexpected error: %v", err)
3535+ }
3636+ if len(edits) != 0 {
3737+ t.Errorf("expected 0 edits, got %d", len(edits))
3838+ }
3939+ })
4040+4141+ t.Run("a empty, b has content", func(t *testing.T) {
4242+ b := []string{"line1", "line2"}
4343+ edits, err := m.Compute([]string{}, b)
4444+ if err != nil {
4545+ t.Fatalf("unexpected error: %v", err)
4646+ }
4747+ if len(edits) != 2 {
4848+ t.Fatalf("expected 2 edits, got %d", len(edits))
4949+ }
5050+ for i, edit := range edits {
5151+ if edit.Kind != Insert {
5252+ t.Errorf("edit %d: expected Insert, got %v", i, edit.Kind)
5353+ }
5454+ if edit.Content != b[i] {
5555+ t.Errorf("edit %d: expected content %q, got %q", i, b[i], edit.Content)
5656+ }
5757+ }
5858+ })
5959+6060+ t.Run("b empty, a has content", func(t *testing.T) {
6161+ a := []string{"line1", "line2"}
6262+ edits, err := m.Compute(a, []string{})
6363+ if err != nil {
6464+ t.Fatalf("unexpected error: %v", err)
6565+ }
6666+ if len(edits) != 2 {
6767+ t.Fatalf("expected 2 edits, got %d", len(edits))
6868+ }
6969+ for i, edit := range edits {
7070+ if edit.Kind != Delete {
7171+ t.Errorf("edit %d: expected Delete, got %v", i, edit.Kind)
7272+ }
7373+ if edit.Content != a[i] {
7474+ t.Errorf("edit %d: expected content %q, got %q", i, a[i], edit.Content)
7575+ }
7676+ }
7777+ })
7878+ })
7979+ }
8080+}
8181+8282+func TestDiff_Compute_IdenticalSequences(t *testing.T) {
8383+ a := []string{"line1", "line2", "line3"}
8484+ b := []string{"line1", "line2", "line3"}
8585+8686+ for _, alg := range diffAlgorithms {
8787+ alg := alg
8888+ t.Run(alg.name, func(t *testing.T) {
8989+ m := alg.new()
9090+ edits, err := m.Compute(a, b)
9191+ if err != nil {
9292+ t.Fatalf("unexpected error: %v", err)
9393+ }
9494+9595+ if len(edits) != 3 {
9696+ t.Fatalf("expected 3 edits, got %d", len(edits))
9797+ }
9898+9999+ for i, edit := range edits {
100100+ if edit.Kind != Equal {
101101+ t.Errorf("edit %d: expected Equal, got %v", i, edit.Kind)
102102+ }
103103+ if edit.AIndex != i || edit.BIndex != i {
104104+ t.Errorf("edit %d: expected indices (%d,%d), got (%d,%d)", i, i, i, edit.AIndex, edit.BIndex)
105105+ }
106106+ }
107107+ })
108108+ }
109109+}
110110+111111+func TestDiff_Compute_SimpleInsert(t *testing.T) {
112112+ a := []string{"line1", "line3"}
113113+ b := []string{"line1", "line2", "line3"}
114114+115115+ for _, alg := range diffAlgorithms {
116116+ alg := alg
117117+ t.Run(alg.name, func(t *testing.T) {
118118+ m := alg.new()
119119+ edits, err := m.Compute(a, b)
120120+ if err != nil {
121121+ t.Fatalf("unexpected error: %v", err)
122122+ }
123123+124124+ // Verify structure: Equal(line1), Insert(line2), Equal(line3)
125125+ if len(edits) != 3 {
126126+ t.Fatalf("expected 3 edits, got %d", len(edits))
127127+ }
128128+129129+ if edits[0].Kind != Equal || edits[0].Content != "line1" {
130130+ t.Errorf("edit 0: expected Equal(line1), got %v(%s)", edits[0].Kind, edits[0].Content)
131131+ }
132132+ if edits[1].Kind != Insert || edits[1].Content != "line2" {
133133+ t.Errorf("edit 1: expected Insert(line2), got %v(%s)", edits[1].Kind, edits[1].Content)
134134+ }
135135+ if edits[2].Kind != Equal || edits[2].Content != "line3" {
136136+ t.Errorf("edit 2: expected Equal(line3), got %v(%s)", edits[2].Kind, edits[2].Content)
137137+ }
138138+ })
139139+ }
140140+}
141141+142142+func TestDiff_Compute_SimpleDelete(t *testing.T) {
143143+ a := []string{"line1", "line2", "line3"}
144144+ b := []string{"line1", "line3"}
145145+146146+ for _, alg := range diffAlgorithms {
147147+ alg := alg
148148+ t.Run(alg.name, func(t *testing.T) {
149149+ m := alg.new()
150150+ edits, err := m.Compute(a, b)
151151+ if err != nil {
152152+ t.Fatalf("unexpected error: %v", err)
153153+ }
154154+155155+ // Verify structure: Equal(line1), Delete(line2), Equal(line3)
156156+ if len(edits) != 3 {
157157+ t.Fatalf("expected 3 edits, got %d", len(edits))
158158+ }
159159+160160+ if edits[0].Kind != Equal || edits[0].Content != "line1" {
161161+ t.Errorf("edit 0: expected Equal(line1), got %v(%s)", edits[0].Kind, edits[0].Content)
162162+ }
163163+ if edits[1].Kind != Delete || edits[1].Content != "line2" {
164164+ t.Errorf("edit 1: expected Delete(line2), got %v(%s)", edits[1].Kind, edits[1].Content)
165165+ }
166166+ if edits[2].Kind != Equal || edits[2].Content != "line3" {
167167+ t.Errorf("edit 2: expected Equal(line3), got %v(%s)", edits[2].Kind, edits[2].Content)
168168+ }
169169+ })
170170+ }
171171+}
172172+173173+func TestDiff_Compute_CompleteReplacement(t *testing.T) {
174174+ a := []string{"old1", "old2"}
175175+ b := []string{"new1", "new2"}
176176+177177+ for _, alg := range diffAlgorithms {
178178+ alg := alg
179179+ t.Run(alg.name, func(t *testing.T) {
180180+ m := alg.new()
181181+ edits, err := m.Compute(a, b)
182182+ if err != nil {
183183+ t.Fatalf("unexpected error: %v", err)
184184+ }
185185+186186+ // Should be all deletes followed by all inserts (or interleaved)
187187+ deleteCount := 0
188188+ insertCount := 0
189189+ for _, edit := range edits {
190190+ switch edit.Kind {
191191+ case Delete:
192192+ deleteCount++
193193+ case Insert:
194194+ insertCount++
195195+ case Equal:
196196+ t.Errorf("unexpected Equal edit when sequences are completely different")
197197+ }
198198+ }
199199+200200+ if deleteCount != 2 {
201201+ t.Errorf("expected 2 deletes, got %d", deleteCount)
202202+ }
203203+ if insertCount != 2 {
204204+ t.Errorf("expected 2 inserts, got %d", insertCount)
205205+ }
206206+ })
207207+ }
208208+}
209209+210210+func TestDiff_Compute_Fixtures(t *testing.T) {
211211+ original := strings.Split(strings.TrimSpace(fixtureOriginal), "\n")
212212+ updated := strings.Split(strings.TrimSpace(fixtureUpdated), "\n")
213213+214214+ for _, alg := range diffAlgorithms {
215215+ alg := alg
216216+ t.Run(alg.name, func(t *testing.T) {
217217+ m := alg.new()
218218+ edits, err := m.Compute(original, updated)
219219+ if err != nil {
220220+ t.Fatalf("unexpected error: %v", err)
221221+ }
222222+223223+ if len(edits) == 0 {
224224+ t.Fatal("expected non-empty edit list")
225225+ }
226226+227227+ reconstructed := ApplyEdits(original, edits)
228228+ if len(reconstructed) != len(updated) {
229229+ t.Fatalf("reconstructed length %d != updated length %d", len(reconstructed), len(updated))
230230+ }
231231+ for i := range reconstructed {
232232+ if reconstructed[i] != updated[i] {
233233+ t.Errorf("line %d: reconstructed %q != updated %q", i, reconstructed[i], updated[i])
234234+ }
235235+ }
236236+237237+ counts := CountEditKinds(edits)
238238+ if counts[Equal] == 0 {
239239+ t.Error("expected some Equal edits (files share common lines like blank lines)")
240240+ }
241241+ if counts[Insert] == 0 {
242242+ t.Error("expected some Insert edits")
243243+ }
244244+ if counts[Delete] == 0 {
245245+ t.Error("expected some Delete edits")
246246+ }
247247+248248+ t.Logf("Edit statistics: Equal=%d, Insert=%d, Delete=%d, Total=%d",
249249+ counts[Equal], counts[Insert], counts[Delete], len(edits))
250250+ })
251251+ }
252252+}
253253+254254+func TestDiff_Name(t *testing.T) {
255255+ for _, alg := range diffAlgorithms {
256256+ alg := alg
257257+ t.Run(alg.name, func(t *testing.T) {
258258+ m := alg.new()
259259+ if m.Name() != alg.name {
260260+ t.Errorf("expected name %q, got %q", alg.name, m.Name())
261261+ }
262262+ })
263263+ }
264264+}
+41
internal/diff/fixtures/diffs_original.md
···11+# Text Differencing Algorithms
22+33+Text differencing algorithms compute the minimal set of edits required to transform one sequence into another.
44+They are widely used in version control systems, compilers, and data synchronization tools.
55+66+## The Myers Algorithm
77+88+Eugene Myers proposed a diff algorithm in 1986 that computes the shortest edit script (SES) between two sequences.
99+It models the problem as a traversal over a grid, where diagonal moves represent matches and horizontal or vertical moves represent insertions and deletions.
1010+1111+### Key Ideas
1212+1313+- Based on the concept of *edit graph traversal*.
1414+- Uses a dynamic programming approach optimized with linear space.
1515+- Achieves **O(ND)** time complexity where `N` is sequence length and `D` is the edit distance.
1616+1717+### Pseudocode
1818+1919+```text
2020+for D from 0 to MAX:
2121+ for k in range(-D, D+1, 2):
2222+ choose move (insert or delete)
2323+ extend along diagonal as far as possible
2424+ if end reached: return path
2525+```
2626+2727+### Strengths
2828+2929+- Produces minimal diffs.
3030+- Works efficiently for typical text files.
3131+- Used by `git diff`, `diffutils`, and many modern tools.
3232+3333+### Weaknesses
3434+3535+- Complexity increases with extremely long or highly divergent sequences.
3636+- Implementation details are tricky due to path tracing.
3737+3838+## References
3939+4040+- Myers, E. W. (1986). *An O(ND) Difference Algorithm and Its Variations.*
4141+- GNU diffutils documentation.
+48
internal/diff/fixtures/diffs_updated.md
···11+# Text Differencing Algorithms
22+33+Diff algorithms determine the smallest set of operations to make two sequences identical.
44+They are essential to tools like `git`, `rsync`, and file synchronization systems.
55+66+## The Hunt–McIlroy Algorithm
77+88+Developed by James W. Hunt and M. Douglas McIlroy in 1976, this algorithm underlies the original Unix `diff` utility.
99+Unlike Myers, it relies on finding **longest common subsequences (LCS)** to compute differences.
1010+1111+### Core Principles
1212+1313+- Operates on the *longest common subsequence* problem.
1414+- Identifies matching lines using hash-based comparison.
1515+- Produces intuitive, human-readable diffs.
1616+1717+### Simplified Outline
1818+1919+```text
2020+match = longest_common_subsequence(A, B)
2121+for each segment not in match:
2222+ emit insertion or deletion
2323+```
2424+2525+### Advantages
2626+2727+- Generates results similar to human intuition.
2828+- Performs well on structured text like source code.
2929+- Simple to implement and debug.
3030+3131+### Limitations
3232+3333+- May not always yield the shortest possible edit script.
3434+- Space complexity can grow for large inputs.
3535+3636+## Comparison to Myers
3737+3838+| Feature | Myers | Hunt–McIlroy |
3939+| ---------- | ----------------- | ------------------ |
4040+| Complexity | O(ND) | O(N log N) typical |
4141+| Output | Minimal | Readable |
4242+| Origin | 1986 | 1976 |
4343+| Use Cases | Modern diff tools | Unix `diff` |
4444+4545+## References
4646+4747+- Hunt, J. W. & McIlroy, M. D. (1976). *An Algorithm for Differential File Comparison.*
4848+- Research on Longest Common Subsequence algorithms.