Approval-based snapshot testing library for Go (mirror)
1package diff
2
3/*
4This file was sourced from github.com/gkampitakis/go-snaps, available with the following License:
5
6MIT License
7
8Copyright (c) 2021 Georgios Kampitakis
9
10Permission is hereby granted, free of charge, to any person obtaining a copy
11of this software and associated documentation files (the "Software"), to deal
12in the Software without restriction, including without limitation the rights
13to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14copies of the Software, and to permit persons to whom the Software is
15furnished to do so, subject to the following conditions:
16
17The above copyright notice and this permission notice shall be included in all
18copies or substantial portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26SOFTWARE.
27
28=======================
29
30This package is a partial port of Python difflib.
31
32This library is vendored and modified to address the `go-snaps` needs for a more
33readable difference report.
34
35
36Original source: https://github.com/pmezard/go-difflib
37
38Copyright (c) 2013, Patrick Mezard
39All rights reserved.
40
41Redistribution and use in source and binary forms, with or without
42modification, are permitted provided that the following conditions are
43met:
44
45 Redistributions of source code must retain the above copyright
46notice, this list of conditions and the following disclaimer.
47 Redistributions in binary form must reproduce the above copyright
48notice, this list of conditions and the following disclaimer in the
49documentation and/or other materials provided with the distribution.
50 The names of its contributors may not be used to endorse or promote
51products derived from this software without specific prior written
52permission.
53
54THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
55IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
56TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
57PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
60TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
61PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
62LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
63NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
64SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65*/
66
67import "strconv"
68
69type DiffKind int
70
71const (
72 DiffShared DiffKind = iota
73 DiffOld
74 DiffNew
75)
76
77type DiffLine struct {
78 OldNumber int
79 NewNumber int
80 Line string
81 Kind DiffKind
82}
83
84const (
85 // Tag Codes for internal opcodes
86 opEqual int8 = iota
87 opInsert
88 opDelete
89 opReplace
90)
91
92type match struct {
93 A int
94 B int
95 Size int
96}
97
98type opCode struct {
99 Tag int8
100 I1 int
101 I2 int
102 J1 int
103 J2 int
104}
105
106// sequenceMatcher compares sequence of strings. The basic
107// algorithm predates, and is a little fancier than, an algorithm
108// published in the late 1980's by Ratcliff and Obershelp under the
109// hyperbolic name "gestalt pattern matching". The basic idea is to find
110// the longest contiguous matching subsequence that contains no "junk"
111// elements (R-O doesn't address junk). The same idea is then applied
112// recursively to the pieces of the sequences to the left and to the right
113// of the matching subsequence. This does not yield minimal edit
114// sequences, but does tend to yield matches that "look right" to people.
115//
116// sequenceMatcher tries to compute a "human-friendly diff" between two
117// sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the
118// longest *contiguous* & junk-free matching subsequence. That's what
119// catches peoples' eyes. The Windows(tm) windiff has another interesting
120// notion, pairing up elements that appear uniquely in each sequence.
121// That, and the method here, appear to yield more intuitive difference
122// reports than does diff. This method appears to be the least vulnerable
123// to synching up on blocks of "junk lines", though (like blank lines in
124// ordinary text files, or maybe "<P>" lines in HTML files). That may be
125// because this is the only method of the 3 that has a *concept* of
126// "junk" <wink>.
127//
128// Timing: Basic R-O is cubic time worst case and quadratic time expected
129// case. sequenceMatcher is quadratic time for the worst case and has
130// expected-case behavior dependent in a complicated way on how many
131// elements the sequences have in common; best case time is linear.
132type sequenceMatcher struct {
133 a []string
134 b []string
135 b2j map[string][]int
136 IsJunk func(string) bool
137 autoJunk bool
138 bJunk map[string]struct{}
139 matchingBlocks []match
140 fullBCount map[string]int
141 bPopular map[string]struct{}
142 opCodes []opCode
143}
144
145func newMatcher(a, b []string) *sequenceMatcher {
146 m := sequenceMatcher{autoJunk: true}
147 m.setSeqs(a, b)
148 return &m
149}
150
151// Set two sequences to be compared.
152func (m *sequenceMatcher) setSeqs(a, b []string) {
153 m.setSeq1(a)
154 m.setSeq2(b)
155}
156
157// Set the first sequence to be compared.
158func (m *sequenceMatcher) setSeq1(a []string) {
159 m.a = a
160 m.matchingBlocks, m.opCodes = nil, nil
161}
162
163// Set the second sequence to be compared.
164func (m *sequenceMatcher) setSeq2(b []string) {
165 m.b = b
166 m.matchingBlocks, m.opCodes, m.fullBCount = nil, nil, nil
167 m.chainB()
168}
169
170func (m *sequenceMatcher) chainB() {
171 // Populate line -> index mapping
172 b2j := map[string][]int{}
173 for i, elt := range m.b {
174 indices := b2j[elt]
175 indices = append(indices, i)
176 b2j[elt] = indices
177 }
178
179 // Purge junk elements
180 m.bJunk = map[string]struct{}{}
181 if m.IsJunk != nil {
182 junk := m.bJunk
183 for elt := range b2j {
184 if m.IsJunk(elt) {
185 junk[elt] = struct{}{}
186 }
187 }
188 for elt := range junk { // separate loop avoids separate list of keys
189 delete(b2j, elt)
190 }
191 }
192
193 // Purge popular elements that are not junk
194 popular := map[string]struct{}{}
195 n := len(m.b)
196 if m.autoJunk && n >= 200 {
197 ntest := n/100 + 1
198 for s, indices := range b2j {
199 if len(indices) > ntest {
200 popular[s] = struct{}{}
201 }
202 }
203 for s := range popular {
204 delete(b2j, s)
205 }
206 }
207 m.bPopular = popular
208 m.b2j = b2j
209}
210
211func (m *sequenceMatcher) isBJunk(s string) bool {
212 _, ok := m.bJunk[s]
213 return ok
214}
215
216// Find longest matching block in a[alo:ahi] and b[blo:bhi].
217func (m *sequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) match {
218 besti, bestj, bestsize := alo, blo, 0
219
220 // find longest junk-free match
221 j2len := map[int]int{}
222 for i := alo; i != ahi; i++ {
223 newj2len := map[int]int{}
224 for _, j := range m.b2j[m.a[i]] {
225 if j < blo {
226 continue
227 }
228 if j >= bhi {
229 break
230 }
231 k := j2len[j-1] + 1
232 newj2len[j] = k
233 if k > bestsize {
234 besti, bestj, bestsize = i-k+1, j-k+1, k
235 }
236 }
237 j2len = newj2len
238 }
239
240 // Extend the best by non-junk elements on each end
241 for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) &&
242 m.a[besti-1] == m.b[bestj-1] {
243 besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
244 }
245 for besti+bestsize < ahi && bestj+bestsize < bhi &&
246 !m.isBJunk(m.b[bestj+bestsize]) &&
247 m.a[besti+bestsize] == m.b[bestj+bestsize] {
248 bestsize++
249 }
250
251 // Suck up junk on each side
252 for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) &&
253 m.a[besti-1] == m.b[bestj-1] {
254 besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
255 }
256 for besti+bestsize < ahi && bestj+bestsize < bhi &&
257 m.isBJunk(m.b[bestj+bestsize]) &&
258 m.a[besti+bestsize] == m.b[bestj+bestsize] {
259 bestsize++
260 }
261
262 return match{A: besti, B: bestj, Size: bestsize}
263}
264
265// Return list of triples describing matching subsequences.
266func (m *sequenceMatcher) getMatchingBlocks() []match {
267 if m.matchingBlocks != nil {
268 return m.matchingBlocks
269 }
270
271 var matchBlocks func(alo, ahi, blo, bhi int, matched []match) []match
272 matchBlocks = func(alo, ahi, blo, bhi int, matched []match) []match {
273 match := m.findLongestMatch(alo, ahi, blo, bhi)
274 i, j, k := match.A, match.B, match.Size
275 if match.Size > 0 {
276 if alo < i && blo < j {
277 matched = matchBlocks(alo, i, blo, j, matched)
278 }
279 matched = append(matched, match)
280 if i+k < ahi && j+k < bhi {
281 matched = matchBlocks(i+k, ahi, j+k, bhi, matched)
282 }
283 }
284 return matched
285 }
286 matched := matchBlocks(0, len(m.a), 0, len(m.b), nil)
287
288 // Collapse adjacent equal blocks
289 nonAdjacent := []match{}
290 i1, j1, k1 := 0, 0, 0
291 for _, b := range matched {
292 i2, j2, k2 := b.A, b.B, b.Size
293 if i1+k1 == i2 && j1+k1 == j2 {
294 k1 += k2
295 } else {
296 if k1 > 0 {
297 nonAdjacent = append(nonAdjacent, match{i1, j1, k1})
298 }
299 i1, j1, k1 = i2, j2, k2
300 }
301 }
302 if k1 > 0 {
303 nonAdjacent = append(nonAdjacent, match{i1, j1, k1})
304 }
305
306 nonAdjacent = append(nonAdjacent, match{len(m.a), len(m.b), 0})
307 m.matchingBlocks = nonAdjacent
308 return m.matchingBlocks
309}
310
311// Return list of opcodes describing how to turn a into b.
312func (m *sequenceMatcher) getOpCodes() []opCode {
313 if m.opCodes != nil {
314 return m.opCodes
315 }
316 i, j := 0, 0
317 matching := m.getMatchingBlocks()
318 opCodes := make([]opCode, 0, len(matching))
319 for _, m := range matching {
320 ai, bj, size := m.A, m.B, m.Size
321 var tag int8 = 0
322 if i < ai && j < bj {
323 tag = opReplace
324 } else if i < ai {
325 tag = opDelete
326 } else if j < bj {
327 tag = opInsert
328 }
329 if tag > 0 {
330 opCodes = append(opCodes, opCode{tag, i, ai, j, bj})
331 }
332 i, j = ai+size, bj+size
333 if size > 0 {
334 opCodes = append(opCodes, opCode{opEqual, ai, i, bj, j})
335 }
336 }
337 m.opCodes = opCodes
338 return m.opCodes
339}
340
341// Histogram computes a diff between two strings using the Ratcliff-Obershelp algorithm
342func Histogram(old, new string) []DiffLine {
343 oldLines := splitLines(old)
344 newLines := splitLines(new)
345
346 matcher := newMatcher(oldLines, newLines)
347 opcodes := matcher.getOpCodes()
348
349 var result []DiffLine
350
351 for _, op := range opcodes {
352 switch op.Tag {
353 case opEqual:
354 for i := op.I1; i < op.I2; i++ {
355 newIdx := i + (op.J1 - op.I1)
356 result = append(result, DiffLine{
357 Line: oldLines[i],
358 Kind: DiffShared,
359 OldNumber: i + 1,
360 NewNumber: newIdx + 1,
361 })
362 }
363 case opDelete:
364 for i := op.I1; i < op.I2; i++ {
365 result = append(result, DiffLine{
366 Line: oldLines[i],
367 Kind: DiffOld,
368 OldNumber: i + 1,
369 })
370 }
371 case opInsert:
372 for j := op.J1; j < op.J2; j++ {
373 result = append(result, DiffLine{
374 Line: newLines[j],
375 Kind: DiffNew,
376 NewNumber: j + 1,
377 })
378 }
379 case opReplace:
380 for i := op.I1; i < op.I2; i++ {
381 result = append(result, DiffLine{
382 Line: oldLines[i],
383 Kind: DiffOld,
384 OldNumber: i + 1,
385 })
386 }
387 for j := op.J1; j < op.J2; j++ {
388 result = append(result, DiffLine{
389 Line: newLines[j],
390 Kind: DiffNew,
391 NewNumber: j + 1,
392 })
393 }
394 }
395 }
396
397 return result
398}
399
400// splitLines splits a string by newlines, handling empty strings
401func splitLines(s string) []string {
402 if s == "" {
403 return []string{}
404 }
405 lines := splitLinesKeepNewline(s)
406 return lines
407}
408
409// splitLinesKeepNewline splits on newlines but removes the newlines themselves
410func splitLinesKeepNewline(s string) []string {
411 var lines []string
412 var current string
413
414 for _, char := range s {
415 if char == '\n' {
416 lines = append(lines, current)
417 current = ""
418 } else {
419 current += string(char)
420 }
421 }
422
423 if current != "" {
424 lines = append(lines, current)
425 }
426
427 return lines
428}
429
430// FormatRangeUnified converts range to the "ed" format for unified diffs
431func FormatRangeUnified(start, stop int) string {
432 beginning := start + 1 // lines start numbering with one
433 length := stop - start
434
435 if length == 1 {
436 return strconv.Itoa(beginning)
437 }
438 if length == 0 {
439 beginning--
440 }
441
442 return strconv.Itoa(beginning) + "," + strconv.Itoa(length)
443}