Approval-based snapshot testing library for Go (mirror)
at main 443 lines 12 kB view raw
1package diff 2 3/* 4This file was sourced from github.com/gkampitakis/go-snaps, available with the following License: 5 6MIT License 7 8Copyright (c) 2021 Georgios Kampitakis 9 10Permission is hereby granted, free of charge, to any person obtaining a copy 11of this software and associated documentation files (the "Software"), to deal 12in the Software without restriction, including without limitation the rights 13to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14copies of the Software, and to permit persons to whom the Software is 15furnished to do so, subject to the following conditions: 16 17The above copyright notice and this permission notice shall be included in all 18copies or substantial portions of the Software. 19 20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26SOFTWARE. 27 28======================= 29 30This package is a partial port of Python difflib. 31 32This library is vendored and modified to address the `go-snaps` needs for a more 33readable difference report. 34 35 36Original source: https://github.com/pmezard/go-difflib 37 38Copyright (c) 2013, Patrick Mezard 39All rights reserved. 40 41Redistribution and use in source and binary forms, with or without 42modification, are permitted provided that the following conditions are 43met: 44 45 Redistributions of source code must retain the above copyright 46notice, this list of conditions and the following disclaimer. 47 Redistributions in binary form must reproduce the above copyright 48notice, this list of conditions and the following disclaimer in the 49documentation and/or other materials provided with the distribution. 50 The names of its contributors may not be used to endorse or promote 51products derived from this software without specific prior written 52permission. 53 54THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 55IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 56TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 57PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 58HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 60TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 61PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 62LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 63NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 64SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65*/ 66 67import "strconv" 68 69type DiffKind int 70 71const ( 72 DiffShared DiffKind = iota 73 DiffOld 74 DiffNew 75) 76 77type DiffLine struct { 78 OldNumber int 79 NewNumber int 80 Line string 81 Kind DiffKind 82} 83 84const ( 85 // Tag Codes for internal opcodes 86 opEqual int8 = iota 87 opInsert 88 opDelete 89 opReplace 90) 91 92type match struct { 93 A int 94 B int 95 Size int 96} 97 98type opCode struct { 99 Tag int8 100 I1 int 101 I2 int 102 J1 int 103 J2 int 104} 105 106// sequenceMatcher compares sequence of strings. The basic 107// algorithm predates, and is a little fancier than, an algorithm 108// published in the late 1980's by Ratcliff and Obershelp under the 109// hyperbolic name "gestalt pattern matching". The basic idea is to find 110// the longest contiguous matching subsequence that contains no "junk" 111// elements (R-O doesn't address junk). The same idea is then applied 112// recursively to the pieces of the sequences to the left and to the right 113// of the matching subsequence. This does not yield minimal edit 114// sequences, but does tend to yield matches that "look right" to people. 115// 116// sequenceMatcher tries to compute a "human-friendly diff" between two 117// sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the 118// longest *contiguous* & junk-free matching subsequence. That's what 119// catches peoples' eyes. The Windows(tm) windiff has another interesting 120// notion, pairing up elements that appear uniquely in each sequence. 121// That, and the method here, appear to yield more intuitive difference 122// reports than does diff. This method appears to be the least vulnerable 123// to synching up on blocks of "junk lines", though (like blank lines in 124// ordinary text files, or maybe "<P>" lines in HTML files). That may be 125// because this is the only method of the 3 that has a *concept* of 126// "junk" <wink>. 127// 128// Timing: Basic R-O is cubic time worst case and quadratic time expected 129// case. sequenceMatcher is quadratic time for the worst case and has 130// expected-case behavior dependent in a complicated way on how many 131// elements the sequences have in common; best case time is linear. 132type sequenceMatcher struct { 133 a []string 134 b []string 135 b2j map[string][]int 136 IsJunk func(string) bool 137 autoJunk bool 138 bJunk map[string]struct{} 139 matchingBlocks []match 140 fullBCount map[string]int 141 bPopular map[string]struct{} 142 opCodes []opCode 143} 144 145func newMatcher(a, b []string) *sequenceMatcher { 146 m := sequenceMatcher{autoJunk: true} 147 m.setSeqs(a, b) 148 return &m 149} 150 151// Set two sequences to be compared. 152func (m *sequenceMatcher) setSeqs(a, b []string) { 153 m.setSeq1(a) 154 m.setSeq2(b) 155} 156 157// Set the first sequence to be compared. 158func (m *sequenceMatcher) setSeq1(a []string) { 159 m.a = a 160 m.matchingBlocks, m.opCodes = nil, nil 161} 162 163// Set the second sequence to be compared. 164func (m *sequenceMatcher) setSeq2(b []string) { 165 m.b = b 166 m.matchingBlocks, m.opCodes, m.fullBCount = nil, nil, nil 167 m.chainB() 168} 169 170func (m *sequenceMatcher) chainB() { 171 // Populate line -> index mapping 172 b2j := map[string][]int{} 173 for i, elt := range m.b { 174 indices := b2j[elt] 175 indices = append(indices, i) 176 b2j[elt] = indices 177 } 178 179 // Purge junk elements 180 m.bJunk = map[string]struct{}{} 181 if m.IsJunk != nil { 182 junk := m.bJunk 183 for elt := range b2j { 184 if m.IsJunk(elt) { 185 junk[elt] = struct{}{} 186 } 187 } 188 for elt := range junk { // separate loop avoids separate list of keys 189 delete(b2j, elt) 190 } 191 } 192 193 // Purge popular elements that are not junk 194 popular := map[string]struct{}{} 195 n := len(m.b) 196 if m.autoJunk && n >= 200 { 197 ntest := n/100 + 1 198 for s, indices := range b2j { 199 if len(indices) > ntest { 200 popular[s] = struct{}{} 201 } 202 } 203 for s := range popular { 204 delete(b2j, s) 205 } 206 } 207 m.bPopular = popular 208 m.b2j = b2j 209} 210 211func (m *sequenceMatcher) isBJunk(s string) bool { 212 _, ok := m.bJunk[s] 213 return ok 214} 215 216// Find longest matching block in a[alo:ahi] and b[blo:bhi]. 217func (m *sequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) match { 218 besti, bestj, bestsize := alo, blo, 0 219 220 // find longest junk-free match 221 j2len := map[int]int{} 222 for i := alo; i != ahi; i++ { 223 newj2len := map[int]int{} 224 for _, j := range m.b2j[m.a[i]] { 225 if j < blo { 226 continue 227 } 228 if j >= bhi { 229 break 230 } 231 k := j2len[j-1] + 1 232 newj2len[j] = k 233 if k > bestsize { 234 besti, bestj, bestsize = i-k+1, j-k+1, k 235 } 236 } 237 j2len = newj2len 238 } 239 240 // Extend the best by non-junk elements on each end 241 for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) && 242 m.a[besti-1] == m.b[bestj-1] { 243 besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 244 } 245 for besti+bestsize < ahi && bestj+bestsize < bhi && 246 !m.isBJunk(m.b[bestj+bestsize]) && 247 m.a[besti+bestsize] == m.b[bestj+bestsize] { 248 bestsize++ 249 } 250 251 // Suck up junk on each side 252 for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) && 253 m.a[besti-1] == m.b[bestj-1] { 254 besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 255 } 256 for besti+bestsize < ahi && bestj+bestsize < bhi && 257 m.isBJunk(m.b[bestj+bestsize]) && 258 m.a[besti+bestsize] == m.b[bestj+bestsize] { 259 bestsize++ 260 } 261 262 return match{A: besti, B: bestj, Size: bestsize} 263} 264 265// Return list of triples describing matching subsequences. 266func (m *sequenceMatcher) getMatchingBlocks() []match { 267 if m.matchingBlocks != nil { 268 return m.matchingBlocks 269 } 270 271 var matchBlocks func(alo, ahi, blo, bhi int, matched []match) []match 272 matchBlocks = func(alo, ahi, blo, bhi int, matched []match) []match { 273 match := m.findLongestMatch(alo, ahi, blo, bhi) 274 i, j, k := match.A, match.B, match.Size 275 if match.Size > 0 { 276 if alo < i && blo < j { 277 matched = matchBlocks(alo, i, blo, j, matched) 278 } 279 matched = append(matched, match) 280 if i+k < ahi && j+k < bhi { 281 matched = matchBlocks(i+k, ahi, j+k, bhi, matched) 282 } 283 } 284 return matched 285 } 286 matched := matchBlocks(0, len(m.a), 0, len(m.b), nil) 287 288 // Collapse adjacent equal blocks 289 nonAdjacent := []match{} 290 i1, j1, k1 := 0, 0, 0 291 for _, b := range matched { 292 i2, j2, k2 := b.A, b.B, b.Size 293 if i1+k1 == i2 && j1+k1 == j2 { 294 k1 += k2 295 } else { 296 if k1 > 0 { 297 nonAdjacent = append(nonAdjacent, match{i1, j1, k1}) 298 } 299 i1, j1, k1 = i2, j2, k2 300 } 301 } 302 if k1 > 0 { 303 nonAdjacent = append(nonAdjacent, match{i1, j1, k1}) 304 } 305 306 nonAdjacent = append(nonAdjacent, match{len(m.a), len(m.b), 0}) 307 m.matchingBlocks = nonAdjacent 308 return m.matchingBlocks 309} 310 311// Return list of opcodes describing how to turn a into b. 312func (m *sequenceMatcher) getOpCodes() []opCode { 313 if m.opCodes != nil { 314 return m.opCodes 315 } 316 i, j := 0, 0 317 matching := m.getMatchingBlocks() 318 opCodes := make([]opCode, 0, len(matching)) 319 for _, m := range matching { 320 ai, bj, size := m.A, m.B, m.Size 321 var tag int8 = 0 322 if i < ai && j < bj { 323 tag = opReplace 324 } else if i < ai { 325 tag = opDelete 326 } else if j < bj { 327 tag = opInsert 328 } 329 if tag > 0 { 330 opCodes = append(opCodes, opCode{tag, i, ai, j, bj}) 331 } 332 i, j = ai+size, bj+size 333 if size > 0 { 334 opCodes = append(opCodes, opCode{opEqual, ai, i, bj, j}) 335 } 336 } 337 m.opCodes = opCodes 338 return m.opCodes 339} 340 341// Histogram computes a diff between two strings using the Ratcliff-Obershelp algorithm 342func Histogram(old, new string) []DiffLine { 343 oldLines := splitLines(old) 344 newLines := splitLines(new) 345 346 matcher := newMatcher(oldLines, newLines) 347 opcodes := matcher.getOpCodes() 348 349 var result []DiffLine 350 351 for _, op := range opcodes { 352 switch op.Tag { 353 case opEqual: 354 for i := op.I1; i < op.I2; i++ { 355 newIdx := i + (op.J1 - op.I1) 356 result = append(result, DiffLine{ 357 Line: oldLines[i], 358 Kind: DiffShared, 359 OldNumber: i + 1, 360 NewNumber: newIdx + 1, 361 }) 362 } 363 case opDelete: 364 for i := op.I1; i < op.I2; i++ { 365 result = append(result, DiffLine{ 366 Line: oldLines[i], 367 Kind: DiffOld, 368 OldNumber: i + 1, 369 }) 370 } 371 case opInsert: 372 for j := op.J1; j < op.J2; j++ { 373 result = append(result, DiffLine{ 374 Line: newLines[j], 375 Kind: DiffNew, 376 NewNumber: j + 1, 377 }) 378 } 379 case opReplace: 380 for i := op.I1; i < op.I2; i++ { 381 result = append(result, DiffLine{ 382 Line: oldLines[i], 383 Kind: DiffOld, 384 OldNumber: i + 1, 385 }) 386 } 387 for j := op.J1; j < op.J2; j++ { 388 result = append(result, DiffLine{ 389 Line: newLines[j], 390 Kind: DiffNew, 391 NewNumber: j + 1, 392 }) 393 } 394 } 395 } 396 397 return result 398} 399 400// splitLines splits a string by newlines, handling empty strings 401func splitLines(s string) []string { 402 if s == "" { 403 return []string{} 404 } 405 lines := splitLinesKeepNewline(s) 406 return lines 407} 408 409// splitLinesKeepNewline splits on newlines but removes the newlines themselves 410func splitLinesKeepNewline(s string) []string { 411 var lines []string 412 var current string 413 414 for _, char := range s { 415 if char == '\n' { 416 lines = append(lines, current) 417 current = "" 418 } else { 419 current += string(char) 420 } 421 } 422 423 if current != "" { 424 lines = append(lines, current) 425 } 426 427 return lines 428} 429 430// FormatRangeUnified converts range to the "ed" format for unified diffs 431func FormatRangeUnified(start, stop int) string { 432 beginning := start + 1 // lines start numbering with one 433 length := stop - start 434 435 if length == 1 { 436 return strconv.Itoa(beginning) 437 } 438 if length == 0 { 439 beginning-- 440 } 441 442 return strconv.Itoa(beginning) + "," + strconv.Itoa(length) 443}