comparing 53bcdf7e5d94b09bf31b6e2915a7aa51d1f50b36 and main on oppi.li/go-gitdiff

+6 -6

.github/workflows/go.yml

··· 9 9 name: Verify 10 10 runs-on: ubuntu-latest 11 11 steps: 12 - - name: Set up Go 1.16 13 - uses: actions/setup-go@v2 12 + - name: Set up Go 1.21 13 + uses: actions/setup-go@v5 14 14 with: 15 - go-version: 1.16 15 + go-version: 1.21 16 16 17 17 - name: Check out code into the Go module directory 18 - uses: actions/checkout@v2 18 + uses: actions/checkout@v4 19 19 20 20 - name: Lint 21 - uses: golangci/golangci-lint-action@v2 21 + uses: golangci/golangci-lint-action@v7 22 22 with: 23 - version: v1.28 23 + version: v2.0 24 24 25 25 - name: Test 26 26 run: go test -v ./...

+41 -13

.golangci.yml

··· 1 + version: "2" 2 + 1 3 run: 2 4 tests: false 3 5 4 6 linters: 5 - disable-all: true 7 + default: none 6 8 enable: 7 - - deadcode 8 9 - errcheck 9 - - gofmt 10 - - goimports 11 - - golint 12 10 - govet 13 11 - ineffassign 14 - - typecheck 12 + - misspell 13 + - revive 15 14 - unconvert 16 - - varcheck 17 - 18 - issues: 19 - exclude-use-default: false 15 + - unused 16 + settings: 17 + errcheck: 18 + exclude-functions: 19 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write 20 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString 21 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte 22 + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) 23 + revive: 24 + rules: 25 + - name: context-keys-type 26 + - name: time-naming 27 + - name: var-declaration 28 + - name: unexported-return 29 + - name: errorf 30 + - name: blank-imports 31 + - name: context-as-argument 32 + - name: dot-imports 33 + - name: error-return 34 + - name: error-strings 35 + - name: error-naming 36 + - name: exported 37 + - name: increment-decrement 38 + - name: var-naming 39 + - name: package-comments 40 + - name: range 41 + - name: receiver-naming 42 + - name: indent-error-flow 20 43 21 - linter-settings: 22 - goimports: 23 - local-prefixes: github.com/bluekeyes/go-gitdiff 44 + formatters: 45 + enable: 46 + - gofmt 47 + - goimports 48 + settings: 49 + goimports: 50 + local-prefixes: 51 + - github.com/bluekeyes/go-gitdiff

+15 -8

README.md

··· 4 4 5 5 A Go library for parsing and applying patches generated by `git diff`, `git 6 6 show`, and `git format-patch`. It can also parse and apply unified diffs 7 - generated by the standard `diff` tool. 7 + generated by the standard GNU `diff` tool. 8 8 9 9 It supports standard line-oriented text patches and Git binary patches, and 10 10 aims to parse anything accepted by the `git apply` command. ··· 29 29 30 30 // apply the changes in the patch to a source file 31 31 var output bytes.Buffer 32 - if err := gitdiff.NewApplier(code).ApplyFile(&output, files[0]); err != nil { 32 + if err := gitdiff.Apply(&output, code, files[0]); err != nil { 33 33 log.Fatal(err) 34 34 } 35 35 ``` 36 36 37 37 ## Development Status 38 38 39 - Mostly complete. API changes are possible, particularly for patch application, 40 - but I expect the parsing interface and types to remain stable. 39 + The parsing API and types are complete and I expect will remain stable. Version 40 + 0.7.0 introduced a new apply API that may change more in the future to support 41 + non-strict patch application. 42 + 43 + Parsing and strict application are well-covered by unit tests and the library 44 + is used in a production application that parses and applies thousands of 45 + patches every day. However, the space of all possible patches is large, so 46 + there are likely undiscovered bugs. 41 47 42 - Patch parsing and strict application are well-covered by unit tests and the 43 - library is used in a production application that parses and applies thousands of 44 - patches every day, but the space of all possible patches is large, so there are 45 - likely undiscovered bugs. 48 + The parsing code has also had a modest amount of fuzz testing. 46 49 47 50 ## Why another git/unified diff parser? 48 51 ··· 96 99 context of each fragment must exactly match the source file; `git apply` 97 100 implements a search algorithm that tries different lines and amounts of 98 101 context, with further options to normalize or ignore whitespace changes. 102 + 103 + 7. When parsing mail-formatted patch headers, leading and trailing whitespace 104 + is always removed from `Subject` lines. There is no exact equivalent to `git 105 + mailinfo -k`.

+30 -334

gitdiff/apply.go

··· 13 13 // Users can test if an error was caused by a conflict by using errors.Is with 14 14 // an empty Conflict: 15 15 // 16 - // if errors.Is(err, &Conflict{}) { 17 - // // handle conflict 18 - // } 19 - // 16 + // if errors.Is(err, &Conflict{}) { 17 + // // handle conflict 18 + // } 20 19 type Conflict struct { 21 20 msg string 22 21 } ··· 89 88 90 89 var ( 91 90 errApplyInProgress = errors.New("gitdiff: incompatible apply in progress") 92 - ) 93 - 94 - const ( 95 - applyInitial = iota 96 - applyText 97 - applyBinary 98 - applyFile 91 + errApplierClosed = errors.New("gitdiff: applier is closed") 99 92 ) 100 93 101 - // Apply is a convenience function that creates an Applier for src with default 102 - // settings and applies the changes in f, writing the result to dst. 94 + // Apply applies the changes in f to src, writing the result to dst. It can 95 + // apply both text and binary changes. 96 + // 97 + // If an error occurs while applying, Apply returns an *ApplyError that 98 + // annotates the error with additional information. If the error is because of 99 + // a conflict with the source, the wrapped error will be a *Conflict. 103 100 func Apply(dst io.Writer, src io.ReaderAt, f *File) error { 104 - return NewApplier(src).ApplyFile(dst, f) 105 - } 106 - 107 - // Applier applies changes described in fragments to source data. If changes 108 - // are described in multiple fragments, those fragments must be applied in 109 - // order, usually by calling ApplyFile. 110 - // 111 - // By default, Applier operates in "strict" mode, where fragment content and 112 - // positions must exactly match those of the source. 113 - // 114 - // If an error occurs while applying, methods on Applier return instances of 115 - // *ApplyError that annotate the wrapped error with additional information 116 - // when available. If the error is because of a conflict between a fragment and 117 - // the source, the wrapped error will be a *Conflict. 118 - // 119 - // While an Applier can apply both text and binary fragments, only one fragment 120 - // type can be used without resetting the Applier. The first fragment applied 121 - // sets the type for the Applier. Mixing fragment types or mixing 122 - // fragment-level and file-level applies results in an error. 123 - type Applier struct { 124 - src io.ReaderAt 125 - lineSrc LineReaderAt 126 - nextLine int64 127 - applyType int 128 - } 129 - 130 - // NewApplier creates an Applier that reads data from src. If src is a 131 - // LineReaderAt, it is used directly to apply text fragments. 132 - func NewApplier(src io.ReaderAt) *Applier { 133 - a := new(Applier) 134 - a.Reset(src) 135 - return a 136 - } 137 - 138 - // Reset resets the input and internal state of the Applier. If src is nil, the 139 - // existing source is reused. 140 - func (a *Applier) Reset(src io.ReaderAt) { 141 - if src != nil { 142 - a.src = src 143 - if lineSrc, ok := src.(LineReaderAt); ok { 144 - a.lineSrc = lineSrc 145 - } else { 146 - a.lineSrc = &lineReaderAt{r: src} 101 + if f.IsBinary { 102 + if len(f.TextFragments) > 0 { 103 + return applyError(errors.New("binary file contains text fragments")) 104 + } 105 + if f.BinaryFragment == nil { 106 + return applyError(errors.New("binary file does not contain a binary fragment")) 147 107 } 148 - } 149 - a.nextLine = 0 150 - a.applyType = applyInitial 151 - } 152 - 153 - // ApplyFile applies the changes in all of the fragments of f and writes the 154 - // result to dst. 155 - func (a *Applier) ApplyFile(dst io.Writer, f *File) error { 156 - if a.applyType != applyInitial { 157 - return applyError(errApplyInProgress) 158 - } 159 - defer func() { a.applyType = applyFile }() 160 - 161 - if f.IsBinary && len(f.TextFragments) > 0 { 162 - return applyError(errors.New("binary file contains text fragments")) 163 - } 164 - if !f.IsBinary && f.BinaryFragment != nil { 165 - return applyError(errors.New("text file contains binary fragment")) 108 + } else { 109 + if f.BinaryFragment != nil { 110 + return applyError(errors.New("text file contains a binary fragment")) 111 + } 166 112 } 167 113 168 114 switch { 169 115 case f.BinaryFragment != nil: 170 - return a.ApplyBinaryFragment(dst, f.BinaryFragment) 116 + applier := NewBinaryApplier(dst, src) 117 + if err := applier.ApplyFragment(f.BinaryFragment); err != nil { 118 + return err 119 + } 120 + return applier.Close() 171 121 172 122 case len(f.TextFragments) > 0: 173 123 frags := make([]*TextFragment, len(f.TextFragments)) ··· 181 131 // right now, the application fails if fragments overlap, but it should be 182 132 // possible to precompute the result of applying them in order 183 133 134 + applier := NewTextApplier(dst, src) 184 135 for i, frag := range frags { 185 - if err := a.ApplyTextFragment(dst, frag); err != nil { 136 + if err := applier.ApplyFragment(frag); err != nil { 186 137 return applyError(err, fragNum(i)) 187 138 } 188 139 } 189 - } 190 - 191 - return applyError(a.Flush(dst)) 192 - } 193 - 194 - // ApplyTextFragment applies the changes in the fragment f and writes unwritten 195 - // data before the start of the fragment and the result to dst. If multiple 196 - // text fragments apply to the same source, ApplyTextFragment must be called in 197 - // order of increasing start position. As a result, each fragment can be 198 - // applied at most once before a call to Reset. 199 - func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error { 200 - if a.applyType != applyInitial && a.applyType != applyText { 201 - return applyError(errApplyInProgress) 202 - } 203 - defer func() { a.applyType = applyText }() 204 - 205 - // application code assumes fragment fields are consistent 206 - if err := f.Validate(); err != nil { 207 - return applyError(err) 208 - } 209 - 210 - // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) 211 - fragStart := f.OldPosition - 1 212 - if fragStart < 0 { 213 - fragStart = 0 214 - } 215 - fragEnd := fragStart + f.OldLines 216 - 217 - start := a.nextLine 218 - if fragStart < start { 219 - return applyError(&Conflict{"fragment overlaps with an applied fragment"}) 220 - } 221 - 222 - if f.OldPosition == 0 { 223 - ok, err := isLen(a.src, 0) 224 - if err != nil { 225 - return applyError(err) 226 - } 227 - if !ok { 228 - return applyError(&Conflict{"cannot create new file from non-empty src"}) 229 - } 230 - } 231 - 232 - preimage := make([][]byte, fragEnd-start) 233 - n, err := a.lineSrc.ReadLinesAt(preimage, start) 234 - if err != nil { 235 - return applyError(err, lineNum(start+int64(n))) 236 - } 237 - 238 - // copy leading data before the fragment starts 239 - for i, line := range preimage[:fragStart-start] { 240 - if _, err := dst.Write(line); err != nil { 241 - a.nextLine = start + int64(i) 242 - return applyError(err, lineNum(a.nextLine)) 243 - } 244 - } 245 - preimage = preimage[fragStart-start:] 246 - 247 - // apply the changes in the fragment 248 - used := int64(0) 249 - for i, line := range f.Lines { 250 - if err := applyTextLine(dst, line, preimage, used); err != nil { 251 - a.nextLine = fragStart + used 252 - return applyError(err, lineNum(a.nextLine), fragLineNum(i)) 253 - } 254 - if line.Old() { 255 - used++ 256 - } 257 - } 258 - a.nextLine = fragStart + used 259 - 260 - // new position of +0,0 mean a full delete, so check for leftovers 261 - if f.NewPosition == 0 && f.NewLines == 0 { 262 - var b [1][]byte 263 - n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) 264 - if err != nil && err != io.EOF { 265 - return applyError(err, lineNum(a.nextLine)) 266 - } 267 - if n > 0 { 268 - return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) 269 - } 270 - } 271 - 272 - return nil 273 - } 274 - 275 - func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { 276 - if line.Old() && string(preimage[i]) != line.Line { 277 - return &Conflict{"fragment line does not match src line"} 278 - } 279 - if line.New() { 280 - _, err = io.WriteString(dst, line.Line) 281 - } 282 - return err 283 - } 284 - 285 - // Flush writes any data following the last applied fragment to dst. 286 - func (a *Applier) Flush(dst io.Writer) (err error) { 287 - switch a.applyType { 288 - case applyInitial: 289 - _, err = copyFrom(dst, a.src, 0) 290 - case applyText: 291 - _, err = copyLinesFrom(dst, a.lineSrc, a.nextLine) 292 - case applyBinary: 293 - // nothing to flush, binary apply "consumes" full source 294 - } 295 - return err 296 - } 140 + return applier.Close() 297 141 298 - // ApplyBinaryFragment applies the changes in the fragment f and writes the 299 - // result to dst. At most one binary fragment can be applied before a call to 300 - // Reset. 301 - func (a *Applier) ApplyBinaryFragment(dst io.Writer, f *BinaryFragment) error { 302 - if a.applyType != applyInitial { 303 - return applyError(errApplyInProgress) 304 - } 305 - defer func() { a.applyType = applyBinary }() 306 - 307 - if f == nil { 308 - return applyError(errors.New("nil fragment")) 309 - } 310 - 311 - switch f.Method { 312 - case BinaryPatchLiteral: 313 - if _, err := dst.Write(f.Data); err != nil { 314 - return applyError(err) 315 - } 316 - case BinaryPatchDelta: 317 - if err := applyBinaryDeltaFragment(dst, a.src, f.Data); err != nil { 318 - return applyError(err) 319 - } 320 142 default: 321 - return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) 322 - } 323 - return nil 324 - } 325 - 326 - func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { 327 - srcSize, delta := readBinaryDeltaSize(frag) 328 - if err := checkBinarySrcSize(src, srcSize); err != nil { 329 - return err 330 - } 331 - 332 - dstSize, delta := readBinaryDeltaSize(delta) 333 - 334 - for len(delta) > 0 { 335 - op := delta[0] 336 - if op == 0 { 337 - return errors.New("invalid delta opcode 0") 338 - } 339 - 340 - var n int64 341 - var err error 342 - switch op & 0x80 { 343 - case 0x80: 344 - n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) 345 - case 0x00: 346 - n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) 347 - } 348 - if err != nil { 349 - return err 350 - } 351 - dstSize -= n 352 - } 353 - 354 - if dstSize != 0 { 355 - return errors.New("corrupt binary delta: insufficient or extra data") 356 - } 357 - return nil 358 - } 359 - 360 - // readBinaryDeltaSize reads a variable length size from a delta-encoded binary 361 - // fragment, returing the size and the unused data. Data is encoded as: 362 - // 363 - // [[1xxxxxxx]...] [0xxxxxxx] 364 - // 365 - // in little-endian order, with 7 bits of the value per byte. 366 - func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { 367 - shift := uint(0) 368 - for i, b := range d { 369 - size |= int64(b&0x7F) << shift 370 - shift += 7 371 - if b <= 0x7F { 372 - return size, d[i+1:] 373 - } 374 - } 375 - return size, nil 376 - } 377 - 378 - // applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary 379 - // fragment, returning the amount of data written and the usused part of the 380 - // fragment. An add operation takes the form: 381 - // 382 - // [0xxxxxx][[data1]...] 383 - // 384 - // where the lower seven bits of the opcode is the number of data bytes 385 - // following the opcode. See also pack-format.txt in the Git source. 386 - func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { 387 - size := int(op) 388 - if len(delta) < size { 389 - return 0, delta, errors.New("corrupt binary delta: incomplete add") 390 - } 391 - _, err = w.Write(delta[:size]) 392 - return int64(size), delta[size:], err 393 - } 394 - 395 - // applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary 396 - // fragment, returing the amount of data written and the unused part of the 397 - // fragment. A copy operation takes the form: 398 - // 399 - // [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] 400 - // 401 - // where the lower seven bits of the opcode determine which non-zero offset and 402 - // size bytes are present in little-endian order: if bit 0 is set, offset1 is 403 - // present, etc. If no offset or size bytes are present, offset is 0 and size 404 - // is 0x10000. See also pack-format.txt in the Git source. 405 - func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { 406 - const defaultSize = 0x10000 407 - 408 - unpack := func(start, bits uint) (v int64) { 409 - for i := uint(0); i < bits; i++ { 410 - mask := byte(1 << (i + start)) 411 - if op&mask > 0 { 412 - if len(delta) == 0 { 413 - err = errors.New("corrupt binary delta: incomplete copy") 414 - return 415 - } 416 - v |= int64(delta[0]) << (8 * i) 417 - delta = delta[1:] 418 - } 419 - } 420 - return 421 - } 422 - 423 - offset := unpack(0, 4) 424 - size := unpack(4, 3) 425 - if err != nil { 426 - return 0, delta, err 427 - } 428 - if size == 0 { 429 - size = defaultSize 430 - } 431 - 432 - // TODO(bkeyes): consider pooling these buffers 433 - b := make([]byte, size) 434 - if _, err := src.ReadAt(b, offset); err != nil { 435 - return 0, delta, err 436 - } 437 - 438 - _, err = w.Write(b) 439 - return size, delta, err 440 - } 441 - 442 - func checkBinarySrcSize(r io.ReaderAt, size int64) error { 443 - ok, err := isLen(r, size) 444 - if err != nil { 143 + // nothing to apply, just copy all the data 144 + _, err := copyFrom(dst, src, 0) 445 145 return err 446 146 } 447 - if !ok { 448 - return &Conflict{"fragment src size does not match actual src size"} 449 - } 450 - return nil 451 147 }

+206

gitdiff/apply_binary.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "errors" 5 + "fmt" 6 + "io" 7 + ) 8 + 9 + // BinaryApplier applies binary changes described in a fragment to source data. 10 + // The applier must be closed after use. 11 + type BinaryApplier struct { 12 + dst io.Writer 13 + src io.ReaderAt 14 + 15 + closed bool 16 + dirty bool 17 + } 18 + 19 + // NewBinaryApplier creates an BinaryApplier that reads data from src and 20 + // writes modified data to dst. 21 + func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier { 22 + a := BinaryApplier{ 23 + dst: dst, 24 + src: src, 25 + } 26 + return &a 27 + } 28 + 29 + // ApplyFragment applies the changes in the fragment f and writes the result to 30 + // dst. ApplyFragment can be called at most once. 31 + // 32 + // If an error occurs while applying, ApplyFragment returns an *ApplyError that 33 + // annotates the error with additional information. If the error is because of 34 + // a conflict between a fragment and the source, the wrapped error will be a 35 + // *Conflict. 36 + func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error { 37 + if f == nil { 38 + return applyError(errors.New("nil fragment")) 39 + } 40 + if a.closed { 41 + return applyError(errApplierClosed) 42 + } 43 + if a.dirty { 44 + return applyError(errApplyInProgress) 45 + } 46 + 47 + // mark an apply as in progress, even if it fails before making changes 48 + a.dirty = true 49 + 50 + switch f.Method { 51 + case BinaryPatchLiteral: 52 + if _, err := a.dst.Write(f.Data); err != nil { 53 + return applyError(err) 54 + } 55 + case BinaryPatchDelta: 56 + if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil { 57 + return applyError(err) 58 + } 59 + default: 60 + return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) 61 + } 62 + return nil 63 + } 64 + 65 + // Close writes any data following the last applied fragment and prevents 66 + // future calls to ApplyFragment. 67 + func (a *BinaryApplier) Close() (err error) { 68 + if a.closed { 69 + return nil 70 + } 71 + 72 + a.closed = true 73 + if !a.dirty { 74 + _, err = copyFrom(a.dst, a.src, 0) 75 + } else { 76 + // do nothing, applying a binary fragment copies all data 77 + } 78 + return err 79 + } 80 + 81 + func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { 82 + srcSize, delta := readBinaryDeltaSize(frag) 83 + if err := checkBinarySrcSize(src, srcSize); err != nil { 84 + return err 85 + } 86 + 87 + dstSize, delta := readBinaryDeltaSize(delta) 88 + 89 + for len(delta) > 0 { 90 + op := delta[0] 91 + if op == 0 { 92 + return errors.New("invalid delta opcode 0") 93 + } 94 + 95 + var n int64 96 + var err error 97 + switch op & 0x80 { 98 + case 0x80: 99 + n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) 100 + case 0x00: 101 + n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) 102 + } 103 + if err != nil { 104 + return err 105 + } 106 + dstSize -= n 107 + } 108 + 109 + if dstSize != 0 { 110 + return errors.New("corrupt binary delta: insufficient or extra data") 111 + } 112 + return nil 113 + } 114 + 115 + // readBinaryDeltaSize reads a variable length size from a delta-encoded binary 116 + // fragment, returing the size and the unused data. Data is encoded as: 117 + // 118 + // [[1xxxxxxx]...] [0xxxxxxx] 119 + // 120 + // in little-endian order, with 7 bits of the value per byte. 121 + func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { 122 + shift := uint(0) 123 + for i, b := range d { 124 + size |= int64(b&0x7F) << shift 125 + shift += 7 126 + if b <= 0x7F { 127 + return size, d[i+1:] 128 + } 129 + } 130 + return size, nil 131 + } 132 + 133 + // applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary 134 + // fragment, returning the amount of data written and the usused part of the 135 + // fragment. An add operation takes the form: 136 + // 137 + // [0xxxxxx][[data1]...] 138 + // 139 + // where the lower seven bits of the opcode is the number of data bytes 140 + // following the opcode. See also pack-format.txt in the Git source. 141 + func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { 142 + size := int(op) 143 + if len(delta) < size { 144 + return 0, delta, errors.New("corrupt binary delta: incomplete add") 145 + } 146 + _, err = w.Write(delta[:size]) 147 + return int64(size), delta[size:], err 148 + } 149 + 150 + // applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary 151 + // fragment, returing the amount of data written and the unused part of the 152 + // fragment. A copy operation takes the form: 153 + // 154 + // [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] 155 + // 156 + // where the lower seven bits of the opcode determine which non-zero offset and 157 + // size bytes are present in little-endian order: if bit 0 is set, offset1 is 158 + // present, etc. If no offset or size bytes are present, offset is 0 and size 159 + // is 0x10000. See also pack-format.txt in the Git source. 160 + func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { 161 + const defaultSize = 0x10000 162 + 163 + unpack := func(start, bits uint) (v int64) { 164 + for i := uint(0); i < bits; i++ { 165 + mask := byte(1 << (i + start)) 166 + if op&mask > 0 { 167 + if len(delta) == 0 { 168 + err = errors.New("corrupt binary delta: incomplete copy") 169 + return 170 + } 171 + v |= int64(delta[0]) << (8 * i) 172 + delta = delta[1:] 173 + } 174 + } 175 + return 176 + } 177 + 178 + offset := unpack(0, 4) 179 + size := unpack(4, 3) 180 + if err != nil { 181 + return 0, delta, err 182 + } 183 + if size == 0 { 184 + size = defaultSize 185 + } 186 + 187 + // TODO(bkeyes): consider pooling these buffers 188 + b := make([]byte, size) 189 + if _, err := src.ReadAt(b, offset); err != nil { 190 + return 0, delta, err 191 + } 192 + 193 + _, err = w.Write(b) 194 + return size, delta, err 195 + } 196 + 197 + func checkBinarySrcSize(r io.ReaderAt, size int64) error { 198 + ok, err := isLen(r, size) 199 + if err != nil { 200 + return err 201 + } 202 + if !ok { 203 + return &Conflict{"fragment src size does not match actual src size"} 204 + } 205 + return nil 206 + }

+11 -73

gitdiff/apply_test.go

··· 9 9 "testing" 10 10 ) 11 11 12 - func TestApplierInvariants(t *testing.T) { 13 - binary := &BinaryFragment{ 14 - Method: BinaryPatchLiteral, 15 - Size: 2, 16 - Data: []byte("\xbe\xef"), 17 - } 18 - 19 - text := &TextFragment{ 20 - NewPosition: 1, 21 - NewLines: 1, 22 - LinesAdded: 1, 23 - Lines: []Line{ 24 - {Op: OpAdd, Line: "new line\n"}, 25 - }, 26 - } 27 - 28 - file := &File{ 29 - TextFragments: []*TextFragment{text}, 30 - } 31 - 32 - src := bytes.NewReader(nil) 33 - dst := ioutil.Discard 34 - 35 - assertInProgress := func(t *testing.T, kind string, err error) { 36 - if !errors.Is(err, errApplyInProgress) { 37 - t.Fatalf("expected in-progress error for %s apply, but got: %v", kind, err) 38 - } 39 - } 40 - 41 - t.Run("binaryFirst", func(t *testing.T) { 42 - a := NewApplier(src) 43 - if err := a.ApplyBinaryFragment(dst, binary); err != nil { 44 - t.Fatalf("unexpected error applying fragment: %v", err) 45 - } 46 - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) 47 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 48 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 49 - }) 50 - 51 - t.Run("textFirst", func(t *testing.T) { 52 - a := NewApplier(src) 53 - if err := a.ApplyTextFragment(dst, text); err != nil { 54 - t.Fatalf("unexpected error applying fragment: %v", err) 55 - } 56 - // additional text fragments are allowed 57 - if err := a.ApplyTextFragment(dst, text); err != nil { 58 - t.Fatalf("unexpected error applying second fragment: %v", err) 59 - } 60 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 61 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 62 - }) 63 - 64 - t.Run("fileFirst", func(t *testing.T) { 65 - a := NewApplier(src) 66 - if err := a.ApplyFile(dst, file); err != nil { 67 - t.Fatalf("unexpected error applying file: %v", err) 68 - } 69 - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) 70 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 71 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 72 - }) 73 - } 74 - 75 12 func TestApplyTextFragment(t *testing.T) { 76 13 tests := map[string]applyTest{ 77 14 "createFile": {Files: getApplyFiles("text_fragment_new")}, ··· 85 22 "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, 86 23 "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, 87 24 "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, 25 + "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, 88 26 "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, 89 27 "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, 90 28 ··· 127 65 128 66 for name, test := range tests { 129 67 t.Run(name, func(t *testing.T) { 130 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 68 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 131 69 if len(file.TextFragments) != 1 { 132 70 t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments)) 133 71 } 134 - return applier.ApplyTextFragment(w, file.TextFragments[0]) 72 + applier := NewTextApplier(dst, src) 73 + return applier.ApplyFragment(file.TextFragments[0]) 135 74 }) 136 75 }) 137 76 } ··· 176 115 177 116 for name, test := range tests { 178 117 t.Run(name, func(t *testing.T) { 179 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 180 - return applier.ApplyBinaryFragment(w, file.BinaryFragment) 118 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 119 + applier := NewBinaryApplier(dst, src) 120 + return applier.ApplyFragment(file.BinaryFragment) 181 121 }) 182 122 }) 183 123 } ··· 216 156 217 157 for name, test := range tests { 218 158 t.Run(name, func(t *testing.T) { 219 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 220 - return applier.ApplyFile(w, file) 159 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 160 + return Apply(dst, src, file) 221 161 }) 222 162 }) 223 163 } ··· 228 168 Err interface{} 229 169 } 230 170 231 - func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) error) { 171 + func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) { 232 172 src, patch, out := at.Files.Load(t) 233 173 234 174 files, _, err := Parse(bytes.NewReader(patch)) ··· 239 179 t.Fatalf("patch should contain exactly one file, but it has %d", len(files)) 240 180 } 241 181 242 - applier := NewApplier(bytes.NewReader(src)) 243 - 244 182 var dst bytes.Buffer 245 - err = apply(&dst, applier, files[0]) 183 + err = apply(&dst, bytes.NewReader(src), files[0]) 246 184 if at.Err != nil { 247 185 assertError(t, at.Err, err, "applying fragment") 248 186 return

+152

gitdiff/apply_text.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "io" 5 + ) 6 + 7 + // TextApplier applies changes described in text fragments to source data. If 8 + // changes are described in multiple fragments, those fragments must be applied 9 + // in order. The applier must be closed after use. 10 + // 11 + // By default, TextApplier operates in "strict" mode, where fragment content 12 + // and positions must exactly match those of the source. 13 + type TextApplier struct { 14 + dst io.Writer 15 + src io.ReaderAt 16 + lineSrc LineReaderAt 17 + nextLine int64 18 + 19 + closed bool 20 + dirty bool 21 + } 22 + 23 + // NewTextApplier creates a TextApplier that reads data from src and writes 24 + // modified data to dst. If src implements LineReaderAt, it is used directly. 25 + func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier { 26 + a := TextApplier{ 27 + dst: dst, 28 + src: src, 29 + } 30 + 31 + if lineSrc, ok := src.(LineReaderAt); ok { 32 + a.lineSrc = lineSrc 33 + } else { 34 + a.lineSrc = &lineReaderAt{r: src} 35 + } 36 + 37 + return &a 38 + } 39 + 40 + // ApplyFragment applies the changes in the fragment f, writing unwritten data 41 + // before the start of the fragment and any changes from the fragment. If 42 + // multiple text fragments apply to the same content, ApplyFragment must be 43 + // called in order of increasing start position. As a result, each fragment can 44 + // be applied at most once. 45 + // 46 + // If an error occurs while applying, ApplyFragment returns an *ApplyError that 47 + // annotates the error with additional information. If the error is because of 48 + // a conflict between the fragment and the source, the wrapped error will be a 49 + // *Conflict. 50 + func (a *TextApplier) ApplyFragment(f *TextFragment) error { 51 + if a.closed { 52 + return applyError(errApplierClosed) 53 + } 54 + 55 + // mark an apply as in progress, even if it fails before making changes 56 + a.dirty = true 57 + 58 + // application code assumes fragment fields are consistent 59 + if err := f.Validate(); err != nil { 60 + return applyError(err) 61 + } 62 + 63 + // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) 64 + fragStart := f.OldPosition - 1 65 + if fragStart < 0 { 66 + fragStart = 0 67 + } 68 + fragEnd := fragStart + f.OldLines 69 + 70 + start := a.nextLine 71 + if fragStart < start { 72 + return applyError(&Conflict{"fragment overlaps with an applied fragment"}) 73 + } 74 + 75 + if f.OldPosition == 0 { 76 + ok, err := isLen(a.src, 0) 77 + if err != nil { 78 + return applyError(err) 79 + } 80 + if !ok { 81 + return applyError(&Conflict{"cannot create new file from non-empty src"}) 82 + } 83 + } 84 + 85 + preimage := make([][]byte, fragEnd-start) 86 + n, err := a.lineSrc.ReadLinesAt(preimage, start) 87 + if err != nil { 88 + return applyError(err, lineNum(start+int64(n))) 89 + } 90 + 91 + // copy leading data before the fragment starts 92 + for i, line := range preimage[:fragStart-start] { 93 + if _, err := a.dst.Write(line); err != nil { 94 + a.nextLine = start + int64(i) 95 + return applyError(err, lineNum(a.nextLine)) 96 + } 97 + } 98 + preimage = preimage[fragStart-start:] 99 + 100 + // apply the changes in the fragment 101 + used := int64(0) 102 + for i, line := range f.Lines { 103 + if err := applyTextLine(a.dst, line, preimage, used); err != nil { 104 + a.nextLine = fragStart + used 105 + return applyError(err, lineNum(a.nextLine), fragLineNum(i)) 106 + } 107 + if line.Old() { 108 + used++ 109 + } 110 + } 111 + a.nextLine = fragStart + used 112 + 113 + // new position of +0,0 mean a full delete, so check for leftovers 114 + if f.NewPosition == 0 && f.NewLines == 0 { 115 + var b [1][]byte 116 + n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) 117 + if err != nil && err != io.EOF { 118 + return applyError(err, lineNum(a.nextLine)) 119 + } 120 + if n > 0 { 121 + return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) 122 + } 123 + } 124 + 125 + return nil 126 + } 127 + 128 + func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { 129 + if line.Old() && string(preimage[i]) != line.Line { 130 + return &Conflict{"fragment line does not match src line"} 131 + } 132 + if line.New() { 133 + _, err = io.WriteString(dst, line.Line) 134 + } 135 + return err 136 + } 137 + 138 + // Close writes any data following the last applied fragment and prevents 139 + // future calls to ApplyFragment. 140 + func (a *TextApplier) Close() (err error) { 141 + if a.closed { 142 + return nil 143 + } 144 + 145 + a.closed = true 146 + if !a.dirty { 147 + _, err = copyFrom(a.dst, a.src, 0) 148 + } else { 149 + _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine) 150 + } 151 + return err 152 + }

+41 -2

gitdiff/base85.go

··· 19 19 } 20 20 21 21 // base85Decode decodes Base85-encoded data from src into dst. It uses the 22 - // alphabet defined by base85.c in the Git source tree, which appears to be 23 - // unique. src must contain at least len(dst) bytes of encoded data. 22 + // alphabet defined by base85.c in the Git source tree. src must contain at 23 + // least len(dst) bytes of encoded data. 24 24 func base85Decode(dst, src []byte) error { 25 25 var v uint32 26 26 var n, ndst int ··· 50 50 } 51 51 return nil 52 52 } 53 + 54 + // base85Encode encodes src in Base85, writing the result to dst. It uses the 55 + // alphabet defined by base85.c in the Git source tree. 56 + func base85Encode(dst, src []byte) { 57 + var di, si int 58 + 59 + encode := func(v uint32) { 60 + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] 61 + dst[di+1] = b85Alpha[(v/(85*85*85))%85] 62 + dst[di+2] = b85Alpha[(v/(85*85))%85] 63 + dst[di+3] = b85Alpha[(v/85)%85] 64 + dst[di+4] = b85Alpha[v%85] 65 + } 66 + 67 + n := (len(src) / 4) * 4 68 + for si < n { 69 + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) 70 + si += 4 71 + di += 5 72 + } 73 + 74 + var v uint32 75 + switch len(src) - si { 76 + case 3: 77 + v |= uint32(src[si+2]) << 8 78 + fallthrough 79 + case 2: 80 + v |= uint32(src[si+1]) << 16 81 + fallthrough 82 + case 1: 83 + v |= uint32(src[si+0]) << 24 84 + encode(v) 85 + } 86 + } 87 + 88 + // base85Len returns the length of n bytes of Base85 encoded data. 89 + func base85Len(n int) int { 90 + return (n + 3) / 4 * 5 91 + }

+58

gitdiff/base85_test.go

··· 1 1 package gitdiff 2 2 3 3 import ( 4 + "bytes" 4 5 "testing" 5 6 ) 6 7 ··· 58 59 }) 59 60 } 60 61 } 62 + 63 + func TestBase85Encode(t *testing.T) { 64 + tests := map[string]struct { 65 + Input []byte 66 + Output string 67 + }{ 68 + "zeroBytes": { 69 + Input: []byte{}, 70 + Output: "", 71 + }, 72 + "twoBytes": { 73 + Input: []byte{0xCA, 0xFE}, 74 + Output: "%KiWV", 75 + }, 76 + "fourBytes": { 77 + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, 78 + Output: "007GV", 79 + }, 80 + "sixBytes": { 81 + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, 82 + Output: "007GV%KiWV", 83 + }, 84 + } 85 + 86 + for name, test := range tests { 87 + t.Run(name, func(t *testing.T) { 88 + dst := make([]byte, len(test.Output)) 89 + base85Encode(dst, test.Input) 90 + for i, b := range test.Output { 91 + if dst[i] != byte(b) { 92 + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) 93 + } 94 + } 95 + }) 96 + } 97 + } 98 + 99 + func FuzzBase85Roundtrip(f *testing.F) { 100 + f.Add([]byte{0x2b, 0x0d}) 101 + f.Add([]byte{0xbc, 0xb4, 0x3f}) 102 + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) 103 + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) 104 + 105 + f.Fuzz(func(t *testing.T, in []byte) { 106 + n := len(in) 107 + dst := make([]byte, base85Len(n)) 108 + out := make([]byte, n) 109 + 110 + base85Encode(dst, in) 111 + if err := base85Decode(out, dst); err != nil { 112 + t.Fatalf("unexpected error decoding base85 data: %v", err) 113 + } 114 + if !bytes.Equal(in, out) { 115 + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) 116 + } 117 + }) 118 + }

+11 -4

gitdiff/binary.go

··· 50 50 } 51 51 52 52 func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { 53 - switch p.Line(0) { 54 - case "GIT binary patch\n": 53 + line := p.Line(0) 54 + switch { 55 + case line == "GIT binary patch\n": 55 56 hasData = true 56 - case "Binary files differ\n": 57 - case "Files differ\n": 57 + case isBinaryNoDataMarker(line): 58 58 default: 59 59 return false, false, nil 60 60 } ··· 63 63 return false, false, err 64 64 } 65 65 return true, hasData, nil 66 + } 67 + 68 + func isBinaryNoDataMarker(line string) bool { 69 + if strings.HasSuffix(line, " differ\n") { 70 + return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") 71 + } 72 + return false 66 73 } 67 74 68 75 func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) {

+10

gitdiff/binary_test.go

··· 25 25 IsBinary: true, 26 26 HasData: false, 27 27 }, 28 + "binaryFileNoPatchPaths": { 29 + Input: "Binary files a/foo.bin and b/foo.bin differ\n", 30 + IsBinary: true, 31 + HasData: false, 32 + }, 33 + "fileNoPatch": { 34 + Input: "Files differ\n", 35 + IsBinary: true, 36 + HasData: false, 37 + }, 28 38 "textFile": { 29 39 Input: "@@ -10,14 +22,31 @@\n", 30 40 IsBinary: false,

+4 -4

gitdiff/file_header.go

··· 57 57 return nil, "", err 58 58 } 59 59 } 60 - return nil, "", nil 60 + return nil, preamble.String(), nil 61 61 } 62 62 63 63 func (p *parser) ParseGitFileHeader() (*File, error) { ··· 324 324 } 325 325 326 326 func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { 327 - f.OldMode, err = parseMode(line) 327 + f.OldMode, err = parseMode(strings.TrimSpace(line)) 328 328 return 329 329 } 330 330 331 331 func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { 332 - f.NewMode, err = parseMode(line) 332 + f.NewMode, err = parseMode(strings.TrimSpace(line)) 333 333 return 334 334 } 335 335 ··· 527 527 528 528 // a valid timestamp can have optional ':' in zone specifier 529 529 // remove that if it exists so we have a single format 530 - if ts[len(ts)-3] == ':' { 530 + if len(ts) >= 3 && ts[len(ts)-3] == ':' { 531 531 ts = ts[:len(ts)-3] + ts[len(ts)-2:] 532 532 } 533 533

+29

gitdiff/file_header_test.go

··· 486 486 OldMode: os.FileMode(0100644), 487 487 }, 488 488 }, 489 + "oldModeWithTrailingSpace": { 490 + Line: "old mode 100644\r\n", 491 + OutputFile: &File{ 492 + OldMode: os.FileMode(0100644), 493 + }, 494 + }, 489 495 "invalidOldMode": { 490 496 Line: "old mode rw\n", 491 497 Err: true, ··· 496 502 NewMode: os.FileMode(0100755), 497 503 }, 498 504 }, 505 + "newModeWithTrailingSpace": { 506 + Line: "new mode 100755\r\n", 507 + OutputFile: &File{ 508 + NewMode: os.FileMode(0100755), 509 + }, 510 + }, 499 511 "invalidNewMode": { 500 512 Line: "new mode rwx\n", 501 513 Err: true, ··· 511 523 }, 512 524 "newFileMode": { 513 525 Line: "new file mode 100755\n", 526 + DefaultName: "dir/file.txt", 527 + OutputFile: &File{ 528 + NewName: "dir/file.txt", 529 + NewMode: os.FileMode(0100755), 530 + IsNew: true, 531 + }, 532 + }, 533 + "newFileModeWithTrailingSpace": { 534 + Line: "new file mode 100755\r\n", 514 535 DefaultName: "dir/file.txt", 515 536 OutputFile: &File{ 516 537 NewName: "dir/file.txt", ··· 722 743 }, 723 744 "notEpoch": { 724 745 Input: "+++ file.txt\t2019-03-21 12:34:56.789 -0700\n", 746 + Output: false, 747 + }, 748 + "notTimestamp": { 749 + Input: "+++ file.txt\trandom text\n", 750 + Output: false, 751 + }, 752 + "notTimestampShort": { 753 + Input: "+++ file.txt\t0\n", 725 754 Output: false, 726 755 }, 727 756 }

+281

gitdiff/format.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "bytes" 5 + "compress/zlib" 6 + "fmt" 7 + "io" 8 + "strconv" 9 + ) 10 + 11 + type formatter struct { 12 + w io.Writer 13 + err error 14 + } 15 + 16 + func newFormatter(w io.Writer) *formatter { 17 + return &formatter{w: w} 18 + } 19 + 20 + func (fm *formatter) Write(p []byte) (int, error) { 21 + if fm.err != nil { 22 + return len(p), nil 23 + } 24 + if _, err := fm.w.Write(p); err != nil { 25 + fm.err = err 26 + } 27 + return len(p), nil 28 + } 29 + 30 + func (fm *formatter) WriteString(s string) (int, error) { 31 + fm.Write([]byte(s)) 32 + return len(s), nil 33 + } 34 + 35 + func (fm *formatter) WriteByte(c byte) error { 36 + fm.Write([]byte{c}) 37 + return nil 38 + } 39 + 40 + func (fm *formatter) WriteQuotedName(s string) { 41 + qpos := 0 42 + for i := 0; i < len(s); i++ { 43 + ch := s[i] 44 + if q, quoted := quoteByte(ch); quoted { 45 + if qpos == 0 { 46 + fm.WriteByte('"') 47 + } 48 + fm.WriteString(s[qpos:i]) 49 + fm.Write(q) 50 + qpos = i + 1 51 + } 52 + } 53 + fm.WriteString(s[qpos:]) 54 + if qpos > 0 { 55 + fm.WriteByte('"') 56 + } 57 + } 58 + 59 + var quoteEscapeTable = map[byte]byte{ 60 + '\a': 'a', 61 + '\b': 'b', 62 + '\t': 't', 63 + '\n': 'n', 64 + '\v': 'v', 65 + '\f': 'f', 66 + '\r': 'r', 67 + '"': '"', 68 + '\\': '\\', 69 + } 70 + 71 + func quoteByte(b byte) ([]byte, bool) { 72 + if q, ok := quoteEscapeTable[b]; ok { 73 + return []byte{'\\', q}, true 74 + } 75 + if b < 0x20 || b >= 0x7F { 76 + return []byte{ 77 + '\\', 78 + '0' + (b>>6)&0o3, 79 + '0' + (b>>3)&0o7, 80 + '0' + (b>>0)&0o7, 81 + }, true 82 + } 83 + return nil, false 84 + } 85 + 86 + func (fm *formatter) FormatFile(f *File) { 87 + fm.WriteString("diff --git ") 88 + 89 + var aName, bName string 90 + switch { 91 + case f.OldName == "": 92 + aName = f.NewName 93 + bName = f.NewName 94 + 95 + case f.NewName == "": 96 + aName = f.OldName 97 + bName = f.OldName 98 + 99 + default: 100 + aName = f.OldName 101 + bName = f.NewName 102 + } 103 + 104 + fm.WriteQuotedName("a/" + aName) 105 + fm.WriteByte(' ') 106 + fm.WriteQuotedName("b/" + bName) 107 + fm.WriteByte('\n') 108 + 109 + if f.OldMode != 0 { 110 + if f.IsDelete { 111 + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) 112 + } else if f.NewMode != 0 { 113 + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) 114 + } 115 + } 116 + 117 + if f.NewMode != 0 { 118 + if f.IsNew { 119 + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) 120 + } else if f.OldMode != 0 { 121 + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) 122 + } 123 + } 124 + 125 + if f.Score > 0 { 126 + if f.IsCopy || f.IsRename { 127 + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) 128 + } else { 129 + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) 130 + } 131 + } 132 + 133 + if f.IsCopy { 134 + if f.OldName != "" { 135 + fm.WriteString("copy from ") 136 + fm.WriteQuotedName(f.OldName) 137 + fm.WriteByte('\n') 138 + } 139 + if f.NewName != "" { 140 + fm.WriteString("copy to ") 141 + fm.WriteQuotedName(f.NewName) 142 + fm.WriteByte('\n') 143 + } 144 + } 145 + 146 + if f.IsRename { 147 + if f.OldName != "" { 148 + fm.WriteString("rename from ") 149 + fm.WriteQuotedName(f.OldName) 150 + fm.WriteByte('\n') 151 + } 152 + if f.NewName != "" { 153 + fm.WriteString("rename to ") 154 + fm.WriteQuotedName(f.NewName) 155 + fm.WriteByte('\n') 156 + } 157 + } 158 + 159 + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { 160 + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) 161 + 162 + // Mode is only included on the index line when it is not changing 163 + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { 164 + fmt.Fprintf(fm, " %o", f.OldMode) 165 + } 166 + 167 + fm.WriteByte('\n') 168 + } 169 + 170 + if f.IsBinary { 171 + if f.BinaryFragment == nil { 172 + fm.WriteString("Binary files ") 173 + fm.WriteQuotedName("a/" + aName) 174 + fm.WriteString(" and ") 175 + fm.WriteQuotedName("b/" + bName) 176 + fm.WriteString(" differ\n") 177 + } else { 178 + fm.WriteString("GIT binary patch\n") 179 + fm.FormatBinaryFragment(f.BinaryFragment) 180 + if f.ReverseBinaryFragment != nil { 181 + fm.FormatBinaryFragment(f.ReverseBinaryFragment) 182 + } 183 + } 184 + } 185 + 186 + // The "---" and "+++" lines only appear for text patches with fragments 187 + if len(f.TextFragments) > 0 { 188 + fm.WriteString("--- ") 189 + if f.OldName == "" { 190 + fm.WriteString("/dev/null") 191 + } else { 192 + fm.WriteQuotedName("a/" + f.OldName) 193 + } 194 + fm.WriteByte('\n') 195 + 196 + fm.WriteString("+++ ") 197 + if f.NewName == "" { 198 + fm.WriteString("/dev/null") 199 + } else { 200 + fm.WriteQuotedName("b/" + f.NewName) 201 + } 202 + fm.WriteByte('\n') 203 + 204 + for _, frag := range f.TextFragments { 205 + fm.FormatTextFragment(frag) 206 + } 207 + } 208 + } 209 + 210 + func (fm *formatter) FormatTextFragment(f *TextFragment) { 211 + fm.FormatTextFragmentHeader(f) 212 + fm.WriteByte('\n') 213 + 214 + for _, line := range f.Lines { 215 + fm.WriteString(line.Op.String()) 216 + fm.WriteString(line.Line) 217 + if line.NoEOL() { 218 + fm.WriteString("\n\\ No newline at end of file\n") 219 + } 220 + } 221 + } 222 + 223 + func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { 224 + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) 225 + if f.Comment != "" { 226 + fm.WriteByte(' ') 227 + fm.WriteString(f.Comment) 228 + } 229 + } 230 + 231 + func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { 232 + const ( 233 + maxBytesPerLine = 52 234 + ) 235 + 236 + switch f.Method { 237 + case BinaryPatchDelta: 238 + fm.WriteString("delta ") 239 + case BinaryPatchLiteral: 240 + fm.WriteString("literal ") 241 + } 242 + fm.Write(strconv.AppendInt(nil, f.Size, 10)) 243 + fm.WriteByte('\n') 244 + 245 + data := deflateBinaryChunk(f.Data) 246 + n := (len(data) / maxBytesPerLine) * maxBytesPerLine 247 + 248 + buf := make([]byte, base85Len(maxBytesPerLine)) 249 + for i := 0; i < n; i += maxBytesPerLine { 250 + base85Encode(buf, data[i:i+maxBytesPerLine]) 251 + fm.WriteByte('z') 252 + fm.Write(buf) 253 + fm.WriteByte('\n') 254 + } 255 + if remainder := len(data) - n; remainder > 0 { 256 + buf = buf[0:base85Len(remainder)] 257 + 258 + sizeChar := byte(remainder) 259 + if remainder <= 26 { 260 + sizeChar = 'A' + sizeChar - 1 261 + } else { 262 + sizeChar = 'a' + sizeChar - 27 263 + } 264 + 265 + base85Encode(buf, data[n:]) 266 + fm.WriteByte(sizeChar) 267 + fm.Write(buf) 268 + fm.WriteByte('\n') 269 + } 270 + fm.WriteByte('\n') 271 + } 272 + 273 + func deflateBinaryChunk(data []byte) []byte { 274 + var b bytes.Buffer 275 + 276 + zw := zlib.NewWriter(&b) 277 + _, _ = zw.Write(data) 278 + _ = zw.Close() 279 + 280 + return b.Bytes() 281 + }

+157

gitdiff/format_roundtrip_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "bytes" 5 + "fmt" 6 + "os" 7 + "path/filepath" 8 + "slices" 9 + "testing" 10 + ) 11 + 12 + func TestFormatRoundtrip(t *testing.T) { 13 + patches := []struct { 14 + File string 15 + SkipTextCompare bool 16 + }{ 17 + {File: "copy.patch"}, 18 + {File: "copy_modify.patch"}, 19 + {File: "delete.patch"}, 20 + {File: "mode.patch"}, 21 + {File: "mode_modify.patch"}, 22 + {File: "modify.patch"}, 23 + {File: "new.patch"}, 24 + {File: "new_empty.patch"}, 25 + {File: "new_mode.patch"}, 26 + {File: "rename.patch"}, 27 + {File: "rename_modify.patch"}, 28 + 29 + // Due to differences between Go's 'encoding/zlib' package and the zlib 30 + // C library, binary patches cannot be compared directly as the patch 31 + // data is slightly different when re-encoded by Go. 32 + {File: "binary_modify.patch", SkipTextCompare: true}, 33 + {File: "binary_new.patch", SkipTextCompare: true}, 34 + {File: "binary_modify_nodata.patch"}, 35 + } 36 + 37 + for _, patch := range patches { 38 + t.Run(patch.File, func(t *testing.T) { 39 + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) 40 + if err != nil { 41 + t.Fatalf("failed to read patch: %v", err) 42 + } 43 + 44 + original := assertParseSingleFile(t, b, "patch") 45 + str := original.String() 46 + 47 + if !patch.SkipTextCompare { 48 + if string(b) != str { 49 + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) 50 + } 51 + } 52 + 53 + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") 54 + assertFilesEqual(t, original, reparsed) 55 + }) 56 + } 57 + } 58 + 59 + func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { 60 + files, _, err := Parse(bytes.NewReader(b)) 61 + if err != nil { 62 + t.Fatalf("failed to parse %s: %v", kind, err) 63 + } 64 + if len(files) != 1 { 65 + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) 66 + } 67 + return files[0] 68 + } 69 + 70 + func assertFilesEqual(t *testing.T, expected, actual *File) { 71 + assertEqual(t, expected.OldName, actual.OldName, "OldName") 72 + assertEqual(t, expected.NewName, actual.NewName, "NewName") 73 + 74 + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") 75 + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") 76 + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") 77 + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") 78 + 79 + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") 80 + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") 81 + 82 + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") 83 + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") 84 + assertEqual(t, expected.Score, actual.Score, "Score") 85 + 86 + if len(expected.TextFragments) == len(actual.TextFragments) { 87 + for i := range expected.TextFragments { 88 + prefix := fmt.Sprintf("TextFragments[%d].", i) 89 + ef := expected.TextFragments[i] 90 + af := actual.TextFragments[i] 91 + 92 + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") 93 + 94 + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") 95 + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") 96 + 97 + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") 98 + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") 99 + 100 + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") 101 + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") 102 + 103 + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") 104 + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") 105 + 106 + if !slices.Equal(ef.Lines, af.Lines) { 107 + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) 108 + } 109 + } 110 + } else { 111 + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) 112 + } 113 + 114 + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") 115 + 116 + if expected.BinaryFragment != nil { 117 + if actual.BinaryFragment == nil { 118 + t.Errorf("BinaryFragment: expected non-nil, actual is nil") 119 + } else { 120 + ef := expected.BinaryFragment 121 + af := expected.BinaryFragment 122 + 123 + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") 124 + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") 125 + 126 + if !slices.Equal(ef.Data, af.Data) { 127 + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) 128 + } 129 + } 130 + } else if actual.BinaryFragment != nil { 131 + t.Errorf("BinaryFragment: expected nil, actual is non-nil") 132 + } 133 + 134 + if expected.ReverseBinaryFragment != nil { 135 + if actual.ReverseBinaryFragment == nil { 136 + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") 137 + } else { 138 + ef := expected.ReverseBinaryFragment 139 + af := expected.ReverseBinaryFragment 140 + 141 + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") 142 + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") 143 + 144 + if !slices.Equal(ef.Data, af.Data) { 145 + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) 146 + } 147 + } 148 + } else if actual.ReverseBinaryFragment != nil { 149 + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") 150 + } 151 + } 152 + 153 + func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { 154 + if expected != actual { 155 + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) 156 + } 157 + }

+28

gitdiff/format_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "strings" 5 + "testing" 6 + ) 7 + 8 + func TestFormatter_WriteQuotedName(t *testing.T) { 9 + tests := []struct { 10 + Input string 11 + Expected string 12 + }{ 13 + {"noquotes.txt", `noquotes.txt`}, 14 + {"no quotes.txt", `no quotes.txt`}, 15 + {"new\nline", `"new\nline"`}, 16 + {"escape\x1B null\x00", `"escape\033 null\000"`}, 17 + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, 18 + {"\"already quoted\"", `"\"already quoted\""`}, 19 + } 20 + 21 + for _, test := range tests { 22 + var b strings.Builder 23 + newFormatter(&b).WriteQuotedName(test.Input) 24 + if b.String() != test.Expected { 25 + t.Errorf("expected %q, got %q", test.Expected, b.String()) 26 + } 27 + } 28 + }

+33 -2

gitdiff/gitdiff.go

··· 4 4 "errors" 5 5 "fmt" 6 6 "os" 7 + "strings" 7 8 ) 8 9 9 10 // File describes changes to a single file. It can be either a text file or a ··· 38 39 ReverseBinaryFragment *BinaryFragment 39 40 } 40 41 42 + // String returns a git diff representation of this file. The value can be 43 + // parsed by this library to obtain the same File, but may not be the same as 44 + // the original input. 45 + func (f *File) String() string { 46 + var diff strings.Builder 47 + newFormatter(&diff).FormatFile(f) 48 + return diff.String() 49 + } 50 + 41 51 // TextFragment describes changed lines starting at a specific line in a text file. 42 52 type TextFragment struct { 43 53 Comment string ··· 57 67 Lines []Line 58 68 } 59 69 60 - // Header returns the canonical header of this fragment. 70 + // String returns a git diff format of this fragment. See [File.String] for 71 + // more details on this format. 72 + func (f *TextFragment) String() string { 73 + var diff strings.Builder 74 + newFormatter(&diff).FormatTextFragment(f) 75 + return diff.String() 76 + } 77 + 78 + // Header returns a git diff header of this fragment. See [File.String] for 79 + // more details on this format. 61 80 func (f *TextFragment) Header() string { 62 - return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) 81 + var hdr strings.Builder 82 + newFormatter(&hdr).FormatTextFragmentHeader(f) 83 + return hdr.String() 63 84 } 64 85 65 86 // Validate checks that the fragment is self-consistent and appliable. Validate ··· 197 218 // BinaryPatchLiteral indicates the data is the exact file content 198 219 BinaryPatchLiteral 199 220 ) 221 + 222 + // String returns a git diff format of this fragment. Due to differences in 223 + // zlib implementation between Go and Git, encoded binary data in the result 224 + // will likely differ from what Git produces for the same input. See 225 + // [File.String] for more details on this format. 226 + func (f *BinaryFragment) String() string { 227 + var diff strings.Builder 228 + newFormatter(&diff).FormatBinaryFragment(f) 229 + return diff.String() 230 + }

+7 -3

gitdiff/parser.go

··· 12 12 // Parse parses a patch with changes to one or more files. Any content before 13 13 // the first file is returned as the second value. If an error occurs while 14 14 // parsing, it returns all files parsed before the error. 15 + // 16 + // Parse expects to receive a single patch. If the input may contain multiple 17 + // patches (for example, if it is an mbox file), callers should split it into 18 + // individual patches and call Parse on each one. 15 19 func Parse(r io.Reader) ([]*File, string, error) { 16 20 p := newParser(r) 17 21 ··· 29 33 if err != nil { 30 34 return files, preamble, err 31 35 } 36 + if len(files) == 0 { 37 + preamble = pre 38 + } 32 39 if file == nil { 33 40 break 34 41 } ··· 46 53 } 47 54 } 48 55 49 - if len(files) == 0 { 50 - preamble = pre 51 - } 52 56 files = append(files, file) 53 57 } 54 58

+16 -2

gitdiff/parser_test.go

··· 281 281 --- could this be a header? 282 282 nope, it's just some dashes 283 283 `, 284 - Output: nil, 285 - Preamble: "", 284 + Output: nil, 285 + Preamble: ` 286 + this is a line 287 + this is another line 288 + --- could this be a header? 289 + nope, it's just some dashes 290 + `, 286 291 }, 287 292 "detatchedFragmentLike": { 288 293 Input: ` ··· 290 295 @@ -1,3 +1,4 ~1,5 @@ 291 296 `, 292 297 Output: nil, 298 + Preamble: ` 299 + a wild fragment appears? 300 + @@ -1,3 +1,4 ~1,5 @@ 301 + `, 293 302 }, 294 303 "detatchedFragment": { 295 304 Input: ` ··· 425 434 }, 426 435 }, 427 436 Preamble: textPreamble, 437 + }, 438 + "noFiles": { 439 + InputFile: "testdata/no_files.patch", 440 + Output: nil, 441 + Preamble: textPreamble, 428 442 }, 429 443 "newBinaryFile": { 430 444 InputFile: "testdata/new_binary_file.patch",

+125 -129

gitdiff/patch_header.go

··· 5 5 "errors" 6 6 "fmt" 7 7 "io" 8 + "io/ioutil" 8 9 "mime/quotedprintable" 9 10 "net/mail" 10 11 "strconv" ··· 51 52 // line, that line will be removed and everything after it will be 52 53 // placed in BodyAppendix. 53 54 BodyAppendix string 55 + 56 + // All headers completely unparsed 57 + RawHeaders map[string][]string 54 58 } 55 59 56 60 // Message returns the commit message for the header. The message consists of ··· 67 71 return msg.String() 68 72 } 69 73 70 - // PatchIdentity identifies a person who authored or committed a patch. 71 - type PatchIdentity struct { 72 - Name string 73 - Email string 74 - } 75 - 76 - func (i PatchIdentity) String() string { 77 - name := i.Name 78 - if name == "" { 79 - name = `""` 80 - } 81 - return fmt.Sprintf("%s <%s>", name, i.Email) 82 - } 83 - 84 - // ParsePatchIdentity parses a patch identity string. A valid string contains a 85 - // non-empty name followed by an email address in angle brackets. Like Git, 86 - // ParsePatchIdentity does not require that the email address is valid or 87 - // properly formatted, only that it is non-empty. The name must not contain a 88 - // left angle bracket, '<', and the email address must not contain a right 89 - // angle bracket, '>'. 90 - func ParsePatchIdentity(s string) (PatchIdentity, error) { 91 - var emailStart, emailEnd int 92 - for i, c := range s { 93 - if c == '<' && emailStart == 0 { 94 - emailStart = i + 1 95 - } 96 - if c == '>' && emailStart > 0 { 97 - emailEnd = i 98 - break 99 - } 100 - } 101 - if emailStart > 0 && emailEnd == 0 { 102 - return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) 103 - } 104 - 105 - var name, email string 106 - if emailStart > 0 { 107 - name = strings.TrimSpace(s[:emailStart-1]) 108 - } 109 - if emailStart > 0 && emailEnd > 0 { 110 - email = strings.TrimSpace(s[emailStart:emailEnd]) 111 - } 112 - if name == "" || email == "" { 113 - return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) 114 - } 115 - 116 - return PatchIdentity{Name: name, Email: email}, nil 117 - } 118 - 119 74 // ParsePatchDate parses a patch date string. It returns the parsed time or an 120 75 // error if s has an unknown format. ParsePatchDate supports the iso, rfc, 121 76 // short, raw, unix, and default formats (with local variants) used by the ··· 164 119 return time.Time{}, fmt.Errorf("unknown date format: %s", s) 165 120 } 166 121 167 - // ParsePatchHeader parses a preamble string as returned by Parse into a 122 + // A PatchHeaderOption modifies the behavior of ParsePatchHeader. 123 + type PatchHeaderOption func(*patchHeaderOptions) 124 + 125 + // SubjectCleanMode controls how ParsePatchHeader cleans subject lines when 126 + // parsing mail-formatted patches. 127 + type SubjectCleanMode int 128 + 129 + const ( 130 + // SubjectCleanWhitespace removes leading and trailing whitespace. 131 + SubjectCleanWhitespace SubjectCleanMode = iota 132 + 133 + // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", 134 + // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. 135 + // This is the default behavior of git (see `git mailinfo`) and this 136 + // package. 137 + SubjectCleanAll 138 + 139 + // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes 140 + // leading strings enclosed by '[' and ']' if they start with "PATCH". 141 + SubjectCleanPatchOnly 142 + ) 143 + 144 + // WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By 145 + // default, uses SubjectCleanAll. 146 + func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { 147 + return func(opts *patchHeaderOptions) { 148 + opts.subjectCleanMode = m 149 + } 150 + } 151 + 152 + type patchHeaderOptions struct { 153 + subjectCleanMode SubjectCleanMode 154 + } 155 + 156 + // ParsePatchHeader parses the preamble string returned by [Parse] into a 168 157 // PatchHeader. Due to the variety of header formats, some fields of the parsed 169 158 // PatchHeader may be unset after parsing. 170 159 // 171 160 // Supported formats are the short, medium, full, fuller, and email pretty 172 - // formats used by git diff, git log, and git show and the UNIX mailbox format 173 - // used by git format-patch. 161 + // formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox 162 + // format used by `git format-patch`. 163 + // 164 + // When parsing mail-formatted headers, ParsePatchHeader tries to remove 165 + // email-specific content from the title and body: 174 166 // 175 - // If ParsePatchHeader detects that it is handling an email, it will 176 - // remove extra content at the beginning of the title line, such as 177 - // `[PATCH]` or `Re:` in the same way that `git mailinfo` does. 178 - // SubjectPrefix will be set to the value of this removed string. 179 - // (`git mailinfo` is the core part of `git am` that pulls information 180 - // out of an individual mail.) 167 + // - Based on the SubjectCleanMode, remove prefixes like reply markers and 168 + // "[PATCH]" strings from the subject, saving any removed content in the 169 + // SubjectPrefix field. Parsing always discards leading and trailing 170 + // whitespace from the subject line. The default mode is SubjectCleanAll. 181 171 // 182 - // Additionally, if ParsePatchHeader detects that it's handling an 183 - // email, it will remove a `---` line and put anything after it into 184 - // BodyAppendix. 172 + // - If the body contains a "---" line (3 hyphens), remove that line and any 173 + // content after it from the body and save it in the BodyAppendix field. 185 174 // 186 - // Those wishing the effect of a plain `git am` should use 187 - // `PatchHeader.Title + "\n" + PatchHeader.Body` (or 188 - // `PatchHeader.Message()`). Those wishing to retain the subject 189 - // prefix and appendix material should use `PatchHeader.SubjectPrefix 190 - // + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + 191 - // PatchHeader.BodyAppendix`. 192 - func ParsePatchHeader(s string) (*PatchHeader, error) { 193 - r := bufio.NewReader(strings.NewReader(s)) 175 + // ParsePatchHeader tries to process content it does not understand wthout 176 + // returning errors, but will return errors if well-identified content like 177 + // dates or identies uses unknown or invalid formats. 178 + func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { 179 + opts := patchHeaderOptions{ 180 + subjectCleanMode: SubjectCleanAll, // match git defaults 181 + } 182 + for _, optFn := range options { 183 + optFn(&opts) 184 + } 194 185 195 - var line string 196 - for { 197 - var err error 198 - line, err = r.ReadString('\n') 199 - if err == io.EOF { 200 - break 201 - } 202 - if err != nil { 203 - return nil, err 204 - } 186 + header = strings.TrimSpace(header) 187 + if header == "" { 188 + return &PatchHeader{}, nil 189 + } 205 190 206 - line = strings.TrimSpace(line) 207 - if len(line) > 0 { 208 - break 209 - } 191 + var firstLine, rest string 192 + if idx := strings.IndexByte(header, '\n'); idx >= 0 { 193 + firstLine = header[:idx] 194 + rest = header[idx+1:] 195 + } else { 196 + firstLine = header 197 + rest = "" 210 198 } 211 199 212 200 switch { 213 - case strings.HasPrefix(line, mailHeaderPrefix): 214 - return parseHeaderMail(line, r) 215 - case strings.HasPrefix(line, mailMinimumHeaderPrefix): 216 - r = bufio.NewReader(strings.NewReader(s)) 217 - return parseHeaderMail("", r) 218 - case strings.HasPrefix(line, prettyHeaderPrefix): 219 - return parseHeaderPretty(line, r) 201 + case strings.HasPrefix(firstLine, mailHeaderPrefix): 202 + return parseHeaderMail(firstLine, strings.NewReader(rest), opts) 203 + 204 + case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): 205 + // With a minimum header, the first line is part of the actual mail 206 + // content and needs to be parsed as part of the "rest" 207 + return parseHeaderMail("", strings.NewReader(header), opts) 208 + 209 + case strings.HasPrefix(firstLine, prettyHeaderPrefix): 210 + return parseHeaderPretty(firstLine, strings.NewReader(rest)) 220 211 } 212 + 221 213 return nil, errors.New("unrecognized patch header format") 222 214 } 223 215 ··· 232 224 233 225 h := &PatchHeader{} 234 226 235 - prettyLine = prettyLine[len(prettyHeaderPrefix):] 227 + prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix) 236 228 if i := strings.IndexByte(prettyLine, ' '); i > 0 { 237 229 h.SHA = prettyLine[:i] 238 230 } else { ··· 248 240 break 249 241 } 250 242 243 + items := strings.SplitN(line, ":", 2) 244 + 245 + // we have "key: value" 246 + if len(items) == 2 { 247 + key := items[0] 248 + val := items[1] 249 + h.RawHeaders[key] = append(h.RawHeaders[key], val) 250 + } 251 + 251 252 switch { 252 253 case strings.HasPrefix(line, authorPrefix): 253 254 u, err := ParsePatchIdentity(line[len(authorPrefix):]) ··· 296 297 h.Title = title 297 298 298 299 if title != "" { 299 - // Don't check for an appendix 300 + // Don't check for an appendix, pretty headers do not contain them 300 301 body, _ := scanMessageBody(s, indent, false) 301 302 if s.Err() != nil { 302 303 return nil, s.Err() ··· 365 366 return body.String(), appendix.String() 366 367 } 367 368 368 - func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { 369 + func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { 369 370 msg, err := mail.ReadMessage(r) 370 371 if err != nil { 371 372 return nil, err 372 373 } 373 374 374 375 h := &PatchHeader{} 376 + h.RawHeaders = msg.Header 375 377 376 - if len(mailLine) > len(mailHeaderPrefix) { 377 - mailLine = mailLine[len(mailHeaderPrefix):] 378 + if strings.HasPrefix(mailLine, mailHeaderPrefix) { 379 + mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix) 378 380 if i := strings.IndexByte(mailLine, ' '); i > 0 { 379 381 h.SHA = mailLine[:i] 380 382 } 381 383 } 382 384 383 - addrs, err := msg.Header.AddressList("From") 384 - if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { 385 - return nil, err 386 - } 387 - if len(addrs) > 0 { 388 - addr := addrs[0] 389 - if addr.Name == "" { 390 - addr.Name = addr.Address 385 + from := msg.Header.Get("From") 386 + if from != "" { 387 + u, err := ParsePatchIdentity(from) 388 + if err != nil { 389 + return nil, err 391 390 } 392 - h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} 391 + h.Author = &u 393 392 } 394 393 395 394 date := msg.Header.Get("Date") ··· 402 401 } 403 402 404 403 subject := msg.Header.Get("Subject") 405 - h.SubjectPrefix, h.Title = parseSubject(subject) 404 + h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) 406 405 407 406 s := bufio.NewScanner(msg.Body) 408 407 h.Body, h.BodyAppendix = scanMessageBody(s, "", true) ··· 413 412 return h, nil 414 413 } 415 414 416 - // Takes an email subject and returns the patch prefix and commit 417 - // title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH 418 - // v3 3/5] ` and `Implement foo` 419 - func parseSubject(s string) (string, string) { 420 - // This is meant to be compatible with 421 - // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject(). 422 - // If compatibility with `git am` drifts, go there to see if there 423 - // are any updates. 415 + func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { 416 + switch mode { 417 + case SubjectCleanAll, SubjectCleanPatchOnly: 418 + case SubjectCleanWhitespace: 419 + return "", strings.TrimSpace(decodeSubject(s)) 420 + default: 421 + panic(fmt.Sprintf("unknown clean mode: %d", mode)) 422 + } 423 + 424 + // Based on the algorithm from Git in mailinfo.c:cleanup_subject() 425 + // If compatibility with `git am` drifts, go there to see if there are any updates. 424 426 425 427 at := 0 426 428 for at < len(s) { 427 429 switch s[at] { 428 430 case 'r', 'R': 429 431 // Detect re:, Re:, rE: and RE: 430 - if at+2 < len(s) && 431 - (s[at+1] == 'e' || s[at+1] == 'E') && 432 - s[at+2] == ':' { 432 + if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { 433 433 at += 3 434 434 continue 435 435 } ··· 440 440 continue 441 441 442 442 case '[': 443 - // Look for closing parenthesis 444 - j := at + 1 445 - for ; j < len(s); j++ { 446 - if s[j] == ']' { 447 - break 443 + if i := strings.IndexByte(s[at:], ']'); i > 0 { 444 + if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { 445 + at += i + 1 446 + continue 448 447 } 449 - } 450 - 451 - if j < len(s) { 452 - at = j + 1 453 - continue 454 448 } 455 449 } 456 450 457 - // Only loop if we actually removed something 451 + // Nothing was removed, end processing 458 452 break 459 453 } 460 454 461 - return s[:at], decodeSubject(s[at:]) 455 + prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) 456 + subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) 457 + return 462 458 } 463 459 464 460 // Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result ··· 477 473 payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") 478 474 payload = strings.ReplaceAll(payload, "?=", "") 479 475 480 - decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) 476 + decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) 481 477 if err != nil { 482 478 // if err, abort decoding and return original subject 483 479 return encoded

+172 -104

gitdiff/patch_header_test.go

··· 5 5 "time" 6 6 ) 7 7 8 - func TestParsePatchIdentity(t *testing.T) { 9 - tests := map[string]struct { 10 - Input string 11 - Output PatchIdentity 12 - Err interface{} 13 - }{ 14 - "simple": { 15 - Input: "Morton Haypenny <mhaypenny@example.com>", 16 - Output: PatchIdentity{ 17 - Name: "Morton Haypenny", 18 - Email: "mhaypenny@example.com", 19 - }, 20 - }, 21 - "extraWhitespace": { 22 - Input: " Morton Haypenny <mhaypenny@example.com > ", 23 - Output: PatchIdentity{ 24 - Name: "Morton Haypenny", 25 - Email: "mhaypenny@example.com", 26 - }, 27 - }, 28 - "trailingCharacters": { 29 - Input: "Morton Haypenny <mhaypenny@example.com> unrelated garbage", 30 - Output: PatchIdentity{ 31 - Name: "Morton Haypenny", 32 - Email: "mhaypenny@example.com", 33 - }, 34 - }, 35 - "missingName": { 36 - Input: "<mhaypenny@example.com>", 37 - Err: "invalid identity", 38 - }, 39 - "missingEmail": { 40 - Input: "Morton Haypenny", 41 - Err: "invalid identity", 42 - }, 43 - "unclosedEmail": { 44 - Input: "Morton Haypenny <mhaypenny@example.com", 45 - Err: "unclosed email", 46 - }, 47 - } 48 - 49 - for name, test := range tests { 50 - t.Run(name, func(t *testing.T) { 51 - id, err := ParsePatchIdentity(test.Input) 52 - if test.Err != nil { 53 - assertError(t, test.Err, err, "parsing identity") 54 - return 55 - } 56 - if err != nil { 57 - t.Fatalf("unexpected error parsing identity: %v", err) 58 - } 59 - 60 - if test.Output != id { 61 - t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) 62 - } 63 - }) 64 - } 65 - } 66 - 67 8 func TestParsePatchDate(t *testing.T) { 68 9 expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC) 69 10 ··· 144 85 expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>" 145 86 146 87 tests := map[string]struct { 147 - Input string 148 - Header PatchHeader 149 - Err interface{} 88 + Input string 89 + Options []PatchHeaderOption 90 + Header PatchHeader 91 + Err interface{} 150 92 }{ 151 93 "prettyShort": { 152 94 Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b ··· 269 211 Body: expectedBody, 270 212 }, 271 213 }, 214 + "mailboxPatchOnly": { 215 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 216 + From: Morton Haypenny <mhaypenny@example.com> 217 + Date: Sat, 11 Apr 2020 15:21:23 -0700 218 + Subject: [PATCH] [BUG-123] A sample commit to test header parsing 219 + 220 + The medium format shows the body, which 221 + may wrap on to multiple lines. 222 + 223 + Another body line. 224 + `, 225 + Options: []PatchHeaderOption{ 226 + WithSubjectCleanMode(SubjectCleanPatchOnly), 227 + }, 228 + Header: PatchHeader{ 229 + SHA: expectedSHA, 230 + Author: expectedIdentity, 231 + AuthorDate: expectedDate, 232 + Title: "[BUG-123] " + expectedTitle, 233 + Body: expectedBody, 234 + }, 235 + }, 272 236 "mailboxEmojiOneLine": { 273 237 Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 274 238 From: Morton Haypenny <mhaypenny@example.com> ··· 308 272 Body: expectedBody, 309 273 }, 310 274 }, 275 + "mailboxRFC5322SpecialCharacters": { 276 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 277 + From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com> 278 + Date: Sat, 11 Apr 2020 15:21:23 -0700 279 + Subject: [PATCH] A sample commit to test header parsing 280 + 281 + The medium format shows the body, which 282 + may wrap on to multiple lines. 283 + 284 + Another body line. 285 + `, 286 + Header: PatchHeader{ 287 + SHA: expectedSHA, 288 + Author: &PatchIdentity{ 289 + Name: "dependabot[bot]", 290 + Email: "12345+dependabot[bot]@users.noreply.github.com", 291 + }, 292 + AuthorDate: expectedDate, 293 + Title: expectedTitle, 294 + Body: expectedBody, 295 + }, 296 + }, 311 297 "mailboxAppendix": { 312 298 Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 313 299 From: Morton Haypenny <mhaypenny@example.com> ··· 414 400 Title: expectedTitle, 415 401 }, 416 402 }, 403 + "emptyHeader": { 404 + Input: "", 405 + Header: PatchHeader{}, 406 + }, 417 407 } 418 408 419 409 for name, test := range tests { 420 410 t.Run(name, func(t *testing.T) { 421 - h, err := ParsePatchHeader(test.Input) 411 + h, err := ParsePatchHeader(test.Input, test.Options...) 422 412 if test.Err != nil { 423 413 assertError(t, test.Err, err, "parsing patch header") 424 414 return ··· 473 463 } 474 464 } 475 465 476 - func TestCleanupSubject(t *testing.T) { 477 - exp := "A sample commit to test header parsing" 478 - tests := map[string]string{ 479 - "plain": "", 480 - "patch": "[PATCH] ", 481 - "patchv5": "[PATCH v5] ", 482 - "patchrfc": "[PATCH RFC] ", 483 - "patchnospace": "[PATCH]", 484 - "space": " ", 485 - "re": "re: ", 486 - "Re": "Re: ", 487 - "RE": "rE: ", 488 - "rere": "re: re: ", 489 - } 490 - 491 - for name, prefix := range tests { 492 - gotprefix, gottitle := parseSubject(prefix + exp) 493 - if gottitle != exp { 494 - t.Errorf("%s: Incorrect parsing of prefix %s: got title %s, wanted %s", 495 - name, prefix, gottitle, exp) 496 - } 497 - if gotprefix != prefix { 498 - t.Errorf("%s: Incorrect parsing of prefix %s: got prefix %s", 499 - name, prefix, gotprefix) 500 - } 501 - } 466 + func TestCleanSubject(t *testing.T) { 467 + expectedSubject := "A sample commit to test header parsing" 502 468 503 - moretests := map[string]struct { 504 - in, eprefix, etitle string 469 + tests := map[string]struct { 470 + Input string 471 + Mode SubjectCleanMode 472 + Prefix string 473 + Subject string 505 474 }{ 506 - "Reimplement": {"Reimplement something", "", "Reimplement something"}, 507 - "patch-reimplement": {"[PATCH v5] Reimplement something", "[PATCH v5] ", "Reimplement something"}, 508 - "Openbracket": {"[Just to annoy people", "", "[Just to annoy people"}, 475 + "CleanAll/noPrefix": { 476 + Input: expectedSubject, 477 + Mode: SubjectCleanAll, 478 + Subject: expectedSubject, 479 + }, 480 + "CleanAll/patchPrefix": { 481 + Input: "[PATCH] " + expectedSubject, 482 + Mode: SubjectCleanAll, 483 + Prefix: "[PATCH] ", 484 + Subject: expectedSubject, 485 + }, 486 + "CleanAll/patchPrefixNoSpace": { 487 + Input: "[PATCH]" + expectedSubject, 488 + Mode: SubjectCleanAll, 489 + Prefix: "[PATCH]", 490 + Subject: expectedSubject, 491 + }, 492 + "CleanAll/patchPrefixContent": { 493 + Input: "[PATCH 3/7] " + expectedSubject, 494 + Mode: SubjectCleanAll, 495 + Prefix: "[PATCH 3/7] ", 496 + Subject: expectedSubject, 497 + }, 498 + "CleanAll/spacePrefix": { 499 + Input: " " + expectedSubject, 500 + Mode: SubjectCleanAll, 501 + Subject: expectedSubject, 502 + }, 503 + "CleanAll/replyLowerPrefix": { 504 + Input: "re: " + expectedSubject, 505 + Mode: SubjectCleanAll, 506 + Prefix: "re: ", 507 + Subject: expectedSubject, 508 + }, 509 + "CleanAll/replyMixedPrefix": { 510 + Input: "Re: " + expectedSubject, 511 + Mode: SubjectCleanAll, 512 + Prefix: "Re: ", 513 + Subject: expectedSubject, 514 + }, 515 + "CleanAll/replyCapsPrefix": { 516 + Input: "RE: " + expectedSubject, 517 + Mode: SubjectCleanAll, 518 + Prefix: "RE: ", 519 + Subject: expectedSubject, 520 + }, 521 + "CleanAll/replyDoublePrefix": { 522 + Input: "Re: re: " + expectedSubject, 523 + Mode: SubjectCleanAll, 524 + Prefix: "Re: re: ", 525 + Subject: expectedSubject, 526 + }, 527 + "CleanAll/noPrefixSubjectHasRe": { 528 + Input: "Reimplement parsing", 529 + Mode: SubjectCleanAll, 530 + Subject: "Reimplement parsing", 531 + }, 532 + "CleanAll/patchPrefixSubjectHasRe": { 533 + Input: "[PATCH 1/2] Reimplement parsing", 534 + Mode: SubjectCleanAll, 535 + Prefix: "[PATCH 1/2] ", 536 + Subject: "Reimplement parsing", 537 + }, 538 + "CleanAll/unclosedPrefix": { 539 + Input: "[Just to annoy people", 540 + Mode: SubjectCleanAll, 541 + Subject: "[Just to annoy people", 542 + }, 543 + "CleanAll/multiplePrefix": { 544 + Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ", 545 + Mode: SubjectCleanAll, 546 + Prefix: "Re:Re: [PATCH 1/2][DRAFT] ", 547 + Subject: expectedSubject, 548 + }, 549 + "CleanPatchOnly/patchPrefix": { 550 + Input: "[PATCH] " + expectedSubject, 551 + Mode: SubjectCleanPatchOnly, 552 + Prefix: "[PATCH] ", 553 + Subject: expectedSubject, 554 + }, 555 + "CleanPatchOnly/mixedPrefix": { 556 + Input: "[PATCH] [TICKET-123] " + expectedSubject, 557 + Mode: SubjectCleanPatchOnly, 558 + Prefix: "[PATCH] ", 559 + Subject: "[TICKET-123] " + expectedSubject, 560 + }, 561 + "CleanPatchOnly/multiplePrefix": { 562 + Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject, 563 + Mode: SubjectCleanPatchOnly, 564 + Prefix: "Re:Re: [PATCH 1/2]", 565 + Subject: "[DRAFT] " + expectedSubject, 566 + }, 567 + "CleanWhitespace/leadingSpace": { 568 + Input: " [PATCH] " + expectedSubject, 569 + Mode: SubjectCleanWhitespace, 570 + Subject: "[PATCH] " + expectedSubject, 571 + }, 572 + "CleanWhitespace/trailingSpace": { 573 + Input: "[PATCH] " + expectedSubject + " ", 574 + Mode: SubjectCleanWhitespace, 575 + Subject: "[PATCH] " + expectedSubject, 576 + }, 509 577 } 510 578 511 - for name, test := range moretests { 512 - prefix, title := parseSubject(test.in) 513 - if title != test.etitle { 514 - t.Errorf("%s: Incorrect parsing of %s: got title %s, wanted %s", 515 - name, test.in, title, test.etitle) 516 - } 517 - if prefix != test.eprefix { 518 - t.Errorf("%s: Incorrect parsing of %s: got prefix %s, wanted %s", 519 - name, test.in, title, test.etitle) 520 - } 579 + for name, test := range tests { 580 + t.Run(name, func(t *testing.T) { 581 + prefix, subject := cleanSubject(test.Input, test.Mode) 582 + if prefix != test.Prefix { 583 + t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix) 584 + } 585 + if subject != test.Subject { 586 + t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject) 587 + } 588 + }) 521 589 } 522 590 }

+166

gitdiff/patch_identity.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "fmt" 5 + "strings" 6 + ) 7 + 8 + // PatchIdentity identifies a person who authored or committed a patch. 9 + type PatchIdentity struct { 10 + Name string 11 + Email string 12 + } 13 + 14 + func (i PatchIdentity) String() string { 15 + name := i.Name 16 + if name == "" { 17 + name = `""` 18 + } 19 + return fmt.Sprintf("%s <%s>", name, i.Email) 20 + } 21 + 22 + // ParsePatchIdentity parses a patch identity string. A patch identity contains 23 + // an email address and an optional name in [RFC 5322] format. This is either a 24 + // plain email adddress or a name followed by an address in angle brackets: 25 + // 26 + // author@example.com 27 + // Author Name <author@example.com> 28 + // 29 + // If the input is not one of these formats, ParsePatchIdentity applies a 30 + // heuristic to separate the name and email portions. If both the name and 31 + // email are missing or empty, ParsePatchIdentity returns an error. It 32 + // otherwise does not validate the result. 33 + // 34 + // [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 35 + func ParsePatchIdentity(s string) (PatchIdentity, error) { 36 + s = normalizeSpace(s) 37 + s = unquotePairs(s) 38 + 39 + var name, email string 40 + if at := strings.IndexByte(s, '@'); at >= 0 { 41 + start, end := at, at 42 + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { 43 + start-- 44 + } 45 + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { 46 + end++ 47 + } 48 + email = s[start+1 : end] 49 + 50 + // Adjust the boundaries so that we drop angle brackets, but keep 51 + // spaces when removing the email to form the name. 52 + if start < 0 || s[start] != '<' { 53 + start++ 54 + } 55 + if end >= len(s) || s[end] != '>' { 56 + end-- 57 + } 58 + name = s[:start] + s[end+1:] 59 + } else { 60 + start, end := 0, 0 61 + for i := 0; i < len(s); i++ { 62 + if s[i] == '<' && start == 0 { 63 + start = i + 1 64 + } 65 + if s[i] == '>' && start > 0 { 66 + end = i 67 + break 68 + } 69 + } 70 + if start > 0 && end >= start { 71 + email = strings.TrimSpace(s[start:end]) 72 + name = s[:start-1] 73 + } 74 + } 75 + 76 + // After extracting the email, the name might contain extra whitespace 77 + // again and may be surrounded by comment characters. The git source gives 78 + // these examples of when this can happen: 79 + // 80 + // "Name <email@domain>" 81 + // "email@domain (Name)" 82 + // "Name <email@domain> (Comment)" 83 + // 84 + name = normalizeSpace(name) 85 + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { 86 + name = name[1 : len(name)-1] 87 + } 88 + name = strings.TrimSpace(name) 89 + 90 + // If the name is empty or contains email-like characters, use the email 91 + // instead (assuming one exists) 92 + if name == "" || strings.ContainsAny(name, "@<>") { 93 + name = email 94 + } 95 + 96 + if name == "" && email == "" { 97 + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) 98 + } 99 + return PatchIdentity{Name: name, Email: email}, nil 100 + } 101 + 102 + // unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to 103 + // remove any "quoted-pairs" (backslash-espaced characters). It also removes 104 + // the quotes from any quoted strings, but leaves the comment delimiters. 105 + func unquotePairs(s string) string { 106 + quote := false 107 + comments := 0 108 + escaped := false 109 + 110 + var out strings.Builder 111 + for i := 0; i < len(s); i++ { 112 + if escaped { 113 + escaped = false 114 + } else { 115 + switch s[i] { 116 + case '\\': 117 + // quoted-pair is only allowed in quoted-string/comment 118 + if quote || comments > 0 { 119 + escaped = true 120 + continue // drop '\' character 121 + } 122 + 123 + case '"': 124 + if comments == 0 { 125 + quote = !quote 126 + continue // drop '"' character 127 + } 128 + 129 + case '(': 130 + if !quote { 131 + comments++ 132 + } 133 + case ')': 134 + if comments > 0 { 135 + comments-- 136 + } 137 + } 138 + } 139 + out.WriteByte(s[i]) 140 + } 141 + return out.String() 142 + } 143 + 144 + // normalizeSpace trims leading and trailing whitespace from s and converts 145 + // inner sequences of one or more whitespace characters to single spaces. 146 + func normalizeSpace(s string) string { 147 + var sb strings.Builder 148 + for i := 0; i < len(s); i++ { 149 + c := s[i] 150 + if !isRFC5332Space(c) { 151 + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { 152 + sb.WriteByte(' ') 153 + } 154 + sb.WriteByte(c) 155 + } 156 + } 157 + return sb.String() 158 + } 159 + 160 + func isRFC5332Space(c byte) bool { 161 + switch c { 162 + case '\t', '\n', '\r', ' ': 163 + return true 164 + } 165 + return false 166 + }

+127

gitdiff/patch_identity_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "testing" 5 + ) 6 + 7 + func TestParsePatchIdentity(t *testing.T) { 8 + tests := map[string]struct { 9 + Input string 10 + Output PatchIdentity 11 + Err interface{} 12 + }{ 13 + "simple": { 14 + Input: "Morton Haypenny <mhaypenny@example.com>", 15 + Output: PatchIdentity{ 16 + Name: "Morton Haypenny", 17 + Email: "mhaypenny@example.com", 18 + }, 19 + }, 20 + "extraWhitespace": { 21 + Input: "\t Morton Haypenny \r\n<mhaypenny@example.com> ", 22 + Output: PatchIdentity{ 23 + Name: "Morton Haypenny", 24 + Email: "mhaypenny@example.com", 25 + }, 26 + }, 27 + "trailingCharacters": { 28 + Input: "Morton Haypenny <mhaypenny@example.com> II", 29 + Output: PatchIdentity{ 30 + Name: "Morton Haypenny II", 31 + Email: "mhaypenny@example.com", 32 + }, 33 + }, 34 + "onlyEmail": { 35 + Input: "mhaypenny@example.com", 36 + Output: PatchIdentity{ 37 + Name: "mhaypenny@example.com", 38 + Email: "mhaypenny@example.com", 39 + }, 40 + }, 41 + "onlyEmailInBrackets": { 42 + Input: "<mhaypenny@example.com>", 43 + Output: PatchIdentity{ 44 + Name: "mhaypenny@example.com", 45 + Email: "mhaypenny@example.com", 46 + }, 47 + }, 48 + "rfc5322SpecialCharacters": { 49 + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, 50 + Output: PatchIdentity{ 51 + Name: "dependabot[bot]", 52 + Email: "12345+dependabot[bot]@users.noreply.github.com", 53 + }, 54 + }, 55 + "rfc5322QuotedPairs": { 56 + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III $PhD$)`, 57 + Output: PatchIdentity{ 58 + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, 59 + Email: "mhaypenny+[1900]@example.com", 60 + }, 61 + }, 62 + "rfc5322QuotedPairsOutOfContext": { 63 + Input: `Morton \\Backslash Haypenny <mhaypenny@example.com>`, 64 + Output: PatchIdentity{ 65 + Name: `Morton \\Backslash Haypenny`, 66 + Email: "mhaypenny@example.com", 67 + }, 68 + }, 69 + "emptyEmail": { 70 + Input: "Morton Haypenny <>", 71 + Output: PatchIdentity{ 72 + Name: "Morton Haypenny", 73 + Email: "", 74 + }, 75 + }, 76 + "unclosedEmail": { 77 + Input: "Morton Haypenny <mhaypenny@example.com", 78 + Output: PatchIdentity{ 79 + Name: "Morton Haypenny", 80 + Email: "mhaypenny@example.com", 81 + }, 82 + }, 83 + "bogusEmail": { 84 + Input: "Morton Haypenny <mhaypenny>", 85 + Output: PatchIdentity{ 86 + Name: "Morton Haypenny", 87 + Email: "mhaypenny", 88 + }, 89 + }, 90 + "bogusEmailWithWhitespace": { 91 + Input: "Morton Haypenny < mhaypenny >", 92 + Output: PatchIdentity{ 93 + Name: "Morton Haypenny", 94 + Email: "mhaypenny", 95 + }, 96 + }, 97 + "missingEmail": { 98 + Input: "Morton Haypenny", 99 + Err: "invalid identity", 100 + }, 101 + "missingNameAndEmptyEmail": { 102 + Input: "<>", 103 + Err: "invalid identity", 104 + }, 105 + "empty": { 106 + Input: "", 107 + Err: "invalid identity", 108 + }, 109 + } 110 + 111 + for name, test := range tests { 112 + t.Run(name, func(t *testing.T) { 113 + id, err := ParsePatchIdentity(test.Input) 114 + if test.Err != nil { 115 + assertError(t, test.Err, err, "parsing identity") 116 + return 117 + } 118 + if err != nil { 119 + t.Fatalf("unexpected error parsing identity: %v", err) 120 + } 121 + 122 + if test.Output != id { 123 + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) 124 + } 125 + }) 126 + } 127 + }

+1 -1

gitdiff/testdata/apply/bin.go

··· 1 - //+build ignore 1 + //go:build ignore 2 2 3 3 // bin.go is a helper CLI to manipulate binary diff data for testing purposes. 4 4 // It can decode patches generated by git using the standard parsing functions

+3

gitdiff/testdata/apply/text_fragment_change_end_eol.out

··· 1 + line 1 2 + line 2 3 + line 3

+10

gitdiff/testdata/apply/text_fragment_change_end_eol.patch

··· 1 + diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src 2 + index a92d664..8cf2f17 100644 3 + --- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src 4 + +++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src 5 + @@ -1,3 +1,3 @@ 6 + line 1 7 + line 2 8 + -line 3 9 + +line 3 10 + \ No newline at end of file

+3

gitdiff/testdata/apply/text_fragment_change_end_eol.src

··· 1 + line 1 2 + line 2 3 + line 3

+8

gitdiff/testdata/no_files.patch

··· 1 + commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe 2 + Author: Morton Haypenny <mhaypenny@example.com> 3 + Date: Tue Apr 2 22:55:40 2019 -0700 4 + 5 + A file with multiple fragments. 6 + 7 + The content is arbitrary. 8 +

+9

gitdiff/testdata/string/binary_modify.patch

··· 1 + diff --git a/file.bin b/file.bin 2 + index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 3 + GIT binary patch 4 + delta 66 5 + QcmeZhVVvM$!$1K50C&Ox;s5{u 6 + 7 + delta 5 8 + McmZo+^qAlQ00i9urT_o{ 9 +

+3

gitdiff/testdata/string/binary_modify_nodata.patch

··· 1 + diff --git a/file.bin b/file.bin 2 + index a7f4d5d..bdc9a70 100644 3 + Binary files a/file.bin and b/file.bin differ

+11

gitdiff/testdata/string/binary_new.patch

··· 1 + diff --git a/file.bin b/file.bin 2 + new file mode 100644 3 + index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c 4 + GIT binary patch 5 + literal 72 6 + zcmV-O0Jr~td-`u6JcK&{KDK=<a#;v1^LR5&K)zQ0=Goz82(?nJ6_nD`f#8O9p}}{P 7 + eiXim+rDI+BDadMQmMsO5Sw@;DbrCA+PamP;Ng_@F 8 + 9 + literal 0 10 + HcmV?d00001 11 +

+4

gitdiff/testdata/string/copy.patch

··· 1 + diff --git a/file.txt b/numbers.txt 2 + similarity index 100% 3 + copy from file.txt 4 + copy to numbers.txt

+21

gitdiff/testdata/string/copy_modify.patch

··· 1 + diff --git a/file.txt b/numbers.txt 2 + similarity index 57% 3 + copy from file.txt 4 + copy to numbers.txt 5 + index c9e9e05..6c4a3e0 100644 6 + --- a/file.txt 7 + +++ b/numbers.txt 8 + @@ -1,6 +1,6 @@ 9 + one 10 + two 11 + -three 12 + +three three three 13 + four 14 + five 15 + six 16 + @@ -8,3 +8,5 @@ seven 17 + eight 18 + nine 19 + ten 20 + +eleven 21 + +twelve

+16

gitdiff/testdata/string/delete.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + deleted file mode 100644 3 + index c9e9e05..0000000 4 + --- a/file.txt 5 + +++ /dev/null 6 + @@ -1,10 +0,0 @@ 7 + -one 8 + -two 9 + -three 10 + -four 11 + -five 12 + -six 13 + -seven 14 + -eight 15 + -nine 16 + -ten

+3

gitdiff/testdata/string/mode.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + old mode 100644 3 + new mode 100755

+10

gitdiff/testdata/string/mode_modify.patch

··· 1 + diff --git a/script.sh b/script.sh 2 + old mode 100644 3 + new mode 100755 4 + index 7a870bd..68d501e 5 + --- a/script.sh 6 + +++ b/script.sh 7 + @@ -1,2 +1,2 @@ 8 + #!/bin/bash 9 + -echo "Hello World" 10 + +echo "Hello, World!"

+16

gitdiff/testdata/string/modify.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + index c9e9e05..7d5fdc6 100644 3 + --- a/file.txt 4 + +++ b/file.txt 5 + @@ -3,8 +3,10 @@ two 6 + three 7 + four 8 + five 9 + -six 10 + +six six six six six six 11 + seven 12 + eight 13 + nine 14 + ten 15 + +eleven 16 + +twelve

+16

gitdiff/testdata/string/new.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + new file mode 100644 3 + index 0000000..c9e9e05 4 + --- /dev/null 5 + +++ b/file.txt 6 + @@ -0,0 +1,10 @@ 7 + +one 8 + +two 9 + +three 10 + +four 11 + +five 12 + +six 13 + +seven 14 + +eight 15 + +nine 16 + +ten

+3

gitdiff/testdata/string/new_empty.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + new file mode 100644 3 + index 0000000..e69de29

+16

gitdiff/testdata/string/new_mode.patch

··· 1 + diff --git a/file.sh b/file.sh 2 + new file mode 100755 3 + index 0000000..c9e9e05 4 + --- /dev/null 5 + +++ b/file.sh 6 + @@ -0,0 +1,10 @@ 7 + +one 8 + +two 9 + +three 10 + +four 11 + +five 12 + +six 13 + +seven 14 + +eight 15 + +nine 16 + +ten

+4

gitdiff/testdata/string/rename.patch

··· 1 + diff --git a/file.txt b/numbers.txt 2 + similarity index 100% 3 + rename from file.txt 4 + rename to numbers.txt

+18

gitdiff/testdata/string/rename_modify.patch

··· 1 + diff --git a/file.txt b/numbers.txt 2 + similarity index 77% 3 + rename from file.txt 4 + rename to numbers.txt 5 + index c9e9e05..a6b31d6 100644 6 + --- a/file.txt 7 + +++ b/numbers.txt 8 + @@ -3,8 +3,9 @@ two 9 + three 10 + four 11 + five 12 + -six 13 + + six 14 + seven 15 + eight 16 + nine 17 + ten 18 + +eleven

+30 -18

gitdiff/text.go

··· 79 79 return p.Errorf(0, "no content following fragment header") 80 80 } 81 81 82 - isNoNewlineLine := func(s string) bool { 83 - // test for "\ No newline at end of file" by prefix because the text 84 - // changes by locale (git claims all versions are at least 12 chars) 85 - return len(s) >= 12 && s[:2] == "\\ " 86 - } 87 - 88 82 oldLines, newLines := frag.OldLines, frag.NewLines 89 - for { 83 + for oldLines > 0 || newLines > 0 { 90 84 line := p.Line(0) 91 85 op, data := line[0], line[1:] 92 86 ··· 113 107 frag.LinesAdded++ 114 108 frag.TrailingContext = 0 115 109 frag.Lines = append(frag.Lines, Line{OpAdd, data}) 116 - default: 110 + case '\\': 117 111 // this may appear in middle of fragment if it's for a deleted line 118 - if isNoNewlineLine(line) { 119 - last := &frag.Lines[len(frag.Lines)-1] 120 - last.Line = strings.TrimSuffix(last.Line, "\n") 112 + if isNoNewlineMarker(line) { 113 + removeLastNewline(frag) 121 114 break 122 115 } 116 + fallthrough 117 + default: 123 118 // TODO(bkeyes): if this is because we hit the next header, it 124 119 // would be helpful to return the miscounts line error. We could 125 120 // either test for the common headers ("@@ -", "diff --git") or 126 121 // assume any invalid op ends the fragment; git returns the same 127 122 // generic error in all cases so either is compatible 128 123 return p.Errorf(0, "invalid line operation: %q", op) 129 - } 130 - 131 - next := p.Line(1) 132 - if oldLines <= 0 && newLines <= 0 && !isNoNewlineLine(next) { 133 - break 134 124 } 135 125 136 126 if err := p.Next(); err != nil { ··· 145 135 hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1 146 136 return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines) 147 137 } 138 + if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { 139 + return p.Errorf(0, "fragment contains no changes") 140 + } 148 141 149 - if err := p.Next(); err != nil && err != io.EOF { 150 - return err 142 + // check for a final "no newline" marker since it is not included in the 143 + // counters used to stop the loop above 144 + if isNoNewlineMarker(p.Line(0)) { 145 + removeLastNewline(frag) 146 + if err := p.Next(); err != nil && err != io.EOF { 147 + return err 148 + } 151 149 } 150 + 152 151 return nil 152 + } 153 + 154 + func isNoNewlineMarker(s string) bool { 155 + // test for "\ No newline at end of file" by prefix because the text 156 + // changes by locale (git claims all versions are at least 12 chars) 157 + return len(s) >= 12 && s[:2] == "\\ " 158 + } 159 + 160 + func removeLastNewline(frag *TextFragment) { 161 + if len(frag.Lines) > 0 { 162 + last := &frag.Lines[len(frag.Lines)-1] 163 + last.Line = strings.TrimSuffix(last.Line, "\n") 164 + } 153 165 } 154 166 155 167 func parseRange(s string) (start int64, end int64, err error) {

+18

gitdiff/text_test.go

··· 317 317 }, 318 318 Err: true, 319 319 }, 320 + "onlyContext": { 321 + Input: ` context line 322 + context line 323 + `, 324 + Fragment: TextFragment{ 325 + OldLines: 2, 326 + NewLines: 2, 327 + }, 328 + Err: true, 329 + }, 330 + "unexpectedNoNewlineMarker": { 331 + Input: `\ No newline at end of file`, 332 + Fragment: TextFragment{ 333 + OldLines: 1, 334 + NewLines: 1, 335 + }, 336 + Err: true, 337 + }, 320 338 } 321 339 322 340 for name, test := range tests {

+1 -1

go.mod

··· 1 1 module github.com/bluekeyes/go-gitdiff 2 2 3 - go 1.13 3 + go 1.21

Compare changes