comparing 1bd59c4f1123687f522447da120db26f61e787f2 and master on oppi.li/go-gitdiff

+8 -17

.github/workflows/go.yml

··· 9 9 name: Verify 10 10 runs-on: ubuntu-latest 11 11 steps: 12 - - name: Set up Go 1.13 13 - uses: actions/setup-go@v1 12 + - name: Set up Go 1.21 13 + uses: actions/setup-go@v5 14 14 with: 15 - go-version: 1.13 16 - id: go 17 - 18 - - name: Set up environment 19 - run: | 20 - # https://github.com/actions/setup-go/issues/14 21 - echo "::set-env name=GOPATH::$(go env GOPATH)" 22 - echo "::add-path::$(go env GOPATH)/bin" 23 - 24 - - name: Install golangci-lint 25 - run: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.23.1 26 - 15 + go-version: 1.21 16 + 27 17 - name: Check out code into the Go module directory 28 - uses: actions/checkout@v1 18 + uses: actions/checkout@v4 29 19 30 20 - name: Lint 31 - run: golangci-lint run 21 + uses: golangci/golangci-lint-action@v7 22 + with: 23 + version: v2.0 32 24 33 25 - name: Test 34 26 run: go test -v ./... 35 -

+41 -13

.golangci.yml

··· 1 + version: "2" 2 + 1 3 run: 2 4 tests: false 3 5 4 6 linters: 5 - disable-all: true 7 + default: none 6 8 enable: 7 - - deadcode 8 9 - errcheck 9 - - gofmt 10 - - goimports 11 - - golint 12 10 - govet 13 11 - ineffassign 14 - - typecheck 12 + - misspell 13 + - revive 15 14 - unconvert 16 - - varcheck 17 - 18 - issues: 19 - exclude-use-default: false 15 + - unused 16 + settings: 17 + errcheck: 18 + exclude-functions: 19 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write 20 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString 21 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte 22 + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) 23 + revive: 24 + rules: 25 + - name: context-keys-type 26 + - name: time-naming 27 + - name: var-declaration 28 + - name: unexported-return 29 + - name: errorf 30 + - name: blank-imports 31 + - name: context-as-argument 32 + - name: dot-imports 33 + - name: error-return 34 + - name: error-strings 35 + - name: error-naming 36 + - name: exported 37 + - name: increment-decrement 38 + - name: var-naming 39 + - name: package-comments 40 + - name: range 41 + - name: receiver-naming 42 + - name: indent-error-flow 20 43 21 - linter-settings: 22 - goimports: 23 - local-prefixes: github.com/bluekeyes/go-gitdiff 44 + formatters: 45 + enable: 46 + - gofmt 47 + - goimports 48 + settings: 49 + goimports: 50 + local-prefixes: 51 + - github.com/bluekeyes/go-gitdiff

+17 -5

README.md

··· 4 4 5 5 A Go library for parsing and applying patches generated by `git diff`, `git 6 6 show`, and `git format-patch`. It can also parse and apply unified diffs 7 - generated by the standard `diff` tool. 7 + generated by the standard GNU `diff` tool. 8 8 9 9 It supports standard line-oriented text patches and Git binary patches, and 10 10 aims to parse anything accepted by the `git apply` command. ··· 29 29 30 30 // apply the changes in the patch to a source file 31 31 var output bytes.Buffer 32 - if err := gitdiff.NewApplier(code).ApplyFile(&output, files[0]); err != nil { 32 + if err := gitdiff.Apply(&output, code, files[0]); err != nil { 33 33 log.Fatal(err) 34 34 } 35 35 ``` 36 36 37 37 ## Development Status 38 38 39 - Mostly complete, but API changes are possible. Patch parsing and strict 40 - application works and is well-covered by unit tests, but has not been validated 41 - extensively against real-world patches. 39 + The parsing API and types are complete and I expect will remain stable. Version 40 + 0.7.0 introduced a new apply API that may change more in the future to support 41 + non-strict patch application. 42 + 43 + Parsing and strict application are well-covered by unit tests and the library 44 + is used in a production application that parses and applies thousands of 45 + patches every day. However, the space of all possible patches is large, so 46 + there are likely undiscovered bugs. 47 + 48 + The parsing code has also had a modest amount of fuzz testing. 42 49 43 50 ## Why another git/unified diff parser? 44 51 ··· 71 78 72 79 - Numbers immediately followed by non-numeric characters 73 80 - Trailing characters on a line after valid or expected content 81 + - Malformed file header lines (lines that start with `diff --git`) 74 82 75 83 2. Errors for invalid input are generally more verbose and specific than those 76 84 from `git apply`. ··· 91 99 context of each fragment must exactly match the source file; `git apply` 92 100 implements a search algorithm that tries different lines and amounts of 93 101 context, with further options to normalize or ignore whitespace changes. 102 + 103 + 7. When parsing mail-formatted patch headers, leading and trailing whitespace 104 + is always removed from `Subject` lines. There is no exact equivalent to `git 105 + mailinfo -k`.

+30 -337

gitdiff/apply.go

··· 13 13 // Users can test if an error was caused by a conflict by using errors.Is with 14 14 // an empty Conflict: 15 15 // 16 - // if errors.Is(err, &Conflict{}) { 17 - // // handle conflict 18 - // } 19 - // 16 + // if errors.Is(err, &Conflict{}) { 17 + // // handle conflict 18 + // } 20 19 type Conflict struct { 21 20 msg string 22 21 } ··· 89 88 90 89 var ( 91 90 errApplyInProgress = errors.New("gitdiff: incompatible apply in progress") 92 - ) 93 - 94 - const ( 95 - applyInitial = iota 96 - applyText 97 - applyBinary 98 - applyFile 91 + errApplierClosed = errors.New("gitdiff: applier is closed") 99 92 ) 100 93 101 - // Apply is a convenience function that creates an Applier for src with default 102 - // settings and applies the changes in f, writing the result to dst. 103 - func Apply(dst io.Writer, src io.ReaderAt, f *File) error { 104 - return NewApplier(src).ApplyFile(dst, f) 105 - } 106 - 107 - // Applier applies changes described in fragments to source data. If changes 108 - // are described in multiple fragments, those fragments must be applied in 109 - // order, usually by calling ApplyFile. 94 + // Apply applies the changes in f to src, writing the result to dst. It can 95 + // apply both text and binary changes. 110 96 // 111 - // By default, Applier operates in "strict" mode, where fragment content and 112 - // positions must exactly match those of the source. 113 - // 114 - // If an error occurs while applying, methods on Applier return instances of 115 - // *ApplyError that annotate the wrapped error with additional information 116 - // when available. If the error is because of a conflict between a fragment and 117 - // the source, the wrapped error will be a *Conflict. 118 - // 119 - // While an Applier can apply both text and binary fragments, only one fragment 120 - // type can be used without resetting the Applier. The first fragment applied 121 - // sets the type for the Applier. Mixing fragment types or mixing 122 - // fragment-level and file-level applies results in an error. 123 - type Applier struct { 124 - src io.ReaderAt 125 - lineSrc LineReaderAt 126 - nextLine int64 127 - applyType int 128 - } 129 - 130 - // NewApplier creates an Applier that reads data from src. If src is a 131 - // LineReaderAt, it is used directly to apply text fragments. 132 - func NewApplier(src io.ReaderAt) *Applier { 133 - a := new(Applier) 134 - a.Reset(src) 135 - return a 136 - } 137 - 138 - // Reset resets the input and internal state of the Applier. If src is nil, the 139 - // existing source is reused. 140 - func (a *Applier) Reset(src io.ReaderAt) { 141 - if src != nil { 142 - a.src = src 143 - if lineSrc, ok := src.(LineReaderAt); ok { 144 - a.lineSrc = lineSrc 145 - } else { 146 - a.lineSrc = &lineReaderAt{r: src} 97 + // If an error occurs while applying, Apply returns an *ApplyError that 98 + // annotates the error with additional information. If the error is because of 99 + // a conflict with the source, the wrapped error will be a *Conflict. 100 + func Apply(dst io.Writer, src io.ReaderAt, f *File) error { 101 + if f.IsBinary { 102 + if len(f.TextFragments) > 0 { 103 + return applyError(errors.New("binary file contains text fragments")) 147 104 } 148 - } 149 - a.nextLine = 0 150 - a.applyType = applyInitial 151 - } 152 - 153 - // ApplyFile applies the changes in all of the fragments of f and writes the 154 - // result to dst. 155 - func (a *Applier) ApplyFile(dst io.Writer, f *File) error { 156 - if a.applyType != applyInitial { 157 - return applyError(errApplyInProgress) 158 - } 159 - defer func() { a.applyType = applyFile }() 160 - 161 - if f.IsBinary && len(f.TextFragments) > 0 { 162 - return applyError(errors.New("binary file contains text fragments")) 163 - } 164 - if !f.IsBinary && f.BinaryFragment != nil { 165 - return applyError(errors.New("text file contains binary fragment")) 105 + if f.BinaryFragment == nil { 106 + return applyError(errors.New("binary file does not contain a binary fragment")) 107 + } 108 + } else { 109 + if f.BinaryFragment != nil { 110 + return applyError(errors.New("text file contains a binary fragment")) 111 + } 166 112 } 167 113 168 114 switch { 169 115 case f.BinaryFragment != nil: 170 - return a.ApplyBinaryFragment(dst, f.BinaryFragment) 116 + applier := NewBinaryApplier(dst, src) 117 + if err := applier.ApplyFragment(f.BinaryFragment); err != nil { 118 + return err 119 + } 120 + return applier.Close() 171 121 172 122 case len(f.TextFragments) > 0: 173 123 frags := make([]*TextFragment, len(f.TextFragments)) ··· 181 131 // right now, the application fails if fragments overlap, but it should be 182 132 // possible to precompute the result of applying them in order 183 133 134 + applier := NewTextApplier(dst, src) 184 135 for i, frag := range frags { 185 - if err := a.ApplyTextFragment(dst, frag); err != nil { 136 + if err := applier.ApplyFragment(frag); err != nil { 186 137 return applyError(err, fragNum(i)) 187 138 } 188 139 } 189 - } 190 - 191 - return applyError(a.Flush(dst)) 192 - } 193 - 194 - // ApplyTextFragment applies the changes in the fragment f and writes unwritten 195 - // data before the start of the fragment and the result to dst. If multiple 196 - // text fragments apply to the same source, ApplyTextFragment must be called in 197 - // order of increasing start position. As a result, each fragment can be 198 - // applied at most once before a call to Reset. 199 - func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error { 200 - if a.applyType != applyInitial && a.applyType != applyText { 201 - return applyError(errApplyInProgress) 202 - } 203 - defer func() { a.applyType = applyText }() 204 - 205 - // application code assumes fragment fields are consistent 206 - if err := f.Validate(); err != nil { 207 - return applyError(err) 208 - } 209 - 210 - // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) 211 - fragStart := f.OldPosition - 1 212 - if fragStart < 0 { 213 - fragStart = 0 214 - } 215 - fragEnd := fragStart + f.OldLines 216 - 217 - start := a.nextLine 218 - if fragStart < start { 219 - return applyError(&Conflict{"fragment overlaps with an applied fragment"}) 220 - } 221 - 222 - if f.OldPosition == 0 { 223 - ok, err := isLen(a.src, 0) 224 - if err != nil { 225 - return applyError(err) 226 - } 227 - if !ok { 228 - return applyError(&Conflict{"cannot create new file from non-empty src"}) 229 - } 230 - } 140 + return applier.Close() 231 141 232 - preimage := make([][]byte, fragEnd-start) 233 - n, err := a.lineSrc.ReadLinesAt(preimage, start) 234 - switch { 235 - case err == nil: 236 - case err == io.EOF && n == len(preimage): // last line of frag has no newline character 237 142 default: 238 - return applyError(err, lineNum(start+int64(n))) 239 - } 240 - 241 - // copy leading data before the fragment starts 242 - for i, line := range preimage[:fragStart-start] { 243 - if _, err := dst.Write(line); err != nil { 244 - a.nextLine = start + int64(i) 245 - return applyError(err, lineNum(a.nextLine)) 246 - } 247 - } 248 - preimage = preimage[fragStart-start:] 249 - 250 - // apply the changes in the fragment 251 - used := int64(0) 252 - for i, line := range f.Lines { 253 - if err := applyTextLine(dst, line, preimage, used); err != nil { 254 - a.nextLine = fragStart + used 255 - return applyError(err, lineNum(a.nextLine), fragLineNum(i)) 256 - } 257 - if line.Old() { 258 - used++ 259 - } 260 - } 261 - a.nextLine = fragStart + used 262 - 263 - // new position of +0,0 mean a full delete, so check for leftovers 264 - if f.NewPosition == 0 && f.NewLines == 0 { 265 - var b [1][]byte 266 - n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) 267 - if err != nil && err != io.EOF { 268 - return applyError(err, lineNum(a.nextLine)) 269 - } 270 - if n > 0 { 271 - return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) 272 - } 273 - } 274 - 275 - return nil 276 - } 277 - 278 - func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { 279 - if line.Old() && string(preimage[i]) != line.Line { 280 - return &Conflict{"fragment line does not match src line"} 281 - } 282 - if line.New() { 283 - _, err = io.WriteString(dst, line.Line) 284 - } 285 - return err 286 - } 287 - 288 - // Flush writes any data following the last applied fragment to dst. 289 - func (a *Applier) Flush(dst io.Writer) (err error) { 290 - switch a.applyType { 291 - case applyInitial: 292 - _, err = copyFrom(dst, a.src, 0) 293 - case applyText: 294 - _, err = copyLinesFrom(dst, a.lineSrc, a.nextLine) 295 - case applyBinary: 296 - // nothing to flush, binary apply "consumes" full source 297 - } 298 - return err 299 - } 300 - 301 - // ApplyBinaryFragment applies the changes in the fragment f and writes the 302 - // result to dst. At most one binary fragment can be applied before a call to 303 - // Reset. 304 - func (a *Applier) ApplyBinaryFragment(dst io.Writer, f *BinaryFragment) error { 305 - if a.applyType != applyInitial { 306 - return applyError(errApplyInProgress) 307 - } 308 - defer func() { a.applyType = applyBinary }() 309 - 310 - if f == nil { 311 - return applyError(errors.New("nil fragment")) 312 - } 313 - 314 - switch f.Method { 315 - case BinaryPatchLiteral: 316 - if _, err := dst.Write(f.Data); err != nil { 317 - return applyError(err) 318 - } 319 - case BinaryPatchDelta: 320 - if err := applyBinaryDeltaFragment(dst, a.src, f.Data); err != nil { 321 - return applyError(err) 322 - } 323 - default: 324 - return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) 325 - } 326 - return nil 327 - } 328 - 329 - func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { 330 - srcSize, delta := readBinaryDeltaSize(frag) 331 - if err := checkBinarySrcSize(src, srcSize); err != nil { 143 + // nothing to apply, just copy all the data 144 + _, err := copyFrom(dst, src, 0) 332 145 return err 333 146 } 334 - 335 - dstSize, delta := readBinaryDeltaSize(delta) 336 - 337 - for len(delta) > 0 { 338 - op := delta[0] 339 - if op == 0 { 340 - return errors.New("invalid delta opcode 0") 341 - } 342 - 343 - var n int64 344 - var err error 345 - switch op & 0x80 { 346 - case 0x80: 347 - n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) 348 - case 0x00: 349 - n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) 350 - } 351 - if err != nil { 352 - return err 353 - } 354 - dstSize -= n 355 - } 356 - 357 - if dstSize != 0 { 358 - return errors.New("corrupt binary delta: insufficient or extra data") 359 - } 360 - return nil 361 - } 362 - 363 - // readBinaryDeltaSize reads a variable length size from a delta-encoded binary 364 - // fragment, returing the size and the unused data. Data is encoded as: 365 - // 366 - // [[1xxxxxxx]...] [0xxxxxxx] 367 - // 368 - // in little-endian order, with 7 bits of the value per byte. 369 - func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { 370 - shift := uint(0) 371 - for i, b := range d { 372 - size |= int64(b&0x7F) << shift 373 - shift += 7 374 - if b <= 0x7F { 375 - return size, d[i+1:] 376 - } 377 - } 378 - return size, nil 379 - } 380 - 381 - // applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary 382 - // fragment, returning the amount of data written and the usused part of the 383 - // fragment. An add operation takes the form: 384 - // 385 - // [0xxxxxx][[data1]...] 386 - // 387 - // where the lower seven bits of the opcode is the number of data bytes 388 - // following the opcode. See also pack-format.txt in the Git source. 389 - func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { 390 - size := int(op) 391 - if len(delta) < size { 392 - return 0, delta, errors.New("corrupt binary delta: incomplete add") 393 - } 394 - _, err = w.Write(delta[:size]) 395 - return int64(size), delta[size:], err 396 - } 397 - 398 - // applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary 399 - // fragment, returing the amount of data written and the unused part of the 400 - // fragment. A copy operation takes the form: 401 - // 402 - // [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] 403 - // 404 - // where the lower seven bits of the opcode determine which non-zero offset and 405 - // size bytes are present in little-endian order: if bit 0 is set, offset1 is 406 - // present, etc. If no offset or size bytes are present, offset is 0 and size 407 - // is 0x10000. See also pack-format.txt in the Git source. 408 - func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { 409 - const defaultSize = 0x10000 410 - 411 - unpack := func(start, bits uint) (v int64) { 412 - for i := uint(0); i < bits; i++ { 413 - mask := byte(1 << (i + start)) 414 - if op&mask > 0 { 415 - if len(delta) == 0 { 416 - err = errors.New("corrupt binary delta: incomplete copy") 417 - return 418 - } 419 - v |= int64(delta[0]) << (8 * i) 420 - delta = delta[1:] 421 - } 422 - } 423 - return 424 - } 425 - 426 - offset := unpack(0, 4) 427 - size := unpack(4, 3) 428 - if err != nil { 429 - return 0, delta, err 430 - } 431 - if size == 0 { 432 - size = defaultSize 433 - } 434 - 435 - // TODO(bkeyes): consider pooling these buffers 436 - b := make([]byte, size) 437 - if _, err := src.ReadAt(b, offset); err != nil { 438 - return 0, delta, err 439 - } 440 - 441 - _, err = w.Write(b) 442 - return size, delta, err 443 - } 444 - 445 - func checkBinarySrcSize(r io.ReaderAt, size int64) error { 446 - ok, err := isLen(r, size) 447 - if err != nil { 448 - return err 449 - } 450 - if !ok { 451 - return &Conflict{"fragment src size does not match actual src size"} 452 - } 453 - return nil 454 147 }

+206

gitdiff/apply_binary.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "errors" 5 + "fmt" 6 + "io" 7 + ) 8 + 9 + // BinaryApplier applies binary changes described in a fragment to source data. 10 + // The applier must be closed after use. 11 + type BinaryApplier struct { 12 + dst io.Writer 13 + src io.ReaderAt 14 + 15 + closed bool 16 + dirty bool 17 + } 18 + 19 + // NewBinaryApplier creates an BinaryApplier that reads data from src and 20 + // writes modified data to dst. 21 + func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier { 22 + a := BinaryApplier{ 23 + dst: dst, 24 + src: src, 25 + } 26 + return &a 27 + } 28 + 29 + // ApplyFragment applies the changes in the fragment f and writes the result to 30 + // dst. ApplyFragment can be called at most once. 31 + // 32 + // If an error occurs while applying, ApplyFragment returns an *ApplyError that 33 + // annotates the error with additional information. If the error is because of 34 + // a conflict between a fragment and the source, the wrapped error will be a 35 + // *Conflict. 36 + func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error { 37 + if f == nil { 38 + return applyError(errors.New("nil fragment")) 39 + } 40 + if a.closed { 41 + return applyError(errApplierClosed) 42 + } 43 + if a.dirty { 44 + return applyError(errApplyInProgress) 45 + } 46 + 47 + // mark an apply as in progress, even if it fails before making changes 48 + a.dirty = true 49 + 50 + switch f.Method { 51 + case BinaryPatchLiteral: 52 + if _, err := a.dst.Write(f.Data); err != nil { 53 + return applyError(err) 54 + } 55 + case BinaryPatchDelta: 56 + if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil { 57 + return applyError(err) 58 + } 59 + default: 60 + return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) 61 + } 62 + return nil 63 + } 64 + 65 + // Close writes any data following the last applied fragment and prevents 66 + // future calls to ApplyFragment. 67 + func (a *BinaryApplier) Close() (err error) { 68 + if a.closed { 69 + return nil 70 + } 71 + 72 + a.closed = true 73 + if !a.dirty { 74 + _, err = copyFrom(a.dst, a.src, 0) 75 + } else { 76 + // do nothing, applying a binary fragment copies all data 77 + } 78 + return err 79 + } 80 + 81 + func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { 82 + srcSize, delta := readBinaryDeltaSize(frag) 83 + if err := checkBinarySrcSize(src, srcSize); err != nil { 84 + return err 85 + } 86 + 87 + dstSize, delta := readBinaryDeltaSize(delta) 88 + 89 + for len(delta) > 0 { 90 + op := delta[0] 91 + if op == 0 { 92 + return errors.New("invalid delta opcode 0") 93 + } 94 + 95 + var n int64 96 + var err error 97 + switch op & 0x80 { 98 + case 0x80: 99 + n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) 100 + case 0x00: 101 + n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) 102 + } 103 + if err != nil { 104 + return err 105 + } 106 + dstSize -= n 107 + } 108 + 109 + if dstSize != 0 { 110 + return errors.New("corrupt binary delta: insufficient or extra data") 111 + } 112 + return nil 113 + } 114 + 115 + // readBinaryDeltaSize reads a variable length size from a delta-encoded binary 116 + // fragment, returing the size and the unused data. Data is encoded as: 117 + // 118 + // [[1xxxxxxx]...] [0xxxxxxx] 119 + // 120 + // in little-endian order, with 7 bits of the value per byte. 121 + func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { 122 + shift := uint(0) 123 + for i, b := range d { 124 + size |= int64(b&0x7F) << shift 125 + shift += 7 126 + if b <= 0x7F { 127 + return size, d[i+1:] 128 + } 129 + } 130 + return size, nil 131 + } 132 + 133 + // applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary 134 + // fragment, returning the amount of data written and the usused part of the 135 + // fragment. An add operation takes the form: 136 + // 137 + // [0xxxxxx][[data1]...] 138 + // 139 + // where the lower seven bits of the opcode is the number of data bytes 140 + // following the opcode. See also pack-format.txt in the Git source. 141 + func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { 142 + size := int(op) 143 + if len(delta) < size { 144 + return 0, delta, errors.New("corrupt binary delta: incomplete add") 145 + } 146 + _, err = w.Write(delta[:size]) 147 + return int64(size), delta[size:], err 148 + } 149 + 150 + // applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary 151 + // fragment, returing the amount of data written and the unused part of the 152 + // fragment. A copy operation takes the form: 153 + // 154 + // [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] 155 + // 156 + // where the lower seven bits of the opcode determine which non-zero offset and 157 + // size bytes are present in little-endian order: if bit 0 is set, offset1 is 158 + // present, etc. If no offset or size bytes are present, offset is 0 and size 159 + // is 0x10000. See also pack-format.txt in the Git source. 160 + func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { 161 + const defaultSize = 0x10000 162 + 163 + unpack := func(start, bits uint) (v int64) { 164 + for i := uint(0); i < bits; i++ { 165 + mask := byte(1 << (i + start)) 166 + if op&mask > 0 { 167 + if len(delta) == 0 { 168 + err = errors.New("corrupt binary delta: incomplete copy") 169 + return 170 + } 171 + v |= int64(delta[0]) << (8 * i) 172 + delta = delta[1:] 173 + } 174 + } 175 + return 176 + } 177 + 178 + offset := unpack(0, 4) 179 + size := unpack(4, 3) 180 + if err != nil { 181 + return 0, delta, err 182 + } 183 + if size == 0 { 184 + size = defaultSize 185 + } 186 + 187 + // TODO(bkeyes): consider pooling these buffers 188 + b := make([]byte, size) 189 + if _, err := src.ReadAt(b, offset); err != nil { 190 + return 0, delta, err 191 + } 192 + 193 + _, err = w.Write(b) 194 + return size, delta, err 195 + } 196 + 197 + func checkBinarySrcSize(r io.ReaderAt, size int64) error { 198 + ok, err := isLen(r, size) 199 + if err != nil { 200 + return err 201 + } 202 + if !ok { 203 + return &Conflict{"fragment src size does not match actual src size"} 204 + } 205 + return nil 206 + }

+11 -73

gitdiff/apply_test.go

··· 9 9 "testing" 10 10 ) 11 11 12 - func TestApplierInvariants(t *testing.T) { 13 - binary := &BinaryFragment{ 14 - Method: BinaryPatchLiteral, 15 - Size: 2, 16 - Data: []byte("\xbe\xef"), 17 - } 18 - 19 - text := &TextFragment{ 20 - NewPosition: 1, 21 - NewLines: 1, 22 - LinesAdded: 1, 23 - Lines: []Line{ 24 - {Op: OpAdd, Line: "new line\n"}, 25 - }, 26 - } 27 - 28 - file := &File{ 29 - TextFragments: []*TextFragment{text}, 30 - } 31 - 32 - src := bytes.NewReader(nil) 33 - dst := ioutil.Discard 34 - 35 - assertInProgress := func(t *testing.T, kind string, err error) { 36 - if !errors.Is(err, errApplyInProgress) { 37 - t.Fatalf("expected in-progress error for %s apply, but got: %v", kind, err) 38 - } 39 - } 40 - 41 - t.Run("binaryFirst", func(t *testing.T) { 42 - a := NewApplier(src) 43 - if err := a.ApplyBinaryFragment(dst, binary); err != nil { 44 - t.Fatalf("unexpected error applying fragment: %v", err) 45 - } 46 - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) 47 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 48 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 49 - }) 50 - 51 - t.Run("textFirst", func(t *testing.T) { 52 - a := NewApplier(src) 53 - if err := a.ApplyTextFragment(dst, text); err != nil { 54 - t.Fatalf("unexpected error applying fragment: %v", err) 55 - } 56 - // additional text fragments are allowed 57 - if err := a.ApplyTextFragment(dst, text); err != nil { 58 - t.Fatalf("unexpected error applying second fragment: %v", err) 59 - } 60 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 61 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 62 - }) 63 - 64 - t.Run("fileFirst", func(t *testing.T) { 65 - a := NewApplier(src) 66 - if err := a.ApplyFile(dst, file); err != nil { 67 - t.Fatalf("unexpected error applying file: %v", err) 68 - } 69 - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) 70 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 71 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 72 - }) 73 - } 74 - 75 12 func TestApplyTextFragment(t *testing.T) { 76 13 tests := map[string]applyTest{ 77 14 "createFile": {Files: getApplyFiles("text_fragment_new")}, ··· 85 22 "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, 86 23 "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, 87 24 "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, 25 + "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, 88 26 "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, 89 27 "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, 90 28 ··· 127 65 128 66 for name, test := range tests { 129 67 t.Run(name, func(t *testing.T) { 130 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 68 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 131 69 if len(file.TextFragments) != 1 { 132 70 t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments)) 133 71 } 134 - return applier.ApplyTextFragment(w, file.TextFragments[0]) 72 + applier := NewTextApplier(dst, src) 73 + return applier.ApplyFragment(file.TextFragments[0]) 135 74 }) 136 75 }) 137 76 } ··· 176 115 177 116 for name, test := range tests { 178 117 t.Run(name, func(t *testing.T) { 179 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 180 - return applier.ApplyBinaryFragment(w, file.BinaryFragment) 118 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 119 + applier := NewBinaryApplier(dst, src) 120 + return applier.ApplyFragment(file.BinaryFragment) 181 121 }) 182 122 }) 183 123 } ··· 216 156 217 157 for name, test := range tests { 218 158 t.Run(name, func(t *testing.T) { 219 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 220 - return applier.ApplyFile(w, file) 159 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 160 + return Apply(dst, src, file) 221 161 }) 222 162 }) 223 163 } ··· 228 168 Err interface{} 229 169 } 230 170 231 - func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) error) { 171 + func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) { 232 172 src, patch, out := at.Files.Load(t) 233 173 234 174 files, _, err := Parse(bytes.NewReader(patch)) ··· 239 179 t.Fatalf("patch should contain exactly one file, but it has %d", len(files)) 240 180 } 241 181 242 - applier := NewApplier(bytes.NewReader(src)) 243 - 244 182 var dst bytes.Buffer 245 - err = apply(&dst, applier, files[0]) 183 + err = apply(&dst, bytes.NewReader(src), files[0]) 246 184 if at.Err != nil { 247 185 assertError(t, at.Err, err, "applying fragment") 248 186 return

+152

gitdiff/apply_text.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "io" 5 + ) 6 + 7 + // TextApplier applies changes described in text fragments to source data. If 8 + // changes are described in multiple fragments, those fragments must be applied 9 + // in order. The applier must be closed after use. 10 + // 11 + // By default, TextApplier operates in "strict" mode, where fragment content 12 + // and positions must exactly match those of the source. 13 + type TextApplier struct { 14 + dst io.Writer 15 + src io.ReaderAt 16 + lineSrc LineReaderAt 17 + nextLine int64 18 + 19 + closed bool 20 + dirty bool 21 + } 22 + 23 + // NewTextApplier creates a TextApplier that reads data from src and writes 24 + // modified data to dst. If src implements LineReaderAt, it is used directly. 25 + func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier { 26 + a := TextApplier{ 27 + dst: dst, 28 + src: src, 29 + } 30 + 31 + if lineSrc, ok := src.(LineReaderAt); ok { 32 + a.lineSrc = lineSrc 33 + } else { 34 + a.lineSrc = &lineReaderAt{r: src} 35 + } 36 + 37 + return &a 38 + } 39 + 40 + // ApplyFragment applies the changes in the fragment f, writing unwritten data 41 + // before the start of the fragment and any changes from the fragment. If 42 + // multiple text fragments apply to the same content, ApplyFragment must be 43 + // called in order of increasing start position. As a result, each fragment can 44 + // be applied at most once. 45 + // 46 + // If an error occurs while applying, ApplyFragment returns an *ApplyError that 47 + // annotates the error with additional information. If the error is because of 48 + // a conflict between the fragment and the source, the wrapped error will be a 49 + // *Conflict. 50 + func (a *TextApplier) ApplyFragment(f *TextFragment) error { 51 + if a.closed { 52 + return applyError(errApplierClosed) 53 + } 54 + 55 + // mark an apply as in progress, even if it fails before making changes 56 + a.dirty = true 57 + 58 + // application code assumes fragment fields are consistent 59 + if err := f.Validate(); err != nil { 60 + return applyError(err) 61 + } 62 + 63 + // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) 64 + fragStart := f.OldPosition - 1 65 + if fragStart < 0 { 66 + fragStart = 0 67 + } 68 + fragEnd := fragStart + f.OldLines 69 + 70 + start := a.nextLine 71 + if fragStart < start { 72 + return applyError(&Conflict{"fragment overlaps with an applied fragment"}) 73 + } 74 + 75 + if f.OldPosition == 0 { 76 + ok, err := isLen(a.src, 0) 77 + if err != nil { 78 + return applyError(err) 79 + } 80 + if !ok { 81 + return applyError(&Conflict{"cannot create new file from non-empty src"}) 82 + } 83 + } 84 + 85 + preimage := make([][]byte, fragEnd-start) 86 + n, err := a.lineSrc.ReadLinesAt(preimage, start) 87 + if err != nil { 88 + return applyError(err, lineNum(start+int64(n))) 89 + } 90 + 91 + // copy leading data before the fragment starts 92 + for i, line := range preimage[:fragStart-start] { 93 + if _, err := a.dst.Write(line); err != nil { 94 + a.nextLine = start + int64(i) 95 + return applyError(err, lineNum(a.nextLine)) 96 + } 97 + } 98 + preimage = preimage[fragStart-start:] 99 + 100 + // apply the changes in the fragment 101 + used := int64(0) 102 + for i, line := range f.Lines { 103 + if err := applyTextLine(a.dst, line, preimage, used); err != nil { 104 + a.nextLine = fragStart + used 105 + return applyError(err, lineNum(a.nextLine), fragLineNum(i)) 106 + } 107 + if line.Old() { 108 + used++ 109 + } 110 + } 111 + a.nextLine = fragStart + used 112 + 113 + // new position of +0,0 mean a full delete, so check for leftovers 114 + if f.NewPosition == 0 && f.NewLines == 0 { 115 + var b [1][]byte 116 + n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) 117 + if err != nil && err != io.EOF { 118 + return applyError(err, lineNum(a.nextLine)) 119 + } 120 + if n > 0 { 121 + return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) 122 + } 123 + } 124 + 125 + return nil 126 + } 127 + 128 + func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { 129 + if line.Old() && string(preimage[i]) != line.Line { 130 + return &Conflict{"fragment line does not match src line"} 131 + } 132 + if line.New() { 133 + _, err = io.WriteString(dst, line.Line) 134 + } 135 + return err 136 + } 137 + 138 + // Close writes any data following the last applied fragment and prevents 139 + // future calls to ApplyFragment. 140 + func (a *TextApplier) Close() (err error) { 141 + if a.closed { 142 + return nil 143 + } 144 + 145 + a.closed = true 146 + if !a.dirty { 147 + _, err = copyFrom(a.dst, a.src, 0) 148 + } else { 149 + _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine) 150 + } 151 + return err 152 + }

+41 -2

gitdiff/base85.go

··· 19 19 } 20 20 21 21 // base85Decode decodes Base85-encoded data from src into dst. It uses the 22 - // alphabet defined by base85.c in the Git source tree, which appears to be 23 - // unique. src must contain at least len(dst) bytes of encoded data. 22 + // alphabet defined by base85.c in the Git source tree. src must contain at 23 + // least len(dst) bytes of encoded data. 24 24 func base85Decode(dst, src []byte) error { 25 25 var v uint32 26 26 var n, ndst int ··· 50 50 } 51 51 return nil 52 52 } 53 + 54 + // base85Encode encodes src in Base85, writing the result to dst. It uses the 55 + // alphabet defined by base85.c in the Git source tree. 56 + func base85Encode(dst, src []byte) { 57 + var di, si int 58 + 59 + encode := func(v uint32) { 60 + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] 61 + dst[di+1] = b85Alpha[(v/(85*85*85))%85] 62 + dst[di+2] = b85Alpha[(v/(85*85))%85] 63 + dst[di+3] = b85Alpha[(v/85)%85] 64 + dst[di+4] = b85Alpha[v%85] 65 + } 66 + 67 + n := (len(src) / 4) * 4 68 + for si < n { 69 + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) 70 + si += 4 71 + di += 5 72 + } 73 + 74 + var v uint32 75 + switch len(src) - si { 76 + case 3: 77 + v |= uint32(src[si+2]) << 8 78 + fallthrough 79 + case 2: 80 + v |= uint32(src[si+1]) << 16 81 + fallthrough 82 + case 1: 83 + v |= uint32(src[si+0]) << 24 84 + encode(v) 85 + } 86 + } 87 + 88 + // base85Len returns the length of n bytes of Base85 encoded data. 89 + func base85Len(n int) int { 90 + return (n + 3) / 4 * 5 91 + }

+58

gitdiff/base85_test.go

··· 1 1 package gitdiff 2 2 3 3 import ( 4 + "bytes" 4 5 "testing" 5 6 ) 6 7 ··· 58 59 }) 59 60 } 60 61 } 62 + 63 + func TestBase85Encode(t *testing.T) { 64 + tests := map[string]struct { 65 + Input []byte 66 + Output string 67 + }{ 68 + "zeroBytes": { 69 + Input: []byte{}, 70 + Output: "", 71 + }, 72 + "twoBytes": { 73 + Input: []byte{0xCA, 0xFE}, 74 + Output: "%KiWV", 75 + }, 76 + "fourBytes": { 77 + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, 78 + Output: "007GV", 79 + }, 80 + "sixBytes": { 81 + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, 82 + Output: "007GV%KiWV", 83 + }, 84 + } 85 + 86 + for name, test := range tests { 87 + t.Run(name, func(t *testing.T) { 88 + dst := make([]byte, len(test.Output)) 89 + base85Encode(dst, test.Input) 90 + for i, b := range test.Output { 91 + if dst[i] != byte(b) { 92 + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) 93 + } 94 + } 95 + }) 96 + } 97 + } 98 + 99 + func FuzzBase85Roundtrip(f *testing.F) { 100 + f.Add([]byte{0x2b, 0x0d}) 101 + f.Add([]byte{0xbc, 0xb4, 0x3f}) 102 + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) 103 + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) 104 + 105 + f.Fuzz(func(t *testing.T, in []byte) { 106 + n := len(in) 107 + dst := make([]byte, base85Len(n)) 108 + out := make([]byte, n) 109 + 110 + base85Encode(dst, in) 111 + if err := base85Decode(out, dst); err != nil { 112 + t.Fatalf("unexpected error decoding base85 data: %v", err) 113 + } 114 + if !bytes.Equal(in, out) { 115 + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) 116 + } 117 + }) 118 + }

+11 -4

gitdiff/binary.go

··· 50 50 } 51 51 52 52 func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { 53 - switch p.Line(0) { 54 - case "GIT binary patch\n": 53 + line := p.Line(0) 54 + switch { 55 + case line == "GIT binary patch\n": 55 56 hasData = true 56 - case "Binary files differ\n": 57 - case "Files differ\n": 57 + case isBinaryNoDataMarker(line): 58 58 default: 59 59 return false, false, nil 60 60 } ··· 63 63 return false, false, err 64 64 } 65 65 return true, hasData, nil 66 + } 67 + 68 + func isBinaryNoDataMarker(line string) bool { 69 + if strings.HasSuffix(line, " differ\n") { 70 + return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") 71 + } 72 + return false 66 73 } 67 74 68 75 func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) {

+10

gitdiff/binary_test.go

··· 25 25 IsBinary: true, 26 26 HasData: false, 27 27 }, 28 + "binaryFileNoPatchPaths": { 29 + Input: "Binary files a/foo.bin and b/foo.bin differ\n", 30 + IsBinary: true, 31 + HasData: false, 32 + }, 33 + "fileNoPatch": { 34 + Input: "Files differ\n", 35 + IsBinary: true, 36 + HasData: false, 37 + }, 28 38 "textFile": { 29 39 Input: "@@ -10,14 +22,31 @@\n", 30 40 IsBinary: false,

+103 -27

gitdiff/file_header.go

··· 57 57 return nil, "", err 58 58 } 59 59 } 60 - return nil, "", nil 60 + return nil, preamble.String(), nil 61 61 } 62 62 63 63 func (p *parser) ParseGitFileHeader() (*File, error) { ··· 172 172 // If the names in the header do not match because the patch is a rename, 173 173 // return an empty default name. 174 174 func parseGitHeaderName(header string) (string, error) { 175 - firstName, n, err := parseName(header, -1, 1) 176 - if err != nil { 177 - return "", err 175 + header = strings.TrimSuffix(header, "\n") 176 + if len(header) == 0 { 177 + return "", nil 178 178 } 179 179 180 - if n < len(header) && (header[n] == ' ' || header[n] == '\t') { 181 - n++ 182 - } 180 + var err error 181 + var first, second string 182 + 183 + // there are 4 cases to account for: 184 + // 185 + // 1) unquoted unquoted 186 + // 2) unquoted "quoted" 187 + // 3) "quoted" unquoted 188 + // 4) "quoted" "quoted" 189 + // 190 + quote := strings.IndexByte(header, '"') 191 + switch { 192 + case quote < 0: 193 + // case 1 194 + first = header 195 + 196 + case quote > 0: 197 + // case 2 198 + first = header[:quote-1] 199 + if !isSpace(header[quote-1]) { 200 + return "", fmt.Errorf("missing separator") 201 + } 202 + 203 + second, _, err = parseQuotedName(header[quote:]) 204 + if err != nil { 205 + return "", err 206 + } 207 + 208 + case quote == 0: 209 + // case 3 or case 4 210 + var n int 211 + first, n, err = parseQuotedName(header) 212 + if err != nil { 213 + return "", err 214 + } 215 + 216 + // git accepts multiple spaces after a quoted name, but not after an 217 + // unquoted name, since the name might end with one or more spaces 218 + for n < len(header) && isSpace(header[n]) { 219 + n++ 220 + } 221 + if n == len(header) { 222 + return "", nil 223 + } 183 224 184 - secondName, _, err := parseName(header[n:], -1, 1) 185 - if err != nil { 186 - return "", err 225 + if header[n] == '"' { 226 + second, _, err = parseQuotedName(header[n:]) 227 + if err != nil { 228 + return "", err 229 + } 230 + } else { 231 + second = header[n:] 232 + } 187 233 } 188 234 189 - if firstName != secondName { 235 + first = trimTreePrefix(first, 1) 236 + if second != "" { 237 + if first == trimTreePrefix(second, 1) { 238 + return first, nil 239 + } 190 240 return "", nil 191 241 } 192 - return firstName, nil 242 + 243 + // at this point, both names are unquoted (case 1) 244 + // since names may contain spaces, we can't use a known separator 245 + // instead, look for a split that produces two equal names 246 + 247 + for i := 0; i < len(first)-1; i++ { 248 + if !isSpace(first[i]) { 249 + continue 250 + } 251 + second = trimTreePrefix(first[i+1:], 1) 252 + if name := first[:i]; name == second { 253 + return name, nil 254 + } 255 + } 256 + return "", nil 193 257 } 194 258 195 259 // parseGitHeaderData parses a single line of metadata from a Git file header. ··· 260 324 } 261 325 262 326 func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { 263 - f.OldMode, err = parseMode(line) 327 + f.OldMode, err = parseMode(strings.TrimSpace(line)) 264 328 return 265 329 } 266 330 267 331 func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { 268 - f.NewMode, err = parseMode(line) 332 + f.NewMode, err = parseMode(strings.TrimSpace(line)) 269 333 return 270 334 } 271 335 ··· 283 347 284 348 func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { 285 349 f.IsCopy = true 286 - f.OldName, _, err = parseName(line, -1, 0) 350 + f.OldName, _, err = parseName(line, 0, 0) 287 351 return 288 352 } 289 353 290 354 func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { 291 355 f.IsCopy = true 292 - f.NewName, _, err = parseName(line, -1, 0) 356 + f.NewName, _, err = parseName(line, 0, 0) 293 357 return 294 358 } 295 359 296 360 func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { 297 361 f.IsRename = true 298 - f.OldName, _, err = parseName(line, -1, 0) 362 + f.OldName, _, err = parseName(line, 0, 0) 299 363 return 300 364 } 301 365 302 366 func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { 303 367 f.IsRename = true 304 - f.NewName, _, err = parseName(line, -1, 0) 368 + f.NewName, _, err = parseName(line, 0, 0) 305 369 return 306 370 } 307 371 ··· 349 413 350 414 // parseName extracts a file name from the start of a string and returns the 351 415 // name and the index of the first character after the name. If the name is 352 - // unquoted and term is non-negative, parsing stops at the first occurrence of 353 - // term. Otherwise parsing of unquoted names stops at the first space or tab. 416 + // unquoted and term is non-zero, parsing stops at the first occurrence of 417 + // term. 354 418 // 355 419 // If the name is exactly "/dev/null", no further processing occurs. Otherwise, 356 420 // if dropPrefix is greater than zero, that number of prefix components 357 421 // separated by forward slashes are dropped from the name and any duplicate 358 422 // slashes are collapsed. 359 - func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) { 423 + func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { 360 424 if len(s) > 0 && s[0] == '"' { 361 425 name, n, err = parseQuotedName(s) 362 426 } else { ··· 387 451 return name, n, err 388 452 } 389 453 390 - func parseUnquotedName(s string, term rune) (name string, n int, err error) { 454 + func parseUnquotedName(s string, term byte) (name string, n int, err error) { 391 455 for n = 0; n < len(s); n++ { 392 456 if s[n] == '\n' { 393 457 break 394 458 } 395 - if term >= 0 && rune(s[n]) == term { 396 - break 397 - } 398 - if term < 0 && (s[n] == ' ' || s[n] == '\t') { 459 + if term > 0 && s[n] == term { 399 460 break 400 461 } 401 462 } ··· 440 501 return b.String() 441 502 } 442 503 504 + // trimTreePrefix removes up to n leading directory components from name. 505 + func trimTreePrefix(name string, n int) string { 506 + i := 0 507 + for ; i < len(name) && n > 0; i++ { 508 + if name[i] == '/' { 509 + n-- 510 + } 511 + } 512 + return name[i:] 513 + } 514 + 443 515 // hasEpochTimestamp returns true if the string ends with a POSIX-formatted 444 516 // timestamp for the UNIX epoch after a tab character. According to git, this 445 517 // is used by GNU diff to mark creations and deletions. ··· 455 527 456 528 // a valid timestamp can have optional ':' in zone specifier 457 529 // remove that if it exists so we have a single format 458 - if ts[len(ts)-3] == ':' { 530 + if len(ts) >= 3 && ts[len(ts)-3] == ':' { 459 531 ts = ts[:len(ts)-3] + ts[len(ts)-2:] 460 532 } 461 533 ··· 468 540 } 469 541 return true 470 542 } 543 + 544 + func isSpace(c byte) bool { 545 + return c == ' ' || c == '\t' || c == '\n' 546 + }

+64 -8

gitdiff/file_header_test.go

··· 310 310 func TestParseName(t *testing.T) { 311 311 tests := map[string]struct { 312 312 Input string 313 - Term rune 313 + Term byte 314 314 Drop int 315 315 Output string 316 316 N int ··· 334 334 "dropPrefix": { 335 335 Input: "a/dir/file.txt", Drop: 1, Output: "dir/file.txt", N: 14, 336 336 }, 337 - "multipleNames": { 338 - Input: "dir/a.txt dir/b.txt", Term: -1, Output: "dir/a.txt", N: 9, 337 + "unquotedWithSpaces": { 338 + Input: "dir/with spaces.txt", Output: "dir/with spaces.txt", N: 19, 339 + }, 340 + "unquotedWithTrailingSpaces": { 341 + Input: "dir/with spaces.space ", Output: "dir/with spaces.space ", N: 23, 339 342 }, 340 343 "devNull": { 341 344 Input: "/dev/null", Term: '\t', Drop: 1, Output: "/dev/null", N: 9, 342 345 }, 343 - "newlineAlwaysSeparates": { 344 - Input: "dir/file.txt\n", Term: 0, Output: "dir/file.txt", N: 12, 346 + "newlineSeparates": { 347 + Input: "dir/file.txt\n", Output: "dir/file.txt", N: 12, 345 348 }, 346 349 "emptyString": { 347 350 Input: "", Err: true, ··· 483 486 OldMode: os.FileMode(0100644), 484 487 }, 485 488 }, 489 + "oldModeWithTrailingSpace": { 490 + Line: "old mode 100644\r\n", 491 + OutputFile: &File{ 492 + OldMode: os.FileMode(0100644), 493 + }, 494 + }, 486 495 "invalidOldMode": { 487 496 Line: "old mode rw\n", 488 497 Err: true, 489 498 }, 490 499 "newMode": { 491 500 Line: "new mode 100755\n", 501 + OutputFile: &File{ 502 + NewMode: os.FileMode(0100755), 503 + }, 504 + }, 505 + "newModeWithTrailingSpace": { 506 + Line: "new mode 100755\r\n", 492 507 OutputFile: &File{ 493 508 NewMode: os.FileMode(0100755), 494 509 }, ··· 508 523 }, 509 524 "newFileMode": { 510 525 Line: "new file mode 100755\n", 526 + DefaultName: "dir/file.txt", 527 + OutputFile: &File{ 528 + NewName: "dir/file.txt", 529 + NewMode: os.FileMode(0100755), 530 + IsNew: true, 531 + }, 532 + }, 533 + "newFileModeWithTrailingSpace": { 534 + Line: "new file mode 100755\r\n", 511 535 DefaultName: "dir/file.txt", 512 536 OutputFile: &File{ 513 537 NewName: "dir/file.txt", ··· 630 654 Input: "a/dir/foo.txt b/dir/bar.txt", 631 655 Output: "", 632 656 }, 633 - "missingSecondName": { 634 - Input: "a/dir/foo.txt", 635 - Err: true, 657 + "matchingNamesWithSpaces": { 658 + Input: "a/dir/file with spaces.txt b/dir/file with spaces.txt", 659 + Output: "dir/file with spaces.txt", 660 + }, 661 + "matchingNamesWithTrailingSpaces": { 662 + Input: "a/dir/spaces b/dir/spaces ", 663 + Output: "dir/spaces ", 664 + }, 665 + "matchingNamesQuoted": { 666 + Input: `"a/dir/\"quotes\".txt" "b/dir/\"quotes\".txt"`, 667 + Output: `dir/"quotes".txt`, 668 + }, 669 + "matchingNamesFirstQuoted": { 670 + Input: `"a/dir/file.txt" b/dir/file.txt`, 671 + Output: "dir/file.txt", 672 + }, 673 + "matchingNamesSecondQuoted": { 674 + Input: `a/dir/file.txt "b/dir/file.txt"`, 675 + Output: "dir/file.txt", 676 + }, 677 + "noSecondName": { 678 + Input: "a/dir/foo.txt", 679 + Output: "", 680 + }, 681 + "noSecondNameQuoted": { 682 + Input: `"a/dir/foo.txt"`, 683 + Output: "", 636 684 }, 637 685 "invalidName": { 638 686 Input: `"a/dir/file.txt b/dir/file.txt`, ··· 695 743 }, 696 744 "notEpoch": { 697 745 Input: "+++ file.txt\t2019-03-21 12:34:56.789 -0700\n", 746 + Output: false, 747 + }, 748 + "notTimestamp": { 749 + Input: "+++ file.txt\trandom text\n", 750 + Output: false, 751 + }, 752 + "notTimestampShort": { 753 + Input: "+++ file.txt\t0\n", 698 754 Output: false, 699 755 }, 700 756 }

+281

gitdiff/format.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "bytes" 5 + "compress/zlib" 6 + "fmt" 7 + "io" 8 + "strconv" 9 + ) 10 + 11 + type formatter struct { 12 + w io.Writer 13 + err error 14 + } 15 + 16 + func newFormatter(w io.Writer) *formatter { 17 + return &formatter{w: w} 18 + } 19 + 20 + func (fm *formatter) Write(p []byte) (int, error) { 21 + if fm.err != nil { 22 + return len(p), nil 23 + } 24 + if _, err := fm.w.Write(p); err != nil { 25 + fm.err = err 26 + } 27 + return len(p), nil 28 + } 29 + 30 + func (fm *formatter) WriteString(s string) (int, error) { 31 + fm.Write([]byte(s)) 32 + return len(s), nil 33 + } 34 + 35 + func (fm *formatter) WriteByte(c byte) error { 36 + fm.Write([]byte{c}) 37 + return nil 38 + } 39 + 40 + func (fm *formatter) WriteQuotedName(s string) { 41 + qpos := 0 42 + for i := 0; i < len(s); i++ { 43 + ch := s[i] 44 + if q, quoted := quoteByte(ch); quoted { 45 + if qpos == 0 { 46 + fm.WriteByte('"') 47 + } 48 + fm.WriteString(s[qpos:i]) 49 + fm.Write(q) 50 + qpos = i + 1 51 + } 52 + } 53 + fm.WriteString(s[qpos:]) 54 + if qpos > 0 { 55 + fm.WriteByte('"') 56 + } 57 + } 58 + 59 + var quoteEscapeTable = map[byte]byte{ 60 + '\a': 'a', 61 + '\b': 'b', 62 + '\t': 't', 63 + '\n': 'n', 64 + '\v': 'v', 65 + '\f': 'f', 66 + '\r': 'r', 67 + '"': '"', 68 + '\\': '\\', 69 + } 70 + 71 + func quoteByte(b byte) ([]byte, bool) { 72 + if q, ok := quoteEscapeTable[b]; ok { 73 + return []byte{'\\', q}, true 74 + } 75 + if b < 0x20 || b >= 0x7F { 76 + return []byte{ 77 + '\\', 78 + '0' + (b>>6)&0o3, 79 + '0' + (b>>3)&0o7, 80 + '0' + (b>>0)&0o7, 81 + }, true 82 + } 83 + return nil, false 84 + } 85 + 86 + func (fm *formatter) FormatFile(f *File) { 87 + fm.WriteString("diff --git ") 88 + 89 + var aName, bName string 90 + switch { 91 + case f.OldName == "": 92 + aName = f.NewName 93 + bName = f.NewName 94 + 95 + case f.NewName == "": 96 + aName = f.OldName 97 + bName = f.OldName 98 + 99 + default: 100 + aName = f.OldName 101 + bName = f.NewName 102 + } 103 + 104 + fm.WriteQuotedName("a/" + aName) 105 + fm.WriteByte(' ') 106 + fm.WriteQuotedName("b/" + bName) 107 + fm.WriteByte('\n') 108 + 109 + if f.OldMode != 0 { 110 + if f.IsDelete { 111 + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) 112 + } else if f.NewMode != 0 { 113 + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) 114 + } 115 + } 116 + 117 + if f.NewMode != 0 { 118 + if f.IsNew { 119 + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) 120 + } else if f.OldMode != 0 { 121 + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) 122 + } 123 + } 124 + 125 + if f.Score > 0 { 126 + if f.IsCopy || f.IsRename { 127 + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) 128 + } else { 129 + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) 130 + } 131 + } 132 + 133 + if f.IsCopy { 134 + if f.OldName != "" { 135 + fm.WriteString("copy from ") 136 + fm.WriteQuotedName(f.OldName) 137 + fm.WriteByte('\n') 138 + } 139 + if f.NewName != "" { 140 + fm.WriteString("copy to ") 141 + fm.WriteQuotedName(f.NewName) 142 + fm.WriteByte('\n') 143 + } 144 + } 145 + 146 + if f.IsRename { 147 + if f.OldName != "" { 148 + fm.WriteString("rename from ") 149 + fm.WriteQuotedName(f.OldName) 150 + fm.WriteByte('\n') 151 + } 152 + if f.NewName != "" { 153 + fm.WriteString("rename to ") 154 + fm.WriteQuotedName(f.NewName) 155 + fm.WriteByte('\n') 156 + } 157 + } 158 + 159 + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { 160 + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) 161 + 162 + // Mode is only included on the index line when it is not changing 163 + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { 164 + fmt.Fprintf(fm, " %o", f.OldMode) 165 + } 166 + 167 + fm.WriteByte('\n') 168 + } 169 + 170 + if f.IsBinary { 171 + if f.BinaryFragment == nil { 172 + fm.WriteString("Binary files ") 173 + fm.WriteQuotedName("a/" + aName) 174 + fm.WriteString(" and ") 175 + fm.WriteQuotedName("b/" + bName) 176 + fm.WriteString(" differ\n") 177 + } else { 178 + fm.WriteString("GIT binary patch\n") 179 + fm.FormatBinaryFragment(f.BinaryFragment) 180 + if f.ReverseBinaryFragment != nil { 181 + fm.FormatBinaryFragment(f.ReverseBinaryFragment) 182 + } 183 + } 184 + } 185 + 186 + // The "---" and "+++" lines only appear for text patches with fragments 187 + if len(f.TextFragments) > 0 { 188 + fm.WriteString("--- ") 189 + if f.OldName == "" { 190 + fm.WriteString("/dev/null") 191 + } else { 192 + fm.WriteQuotedName("a/" + f.OldName) 193 + } 194 + fm.WriteByte('\n') 195 + 196 + fm.WriteString("+++ ") 197 + if f.NewName == "" { 198 + fm.WriteString("/dev/null") 199 + } else { 200 + fm.WriteQuotedName("b/" + f.NewName) 201 + } 202 + fm.WriteByte('\n') 203 + 204 + for _, frag := range f.TextFragments { 205 + fm.FormatTextFragment(frag) 206 + } 207 + } 208 + } 209 + 210 + func (fm *formatter) FormatTextFragment(f *TextFragment) { 211 + fm.FormatTextFragmentHeader(f) 212 + fm.WriteByte('\n') 213 + 214 + for _, line := range f.Lines { 215 + fm.WriteString(line.Op.String()) 216 + fm.WriteString(line.Line) 217 + if line.NoEOL() { 218 + fm.WriteString("\n\\ No newline at end of file\n") 219 + } 220 + } 221 + } 222 + 223 + func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { 224 + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) 225 + if f.Comment != "" { 226 + fm.WriteByte(' ') 227 + fm.WriteString(f.Comment) 228 + } 229 + } 230 + 231 + func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { 232 + const ( 233 + maxBytesPerLine = 52 234 + ) 235 + 236 + switch f.Method { 237 + case BinaryPatchDelta: 238 + fm.WriteString("delta ") 239 + case BinaryPatchLiteral: 240 + fm.WriteString("literal ") 241 + } 242 + fm.Write(strconv.AppendInt(nil, f.Size, 10)) 243 + fm.WriteByte('\n') 244 + 245 + data := deflateBinaryChunk(f.Data) 246 + n := (len(data) / maxBytesPerLine) * maxBytesPerLine 247 + 248 + buf := make([]byte, base85Len(maxBytesPerLine)) 249 + for i := 0; i < n; i += maxBytesPerLine { 250 + base85Encode(buf, data[i:i+maxBytesPerLine]) 251 + fm.WriteByte('z') 252 + fm.Write(buf) 253 + fm.WriteByte('\n') 254 + } 255 + if remainder := len(data) - n; remainder > 0 { 256 + buf = buf[0:base85Len(remainder)] 257 + 258 + sizeChar := byte(remainder) 259 + if remainder <= 26 { 260 + sizeChar = 'A' + sizeChar - 1 261 + } else { 262 + sizeChar = 'a' + sizeChar - 27 263 + } 264 + 265 + base85Encode(buf, data[n:]) 266 + fm.WriteByte(sizeChar) 267 + fm.Write(buf) 268 + fm.WriteByte('\n') 269 + } 270 + fm.WriteByte('\n') 271 + } 272 + 273 + func deflateBinaryChunk(data []byte) []byte { 274 + var b bytes.Buffer 275 + 276 + zw := zlib.NewWriter(&b) 277 + _, _ = zw.Write(data) 278 + _ = zw.Close() 279 + 280 + return b.Bytes() 281 + }

+157

gitdiff/format_roundtrip_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "bytes" 5 + "fmt" 6 + "os" 7 + "path/filepath" 8 + "slices" 9 + "testing" 10 + ) 11 + 12 + func TestFormatRoundtrip(t *testing.T) { 13 + patches := []struct { 14 + File string 15 + SkipTextCompare bool 16 + }{ 17 + {File: "copy.patch"}, 18 + {File: "copy_modify.patch"}, 19 + {File: "delete.patch"}, 20 + {File: "mode.patch"}, 21 + {File: "mode_modify.patch"}, 22 + {File: "modify.patch"}, 23 + {File: "new.patch"}, 24 + {File: "new_empty.patch"}, 25 + {File: "new_mode.patch"}, 26 + {File: "rename.patch"}, 27 + {File: "rename_modify.patch"}, 28 + 29 + // Due to differences between Go's 'encoding/zlib' package and the zlib 30 + // C library, binary patches cannot be compared directly as the patch 31 + // data is slightly different when re-encoded by Go. 32 + {File: "binary_modify.patch", SkipTextCompare: true}, 33 + {File: "binary_new.patch", SkipTextCompare: true}, 34 + {File: "binary_modify_nodata.patch"}, 35 + } 36 + 37 + for _, patch := range patches { 38 + t.Run(patch.File, func(t *testing.T) { 39 + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) 40 + if err != nil { 41 + t.Fatalf("failed to read patch: %v", err) 42 + } 43 + 44 + original := assertParseSingleFile(t, b, "patch") 45 + str := original.String() 46 + 47 + if !patch.SkipTextCompare { 48 + if string(b) != str { 49 + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) 50 + } 51 + } 52 + 53 + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") 54 + assertFilesEqual(t, original, reparsed) 55 + }) 56 + } 57 + } 58 + 59 + func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { 60 + files, _, err := Parse(bytes.NewReader(b)) 61 + if err != nil { 62 + t.Fatalf("failed to parse %s: %v", kind, err) 63 + } 64 + if len(files) != 1 { 65 + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) 66 + } 67 + return files[0] 68 + } 69 + 70 + func assertFilesEqual(t *testing.T, expected, actual *File) { 71 + assertEqual(t, expected.OldName, actual.OldName, "OldName") 72 + assertEqual(t, expected.NewName, actual.NewName, "NewName") 73 + 74 + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") 75 + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") 76 + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") 77 + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") 78 + 79 + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") 80 + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") 81 + 82 + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") 83 + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") 84 + assertEqual(t, expected.Score, actual.Score, "Score") 85 + 86 + if len(expected.TextFragments) == len(actual.TextFragments) { 87 + for i := range expected.TextFragments { 88 + prefix := fmt.Sprintf("TextFragments[%d].", i) 89 + ef := expected.TextFragments[i] 90 + af := actual.TextFragments[i] 91 + 92 + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") 93 + 94 + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") 95 + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") 96 + 97 + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") 98 + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") 99 + 100 + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") 101 + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") 102 + 103 + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") 104 + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") 105 + 106 + if !slices.Equal(ef.Lines, af.Lines) { 107 + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) 108 + } 109 + } 110 + } else { 111 + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) 112 + } 113 + 114 + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") 115 + 116 + if expected.BinaryFragment != nil { 117 + if actual.BinaryFragment == nil { 118 + t.Errorf("BinaryFragment: expected non-nil, actual is nil") 119 + } else { 120 + ef := expected.BinaryFragment 121 + af := expected.BinaryFragment 122 + 123 + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") 124 + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") 125 + 126 + if !slices.Equal(ef.Data, af.Data) { 127 + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) 128 + } 129 + } 130 + } else if actual.BinaryFragment != nil { 131 + t.Errorf("BinaryFragment: expected nil, actual is non-nil") 132 + } 133 + 134 + if expected.ReverseBinaryFragment != nil { 135 + if actual.ReverseBinaryFragment == nil { 136 + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") 137 + } else { 138 + ef := expected.ReverseBinaryFragment 139 + af := expected.ReverseBinaryFragment 140 + 141 + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") 142 + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") 143 + 144 + if !slices.Equal(ef.Data, af.Data) { 145 + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) 146 + } 147 + } 148 + } else if actual.ReverseBinaryFragment != nil { 149 + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") 150 + } 151 + } 152 + 153 + func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { 154 + if expected != actual { 155 + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) 156 + } 157 + }

+28

gitdiff/format_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "strings" 5 + "testing" 6 + ) 7 + 8 + func TestFormatter_WriteQuotedName(t *testing.T) { 9 + tests := []struct { 10 + Input string 11 + Expected string 12 + }{ 13 + {"noquotes.txt", `noquotes.txt`}, 14 + {"no quotes.txt", `no quotes.txt`}, 15 + {"new\nline", `"new\nline"`}, 16 + {"escape\x1B null\x00", `"escape\033 null\000"`}, 17 + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, 18 + {"\"already quoted\"", `"\"already quoted\""`}, 19 + } 20 + 21 + for _, test := range tests { 22 + var b strings.Builder 23 + newFormatter(&b).WriteQuotedName(test.Input) 24 + if b.String() != test.Expected { 25 + t.Errorf("expected %q, got %q", test.Expected, b.String()) 26 + } 27 + } 28 + }

+33 -2

gitdiff/gitdiff.go

··· 4 4 "errors" 5 5 "fmt" 6 6 "os" 7 + "strings" 7 8 ) 8 9 9 10 // File describes changes to a single file. It can be either a text file or a ··· 38 39 ReverseBinaryFragment *BinaryFragment 39 40 } 40 41 42 + // String returns a git diff representation of this file. The value can be 43 + // parsed by this library to obtain the same File, but may not be the same as 44 + // the original input. 45 + func (f *File) String() string { 46 + var diff strings.Builder 47 + newFormatter(&diff).FormatFile(f) 48 + return diff.String() 49 + } 50 + 41 51 // TextFragment describes changed lines starting at a specific line in a text file. 42 52 type TextFragment struct { 43 53 Comment string ··· 57 67 Lines []Line 58 68 } 59 69 60 - // Header returns the canonical header of this fragment. 70 + // String returns a git diff format of this fragment. See [File.String] for 71 + // more details on this format. 72 + func (f *TextFragment) String() string { 73 + var diff strings.Builder 74 + newFormatter(&diff).FormatTextFragment(f) 75 + return diff.String() 76 + } 77 + 78 + // Header returns a git diff header of this fragment. See [File.String] for 79 + // more details on this format. 61 80 func (f *TextFragment) Header() string { 62 - return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) 81 + var hdr strings.Builder 82 + newFormatter(&hdr).FormatTextFragmentHeader(f) 83 + return hdr.String() 63 84 } 64 85 65 86 // Validate checks that the fragment is self-consistent and appliable. Validate ··· 197 218 // BinaryPatchLiteral indicates the data is the exact file content 198 219 BinaryPatchLiteral 199 220 ) 221 + 222 + // String returns a git diff format of this fragment. Due to differences in 223 + // zlib implementation between Go and Git, encoded binary data in the result 224 + // will likely differ from what Git produces for the same input. See 225 + // [File.String] for more details on this format. 226 + func (f *BinaryFragment) String() string { 227 + var diff strings.Builder 228 + newFormatter(&diff).FormatBinaryFragment(f) 229 + return diff.String() 230 + }

+22 -22

gitdiff/io.go

··· 5 5 "io" 6 6 ) 7 7 8 + const ( 9 + byteBufferSize = 32 * 1024 // from io.Copy 10 + lineBufferSize = 32 11 + indexBufferSize = 1024 12 + ) 13 + 8 14 // LineReaderAt is the interface that wraps the ReadLinesAt method. 9 15 // 10 - // ReadLinesAt reads len(lines) into lines starting at line offset in the 11 - // input source. It returns number of full lines read (0 <= n <= len(lines)) 12 - // and any error encountered. Line numbers are zero-indexed. 16 + // ReadLinesAt reads len(lines) into lines starting at line offset. It returns 17 + // the number of lines read (0 <= n <= len(lines)) and any error encountered. 18 + // Line numbers are zero-indexed. 13 19 // 14 20 // If n < len(lines), ReadLinesAt returns a non-nil error explaining why more 15 21 // lines were not returned. 16 22 // 17 - // Each full line includes the line ending character(s). If the last line of 18 - // the input does not have a line ending character, ReadLinesAt returns the 19 - // content of the line and io.EOF. 20 - // 21 - // If the content of the input source changes after the first call to 22 - // ReadLinesAt, the behavior of future calls is undefined. 23 + // Lines read by ReadLinesAt include the newline character. The last line does 24 + // not have a final newline character if the input ends without one. 23 25 type LineReaderAt interface { 24 26 ReadLinesAt(lines [][]byte, offset int64) (n int, err error) 25 27 } ··· 65 67 lines[n] = buf[start:end] 66 68 } 67 69 68 - if n < count || buf[len(buf)-1] != '\n' { 70 + if n < count { 69 71 return n, io.EOF 70 72 } 71 73 return n, nil ··· 75 77 // for line or a read returns io.EOF. It returns an error if and only if there 76 78 // is an error reading data. 77 79 func (r *lineReaderAt) indexTo(line int64) error { 78 - var buf [1024]byte 79 - 80 - var offset int64 81 - if len(r.index) > 0 { 82 - offset = r.index[len(r.index)-1] 83 - } 80 + var buf [indexBufferSize]byte 84 81 82 + offset := r.lastOffset() 85 83 for int64(len(r.index)) < line { 86 84 n, err := r.r.ReadAt(buf[:], offset) 87 85 if err != nil && err != io.EOF { ··· 94 92 } 95 93 } 96 94 if err == io.EOF { 97 - if n > 0 && buf[n-1] != '\n' { 95 + if offset > r.lastOffset() { 98 96 r.index = append(r.index, offset) 99 97 } 100 98 r.eof = true ··· 102 100 } 103 101 } 104 102 return nil 103 + } 104 + 105 + func (r *lineReaderAt) lastOffset() int64 { 106 + if n := len(r.index); n > 0 { 107 + return r.index[n-1] 108 + } 109 + return 0 105 110 } 106 111 107 112 // readBytes reads the bytes of the n lines starting at line and returns the ··· 146 151 } 147 152 return false, err 148 153 } 149 - 150 - const ( 151 - byteBufferSize = 32 * 1024 // from io.Copy 152 - lineBufferSize = 32 153 - ) 154 154 155 155 // copyFrom writes bytes starting from offset off in src to dst stopping at the 156 156 // end of src or at the first error. copyFrom returns the number of bytes

+54 -2

gitdiff/io_test.go

··· 9 9 ) 10 10 11 11 func TestLineReaderAt(t *testing.T) { 12 + const lineTemplate = "generated test line %d\n" 13 + 12 14 tests := map[string]struct { 13 15 InputLines int 14 16 Offset int64 ··· 41 43 InputLines: 4, 42 44 Offset: 2, 43 45 Count: 0, 46 + }, 47 + "readAllLines": { 48 + InputLines: 64, 49 + Offset: 0, 50 + Count: 64, 44 51 }, 45 52 "readThroughEOF": { 46 53 InputLines: 16, ··· 71 78 }, 72 79 } 73 80 74 - const lineTemplate = "generated test line %d\n" 75 - 76 81 for name, test := range tests { 77 82 t.Run(name, func(t *testing.T) { 78 83 var input bytes.Buffer ··· 110 115 for i := 0; i < n; i++ { 111 116 if !bytes.Equal(output[i], lines[i]) { 112 117 t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], lines[i]) 118 + } 119 + } 120 + }) 121 + } 122 + 123 + newlineTests := map[string]struct { 124 + InputSize int 125 + }{ 126 + "readLinesNoFinalNewline": { 127 + InputSize: indexBufferSize + indexBufferSize/2, 128 + }, 129 + "readLinesNoFinalNewlineBufferMultiple": { 130 + InputSize: 4 * indexBufferSize, 131 + }, 132 + } 133 + 134 + for name, test := range newlineTests { 135 + t.Run(name, func(t *testing.T) { 136 + input := bytes.Repeat([]byte("0"), test.InputSize) 137 + 138 + var output [][]byte 139 + for i := 0; i < len(input); i++ { 140 + last := i 141 + i += rand.Intn(80) 142 + if i < len(input)-1 { // last character of input must not be a newline 143 + input[i] = '\n' 144 + output = append(output, input[last:i+1]) 145 + } else { 146 + output = append(output, input[last:]) 147 + } 148 + } 149 + 150 + r := &lineReaderAt{r: bytes.NewReader(input)} 151 + lines := make([][]byte, len(output)) 152 + 153 + n, err := r.ReadLinesAt(lines, 0) 154 + if err != nil { 155 + t.Fatalf("unexpected error reading reading lines: %v", err) 156 + } 157 + 158 + if n != len(output) { 159 + t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n) 160 + } 161 + 162 + for i, line := range lines { 163 + if !bytes.Equal(output[i], line) { 164 + t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line) 113 165 } 114 166 } 115 167 })

+7 -3

gitdiff/parser.go

··· 12 12 // Parse parses a patch with changes to one or more files. Any content before 13 13 // the first file is returned as the second value. If an error occurs while 14 14 // parsing, it returns all files parsed before the error. 15 + // 16 + // Parse expects to receive a single patch. If the input may contain multiple 17 + // patches (for example, if it is an mbox file), callers should split it into 18 + // individual patches and call Parse on each one. 15 19 func Parse(r io.Reader) ([]*File, string, error) { 16 20 p := newParser(r) 17 21 ··· 29 33 if err != nil { 30 34 return files, preamble, err 31 35 } 36 + if len(files) == 0 { 37 + preamble = pre 38 + } 32 39 if file == nil { 33 40 break 34 41 } ··· 46 53 } 47 54 } 48 55 49 - if len(files) == 0 { 50 - preamble = pre 51 - } 52 56 files = append(files, file) 53 57 } 54 58

+16 -2

gitdiff/parser_test.go

··· 281 281 --- could this be a header? 282 282 nope, it's just some dashes 283 283 `, 284 - Output: nil, 285 - Preamble: "", 284 + Output: nil, 285 + Preamble: ` 286 + this is a line 287 + this is another line 288 + --- could this be a header? 289 + nope, it's just some dashes 290 + `, 286 291 }, 287 292 "detatchedFragmentLike": { 288 293 Input: ` ··· 290 295 @@ -1,3 +1,4 ~1,5 @@ 291 296 `, 292 297 Output: nil, 298 + Preamble: ` 299 + a wild fragment appears? 300 + @@ -1,3 +1,4 ~1,5 @@ 301 + `, 293 302 }, 294 303 "detatchedFragment": { 295 304 Input: ` ··· 425 434 }, 426 435 }, 427 436 Preamble: textPreamble, 437 + }, 438 + "noFiles": { 439 + InputFile: "testdata/no_files.patch", 440 + Output: nil, 441 + Preamble: textPreamble, 428 442 }, 429 443 "newBinaryFile": { 430 444 InputFile: "testdata/new_binary_file.patch",

+156 -128

gitdiff/patch_header.go

··· 5 5 "errors" 6 6 "fmt" 7 7 "io" 8 + "io/ioutil" 9 + "mime/quotedprintable" 8 10 "net/mail" 9 11 "strconv" 10 12 "strings" ··· 13 15 ) 14 16 15 17 const ( 16 - mailHeaderPrefix = "From " 17 - prettyHeaderPrefix = "commit " 18 + mailHeaderPrefix = "From " 19 + prettyHeaderPrefix = "commit " 20 + mailMinimumHeaderPrefix = "From:" 18 21 ) 19 22 20 23 // PatchHeader is a parsed version of the preamble content that appears before ··· 49 52 // line, that line will be removed and everything after it will be 50 53 // placed in BodyAppendix. 51 54 BodyAppendix string 55 + 56 + // All headers completely unparsed 57 + RawHeaders map[string][]string 52 58 } 53 59 54 60 // Message returns the commit message for the header. The message consists of ··· 65 71 return msg.String() 66 72 } 67 73 68 - // PatchIdentity identifies a person who authored or committed a patch. 69 - type PatchIdentity struct { 70 - Name string 71 - Email string 72 - } 73 - 74 - func (i PatchIdentity) String() string { 75 - name := i.Name 76 - if name == "" { 77 - name = `""` 78 - } 79 - return fmt.Sprintf("%s <%s>", name, i.Email) 80 - } 81 - 82 - // ParsePatchIdentity parses a patch identity string. A valid string contains a 83 - // non-empty name followed by an email address in angle brackets. Like Git, 84 - // ParsePatchIdentity does not require that the email address is valid or 85 - // properly formatted, only that it is non-empty. The name must not contain a 86 - // left angle bracket, '<', and the email address must not contain a right 87 - // angle bracket, '>'. 88 - func ParsePatchIdentity(s string) (PatchIdentity, error) { 89 - var emailStart, emailEnd int 90 - for i, c := range s { 91 - if c == '<' && emailStart == 0 { 92 - emailStart = i + 1 93 - } 94 - if c == '>' && emailStart > 0 { 95 - emailEnd = i 96 - break 97 - } 98 - } 99 - if emailStart > 0 && emailEnd == 0 { 100 - return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) 101 - } 102 - 103 - var name, email string 104 - if emailStart > 0 { 105 - name = strings.TrimSpace(s[:emailStart-1]) 106 - } 107 - if emailStart > 0 && emailEnd > 0 { 108 - email = strings.TrimSpace(s[emailStart:emailEnd]) 109 - } 110 - if name == "" || email == "" { 111 - return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) 112 - } 113 - 114 - return PatchIdentity{Name: name, Email: email}, nil 115 - } 116 - 117 74 // ParsePatchDate parses a patch date string. It returns the parsed time or an 118 75 // error if s has an unknown format. ParsePatchDate supports the iso, rfc, 119 76 // short, raw, unix, and default formats (with local variants) used by the ··· 162 119 return time.Time{}, fmt.Errorf("unknown date format: %s", s) 163 120 } 164 121 165 - // ParsePatchHeader parses a preamble string as returned by Parse into a 122 + // A PatchHeaderOption modifies the behavior of ParsePatchHeader. 123 + type PatchHeaderOption func(*patchHeaderOptions) 124 + 125 + // SubjectCleanMode controls how ParsePatchHeader cleans subject lines when 126 + // parsing mail-formatted patches. 127 + type SubjectCleanMode int 128 + 129 + const ( 130 + // SubjectCleanWhitespace removes leading and trailing whitespace. 131 + SubjectCleanWhitespace SubjectCleanMode = iota 132 + 133 + // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", 134 + // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. 135 + // This is the default behavior of git (see `git mailinfo`) and this 136 + // package. 137 + SubjectCleanAll 138 + 139 + // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes 140 + // leading strings enclosed by '[' and ']' if they start with "PATCH". 141 + SubjectCleanPatchOnly 142 + ) 143 + 144 + // WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By 145 + // default, uses SubjectCleanAll. 146 + func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { 147 + return func(opts *patchHeaderOptions) { 148 + opts.subjectCleanMode = m 149 + } 150 + } 151 + 152 + type patchHeaderOptions struct { 153 + subjectCleanMode SubjectCleanMode 154 + } 155 + 156 + // ParsePatchHeader parses the preamble string returned by [Parse] into a 166 157 // PatchHeader. Due to the variety of header formats, some fields of the parsed 167 158 // PatchHeader may be unset after parsing. 168 159 // 169 160 // Supported formats are the short, medium, full, fuller, and email pretty 170 - // formats used by git diff, git log, and git show and the UNIX mailbox format 171 - // used by git format-patch. 161 + // formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox 162 + // format used by `git format-patch`. 172 163 // 173 - // If ParsePatchHeader detects that it is handling an email, it will 174 - // remove extra content at the beginning of the title line, such as 175 - // `[PATCH]` or `Re:` in the same way that `git mailinfo` does. 176 - // SubjectPrefix will be set to the value of this removed string. 177 - // (`git mailinfo` is the core part of `git am` that pulls information 178 - // out of an individual mail.) 164 + // When parsing mail-formatted headers, ParsePatchHeader tries to remove 165 + // email-specific content from the title and body: 166 + // 167 + // - Based on the SubjectCleanMode, remove prefixes like reply markers and 168 + // "[PATCH]" strings from the subject, saving any removed content in the 169 + // SubjectPrefix field. Parsing always discards leading and trailing 170 + // whitespace from the subject line. The default mode is SubjectCleanAll. 179 171 // 180 - // Additionally, if ParsePatchHeader detects that it's handling an 181 - // email, it will remove a `---` line and put anything after it into 182 - // BodyAppendix. 172 + // - If the body contains a "---" line (3 hyphens), remove that line and any 173 + // content after it from the body and save it in the BodyAppendix field. 183 174 // 184 - // Those wishing the effect of a plain `git am` should use 185 - // `PatchHeader.Title + "\n" + PatchHeader.Body` (or 186 - // `PatchHeader.Message()`). Those wishing to retain the subject 187 - // prefix and appendix material should use `PatchHeader.SubjectPrefix 188 - // + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + 189 - // PatchHeader.BodyAppendix`. 190 - func ParsePatchHeader(s string) (*PatchHeader, error) { 191 - r := bufio.NewReader(strings.NewReader(s)) 175 + // ParsePatchHeader tries to process content it does not understand wthout 176 + // returning errors, but will return errors if well-identified content like 177 + // dates or identies uses unknown or invalid formats. 178 + func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { 179 + opts := patchHeaderOptions{ 180 + subjectCleanMode: SubjectCleanAll, // match git defaults 181 + } 182 + for _, optFn := range options { 183 + optFn(&opts) 184 + } 192 185 193 - var line string 194 - for { 195 - var err error 196 - line, err = r.ReadString('\n') 197 - if err == io.EOF { 198 - break 199 - } 200 - if err != nil { 201 - return nil, err 202 - } 186 + header = strings.TrimSpace(header) 187 + if header == "" { 188 + return &PatchHeader{}, nil 189 + } 203 190 204 - line = strings.TrimSpace(line) 205 - if len(line) > 0 { 206 - break 207 - } 191 + var firstLine, rest string 192 + if idx := strings.IndexByte(header, '\n'); idx >= 0 { 193 + firstLine = header[:idx] 194 + rest = header[idx+1:] 195 + } else { 196 + firstLine = header 197 + rest = "" 208 198 } 209 199 210 200 switch { 211 - case strings.HasPrefix(line, mailHeaderPrefix): 212 - return parseHeaderMail(line, r) 213 - case strings.HasPrefix(line, prettyHeaderPrefix): 214 - return parseHeaderPretty(line, r) 201 + case strings.HasPrefix(firstLine, mailHeaderPrefix): 202 + return parseHeaderMail(firstLine, strings.NewReader(rest), opts) 203 + 204 + case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): 205 + // With a minimum header, the first line is part of the actual mail 206 + // content and needs to be parsed as part of the "rest" 207 + return parseHeaderMail("", strings.NewReader(header), opts) 208 + 209 + case strings.HasPrefix(firstLine, prettyHeaderPrefix): 210 + return parseHeaderPretty(firstLine, strings.NewReader(rest)) 215 211 } 212 + 216 213 return nil, errors.New("unrecognized patch header format") 217 214 } 218 215 ··· 227 224 228 225 h := &PatchHeader{} 229 226 230 - prettyLine = prettyLine[len(prettyHeaderPrefix):] 227 + prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix) 231 228 if i := strings.IndexByte(prettyLine, ' '); i > 0 { 232 229 h.SHA = prettyLine[:i] 233 230 } else { ··· 243 240 break 244 241 } 245 242 243 + items := strings.SplitN(line, ":", 2) 244 + 245 + // we have "key: value" 246 + if len(items) == 2 { 247 + key := items[0] 248 + val := items[1] 249 + h.RawHeaders[key] = append(h.RawHeaders[key], val) 250 + } 251 + 246 252 switch { 247 253 case strings.HasPrefix(line, authorPrefix): 248 254 u, err := ParsePatchIdentity(line[len(authorPrefix):]) ··· 291 297 h.Title = title 292 298 293 299 if title != "" { 294 - // Don't check for an appendix 300 + // Don't check for an appendix, pretty headers do not contain them 295 301 body, _ := scanMessageBody(s, indent, false) 296 302 if s.Err() != nil { 297 303 return nil, s.Err() ··· 360 366 return body.String(), appendix.String() 361 367 } 362 368 363 - func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { 369 + func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { 364 370 msg, err := mail.ReadMessage(r) 365 371 if err != nil { 366 372 return nil, err 367 373 } 368 374 369 375 h := &PatchHeader{} 376 + h.RawHeaders = msg.Header 370 377 371 - mailLine = mailLine[len(mailHeaderPrefix):] 372 - if i := strings.IndexByte(mailLine, ' '); i > 0 { 373 - h.SHA = mailLine[:i] 378 + if strings.HasPrefix(mailLine, mailHeaderPrefix) { 379 + mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix) 380 + if i := strings.IndexByte(mailLine, ' '); i > 0 { 381 + h.SHA = mailLine[:i] 382 + } 374 383 } 375 384 376 - addrs, err := msg.Header.AddressList("From") 377 - if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { 378 - return nil, err 379 - } 380 - if len(addrs) > 0 { 381 - addr := addrs[0] 382 - if addr.Name == "" { 383 - return nil, fmt.Errorf("invalid user string: %s", addr) 385 + from := msg.Header.Get("From") 386 + if from != "" { 387 + u, err := ParsePatchIdentity(from) 388 + if err != nil { 389 + return nil, err 384 390 } 385 - h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} 391 + h.Author = &u 386 392 } 387 393 388 394 date := msg.Header.Get("Date") ··· 395 401 } 396 402 397 403 subject := msg.Header.Get("Subject") 398 - h.SubjectPrefix, h.Title = parseSubject(subject) 404 + h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) 399 405 400 406 s := bufio.NewScanner(msg.Body) 401 407 h.Body, h.BodyAppendix = scanMessageBody(s, "", true) ··· 406 412 return h, nil 407 413 } 408 414 409 - // Takes an email subject and returns the patch prefix and commit 410 - // title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH 411 - // v3 3/5] ` and `Implement foo` 412 - func parseSubject(s string) (string, string) { 413 - // This is meant to be compatible with 414 - // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject(). 415 - // If compatibility with `git am` drifts, go there to see if there 416 - // are any updates. 415 + func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { 416 + switch mode { 417 + case SubjectCleanAll, SubjectCleanPatchOnly: 418 + case SubjectCleanWhitespace: 419 + return "", strings.TrimSpace(decodeSubject(s)) 420 + default: 421 + panic(fmt.Sprintf("unknown clean mode: %d", mode)) 422 + } 423 + 424 + // Based on the algorithm from Git in mailinfo.c:cleanup_subject() 425 + // If compatibility with `git am` drifts, go there to see if there are any updates. 417 426 418 427 at := 0 419 428 for at < len(s) { 420 429 switch s[at] { 421 430 case 'r', 'R': 422 431 // Detect re:, Re:, rE: and RE: 423 - if at+2 < len(s) && 424 - (s[at+1] == 'e' || s[at+1] == 'E') && 425 - s[at+2] == ':' { 432 + if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { 426 433 at += 3 427 434 continue 428 435 } ··· 433 440 continue 434 441 435 442 case '[': 436 - // Look for closing parenthesis 437 - j := at + 1 438 - for ; j < len(s); j++ { 439 - if s[j] == ']' { 440 - break 443 + if i := strings.IndexByte(s[at:], ']'); i > 0 { 444 + if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { 445 + at += i + 1 446 + continue 441 447 } 442 448 } 443 - 444 - if j < len(s) { 445 - at = j + 1 446 - continue 447 - } 448 449 } 449 450 450 - // Only loop if we actually removed something 451 + // Nothing was removed, end processing 451 452 break 452 453 } 453 454 454 - return s[:at], s[at:] 455 + prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) 456 + subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) 457 + return 458 + } 459 + 460 + // Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result 461 + // of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). 462 + // See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject 463 + func decodeSubject(encoded string) string { 464 + if !strings.HasPrefix(encoded, "=?UTF-8?q?") { 465 + // not UTF-8 encoded 466 + return encoded 467 + } 468 + 469 + // If the subject is too long, `git format-patch` may produce a subject line across 470 + // multiple lines. When parsed, this can look like the following: 471 + // <UTF8-prefix><first-line> <UTF8-prefix><second-line> 472 + payload := " " + encoded 473 + payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") 474 + payload = strings.ReplaceAll(payload, "?=", "") 475 + 476 + decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) 477 + if err != nil { 478 + // if err, abort decoding and return original subject 479 + return encoded 480 + } 481 + 482 + return string(decoded) 455 483 }

+243 -104

gitdiff/patch_header_test.go

··· 5 5 "time" 6 6 ) 7 7 8 - func TestParsePatchIdentity(t *testing.T) { 9 - tests := map[string]struct { 10 - Input string 11 - Output PatchIdentity 12 - Err interface{} 13 - }{ 14 - "simple": { 15 - Input: "Morton Haypenny <mhaypenny@example.com>", 16 - Output: PatchIdentity{ 17 - Name: "Morton Haypenny", 18 - Email: "mhaypenny@example.com", 19 - }, 20 - }, 21 - "extraWhitespace": { 22 - Input: " Morton Haypenny <mhaypenny@example.com > ", 23 - Output: PatchIdentity{ 24 - Name: "Morton Haypenny", 25 - Email: "mhaypenny@example.com", 26 - }, 27 - }, 28 - "trailingCharacters": { 29 - Input: "Morton Haypenny <mhaypenny@example.com> unrelated garbage", 30 - Output: PatchIdentity{ 31 - Name: "Morton Haypenny", 32 - Email: "mhaypenny@example.com", 33 - }, 34 - }, 35 - "missingName": { 36 - Input: "<mhaypenny@example.com>", 37 - Err: "invalid identity", 38 - }, 39 - "missingEmail": { 40 - Input: "Morton Haypenny", 41 - Err: "invalid identity", 42 - }, 43 - "unclosedEmail": { 44 - Input: "Morton Haypenny <mhaypenny@example.com", 45 - Err: "unclosed email", 46 - }, 47 - } 48 - 49 - for name, test := range tests { 50 - t.Run(name, func(t *testing.T) { 51 - id, err := ParsePatchIdentity(test.Input) 52 - if test.Err != nil { 53 - assertError(t, test.Err, err, "parsing identity") 54 - return 55 - } 56 - if err != nil { 57 - t.Fatalf("unexpected error parsing identity: %v", err) 58 - } 59 - 60 - if test.Output != id { 61 - t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) 62 - } 63 - }) 64 - } 65 - } 66 - 67 8 func TestParsePatchDate(t *testing.T) { 68 9 expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC) 69 10 ··· 138 79 } 139 80 expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) 140 81 expectedTitle := "A sample commit to test header parsing" 82 + expectedEmojiOneLineTitle := "🤖 Enabling auto-merging" 83 + expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-König diet" 141 84 expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." 142 85 expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>" 143 86 144 87 tests := map[string]struct { 145 - Input string 146 - Header PatchHeader 147 - Err interface{} 88 + Input string 89 + Options []PatchHeaderOption 90 + Header PatchHeader 91 + Err interface{} 148 92 }{ 149 93 "prettyShort": { 150 94 Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b ··· 267 211 Body: expectedBody, 268 212 }, 269 213 }, 214 + "mailboxPatchOnly": { 215 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 216 + From: Morton Haypenny <mhaypenny@example.com> 217 + Date: Sat, 11 Apr 2020 15:21:23 -0700 218 + Subject: [PATCH] [BUG-123] A sample commit to test header parsing 219 + 220 + The medium format shows the body, which 221 + may wrap on to multiple lines. 222 + 223 + Another body line. 224 + `, 225 + Options: []PatchHeaderOption{ 226 + WithSubjectCleanMode(SubjectCleanPatchOnly), 227 + }, 228 + Header: PatchHeader{ 229 + SHA: expectedSHA, 230 + Author: expectedIdentity, 231 + AuthorDate: expectedDate, 232 + Title: "[BUG-123] " + expectedTitle, 233 + Body: expectedBody, 234 + }, 235 + }, 236 + "mailboxEmojiOneLine": { 237 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 238 + From: Morton Haypenny <mhaypenny@example.com> 239 + Date: Sat, 11 Apr 2020 15:21:23 -0700 240 + Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?= 241 + 242 + The medium format shows the body, which 243 + may wrap on to multiple lines. 244 + 245 + Another body line. 246 + `, 247 + Header: PatchHeader{ 248 + SHA: expectedSHA, 249 + Author: expectedIdentity, 250 + AuthorDate: expectedDate, 251 + Title: expectedEmojiOneLineTitle, 252 + Body: expectedBody, 253 + }, 254 + }, 255 + "mailboxEmojiMultiLine": { 256 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 257 + From: Morton Haypenny <mhaypenny@example.com> 258 + Date: Sat, 11 Apr 2020 15:21:23 -0700 259 + Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?= 260 + =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?= 261 + 262 + The medium format shows the body, which 263 + may wrap on to multiple lines. 264 + 265 + Another body line. 266 + `, 267 + Header: PatchHeader{ 268 + SHA: expectedSHA, 269 + Author: expectedIdentity, 270 + AuthorDate: expectedDate, 271 + Title: expectedEmojiMultiLineTitle, 272 + Body: expectedBody, 273 + }, 274 + }, 275 + "mailboxRFC5322SpecialCharacters": { 276 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 277 + From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com> 278 + Date: Sat, 11 Apr 2020 15:21:23 -0700 279 + Subject: [PATCH] A sample commit to test header parsing 280 + 281 + The medium format shows the body, which 282 + may wrap on to multiple lines. 283 + 284 + Another body line. 285 + `, 286 + Header: PatchHeader{ 287 + SHA: expectedSHA, 288 + Author: &PatchIdentity{ 289 + Name: "dependabot[bot]", 290 + Email: "12345+dependabot[bot]@users.noreply.github.com", 291 + }, 292 + AuthorDate: expectedDate, 293 + Title: expectedTitle, 294 + Body: expectedBody, 295 + }, 296 + }, 270 297 "mailboxAppendix": { 271 298 Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 272 299 From: Morton Haypenny <mhaypenny@example.com> ··· 289 316 BodyAppendix: expectedBodyAppendix, 290 317 }, 291 318 }, 319 + "mailboxMinimalNoName": { 320 + Input: `From: <mhaypenny@example.com> 321 + Subject: [PATCH] A sample commit to test header parsing 322 + 323 + The medium format shows the body, which 324 + may wrap on to multiple lines. 325 + 326 + Another body line. 327 + `, 328 + Header: PatchHeader{ 329 + Author: &PatchIdentity{expectedIdentity.Email, expectedIdentity.Email}, 330 + Title: expectedTitle, 331 + Body: expectedBody, 332 + }, 333 + }, 334 + "mailboxMinimal": { 335 + Input: `From: Morton Haypenny <mhaypenny@example.com> 336 + Subject: [PATCH] A sample commit to test header parsing 337 + 338 + The medium format shows the body, which 339 + may wrap on to multiple lines. 340 + 341 + Another body line. 342 + `, 343 + Header: PatchHeader{ 344 + Author: expectedIdentity, 345 + Title: expectedTitle, 346 + Body: expectedBody, 347 + }, 348 + }, 292 349 "unwrapTitle": { 293 350 Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 294 351 Author: Morton Haypenny <mhaypenny@example.com> ··· 342 399 Author: expectedIdentity, 343 400 Title: expectedTitle, 344 401 }, 402 + }, 403 + "emptyHeader": { 404 + Input: "", 405 + Header: PatchHeader{}, 345 406 }, 346 407 } 347 408 348 409 for name, test := range tests { 349 410 t.Run(name, func(t *testing.T) { 350 - h, err := ParsePatchHeader(test.Input) 411 + h, err := ParsePatchHeader(test.Input, test.Options...) 351 412 if test.Err != nil { 352 413 assertError(t, test.Err, err, "parsing patch header") 353 414 return ··· 402 463 } 403 464 } 404 465 405 - func TestCleanupSubject(t *testing.T) { 406 - exp := "A sample commit to test header parsing" 407 - tests := map[string]string{ 408 - "plain": "", 409 - "patch": "[PATCH] ", 410 - "patchv5": "[PATCH v5] ", 411 - "patchrfc": "[PATCH RFC] ", 412 - "patchnospace": "[PATCH]", 413 - "space": " ", 414 - "re": "re: ", 415 - "Re": "Re: ", 416 - "RE": "rE: ", 417 - "rere": "re: re: ", 418 - } 466 + func TestCleanSubject(t *testing.T) { 467 + expectedSubject := "A sample commit to test header parsing" 419 468 420 - for name, prefix := range tests { 421 - gotprefix, gottitle := parseSubject(prefix + exp) 422 - if gottitle != exp { 423 - t.Errorf("%s: Incorrect parsing of prefix %s: got title %s, wanted %s", 424 - name, prefix, gottitle, exp) 425 - } 426 - if gotprefix != prefix { 427 - t.Errorf("%s: Incorrect parsing of prefix %s: got prefix %s", 428 - name, prefix, gotprefix) 429 - } 430 - } 431 - 432 - moretests := map[string]struct { 433 - in, eprefix, etitle string 469 + tests := map[string]struct { 470 + Input string 471 + Mode SubjectCleanMode 472 + Prefix string 473 + Subject string 434 474 }{ 435 - "Reimplement": {"Reimplement something", "", "Reimplement something"}, 436 - "patch-reimplement": {"[PATCH v5] Reimplement something", "[PATCH v5] ", "Reimplement something"}, 437 - "Openbracket": {"[Just to annoy people", "", "[Just to annoy people"}, 475 + "CleanAll/noPrefix": { 476 + Input: expectedSubject, 477 + Mode: SubjectCleanAll, 478 + Subject: expectedSubject, 479 + }, 480 + "CleanAll/patchPrefix": { 481 + Input: "[PATCH] " + expectedSubject, 482 + Mode: SubjectCleanAll, 483 + Prefix: "[PATCH] ", 484 + Subject: expectedSubject, 485 + }, 486 + "CleanAll/patchPrefixNoSpace": { 487 + Input: "[PATCH]" + expectedSubject, 488 + Mode: SubjectCleanAll, 489 + Prefix: "[PATCH]", 490 + Subject: expectedSubject, 491 + }, 492 + "CleanAll/patchPrefixContent": { 493 + Input: "[PATCH 3/7] " + expectedSubject, 494 + Mode: SubjectCleanAll, 495 + Prefix: "[PATCH 3/7] ", 496 + Subject: expectedSubject, 497 + }, 498 + "CleanAll/spacePrefix": { 499 + Input: " " + expectedSubject, 500 + Mode: SubjectCleanAll, 501 + Subject: expectedSubject, 502 + }, 503 + "CleanAll/replyLowerPrefix": { 504 + Input: "re: " + expectedSubject, 505 + Mode: SubjectCleanAll, 506 + Prefix: "re: ", 507 + Subject: expectedSubject, 508 + }, 509 + "CleanAll/replyMixedPrefix": { 510 + Input: "Re: " + expectedSubject, 511 + Mode: SubjectCleanAll, 512 + Prefix: "Re: ", 513 + Subject: expectedSubject, 514 + }, 515 + "CleanAll/replyCapsPrefix": { 516 + Input: "RE: " + expectedSubject, 517 + Mode: SubjectCleanAll, 518 + Prefix: "RE: ", 519 + Subject: expectedSubject, 520 + }, 521 + "CleanAll/replyDoublePrefix": { 522 + Input: "Re: re: " + expectedSubject, 523 + Mode: SubjectCleanAll, 524 + Prefix: "Re: re: ", 525 + Subject: expectedSubject, 526 + }, 527 + "CleanAll/noPrefixSubjectHasRe": { 528 + Input: "Reimplement parsing", 529 + Mode: SubjectCleanAll, 530 + Subject: "Reimplement parsing", 531 + }, 532 + "CleanAll/patchPrefixSubjectHasRe": { 533 + Input: "[PATCH 1/2] Reimplement parsing", 534 + Mode: SubjectCleanAll, 535 + Prefix: "[PATCH 1/2] ", 536 + Subject: "Reimplement parsing", 537 + }, 538 + "CleanAll/unclosedPrefix": { 539 + Input: "[Just to annoy people", 540 + Mode: SubjectCleanAll, 541 + Subject: "[Just to annoy people", 542 + }, 543 + "CleanAll/multiplePrefix": { 544 + Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ", 545 + Mode: SubjectCleanAll, 546 + Prefix: "Re:Re: [PATCH 1/2][DRAFT] ", 547 + Subject: expectedSubject, 548 + }, 549 + "CleanPatchOnly/patchPrefix": { 550 + Input: "[PATCH] " + expectedSubject, 551 + Mode: SubjectCleanPatchOnly, 552 + Prefix: "[PATCH] ", 553 + Subject: expectedSubject, 554 + }, 555 + "CleanPatchOnly/mixedPrefix": { 556 + Input: "[PATCH] [TICKET-123] " + expectedSubject, 557 + Mode: SubjectCleanPatchOnly, 558 + Prefix: "[PATCH] ", 559 + Subject: "[TICKET-123] " + expectedSubject, 560 + }, 561 + "CleanPatchOnly/multiplePrefix": { 562 + Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject, 563 + Mode: SubjectCleanPatchOnly, 564 + Prefix: "Re:Re: [PATCH 1/2]", 565 + Subject: "[DRAFT] " + expectedSubject, 566 + }, 567 + "CleanWhitespace/leadingSpace": { 568 + Input: " [PATCH] " + expectedSubject, 569 + Mode: SubjectCleanWhitespace, 570 + Subject: "[PATCH] " + expectedSubject, 571 + }, 572 + "CleanWhitespace/trailingSpace": { 573 + Input: "[PATCH] " + expectedSubject + " ", 574 + Mode: SubjectCleanWhitespace, 575 + Subject: "[PATCH] " + expectedSubject, 576 + }, 438 577 } 439 578 440 - for name, test := range moretests { 441 - prefix, title := parseSubject(test.in) 442 - if title != test.etitle { 443 - t.Errorf("%s: Incorrect parsing of %s: got title %s, wanted %s", 444 - name, test.in, title, test.etitle) 445 - } 446 - if prefix != test.eprefix { 447 - t.Errorf("%s: Incorrect parsing of %s: got prefix %s, wanted %s", 448 - name, test.in, title, test.etitle) 449 - } 579 + for name, test := range tests { 580 + t.Run(name, func(t *testing.T) { 581 + prefix, subject := cleanSubject(test.Input, test.Mode) 582 + if prefix != test.Prefix { 583 + t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix) 584 + } 585 + if subject != test.Subject { 586 + t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject) 587 + } 588 + }) 450 589 } 451 590 }

+166

gitdiff/patch_identity.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "fmt" 5 + "strings" 6 + ) 7 + 8 + // PatchIdentity identifies a person who authored or committed a patch. 9 + type PatchIdentity struct { 10 + Name string 11 + Email string 12 + } 13 + 14 + func (i PatchIdentity) String() string { 15 + name := i.Name 16 + if name == "" { 17 + name = `""` 18 + } 19 + return fmt.Sprintf("%s <%s>", name, i.Email) 20 + } 21 + 22 + // ParsePatchIdentity parses a patch identity string. A patch identity contains 23 + // an email address and an optional name in [RFC 5322] format. This is either a 24 + // plain email adddress or a name followed by an address in angle brackets: 25 + // 26 + // author@example.com 27 + // Author Name <author@example.com> 28 + // 29 + // If the input is not one of these formats, ParsePatchIdentity applies a 30 + // heuristic to separate the name and email portions. If both the name and 31 + // email are missing or empty, ParsePatchIdentity returns an error. It 32 + // otherwise does not validate the result. 33 + // 34 + // [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 35 + func ParsePatchIdentity(s string) (PatchIdentity, error) { 36 + s = normalizeSpace(s) 37 + s = unquotePairs(s) 38 + 39 + var name, email string 40 + if at := strings.IndexByte(s, '@'); at >= 0 { 41 + start, end := at, at 42 + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { 43 + start-- 44 + } 45 + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { 46 + end++ 47 + } 48 + email = s[start+1 : end] 49 + 50 + // Adjust the boundaries so that we drop angle brackets, but keep 51 + // spaces when removing the email to form the name. 52 + if start < 0 || s[start] != '<' { 53 + start++ 54 + } 55 + if end >= len(s) || s[end] != '>' { 56 + end-- 57 + } 58 + name = s[:start] + s[end+1:] 59 + } else { 60 + start, end := 0, 0 61 + for i := 0; i < len(s); i++ { 62 + if s[i] == '<' && start == 0 { 63 + start = i + 1 64 + } 65 + if s[i] == '>' && start > 0 { 66 + end = i 67 + break 68 + } 69 + } 70 + if start > 0 && end >= start { 71 + email = strings.TrimSpace(s[start:end]) 72 + name = s[:start-1] 73 + } 74 + } 75 + 76 + // After extracting the email, the name might contain extra whitespace 77 + // again and may be surrounded by comment characters. The git source gives 78 + // these examples of when this can happen: 79 + // 80 + // "Name <email@domain>" 81 + // "email@domain (Name)" 82 + // "Name <email@domain> (Comment)" 83 + // 84 + name = normalizeSpace(name) 85 + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { 86 + name = name[1 : len(name)-1] 87 + } 88 + name = strings.TrimSpace(name) 89 + 90 + // If the name is empty or contains email-like characters, use the email 91 + // instead (assuming one exists) 92 + if name == "" || strings.ContainsAny(name, "@<>") { 93 + name = email 94 + } 95 + 96 + if name == "" && email == "" { 97 + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) 98 + } 99 + return PatchIdentity{Name: name, Email: email}, nil 100 + } 101 + 102 + // unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to 103 + // remove any "quoted-pairs" (backslash-espaced characters). It also removes 104 + // the quotes from any quoted strings, but leaves the comment delimiters. 105 + func unquotePairs(s string) string { 106 + quote := false 107 + comments := 0 108 + escaped := false 109 + 110 + var out strings.Builder 111 + for i := 0; i < len(s); i++ { 112 + if escaped { 113 + escaped = false 114 + } else { 115 + switch s[i] { 116 + case '\\': 117 + // quoted-pair is only allowed in quoted-string/comment 118 + if quote || comments > 0 { 119 + escaped = true 120 + continue // drop '\' character 121 + } 122 + 123 + case '"': 124 + if comments == 0 { 125 + quote = !quote 126 + continue // drop '"' character 127 + } 128 + 129 + case '(': 130 + if !quote { 131 + comments++ 132 + } 133 + case ')': 134 + if comments > 0 { 135 + comments-- 136 + } 137 + } 138 + } 139 + out.WriteByte(s[i]) 140 + } 141 + return out.String() 142 + } 143 + 144 + // normalizeSpace trims leading and trailing whitespace from s and converts 145 + // inner sequences of one or more whitespace characters to single spaces. 146 + func normalizeSpace(s string) string { 147 + var sb strings.Builder 148 + for i := 0; i < len(s); i++ { 149 + c := s[i] 150 + if !isRFC5332Space(c) { 151 + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { 152 + sb.WriteByte(' ') 153 + } 154 + sb.WriteByte(c) 155 + } 156 + } 157 + return sb.String() 158 + } 159 + 160 + func isRFC5332Space(c byte) bool { 161 + switch c { 162 + case '\t', '\n', '\r', ' ': 163 + return true 164 + } 165 + return false 166 + }

+127

gitdiff/patch_identity_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "testing" 5 + ) 6 + 7 + func TestParsePatchIdentity(t *testing.T) { 8 + tests := map[string]struct { 9 + Input string 10 + Output PatchIdentity 11 + Err interface{} 12 + }{ 13 + "simple": { 14 + Input: "Morton Haypenny <mhaypenny@example.com>", 15 + Output: PatchIdentity{ 16 + Name: "Morton Haypenny", 17 + Email: "mhaypenny@example.com", 18 + }, 19 + }, 20 + "extraWhitespace": { 21 + Input: "\t Morton Haypenny \r\n<mhaypenny@example.com> ", 22 + Output: PatchIdentity{ 23 + Name: "Morton Haypenny", 24 + Email: "mhaypenny@example.com", 25 + }, 26 + }, 27 + "trailingCharacters": { 28 + Input: "Morton Haypenny <mhaypenny@example.com> II", 29 + Output: PatchIdentity{ 30 + Name: "Morton Haypenny II", 31 + Email: "mhaypenny@example.com", 32 + }, 33 + }, 34 + "onlyEmail": { 35 + Input: "mhaypenny@example.com", 36 + Output: PatchIdentity{ 37 + Name: "mhaypenny@example.com", 38 + Email: "mhaypenny@example.com", 39 + }, 40 + }, 41 + "onlyEmailInBrackets": { 42 + Input: "<mhaypenny@example.com>", 43 + Output: PatchIdentity{ 44 + Name: "mhaypenny@example.com", 45 + Email: "mhaypenny@example.com", 46 + }, 47 + }, 48 + "rfc5322SpecialCharacters": { 49 + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, 50 + Output: PatchIdentity{ 51 + Name: "dependabot[bot]", 52 + Email: "12345+dependabot[bot]@users.noreply.github.com", 53 + }, 54 + }, 55 + "rfc5322QuotedPairs": { 56 + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III $PhD$)`, 57 + Output: PatchIdentity{ 58 + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, 59 + Email: "mhaypenny+[1900]@example.com", 60 + }, 61 + }, 62 + "rfc5322QuotedPairsOutOfContext": { 63 + Input: `Morton \\Backslash Haypenny <mhaypenny@example.com>`, 64 + Output: PatchIdentity{ 65 + Name: `Morton \\Backslash Haypenny`, 66 + Email: "mhaypenny@example.com", 67 + }, 68 + }, 69 + "emptyEmail": { 70 + Input: "Morton Haypenny <>", 71 + Output: PatchIdentity{ 72 + Name: "Morton Haypenny", 73 + Email: "", 74 + }, 75 + }, 76 + "unclosedEmail": { 77 + Input: "Morton Haypenny <mhaypenny@example.com", 78 + Output: PatchIdentity{ 79 + Name: "Morton Haypenny", 80 + Email: "mhaypenny@example.com", 81 + }, 82 + }, 83 + "bogusEmail": { 84 + Input: "Morton Haypenny <mhaypenny>", 85 + Output: PatchIdentity{ 86 + Name: "Morton Haypenny", 87 + Email: "mhaypenny", 88 + }, 89 + }, 90 + "bogusEmailWithWhitespace": { 91 + Input: "Morton Haypenny < mhaypenny >", 92 + Output: PatchIdentity{ 93 + Name: "Morton Haypenny", 94 + Email: "mhaypenny", 95 + }, 96 + }, 97 + "missingEmail": { 98 + Input: "Morton Haypenny", 99 + Err: "invalid identity", 100 + }, 101 + "missingNameAndEmptyEmail": { 102 + Input: "<>", 103 + Err: "invalid identity", 104 + }, 105 + "empty": { 106 + Input: "", 107 + Err: "invalid identity", 108 + }, 109 + } 110 + 111 + for name, test := range tests { 112 + t.Run(name, func(t *testing.T) { 113 + id, err := ParsePatchIdentity(test.Input) 114 + if test.Err != nil { 115 + assertError(t, test.Err, err, "parsing identity") 116 + return 117 + } 118 + if err != nil { 119 + t.Fatalf("unexpected error parsing identity: %v", err) 120 + } 121 + 122 + if test.Output != id { 123 + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) 124 + } 125 + }) 126 + } 127 + }

+1 -1

gitdiff/testdata/apply/bin.go

··· 1 - //+build ignore 1 + //go:build ignore 2 2 3 3 // bin.go is a helper CLI to manipulate binary diff data for testing purposes. 4 4 // It can decode patches generated by git using the standard parsing functions

+3

gitdiff/testdata/apply/text_fragment_change_end_eol.out

··· 1 + line 1 2 + line 2 3 + line 3

+10

gitdiff/testdata/apply/text_fragment_change_end_eol.patch

··· 1 + diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src 2 + index a92d664..8cf2f17 100644 3 + --- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src 4 + +++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src 5 + @@ -1,3 +1,3 @@ 6 + line 1 7 + line 2 8 + -line 3 9 + +line 3 10 + \ No newline at end of file

+3

gitdiff/testdata/apply/text_fragment_change_end_eol.src

··· 1 + line 1 2 + line 2 3 + line 3

+8

gitdiff/testdata/no_files.patch

··· 1 + commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe 2 + Author: Morton Haypenny <mhaypenny@example.com> 3 + Date: Tue Apr 2 22:55:40 2019 -0700 4 + 5 + A file with multiple fragments. 6 + 7 + The content is arbitrary. 8 +

+9

gitdiff/testdata/string/binary_modify.patch

··· 1 + diff --git a/file.bin b/file.bin 2 + index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 3 + GIT binary patch 4 + delta 66 5 + QcmeZhVVvM$!$1K50C&Ox;s5{u 6 + 7 + delta 5 8 + McmZo+^qAlQ00i9urT_o{ 9 +

+3

gitdiff/testdata/string/binary_modify_nodata.patch

··· 1 + diff --git a/file.bin b/file.bin 2 + index a7f4d5d..bdc9a70 100644 3 + Binary files a/file.bin and b/file.bin differ

+11

gitdiff/testdata/string/binary_new.patch

··· 1 + diff --git a/file.bin b/file.bin 2 + new file mode 100644 3 + index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c 4 + GIT binary patch 5 + literal 72 6 + zcmV-O0Jr~td-`u6JcK&{KDK=<a#;v1^LR5&K)zQ0=Goz82(?nJ6_nD`f#8O9p}}{P 7 + eiXim+rDI+BDadMQmMsO5Sw@;DbrCA+PamP;Ng_@F 8 + 9 + literal 0 10 + HcmV?d00001 11 +

+4

gitdiff/testdata/string/copy.patch

··· 1 + diff --git a/file.txt b/numbers.txt 2 + similarity index 100% 3 + copy from file.txt 4 + copy to numbers.txt

+21

gitdiff/testdata/string/copy_modify.patch

··· 1 + diff --git a/file.txt b/numbers.txt 2 + similarity index 57% 3 + copy from file.txt 4 + copy to numbers.txt 5 + index c9e9e05..6c4a3e0 100644 6 + --- a/file.txt 7 + +++ b/numbers.txt 8 + @@ -1,6 +1,6 @@ 9 + one 10 + two 11 + -three 12 + +three three three 13 + four 14 + five 15 + six 16 + @@ -8,3 +8,5 @@ seven 17 + eight 18 + nine 19 + ten 20 + +eleven 21 + +twelve

+16

gitdiff/testdata/string/delete.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + deleted file mode 100644 3 + index c9e9e05..0000000 4 + --- a/file.txt 5 + +++ /dev/null 6 + @@ -1,10 +0,0 @@ 7 + -one 8 + -two 9 + -three 10 + -four 11 + -five 12 + -six 13 + -seven 14 + -eight 15 + -nine 16 + -ten

+3

gitdiff/testdata/string/mode.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + old mode 100644 3 + new mode 100755

+10

gitdiff/testdata/string/mode_modify.patch

··· 1 + diff --git a/script.sh b/script.sh 2 + old mode 100644 3 + new mode 100755 4 + index 7a870bd..68d501e 5 + --- a/script.sh 6 + +++ b/script.sh 7 + @@ -1,2 +1,2 @@ 8 + #!/bin/bash 9 + -echo "Hello World" 10 + +echo "Hello, World!"

+16

gitdiff/testdata/string/modify.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + index c9e9e05..7d5fdc6 100644 3 + --- a/file.txt 4 + +++ b/file.txt 5 + @@ -3,8 +3,10 @@ two 6 + three 7 + four 8 + five 9 + -six 10 + +six six six six six six 11 + seven 12 + eight 13 + nine 14 + ten 15 + +eleven 16 + +twelve

+16

gitdiff/testdata/string/new.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + new file mode 100644 3 + index 0000000..c9e9e05 4 + --- /dev/null 5 + +++ b/file.txt 6 + @@ -0,0 +1,10 @@ 7 + +one 8 + +two 9 + +three 10 + +four 11 + +five 12 + +six 13 + +seven 14 + +eight 15 + +nine 16 + +ten

+3

gitdiff/testdata/string/new_empty.patch

··· 1 + diff --git a/file.txt b/file.txt 2 + new file mode 100644 3 + index 0000000..e69de29

+16

gitdiff/testdata/string/new_mode.patch

··· 1 + diff --git a/file.sh b/file.sh 2 + new file mode 100755 3 + index 0000000..c9e9e05 4 + --- /dev/null 5 + +++ b/file.sh 6 + @@ -0,0 +1,10 @@ 7 + +one 8 + +two 9 + +three 10 + +four 11 + +five 12 + +six 13 + +seven 14 + +eight 15 + +nine 16 + +ten

+4

gitdiff/testdata/string/rename.patch

··· 1 + diff --git a/file.txt b/numbers.txt 2 + similarity index 100% 3 + rename from file.txt 4 + rename to numbers.txt

+18

gitdiff/testdata/string/rename_modify.patch

··· 1 + diff --git a/file.txt b/numbers.txt 2 + similarity index 77% 3 + rename from file.txt 4 + rename to numbers.txt 5 + index c9e9e05..a6b31d6 100644 6 + --- a/file.txt 7 + +++ b/numbers.txt 8 + @@ -3,8 +3,9 @@ two 9 + three 10 + four 11 + five 12 + -six 13 + + six 14 + seven 15 + eight 16 + nine 17 + ten 18 + +eleven

+30 -18

gitdiff/text.go

··· 79 79 return p.Errorf(0, "no content following fragment header") 80 80 } 81 81 82 - isNoNewlineLine := func(s string) bool { 83 - // test for "\ No newline at end of file" by prefix because the text 84 - // changes by locale (git claims all versions are at least 12 chars) 85 - return len(s) >= 12 && s[:2] == "\\ " 86 - } 87 - 88 82 oldLines, newLines := frag.OldLines, frag.NewLines 89 - for { 83 + for oldLines > 0 || newLines > 0 { 90 84 line := p.Line(0) 91 85 op, data := line[0], line[1:] 92 86 ··· 113 107 frag.LinesAdded++ 114 108 frag.TrailingContext = 0 115 109 frag.Lines = append(frag.Lines, Line{OpAdd, data}) 116 - default: 110 + case '\\': 117 111 // this may appear in middle of fragment if it's for a deleted line 118 - if isNoNewlineLine(line) { 119 - last := &frag.Lines[len(frag.Lines)-1] 120 - last.Line = strings.TrimSuffix(last.Line, "\n") 112 + if isNoNewlineMarker(line) { 113 + removeLastNewline(frag) 121 114 break 122 115 } 116 + fallthrough 117 + default: 123 118 // TODO(bkeyes): if this is because we hit the next header, it 124 119 // would be helpful to return the miscounts line error. We could 125 120 // either test for the common headers ("@@ -", "diff --git") or 126 121 // assume any invalid op ends the fragment; git returns the same 127 122 // generic error in all cases so either is compatible 128 123 return p.Errorf(0, "invalid line operation: %q", op) 129 - } 130 - 131 - next := p.Line(1) 132 - if oldLines <= 0 && newLines <= 0 && !isNoNewlineLine(next) { 133 - break 134 124 } 135 125 136 126 if err := p.Next(); err != nil { ··· 145 135 hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1 146 136 return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines) 147 137 } 138 + if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { 139 + return p.Errorf(0, "fragment contains no changes") 140 + } 148 141 149 - if err := p.Next(); err != nil && err != io.EOF { 150 - return err 142 + // check for a final "no newline" marker since it is not included in the 143 + // counters used to stop the loop above 144 + if isNoNewlineMarker(p.Line(0)) { 145 + removeLastNewline(frag) 146 + if err := p.Next(); err != nil && err != io.EOF { 147 + return err 148 + } 151 149 } 150 + 152 151 return nil 152 + } 153 + 154 + func isNoNewlineMarker(s string) bool { 155 + // test for "\ No newline at end of file" by prefix because the text 156 + // changes by locale (git claims all versions are at least 12 chars) 157 + return len(s) >= 12 && s[:2] == "\\ " 158 + } 159 + 160 + func removeLastNewline(frag *TextFragment) { 161 + if len(frag.Lines) > 0 { 162 + last := &frag.Lines[len(frag.Lines)-1] 163 + last.Line = strings.TrimSuffix(last.Line, "\n") 164 + } 153 165 } 154 166 155 167 func parseRange(s string) (start int64, end int64, err error) {

+18

gitdiff/text_test.go

··· 317 317 }, 318 318 Err: true, 319 319 }, 320 + "onlyContext": { 321 + Input: ` context line 322 + context line 323 + `, 324 + Fragment: TextFragment{ 325 + OldLines: 2, 326 + NewLines: 2, 327 + }, 328 + Err: true, 329 + }, 330 + "unexpectedNoNewlineMarker": { 331 + Input: `\ No newline at end of file`, 332 + Fragment: TextFragment{ 333 + OldLines: 1, 334 + NewLines: 1, 335 + }, 336 + Err: true, 337 + }, 320 338 } 321 339 322 340 for name, test := range tests {

+1 -1

go.mod

··· 1 1 module github.com/bluekeyes/go-gitdiff 2 2 3 - go 1.13 3 + go 1.21

Compare changes