comparing 34ea1730662faa6b12e10b312fce1b17101d9908 and master on oppi.li/go-gitdiff

+8 -17

.github/workflows/go.yml

··· 9 9 name: Verify 10 10 runs-on: ubuntu-latest 11 11 steps: 12 - - name: Set up Go 1.13 13 - uses: actions/setup-go@v1 12 + - name: Set up Go 1.21 13 + uses: actions/setup-go@v5 14 14 with: 15 - go-version: 1.13 16 - id: go 17 - 18 - - name: Set up environment 19 - run: | 20 - # https://github.com/actions/setup-go/issues/14 21 - echo "::set-env name=GOPATH::$(go env GOPATH)" 22 - echo "::add-path::$(go env GOPATH)/bin" 23 - 24 - - name: Install golangci-lint 25 - run: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.23.1 26 - 15 + go-version: 1.21 16 + 27 17 - name: Check out code into the Go module directory 28 - uses: actions/checkout@v1 18 + uses: actions/checkout@v4 29 19 30 20 - name: Lint 31 - run: golangci-lint run 21 + uses: golangci/golangci-lint-action@v7 22 + with: 23 + version: v2.0 32 24 33 25 - name: Test 34 26 run: go test -v ./... 35 -

+41 -13

.golangci.yml

··· 1 + version: "2" 2 + 1 3 run: 2 4 tests: false 3 5 4 6 linters: 5 - disable-all: true 7 + default: none 6 8 enable: 7 - - deadcode 8 9 - errcheck 9 - - gofmt 10 - - goimports 11 - - golint 12 10 - govet 13 11 - ineffassign 14 - - typecheck 12 + - misspell 13 + - revive 15 14 - unconvert 16 - - varcheck 17 - 18 - issues: 19 - exclude-use-default: false 15 + - unused 16 + settings: 17 + errcheck: 18 + exclude-functions: 19 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write 20 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString 21 + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte 22 + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) 23 + revive: 24 + rules: 25 + - name: context-keys-type 26 + - name: time-naming 27 + - name: var-declaration 28 + - name: unexported-return 29 + - name: errorf 30 + - name: blank-imports 31 + - name: context-as-argument 32 + - name: dot-imports 33 + - name: error-return 34 + - name: error-strings 35 + - name: error-naming 36 + - name: exported 37 + - name: increment-decrement 38 + - name: var-naming 39 + - name: package-comments 40 + - name: range 41 + - name: receiver-naming 42 + - name: indent-error-flow 20 43 21 - linter-settings: 22 - goimports: 23 - local-prefixes: github.com/bluekeyes/go-gitdiff 44 + formatters: 45 + enable: 46 + - gofmt 47 + - goimports 48 + settings: 49 + goimports: 50 + local-prefixes: 51 + - github.com/bluekeyes/go-gitdiff

+18 -6

README.md

··· 1 1 # go-gitdiff 2 2 3 - [![GoDoc](https://godoc.org/github.com/bluekeyes/go-gitdiff/gitdiff?status.svg)](http://godoc.org/github.com/bluekeyes/go-gitdiff/gitdiff) [![Go Report Card](https://goreportcard.com/badge/github.com/bluekeyes/go-gitdiff)](https://goreportcard.com/report/github.com/bluekeyes/go-gitdiff) 3 + [![PkgGoDev](https://pkg.go.dev/badge/github.com/bluekeyes/go-gitdiff/gitdiff)](https://pkg.go.dev/github.com/bluekeyes/go-gitdiff/gitdiff) [![Go Report Card](https://goreportcard.com/badge/github.com/bluekeyes/go-gitdiff)](https://goreportcard.com/report/github.com/bluekeyes/go-gitdiff) 4 4 5 5 A Go library for parsing and applying patches generated by `git diff`, `git 6 6 show`, and `git format-patch`. It can also parse and apply unified diffs 7 - generated by the standard `diff` tool. 7 + generated by the standard GNU `diff` tool. 8 8 9 9 It supports standard line-oriented text patches and Git binary patches, and 10 10 aims to parse anything accepted by the `git apply` command. ··· 29 29 30 30 // apply the changes in the patch to a source file 31 31 var output bytes.Buffer 32 - if err := gitdiff.NewApplier(code).ApplyFile(&output, files[0]); err != nil { 32 + if err := gitdiff.Apply(&output, code, files[0]); err != nil { 33 33 log.Fatal(err) 34 34 } 35 35 ``` 36 36 37 37 ## Development Status 38 38 39 - Mostly complete, but API changes are possible. Patch parsing and strict 40 - application works and is well-covered by unit tests, but has not been validated 41 - extensively against real-world patches. 39 + The parsing API and types are complete and I expect will remain stable. Version 40 + 0.7.0 introduced a new apply API that may change more in the future to support 41 + non-strict patch application. 42 + 43 + Parsing and strict application are well-covered by unit tests and the library 44 + is used in a production application that parses and applies thousands of 45 + patches every day. However, the space of all possible patches is large, so 46 + there are likely undiscovered bugs. 47 + 48 + The parsing code has also had a modest amount of fuzz testing. 42 49 43 50 ## Why another git/unified diff parser? 44 51 ··· 71 78 72 79 - Numbers immediately followed by non-numeric characters 73 80 - Trailing characters on a line after valid or expected content 81 + - Malformed file header lines (lines that start with `diff --git`) 74 82 75 83 2. Errors for invalid input are generally more verbose and specific than those 76 84 from `git apply`. ··· 91 99 context of each fragment must exactly match the source file; `git apply` 92 100 implements a search algorithm that tries different lines and amounts of 93 101 context, with further options to normalize or ignore whitespace changes. 102 + 103 + 7. When parsing mail-formatted patch headers, leading and trailing whitespace 104 + is always removed from `Subject` lines. There is no exact equivalent to `git 105 + mailinfo -k`.

+30 -318

gitdiff/apply.go

··· 13 13 // Users can test if an error was caused by a conflict by using errors.Is with 14 14 // an empty Conflict: 15 15 // 16 - // if errors.Is(err, &Conflict{}) { 17 - // // handle conflict 18 - // } 19 - // 16 + // if errors.Is(err, &Conflict{}) { 17 + // // handle conflict 18 + // } 20 19 type Conflict struct { 21 20 msg string 22 21 } ··· 89 88 90 89 var ( 91 90 errApplyInProgress = errors.New("gitdiff: incompatible apply in progress") 92 - ) 93 - 94 - const ( 95 - applyInitial = iota 96 - applyText 97 - applyBinary 98 - applyFile 91 + errApplierClosed = errors.New("gitdiff: applier is closed") 99 92 ) 100 93 101 - // Applier applies changes described in fragments to source data. If changes 102 - // are described in multiple fragments, those fragments must be applied in 103 - // order, usually by calling ApplyFile. 104 - // 105 - // By default, Applier operates in "strict" mode, where fragment content and 106 - // positions must exactly match those of the source. 107 - // 108 - // If an error occurs while applying, methods on Applier return instances of 109 - // *ApplyError that annotate the wrapped error with additional information 110 - // when available. If the error is because of a conflict between a fragment and 111 - // the source, the wrapped error will be a *Conflict. 94 + // Apply applies the changes in f to src, writing the result to dst. It can 95 + // apply both text and binary changes. 112 96 // 113 - // While an Applier can apply both text and binary fragments, only one fragment 114 - // type can be used without resetting the Applier. The first fragment applied 115 - // sets the type for the Applier. Mixing fragment types or mixing 116 - // fragment-level and file-level applies results in an error. 117 - type Applier struct { 118 - src io.ReaderAt 119 - lineSrc LineReaderAt 120 - nextLine int64 121 - applyType int 122 - } 123 - 124 - // NewApplier creates an Applier that reads data from src. If src is a 125 - // LineReaderAt, it is used directly to apply text fragments. 126 - func NewApplier(src io.ReaderAt) *Applier { 127 - a := new(Applier) 128 - a.Reset(src) 129 - return a 130 - } 131 - 132 - // Reset resets the input and internal state of the Applier. If src is nil, the 133 - // existing source is reused. 134 - func (a *Applier) Reset(src io.ReaderAt) { 135 - if src != nil { 136 - a.src = src 137 - if lineSrc, ok := src.(LineReaderAt); ok { 138 - a.lineSrc = lineSrc 139 - } else { 140 - a.lineSrc = &lineReaderAt{r: src} 97 + // If an error occurs while applying, Apply returns an *ApplyError that 98 + // annotates the error with additional information. If the error is because of 99 + // a conflict with the source, the wrapped error will be a *Conflict. 100 + func Apply(dst io.Writer, src io.ReaderAt, f *File) error { 101 + if f.IsBinary { 102 + if len(f.TextFragments) > 0 { 103 + return applyError(errors.New("binary file contains text fragments")) 141 104 } 142 - } 143 - a.nextLine = 0 144 - a.applyType = applyInitial 145 - } 146 - 147 - // ApplyFile applies the changes in all of the fragments of f and writes the 148 - // result to dst. 149 - func (a *Applier) ApplyFile(dst io.Writer, f *File) error { 150 - if a.applyType != applyInitial { 151 - return applyError(errApplyInProgress) 152 - } 153 - defer func() { a.applyType = applyFile }() 154 - 155 - if f.IsBinary && len(f.TextFragments) > 0 { 156 - return applyError(errors.New("binary file contains text fragments")) 157 - } 158 - if !f.IsBinary && f.BinaryFragment != nil { 159 - return applyError(errors.New("text file contains binary fragment")) 105 + if f.BinaryFragment == nil { 106 + return applyError(errors.New("binary file does not contain a binary fragment")) 107 + } 108 + } else { 109 + if f.BinaryFragment != nil { 110 + return applyError(errors.New("text file contains a binary fragment")) 111 + } 160 112 } 161 113 162 114 switch { 163 115 case f.BinaryFragment != nil: 164 - return a.ApplyBinaryFragment(dst, f.BinaryFragment) 116 + applier := NewBinaryApplier(dst, src) 117 + if err := applier.ApplyFragment(f.BinaryFragment); err != nil { 118 + return err 119 + } 120 + return applier.Close() 165 121 166 122 case len(f.TextFragments) > 0: 167 123 frags := make([]*TextFragment, len(f.TextFragments)) ··· 175 131 // right now, the application fails if fragments overlap, but it should be 176 132 // possible to precompute the result of applying them in order 177 133 134 + applier := NewTextApplier(dst, src) 178 135 for i, frag := range frags { 179 - if err := a.ApplyTextFragment(dst, frag); err != nil { 136 + if err := applier.ApplyFragment(frag); err != nil { 180 137 return applyError(err, fragNum(i)) 181 138 } 182 139 } 183 - } 184 - 185 - return applyError(a.Flush(dst)) 186 - } 187 - 188 - // ApplyTextFragment applies the changes in the fragment f and writes unwritten 189 - // data before the start of the fragment and the result to dst. If multiple 190 - // text fragments apply to the same source, ApplyTextFragment must be called in 191 - // order of increasing start position. As a result, each fragment can be 192 - // applied at most once before a call to Reset. 193 - func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error { 194 - if a.applyType != applyInitial && a.applyType != applyText { 195 - return applyError(errApplyInProgress) 196 - } 197 - defer func() { a.applyType = applyText }() 198 - 199 - // application code assumes fragment fields are consistent 200 - if err := f.Validate(); err != nil { 201 - return applyError(err) 202 - } 203 - 204 - // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) 205 - fragStart := f.OldPosition - 1 206 - if fragStart < 0 { 207 - fragStart = 0 208 - } 209 - fragEnd := fragStart + f.OldLines 210 - 211 - start := a.nextLine 212 - if fragStart < start { 213 - return applyError(&Conflict{"fragment overlaps with an applied fragment"}) 214 - } 215 - 216 - if f.OldPosition == 0 { 217 - ok, err := isLen(a.src, 0) 218 - if err != nil { 219 - return applyError(err) 220 - } 221 - if !ok { 222 - return applyError(&Conflict{"cannot create new file from non-empty src"}) 223 - } 224 - } 225 - 226 - preimage := make([][]byte, fragEnd-start) 227 - n, err := a.lineSrc.ReadLinesAt(preimage, start) 228 - switch { 229 - case err == nil: 230 - case err == io.EOF && n == len(preimage): // last line of frag has no newline character 231 - default: 232 - return applyError(err, lineNum(start+int64(n))) 233 - } 234 - 235 - // copy leading data before the fragment starts 236 - for i, line := range preimage[:fragStart-start] { 237 - if _, err := dst.Write(line); err != nil { 238 - a.nextLine = start + int64(i) 239 - return applyError(err, lineNum(a.nextLine)) 240 - } 241 - } 242 - preimage = preimage[fragStart-start:] 243 - 244 - // apply the changes in the fragment 245 - used := int64(0) 246 - for i, line := range f.Lines { 247 - if err := applyTextLine(dst, line, preimage, used); err != nil { 248 - a.nextLine = fragStart + used 249 - return applyError(err, lineNum(a.nextLine), fragLineNum(i)) 250 - } 251 - if line.Old() { 252 - used++ 253 - } 254 - } 255 - a.nextLine = fragStart + used 256 - return nil 257 - } 258 - 259 - func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { 260 - if line.Old() && string(preimage[i]) != line.Line { 261 - return &Conflict{"fragment line does not match src line"} 262 - } 263 - if line.New() { 264 - _, err = io.WriteString(dst, line.Line) 265 - } 266 - return err 267 - } 268 - 269 - // Flush writes any data following the last applied fragment to dst. 270 - func (a *Applier) Flush(dst io.Writer) (err error) { 271 - switch a.applyType { 272 - case applyInitial: 273 - _, err = copyFrom(dst, a.src, 0) 274 - case applyText: 275 - _, err = copyLinesFrom(dst, a.lineSrc, a.nextLine) 276 - case applyBinary: 277 - // nothing to flush, binary apply "consumes" full source 278 - } 279 - return err 280 - } 281 - 282 - // ApplyBinaryFragment applies the changes in the fragment f and writes the 283 - // result to dst. At most one binary fragment can be applied before a call to 284 - // Reset. 285 - func (a *Applier) ApplyBinaryFragment(dst io.Writer, f *BinaryFragment) error { 286 - if a.applyType != applyInitial { 287 - return applyError(errApplyInProgress) 288 - } 289 - defer func() { a.applyType = applyBinary }() 290 - 291 - if f == nil { 292 - return applyError(errors.New("nil fragment")) 293 - } 140 + return applier.Close() 294 141 295 - switch f.Method { 296 - case BinaryPatchLiteral: 297 - if _, err := dst.Write(f.Data); err != nil { 298 - return applyError(err) 299 - } 300 - case BinaryPatchDelta: 301 - if err := applyBinaryDeltaFragment(dst, a.src, f.Data); err != nil { 302 - return applyError(err) 303 - } 304 142 default: 305 - return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) 306 - } 307 - return nil 308 - } 309 - 310 - func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { 311 - srcSize, delta := readBinaryDeltaSize(frag) 312 - if err := checkBinarySrcSize(src, srcSize); err != nil { 313 - return err 314 - } 315 - 316 - dstSize, delta := readBinaryDeltaSize(delta) 317 - 318 - for len(delta) > 0 { 319 - op := delta[0] 320 - if op == 0 { 321 - return errors.New("invalid delta opcode 0") 322 - } 323 - 324 - var n int64 325 - var err error 326 - switch op & 0x80 { 327 - case 0x80: 328 - n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) 329 - case 0x00: 330 - n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) 331 - } 332 - if err != nil { 333 - return err 334 - } 335 - dstSize -= n 336 - } 337 - 338 - if dstSize != 0 { 339 - return errors.New("corrupt binary delta: insufficient or extra data") 340 - } 341 - return nil 342 - } 343 - 344 - // readBinaryDeltaSize reads a variable length size from a delta-encoded binary 345 - // fragment, returing the size and the unused data. Data is encoded as: 346 - // 347 - // [[1xxxxxxx]...] [0xxxxxxx] 348 - // 349 - // in little-endian order, with 7 bits of the value per byte. 350 - func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { 351 - shift := uint(0) 352 - for i, b := range d { 353 - size |= int64(b&0x7F) << shift 354 - shift += 7 355 - if b <= 0x7F { 356 - return size, d[i+1:] 357 - } 358 - } 359 - return size, nil 360 - } 361 - 362 - // applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary 363 - // fragment, returning the amount of data written and the usused part of the 364 - // fragment. An add operation takes the form: 365 - // 366 - // [0xxxxxx][[data1]...] 367 - // 368 - // where the lower seven bits of the opcode is the number of data bytes 369 - // following the opcode. See also pack-format.txt in the Git source. 370 - func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { 371 - size := int(op) 372 - if len(delta) < size { 373 - return 0, delta, errors.New("corrupt binary delta: incomplete add") 374 - } 375 - _, err = w.Write(delta[:size]) 376 - return int64(size), delta[size:], err 377 - } 378 - 379 - // applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary 380 - // fragment, returing the amount of data written and the unused part of the 381 - // fragment. A copy operation takes the form: 382 - // 383 - // [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] 384 - // 385 - // where the lower seven bits of the opcode determine which non-zero offset and 386 - // size bytes are present in little-endian order: if bit 0 is set, offset1 is 387 - // present, etc. If no offset or size bytes are present, offset is 0 and size 388 - // is 0x10000. See also pack-format.txt in the Git source. 389 - func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { 390 - const defaultSize = 0x10000 391 - 392 - unpack := func(start, bits uint) (v int64) { 393 - for i := uint(0); i < bits; i++ { 394 - mask := byte(1 << (i + start)) 395 - if op&mask > 0 { 396 - if len(delta) == 0 { 397 - err = errors.New("corrupt binary delta: incomplete copy") 398 - return 399 - } 400 - v |= int64(delta[0]) << (8 * i) 401 - delta = delta[1:] 402 - } 403 - } 404 - return 405 - } 406 - 407 - offset := unpack(0, 4) 408 - size := unpack(4, 3) 409 - if err != nil { 410 - return 0, delta, err 411 - } 412 - if size == 0 { 413 - size = defaultSize 414 - } 415 - 416 - // TODO(bkeyes): consider pooling these buffers 417 - b := make([]byte, size) 418 - if _, err := src.ReadAt(b, offset); err != nil { 419 - return 0, delta, err 420 - } 421 - 422 - _, err = w.Write(b) 423 - return size, delta, err 424 - } 425 - 426 - func checkBinarySrcSize(r io.ReaderAt, size int64) error { 427 - ok, err := isLen(r, size) 428 - if err != nil { 143 + // nothing to apply, just copy all the data 144 + _, err := copyFrom(dst, src, 0) 429 145 return err 430 146 } 431 - if !ok { 432 - return &Conflict{"fragment src size does not match actual src size"} 433 - } 434 - return nil 435 147 }

+206

gitdiff/apply_binary.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "errors" 5 + "fmt" 6 + "io" 7 + ) 8 + 9 + // BinaryApplier applies binary changes described in a fragment to source data. 10 + // The applier must be closed after use. 11 + type BinaryApplier struct { 12 + dst io.Writer 13 + src io.ReaderAt 14 + 15 + closed bool 16 + dirty bool 17 + } 18 + 19 + // NewBinaryApplier creates an BinaryApplier that reads data from src and 20 + // writes modified data to dst. 21 + func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier { 22 + a := BinaryApplier{ 23 + dst: dst, 24 + src: src, 25 + } 26 + return &a 27 + } 28 + 29 + // ApplyFragment applies the changes in the fragment f and writes the result to 30 + // dst. ApplyFragment can be called at most once. 31 + // 32 + // If an error occurs while applying, ApplyFragment returns an *ApplyError that 33 + // annotates the error with additional information. If the error is because of 34 + // a conflict between a fragment and the source, the wrapped error will be a 35 + // *Conflict. 36 + func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error { 37 + if f == nil { 38 + return applyError(errors.New("nil fragment")) 39 + } 40 + if a.closed { 41 + return applyError(errApplierClosed) 42 + } 43 + if a.dirty { 44 + return applyError(errApplyInProgress) 45 + } 46 + 47 + // mark an apply as in progress, even if it fails before making changes 48 + a.dirty = true 49 + 50 + switch f.Method { 51 + case BinaryPatchLiteral: 52 + if _, err := a.dst.Write(f.Data); err != nil { 53 + return applyError(err) 54 + } 55 + case BinaryPatchDelta: 56 + if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil { 57 + return applyError(err) 58 + } 59 + default: 60 + return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) 61 + } 62 + return nil 63 + } 64 + 65 + // Close writes any data following the last applied fragment and prevents 66 + // future calls to ApplyFragment. 67 + func (a *BinaryApplier) Close() (err error) { 68 + if a.closed { 69 + return nil 70 + } 71 + 72 + a.closed = true 73 + if !a.dirty { 74 + _, err = copyFrom(a.dst, a.src, 0) 75 + } else { 76 + // do nothing, applying a binary fragment copies all data 77 + } 78 + return err 79 + } 80 + 81 + func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { 82 + srcSize, delta := readBinaryDeltaSize(frag) 83 + if err := checkBinarySrcSize(src, srcSize); err != nil { 84 + return err 85 + } 86 + 87 + dstSize, delta := readBinaryDeltaSize(delta) 88 + 89 + for len(delta) > 0 { 90 + op := delta[0] 91 + if op == 0 { 92 + return errors.New("invalid delta opcode 0") 93 + } 94 + 95 + var n int64 96 + var err error 97 + switch op & 0x80 { 98 + case 0x80: 99 + n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) 100 + case 0x00: 101 + n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) 102 + } 103 + if err != nil { 104 + return err 105 + } 106 + dstSize -= n 107 + } 108 + 109 + if dstSize != 0 { 110 + return errors.New("corrupt binary delta: insufficient or extra data") 111 + } 112 + return nil 113 + } 114 + 115 + // readBinaryDeltaSize reads a variable length size from a delta-encoded binary 116 + // fragment, returing the size and the unused data. Data is encoded as: 117 + // 118 + // [[1xxxxxxx]...] [0xxxxxxx] 119 + // 120 + // in little-endian order, with 7 bits of the value per byte. 121 + func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { 122 + shift := uint(0) 123 + for i, b := range d { 124 + size |= int64(b&0x7F) << shift 125 + shift += 7 126 + if b <= 0x7F { 127 + return size, d[i+1:] 128 + } 129 + } 130 + return size, nil 131 + } 132 + 133 + // applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary 134 + // fragment, returning the amount of data written and the usused part of the 135 + // fragment. An add operation takes the form: 136 + // 137 + // [0xxxxxx][[data1]...] 138 + // 139 + // where the lower seven bits of the opcode is the number of data bytes 140 + // following the opcode. See also pack-format.txt in the Git source. 141 + func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { 142 + size := int(op) 143 + if len(delta) < size { 144 + return 0, delta, errors.New("corrupt binary delta: incomplete add") 145 + } 146 + _, err = w.Write(delta[:size]) 147 + return int64(size), delta[size:], err 148 + } 149 + 150 + // applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary 151 + // fragment, returing the amount of data written and the unused part of the 152 + // fragment. A copy operation takes the form: 153 + // 154 + // [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] 155 + // 156 + // where the lower seven bits of the opcode determine which non-zero offset and 157 + // size bytes are present in little-endian order: if bit 0 is set, offset1 is 158 + // present, etc. If no offset or size bytes are present, offset is 0 and size 159 + // is 0x10000. See also pack-format.txt in the Git source. 160 + func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { 161 + const defaultSize = 0x10000 162 + 163 + unpack := func(start, bits uint) (v int64) { 164 + for i := uint(0); i < bits; i++ { 165 + mask := byte(1 << (i + start)) 166 + if op&mask > 0 { 167 + if len(delta) == 0 { 168 + err = errors.New("corrupt binary delta: incomplete copy") 169 + return 170 + } 171 + v |= int64(delta[0]) << (8 * i) 172 + delta = delta[1:] 173 + } 174 + } 175 + return 176 + } 177 + 178 + offset := unpack(0, 4) 179 + size := unpack(4, 3) 180 + if err != nil { 181 + return 0, delta, err 182 + } 183 + if size == 0 { 184 + size = defaultSize 185 + } 186 + 187 + // TODO(bkeyes): consider pooling these buffers 188 + b := make([]byte, size) 189 + if _, err := src.ReadAt(b, offset); err != nil { 190 + return 0, delta, err 191 + } 192 + 193 + _, err = w.Write(b) 194 + return size, delta, err 195 + } 196 + 197 + func checkBinarySrcSize(r io.ReaderAt, size int64) error { 198 + ok, err := isLen(r, size) 199 + if err != nil { 200 + return err 201 + } 202 + if !ok { 203 + return &Conflict{"fragment src size does not match actual src size"} 204 + } 205 + return nil 206 + }

+40 -98

gitdiff/apply_test.go

··· 6 6 "io" 7 7 "io/ioutil" 8 8 "path/filepath" 9 - "strings" 10 9 "testing" 11 10 ) 12 11 13 - func TestApplierInvariants(t *testing.T) { 14 - binary := &BinaryFragment{ 15 - Method: BinaryPatchLiteral, 16 - Size: 2, 17 - Data: []byte("\xbe\xef"), 18 - } 19 - 20 - text := &TextFragment{ 21 - NewPosition: 1, 22 - NewLines: 1, 23 - LinesAdded: 1, 24 - Lines: []Line{ 25 - {Op: OpAdd, Line: "new line\n"}, 26 - }, 27 - } 28 - 29 - file := &File{ 30 - TextFragments: []*TextFragment{text}, 31 - } 32 - 33 - src := bytes.NewReader(nil) 34 - dst := ioutil.Discard 35 - 36 - assertInProgress := func(t *testing.T, kind string, err error) { 37 - if !errors.Is(err, errApplyInProgress) { 38 - t.Fatalf("expected in-progress error for %s apply, but got: %v", kind, err) 39 - } 40 - } 41 - 42 - t.Run("binaryFirst", func(t *testing.T) { 43 - a := NewApplier(src) 44 - if err := a.ApplyBinaryFragment(dst, binary); err != nil { 45 - t.Fatalf("unexpected error applying fragment: %v", err) 46 - } 47 - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) 48 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 49 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 50 - }) 51 - 52 - t.Run("textFirst", func(t *testing.T) { 53 - a := NewApplier(src) 54 - if err := a.ApplyTextFragment(dst, text); err != nil { 55 - t.Fatalf("unexpected error applying fragment: %v", err) 56 - } 57 - // additional text fragments are allowed 58 - if err := a.ApplyTextFragment(dst, text); err != nil { 59 - t.Fatalf("unexpected error applying second fragment: %v", err) 60 - } 61 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 62 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 63 - }) 64 - 65 - t.Run("fileFirst", func(t *testing.T) { 66 - a := NewApplier(src) 67 - if err := a.ApplyFile(dst, file); err != nil { 68 - t.Fatalf("unexpected error applying file: %v", err) 69 - } 70 - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) 71 - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) 72 - assertInProgress(t, "file", a.ApplyFile(dst, file)) 73 - }) 74 - } 75 - 76 12 func TestApplyTextFragment(t *testing.T) { 77 13 tests := map[string]applyTest{ 78 14 "createFile": {Files: getApplyFiles("text_fragment_new")}, ··· 86 22 "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, 87 23 "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, 88 24 "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, 25 + "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, 89 26 "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, 90 27 "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, 91 28 ··· 128 65 129 66 for name, test := range tests { 130 67 t.Run(name, func(t *testing.T) { 131 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 68 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 132 69 if len(file.TextFragments) != 1 { 133 70 t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments)) 134 71 } 135 - return applier.ApplyTextFragment(w, file.TextFragments[0]) 72 + applier := NewTextApplier(dst, src) 73 + return applier.ApplyFragment(file.TextFragments[0]) 136 74 }) 137 75 }) 138 76 } ··· 177 115 178 116 for name, test := range tests { 179 117 t.Run(name, func(t *testing.T) { 180 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 181 - return applier.ApplyBinaryFragment(w, file.BinaryFragment) 118 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 119 + applier := NewBinaryApplier(dst, src) 120 + return applier.ApplyFragment(file.BinaryFragment) 182 121 }) 183 122 }) 184 123 } ··· 186 125 187 126 func TestApplyFile(t *testing.T) { 188 127 tests := map[string]applyTest{ 189 - "textModify": {Files: getApplyFiles("text_file_modify")}, 190 - "binaryModify": {Files: getApplyFiles("bin_file_modify")}, 191 - "modeChange": {Files: getApplyFiles("file_mode_change")}, 128 + "textModify": { 129 + Files: applyFiles{ 130 + Src: "file_text.src", 131 + Patch: "file_text_modify.patch", 132 + Out: "file_text_modify.out", 133 + }, 134 + }, 135 + "textDelete": { 136 + Files: applyFiles{ 137 + Src: "file_text.src", 138 + Patch: "file_text_delete.patch", 139 + Out: "file_text_delete.out", 140 + }, 141 + }, 142 + "textErrorPartialDelete": { 143 + Files: applyFiles{ 144 + Src: "file_text.src", 145 + Patch: "file_text_error_partial_delete.patch", 146 + }, 147 + Err: &Conflict{}, 148 + }, 149 + "binaryModify": { 150 + Files: getApplyFiles("file_bin_modify"), 151 + }, 152 + "modeChange": { 153 + Files: getApplyFiles("file_mode_change"), 154 + }, 192 155 } 193 156 194 157 for name, test := range tests { 195 158 t.Run(name, func(t *testing.T) { 196 - test.run(t, func(w io.Writer, applier *Applier, file *File) error { 197 - return applier.ApplyFile(w, file) 159 + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { 160 + return Apply(dst, src, file) 198 161 }) 199 162 }) 200 163 } ··· 205 168 Err interface{} 206 169 } 207 170 208 - func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) error) { 171 + func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) { 209 172 src, patch, out := at.Files.Load(t) 210 173 211 174 files, _, err := Parse(bytes.NewReader(patch)) ··· 215 178 if len(files) != 1 { 216 179 t.Fatalf("patch should contain exactly one file, but it has %d", len(files)) 217 180 } 218 - 219 - applier := NewApplier(bytes.NewReader(src)) 220 181 221 182 var dst bytes.Buffer 222 - err = apply(&dst, applier, files[0]) 183 + err = apply(&dst, bytes.NewReader(src), files[0]) 223 184 if at.Err != nil { 224 - at.assertError(t, err) 185 + assertError(t, at.Err, err, "applying fragment") 225 186 return 226 187 } 227 188 if err != nil { ··· 234 195 } 235 196 236 197 if !bytes.Equal(out, dst.Bytes()) { 237 - t.Errorf("incorrect result after apply\nexpected:\n%x\nactual:\n%x", out, dst.Bytes()) 238 - } 239 - } 240 - 241 - func (at applyTest) assertError(t *testing.T, err error) { 242 - if err == nil { 243 - t.Fatalf("expected error applying fragment, but got nil") 244 - } 245 - 246 - switch terr := at.Err.(type) { 247 - case string: 248 - if !strings.Contains(err.Error(), terr) { 249 - t.Fatalf("incorrect apply error: %q does not contain %q", err.Error(), terr) 250 - } 251 - case error: 252 - if !errors.Is(err, terr) { 253 - t.Fatalf("incorrect apply error: expected: %T (%v), actual: %T (%v)", terr, terr, err, err) 254 - } 255 - default: 256 - t.Fatalf("unsupported expected error type: %T", terr) 198 + t.Errorf("incorrect result after apply\nexpected:\n%q\nactual:\n%q", out, dst.Bytes()) 257 199 } 258 200 } 259 201

+152

gitdiff/apply_text.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "io" 5 + ) 6 + 7 + // TextApplier applies changes described in text fragments to source data. If 8 + // changes are described in multiple fragments, those fragments must be applied 9 + // in order. The applier must be closed after use. 10 + // 11 + // By default, TextApplier operates in "strict" mode, where fragment content 12 + // and positions must exactly match those of the source. 13 + type TextApplier struct { 14 + dst io.Writer 15 + src io.ReaderAt 16 + lineSrc LineReaderAt 17 + nextLine int64 18 + 19 + closed bool 20 + dirty bool 21 + } 22 + 23 + // NewTextApplier creates a TextApplier that reads data from src and writes 24 + // modified data to dst. If src implements LineReaderAt, it is used directly. 25 + func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier { 26 + a := TextApplier{ 27 + dst: dst, 28 + src: src, 29 + } 30 + 31 + if lineSrc, ok := src.(LineReaderAt); ok { 32 + a.lineSrc = lineSrc 33 + } else { 34 + a.lineSrc = &lineReaderAt{r: src} 35 + } 36 + 37 + return &a 38 + } 39 + 40 + // ApplyFragment applies the changes in the fragment f, writing unwritten data 41 + // before the start of the fragment and any changes from the fragment. If 42 + // multiple text fragments apply to the same content, ApplyFragment must be 43 + // called in order of increasing start position. As a result, each fragment can 44 + // be applied at most once. 45 + // 46 + // If an error occurs while applying, ApplyFragment returns an *ApplyError that 47 + // annotates the error with additional information. If the error is because of 48 + // a conflict between the fragment and the source, the wrapped error will be a 49 + // *Conflict. 50 + func (a *TextApplier) ApplyFragment(f *TextFragment) error { 51 + if a.closed { 52 + return applyError(errApplierClosed) 53 + } 54 + 55 + // mark an apply as in progress, even if it fails before making changes 56 + a.dirty = true 57 + 58 + // application code assumes fragment fields are consistent 59 + if err := f.Validate(); err != nil { 60 + return applyError(err) 61 + } 62 + 63 + // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) 64 + fragStart := f.OldPosition - 1 65 + if fragStart < 0 { 66 + fragStart = 0 67 + } 68 + fragEnd := fragStart + f.OldLines 69 + 70 + start := a.nextLine 71 + if fragStart < start { 72 + return applyError(&Conflict{"fragment overlaps with an applied fragment"}) 73 + } 74 + 75 + if f.OldPosition == 0 { 76 + ok, err := isLen(a.src, 0) 77 + if err != nil { 78 + return applyError(err) 79 + } 80 + if !ok { 81 + return applyError(&Conflict{"cannot create new file from non-empty src"}) 82 + } 83 + } 84 + 85 + preimage := make([][]byte, fragEnd-start) 86 + n, err := a.lineSrc.ReadLinesAt(preimage, start) 87 + if err != nil { 88 + return applyError(err, lineNum(start+int64(n))) 89 + } 90 + 91 + // copy leading data before the fragment starts 92 + for i, line := range preimage[:fragStart-start] { 93 + if _, err := a.dst.Write(line); err != nil { 94 + a.nextLine = start + int64(i) 95 + return applyError(err, lineNum(a.nextLine)) 96 + } 97 + } 98 + preimage = preimage[fragStart-start:] 99 + 100 + // apply the changes in the fragment 101 + used := int64(0) 102 + for i, line := range f.Lines { 103 + if err := applyTextLine(a.dst, line, preimage, used); err != nil { 104 + a.nextLine = fragStart + used 105 + return applyError(err, lineNum(a.nextLine), fragLineNum(i)) 106 + } 107 + if line.Old() { 108 + used++ 109 + } 110 + } 111 + a.nextLine = fragStart + used 112 + 113 + // new position of +0,0 mean a full delete, so check for leftovers 114 + if f.NewPosition == 0 && f.NewLines == 0 { 115 + var b [1][]byte 116 + n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) 117 + if err != nil && err != io.EOF { 118 + return applyError(err, lineNum(a.nextLine)) 119 + } 120 + if n > 0 { 121 + return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) 122 + } 123 + } 124 + 125 + return nil 126 + } 127 + 128 + func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { 129 + if line.Old() && string(preimage[i]) != line.Line { 130 + return &Conflict{"fragment line does not match src line"} 131 + } 132 + if line.New() { 133 + _, err = io.WriteString(dst, line.Line) 134 + } 135 + return err 136 + } 137 + 138 + // Close writes any data following the last applied fragment and prevents 139 + // future calls to ApplyFragment. 140 + func (a *TextApplier) Close() (err error) { 141 + if a.closed { 142 + return nil 143 + } 144 + 145 + a.closed = true 146 + if !a.dirty { 147 + _, err = copyFrom(a.dst, a.src, 0) 148 + } else { 149 + _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine) 150 + } 151 + return err 152 + }

+30

gitdiff/assert_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "errors" 5 + "strings" 6 + "testing" 7 + ) 8 + 9 + func assertError(t *testing.T, expected interface{}, actual error, action string) { 10 + if actual == nil { 11 + t.Fatalf("expected error %s, but got nil", action) 12 + } 13 + 14 + switch exp := expected.(type) { 15 + case bool: 16 + if !exp { 17 + t.Fatalf("unexpected error %s: %v", action, actual) 18 + } 19 + case string: 20 + if !strings.Contains(actual.Error(), exp) { 21 + t.Fatalf("incorrect error %s: %q does not contain %q", action, actual.Error(), exp) 22 + } 23 + case error: 24 + if !errors.Is(actual, exp) { 25 + t.Fatalf("incorrect error %s: expected %T (%v), actual: %T (%v)", action, exp, exp, actual, actual) 26 + } 27 + default: 28 + t.Fatalf("unsupported expected error type: %T", exp) 29 + } 30 + }

+41 -2

gitdiff/base85.go

··· 19 19 } 20 20 21 21 // base85Decode decodes Base85-encoded data from src into dst. It uses the 22 - // alphabet defined by base85.c in the Git source tree, which appears to be 23 - // unique. src must contain at least len(dst) bytes of encoded data. 22 + // alphabet defined by base85.c in the Git source tree. src must contain at 23 + // least len(dst) bytes of encoded data. 24 24 func base85Decode(dst, src []byte) error { 25 25 var v uint32 26 26 var n, ndst int ··· 50 50 } 51 51 return nil 52 52 } 53 + 54 + // base85Encode encodes src in Base85, writing the result to dst. It uses the 55 + // alphabet defined by base85.c in the Git source tree. 56 + func base85Encode(dst, src []byte) { 57 + var di, si int 58 + 59 + encode := func(v uint32) { 60 + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] 61 + dst[di+1] = b85Alpha[(v/(85*85*85))%85] 62 + dst[di+2] = b85Alpha[(v/(85*85))%85] 63 + dst[di+3] = b85Alpha[(v/85)%85] 64 + dst[di+4] = b85Alpha[v%85] 65 + } 66 + 67 + n := (len(src) / 4) * 4 68 + for si < n { 69 + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) 70 + si += 4 71 + di += 5 72 + } 73 + 74 + var v uint32 75 + switch len(src) - si { 76 + case 3: 77 + v |= uint32(src[si+2]) << 8 78 + fallthrough 79 + case 2: 80 + v |= uint32(src[si+1]) << 16 81 + fallthrough 82 + case 1: 83 + v |= uint32(src[si+0]) << 24 84 + encode(v) 85 + } 86 + } 87 + 88 + // base85Len returns the length of n bytes of Base85 encoded data. 89 + func base85Len(n int) int { 90 + return (n + 3) / 4 * 5 91 + }

+58

gitdiff/base85_test.go

··· 1 1 package gitdiff 2 2 3 3 import ( 4 + "bytes" 4 5 "testing" 5 6 ) 6 7 ··· 58 59 }) 59 60 } 60 61 } 62 + 63 + func TestBase85Encode(t *testing.T) { 64 + tests := map[string]struct { 65 + Input []byte 66 + Output string 67 + }{ 68 + "zeroBytes": { 69 + Input: []byte{}, 70 + Output: "", 71 + }, 72 + "twoBytes": { 73 + Input: []byte{0xCA, 0xFE}, 74 + Output: "%KiWV", 75 + }, 76 + "fourBytes": { 77 + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, 78 + Output: "007GV", 79 + }, 80 + "sixBytes": { 81 + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, 82 + Output: "007GV%KiWV", 83 + }, 84 + } 85 + 86 + for name, test := range tests { 87 + t.Run(name, func(t *testing.T) { 88 + dst := make([]byte, len(test.Output)) 89 + base85Encode(dst, test.Input) 90 + for i, b := range test.Output { 91 + if dst[i] != byte(b) { 92 + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) 93 + } 94 + } 95 + }) 96 + } 97 + } 98 + 99 + func FuzzBase85Roundtrip(f *testing.F) { 100 + f.Add([]byte{0x2b, 0x0d}) 101 + f.Add([]byte{0xbc, 0xb4, 0x3f}) 102 + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) 103 + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) 104 + 105 + f.Fuzz(func(t *testing.T, in []byte) { 106 + n := len(in) 107 + dst := make([]byte, base85Len(n)) 108 + out := make([]byte, n) 109 + 110 + base85Encode(dst, in) 111 + if err := base85Decode(out, dst); err != nil { 112 + t.Fatalf("unexpected error decoding base85 data: %v", err) 113 + } 114 + if !bytes.Equal(in, out) { 115 + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) 116 + } 117 + }) 118 + }

+11 -4

gitdiff/binary.go

··· 50 50 } 51 51 52 52 func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { 53 - switch p.Line(0) { 54 - case "GIT binary patch\n": 53 + line := p.Line(0) 54 + switch { 55 + case line == "GIT binary patch\n": 55 56 hasData = true 56 - case "Binary files differ\n": 57 - case "Files differ\n": 57 + case isBinaryNoDataMarker(line): 58 58 default: 59 59 return false, false, nil 60 60 } ··· 63 63 return false, false, err 64 64 } 65 65 return true, hasData, nil 66 + } 67 + 68 + func isBinaryNoDataMarker(line string) bool { 69 + if strings.HasSuffix(line, " differ\n") { 70 + return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") 71 + } 72 + return false 66 73 } 67 74 68 75 func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) {

+10

gitdiff/binary_test.go

··· 25 25 IsBinary: true, 26 26 HasData: false, 27 27 }, 28 + "binaryFileNoPatchPaths": { 29 + Input: "Binary files a/foo.bin and b/foo.bin differ\n", 30 + IsBinary: true, 31 + HasData: false, 32 + }, 33 + "fileNoPatch": { 34 + Input: "Files differ\n", 35 + IsBinary: true, 36 + HasData: false, 37 + }, 28 38 "textFile": { 29 39 Input: "@@ -10,14 +22,31 @@\n", 30 40 IsBinary: false,

+103 -27

gitdiff/file_header.go

··· 57 57 return nil, "", err 58 58 } 59 59 } 60 - return nil, "", nil 60 + return nil, preamble.String(), nil 61 61 } 62 62 63 63 func (p *parser) ParseGitFileHeader() (*File, error) { ··· 172 172 // If the names in the header do not match because the patch is a rename, 173 173 // return an empty default name. 174 174 func parseGitHeaderName(header string) (string, error) { 175 - firstName, n, err := parseName(header, -1, 1) 176 - if err != nil { 177 - return "", err 175 + header = strings.TrimSuffix(header, "\n") 176 + if len(header) == 0 { 177 + return "", nil 178 178 } 179 179 180 - if n < len(header) && (header[n] == ' ' || header[n] == '\t') { 181 - n++ 182 - } 180 + var err error 181 + var first, second string 182 + 183 + // there are 4 cases to account for: 184 + // 185 + // 1) unquoted unquoted 186 + // 2) unquoted "quoted" 187 + // 3) "quoted" unquoted 188 + // 4) "quoted" "quoted" 189 + // 190 + quote := strings.IndexByte(header, '"') 191 + switch { 192 + case quote < 0: 193 + // case 1 194 + first = header 195 + 196 + case quote > 0: 197 + // case 2 198 + first = header[:quote-1] 199 + if !isSpace(header[quote-1]) { 200 + return "", fmt.Errorf("missing separator") 201 + } 202 + 203 + second, _, err = parseQuotedName(header[quote:]) 204 + if err != nil { 205 + return "", err 206 + } 207 + 208 + case quote == 0: 209 + // case 3 or case 4 210 + var n int 211 + first, n, err = parseQuotedName(header) 212 + if err != nil { 213 + return "", err 214 + } 215 + 216 + // git accepts multiple spaces after a quoted name, but not after an 217 + // unquoted name, since the name might end with one or more spaces 218 + for n < len(header) && isSpace(header[n]) { 219 + n++ 220 + } 221 + if n == len(header) { 222 + return "", nil 223 + } 183 224 184 - secondName, _, err := parseName(header[n:], -1, 1) 185 - if err != nil { 186 - return "", err 225 + if header[n] == '"' { 226 + second, _, err = parseQuotedName(header[n:]) 227 + if err != nil { 228 + return "", err 229 + } 230 + } else { 231 + second = header[n:] 232 + } 187 233 } 188 234 189 - if firstName != secondName { 235 + first = trimTreePrefix(first, 1) 236 + if second != "" { 237 + if first == trimTreePrefix(second, 1) { 238 + return first, nil 239 + } 190 240 return "", nil 191 241 } 192 - return firstName, nil 242 + 243 + // at this point, both names are unquoted (case 1) 244 + // since names may contain spaces, we can't use a known separator 245 + // instead, look for a split that produces two equal names 246 + 247 + for i := 0; i < len(first)-1; i++ { 248 + if !isSpace(first[i]) { 249 + continue 250 + } 251 + second = trimTreePrefix(first[i+1:], 1) 252 + if name := first[:i]; name == second { 253 + return name, nil 254 + } 255 + } 256 + return "", nil 193 257 } 194 258 195 259 // parseGitHeaderData parses a single line of metadata from a Git file header. ··· 260 324 } 261 325 262 326 func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { 263 - f.OldMode, err = parseMode(line) 327 + f.OldMode, err = parseMode(strings.TrimSpace(line)) 264 328 return 265 329 } 266 330 267 331 func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { 268 - f.NewMode, err = parseMode(line) 332 + f.NewMode, err = parseMode(strings.TrimSpace(line)) 269 333 return 270 334 } 271 335 ··· 283 347 284 348 func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { 285 349 f.IsCopy = true 286 - f.OldName, _, err = parseName(line, -1, 0) 350 + f.OldName, _, err = parseName(line, 0, 0) 287 351 return 288 352 } 289 353 290 354 func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { 291 355 f.IsCopy = true 292 - f.NewName, _, err = parseName(line, -1, 0) 356 + f.NewName, _, err = parseName(line, 0, 0) 293 357 return 294 358 } 295 359 296 360 func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { 297 361 f.IsRename = true 298 - f.OldName, _, err = parseName(line, -1, 0) 362 + f.OldName, _, err = parseName(line, 0, 0) 299 363 return 300 364 } 301 365 302 366 func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { 303 367 f.IsRename = true 304 - f.NewName, _, err = parseName(line, -1, 0) 368 + f.NewName, _, err = parseName(line, 0, 0) 305 369 return 306 370 } 307 371 ··· 349 413 350 414 // parseName extracts a file name from the start of a string and returns the 351 415 // name and the index of the first character after the name. If the name is 352 - // unquoted and term is non-negative, parsing stops at the first occurrence of 353 - // term. Otherwise parsing of unquoted names stops at the first space or tab. 416 + // unquoted and term is non-zero, parsing stops at the first occurrence of 417 + // term. 354 418 // 355 419 // If the name is exactly "/dev/null", no further processing occurs. Otherwise, 356 420 // if dropPrefix is greater than zero, that number of prefix components 357 421 // separated by forward slashes are dropped from the name and any duplicate 358 422 // slashes are collapsed. 359 - func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) { 423 + func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { 360 424 if len(s) > 0 && s[0] == '"' { 361 425 name, n, err = parseQuotedName(s) 362 426 } else { ··· 387 451 return name, n, err 388 452 } 389 453 390 - func parseUnquotedName(s string, term rune) (name string, n int, err error) { 454 + func parseUnquotedName(s string, term byte) (name string, n int, err error) { 391 455 for n = 0; n < len(s); n++ { 392 456 if s[n] == '\n' { 393 457 break 394 458 } 395 - if term >= 0 && rune(s[n]) == term { 396 - break 397 - } 398 - if term < 0 && (s[n] == ' ' || s[n] == '\t') { 459 + if term > 0 && s[n] == term { 399 460 break 400 461 } 401 462 } ··· 440 501 return b.String() 441 502 } 442 503 504 + // trimTreePrefix removes up to n leading directory components from name. 505 + func trimTreePrefix(name string, n int) string { 506 + i := 0 507 + for ; i < len(name) && n > 0; i++ { 508 + if name[i] == '/' { 509 + n-- 510 + } 511 + } 512 + return name[i:] 513 + } 514 + 443 515 // hasEpochTimestamp returns true if the string ends with a POSIX-formatted 444 516 // timestamp for the UNIX epoch after a tab character. According to git, this 445 517 // is used by GNU diff to mark creations and deletions. ··· 455 527 456 528 // a valid timestamp can have optional ':' in zone specifier 457 529 // remove that if it exists so we have a single format 458 - if ts[len(ts)-3] == ':' { 530 + if len(ts) >= 3 && ts[len(ts)-3] == ':' { 459 531 ts = ts[:len(ts)-3] + ts[len(ts)-2:] 460 532 } 461 533 ··· 468 540 } 469 541 return true 470 542 } 543 + 544 + func isSpace(c byte) bool { 545 + return c == ' ' || c == '\t' || c == '\n' 546 + }

+64 -8

gitdiff/file_header_test.go

··· 310 310 func TestParseName(t *testing.T) { 311 311 tests := map[string]struct { 312 312 Input string 313 - Term rune 313 + Term byte 314 314 Drop int 315 315 Output string 316 316 N int ··· 334 334 "dropPrefix": { 335 335 Input: "a/dir/file.txt", Drop: 1, Output: "dir/file.txt", N: 14, 336 336 }, 337 - "multipleNames": { 338 - Input: "dir/a.txt dir/b.txt", Term: -1, Output: "dir/a.txt", N: 9, 337 + "unquotedWithSpaces": { 338 + Input: "dir/with spaces.txt", Output: "dir/with spaces.txt", N: 19, 339 + }, 340 + "unquotedWithTrailingSpaces": { 341 + Input: "dir/with spaces.space ", Output: "dir/with spaces.space ", N: 23, 339 342 }, 340 343 "devNull": { 341 344 Input: "/dev/null", Term: '\t', Drop: 1, Output: "/dev/null", N: 9, 342 345 }, 343 - "newlineAlwaysSeparates": { 344 - Input: "dir/file.txt\n", Term: 0, Output: "dir/file.txt", N: 12, 346 + "newlineSeparates": { 347 + Input: "dir/file.txt\n", Output: "dir/file.txt", N: 12, 345 348 }, 346 349 "emptyString": { 347 350 Input: "", Err: true, ··· 483 486 OldMode: os.FileMode(0100644), 484 487 }, 485 488 }, 489 + "oldModeWithTrailingSpace": { 490 + Line: "old mode 100644\r\n", 491 + OutputFile: &File{ 492 + OldMode: os.FileMode(0100644), 493 + }, 494 + }, 486 495 "invalidOldMode": { 487 496 Line: "old mode rw\n", 488 497 Err: true, 489 498 }, 490 499 "newMode": { 491 500 Line: "new mode 100755\n", 501 + OutputFile: &File{ 502 + NewMode: os.FileMode(0100755), 503 + }, 504 + }, 505 + "newModeWithTrailingSpace": { 506 + Line: "new mode 100755\r\n", 492 507 OutputFile: &File{ 493 508 NewMode: os.FileMode(0100755), 494 509 }, ··· 508 523 }, 509 524 "newFileMode": { 510 525 Line: "new file mode 100755\n", 526 + DefaultName: "dir/file.txt", 527 + OutputFile: &File{ 528 + NewName: "dir/file.txt", 529 + NewMode: os.FileMode(0100755), 530 + IsNew: true, 531 + }, 532 + }, 533 + "newFileModeWithTrailingSpace": { 534 + Line: "new file mode 100755\r\n", 511 535 DefaultName: "dir/file.txt", 512 536 OutputFile: &File{ 513 537 NewName: "dir/file.txt", ··· 630 654 Input: "a/dir/foo.txt b/dir/bar.txt", 631 655 Output: "", 632 656 }, 633 - "missingSecondName": { 634 - Input: "a/dir/foo.txt", 635 - Err: true, 657 + "matchingNamesWithSpaces": { 658 + Input: "a/dir/file with spaces.txt b/dir/file with spaces.txt", 659 + Output: "dir/file with spaces.txt", 660 + }, 661 + "matchingNamesWithTrailingSpaces": { 662 + Input: "a/dir/spaces b/dir/spaces ", 663 + Output: "dir/spaces ", 664 + }, 665 + "matchingNamesQuoted": { 666 + Input: `"a/dir/\"quotes\".txt" "b/dir/\"quotes\".txt"`, 667 + Output: `dir/"quotes".txt`, 668 + }, 669 + "matchingNamesFirstQuoted": { 670 + Input: `"a/dir/file.txt" b/dir/file.txt`, 671 + Output: "dir/file.txt", 672 + }, 673 + "matchingNamesSecondQuoted": { 674 + Input: `a/dir/file.txt "b/dir/file.txt"`, 675 + Output: "dir/file.txt", 676 + }, 677 + "noSecondName": { 678 + Input: "a/dir/foo.txt", 679 + Output: "", 680 + }, 681 + "noSecondNameQuoted": { 682 + Input: `"a/dir/foo.txt"`, 683 + Output: "", 636 684 }, 637 685 "invalidName": { 638 686 Input: `"a/dir/file.txt b/dir/file.txt`, ··· 695 743 }, 696 744 "notEpoch": { 697 745 Input: "+++ file.txt\t2019-03-21 12:34:56.789 -0700\n", 746 + Output: false, 747 + }, 748 + "notTimestamp": { 749 + Input: "+++ file.txt\trandom text\n", 750 + Output: false, 751 + }, 752 + "notTimestampShort": { 753 + Input: "+++ file.txt\t0\n", 698 754 Output: false, 699 755 }, 700 756 }

+281

gitdiff/format.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "bytes" 5 + "compress/zlib" 6 + "fmt" 7 + "io" 8 + "strconv" 9 + ) 10 + 11 + type formatter struct { 12 + w io.Writer 13 + err error 14 + } 15 + 16 + func newFormatter(w io.Writer) *formatter { 17 + return &formatter{w: w} 18 + } 19 + 20 + func (fm *formatter) Write(p []byte) (int, error) { 21 + if fm.err != nil { 22 + return len(p), nil 23 + } 24 + if _, err := fm.w.Write(p); err != nil { 25 + fm.err = err 26 + } 27 + return len(p), nil 28 + } 29 + 30 + func (fm *formatter) WriteString(s string) (int, error) { 31 + fm.Write([]byte(s)) 32 + return len(s), nil 33 + } 34 + 35 + func (fm *formatter) WriteByte(c byte) error { 36 + fm.Write([]byte{c}) 37 + return nil 38 + } 39 + 40 + func (fm *formatter) WriteQuotedName(s string) { 41 + qpos := 0 42 + for i := 0; i < len(s); i++ { 43 + ch := s[i] 44 + if q, quoted := quoteByte(ch); quoted { 45 + if qpos == 0 { 46 + fm.WriteByte('"') 47 + } 48 + fm.WriteString(s[qpos:i]) 49 + fm.Write(q) 50 + qpos = i + 1 51 + } 52 + } 53 + fm.WriteString(s[qpos:]) 54 + if qpos > 0 { 55 + fm.WriteByte('"') 56 + } 57 + } 58 + 59 + var quoteEscapeTable = map[byte]byte{ 60 + '\a': 'a', 61 + '\b': 'b', 62 + '\t': 't', 63 + '\n': 'n', 64 + '\v': 'v', 65 + '\f': 'f', 66 + '\r': 'r', 67 + '"': '"', 68 + '\\': '\\', 69 + } 70 + 71 + func quoteByte(b byte) ([]byte, bool) { 72 + if q, ok := quoteEscapeTable[b]; ok { 73 + return []byte{'\\', q}, true 74 + } 75 + if b < 0x20 || b >= 0x7F { 76 + return []byte{ 77 + '\\', 78 + '0' + (b>>6)&0o3, 79 + '0' + (b>>3)&0o7, 80 + '0' + (b>>0)&0o7, 81 + }, true 82 + } 83 + return nil, false 84 + } 85 + 86 + func (fm *formatter) FormatFile(f *File) { 87 + fm.WriteString("diff --git ") 88 + 89 + var aName, bName string 90 + switch { 91 + case f.OldName == "": 92 + aName = f.NewName 93 + bName = f.NewName 94 + 95 + case f.NewName == "": 96 + aName = f.OldName 97 + bName = f.OldName 98 + 99 + default: 100 + aName = f.OldName 101 + bName = f.NewName 102 + } 103 + 104 + fm.WriteQuotedName("a/" + aName) 105 + fm.WriteByte(' ') 106 + fm.WriteQuotedName("b/" + bName) 107 + fm.WriteByte('\n') 108 + 109 + if f.OldMode != 0 { 110 + if f.IsDelete { 111 + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) 112 + } else if f.NewMode != 0 { 113 + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) 114 + } 115 + } 116 + 117 + if f.NewMode != 0 { 118 + if f.IsNew { 119 + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) 120 + } else if f.OldMode != 0 { 121 + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) 122 + } 123 + } 124 + 125 + if f.Score > 0 { 126 + if f.IsCopy || f.IsRename { 127 + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) 128 + } else { 129 + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) 130 + } 131 + } 132 + 133 + if f.IsCopy { 134 + if f.OldName != "" { 135 + fm.WriteString("copy from ") 136 + fm.WriteQuotedName(f.OldName) 137 + fm.WriteByte('\n') 138 + } 139 + if f.NewName != "" { 140 + fm.WriteString("copy to ") 141 + fm.WriteQuotedName(f.NewName) 142 + fm.WriteByte('\n') 143 + } 144 + } 145 + 146 + if f.IsRename { 147 + if f.OldName != "" { 148 + fm.WriteString("rename from ") 149 + fm.WriteQuotedName(f.OldName) 150 + fm.WriteByte('\n') 151 + } 152 + if f.NewName != "" { 153 + fm.WriteString("rename to ") 154 + fm.WriteQuotedName(f.NewName) 155 + fm.WriteByte('\n') 156 + } 157 + } 158 + 159 + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { 160 + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) 161 + 162 + // Mode is only included on the index line when it is not changing 163 + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { 164 + fmt.Fprintf(fm, " %o", f.OldMode) 165 + } 166 + 167 + fm.WriteByte('\n') 168 + } 169 + 170 + if f.IsBinary { 171 + if f.BinaryFragment == nil { 172 + fm.WriteString("Binary files ") 173 + fm.WriteQuotedName("a/" + aName) 174 + fm.WriteString(" and ") 175 + fm.WriteQuotedName("b/" + bName) 176 + fm.WriteString(" differ\n") 177 + } else { 178 + fm.WriteString("GIT binary patch\n") 179 + fm.FormatBinaryFragment(f.BinaryFragment) 180 + if f.ReverseBinaryFragment != nil { 181 + fm.FormatBinaryFragment(f.ReverseBinaryFragment) 182 + } 183 + } 184 + } 185 + 186 + // The "---" and "+++" lines only appear for text patches with fragments 187 + if len(f.TextFragments) > 0 { 188 + fm.WriteString("--- ") 189 + if f.OldName == "" { 190 + fm.WriteString("/dev/null") 191 + } else { 192 + fm.WriteQuotedName("a/" + f.OldName) 193 + } 194 + fm.WriteByte('\n') 195 + 196 + fm.WriteString("+++ ") 197 + if f.NewName == "" { 198 + fm.WriteString("/dev/null") 199 + } else { 200 + fm.WriteQuotedName("b/" + f.NewName) 201 + } 202 + fm.WriteByte('\n') 203 + 204 + for _, frag := range f.TextFragments { 205 + fm.FormatTextFragment(frag) 206 + } 207 + } 208 + } 209 + 210 + func (fm *formatter) FormatTextFragment(f *TextFragment) { 211 + fm.FormatTextFragmentHeader(f) 212 + fm.WriteByte('\n') 213 + 214 + for _, line := range f.Lines { 215 + fm.WriteString(line.Op.String()) 216 + fm.WriteString(line.Line) 217 + if line.NoEOL() { 218 + fm.WriteString("\n\\ No newline at end of file\n") 219 + } 220 + } 221 + } 222 + 223 + func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { 224 + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) 225 + if f.Comment != "" { 226 + fm.WriteByte(' ') 227 + fm.WriteString(f.Comment) 228 + } 229 + } 230 + 231 + func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { 232 + const ( 233 + maxBytesPerLine = 52 234 + ) 235 + 236 + switch f.Method { 237 + case BinaryPatchDelta: 238 + fm.WriteString("delta ") 239 + case BinaryPatchLiteral: 240 + fm.WriteString("literal ") 241 + } 242 + fm.Write(strconv.AppendInt(nil, f.Size, 10)) 243 + fm.WriteByte('\n') 244 + 245 + data := deflateBinaryChunk(f.Data) 246 + n := (len(data) / maxBytesPerLine) * maxBytesPerLine 247 + 248 + buf := make([]byte, base85Len(maxBytesPerLine)) 249 + for i := 0; i < n; i += maxBytesPerLine { 250 + base85Encode(buf, data[i:i+maxBytesPerLine]) 251 + fm.WriteByte('z') 252 + fm.Write(buf) 253 + fm.WriteByte('\n') 254 + } 255 + if remainder := len(data) - n; remainder > 0 { 256 + buf = buf[0:base85Len(remainder)] 257 + 258 + sizeChar := byte(remainder) 259 + if remainder <= 26 { 260 + sizeChar = 'A' + sizeChar - 1 261 + } else { 262 + sizeChar = 'a' + sizeChar - 27 263 + } 264 + 265 + base85Encode(buf, data[n:]) 266 + fm.WriteByte(sizeChar) 267 + fm.Write(buf) 268 + fm.WriteByte('\n') 269 + } 270 + fm.WriteByte('\n') 271 + } 272 + 273 + func deflateBinaryChunk(data []byte) []byte { 274 + var b bytes.Buffer 275 + 276 + zw := zlib.NewWriter(&b) 277 + _, _ = zw.Write(data) 278 + _ = zw.Close() 279 + 280 + return b.Bytes() 281 + }

+157

gitdiff/format_roundtrip_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "bytes" 5 + "fmt" 6 + "os" 7 + "path/filepath" 8 + "slices" 9 + "testing" 10 + ) 11 + 12 + func TestFormatRoundtrip(t *testing.T) { 13 + patches := []struct { 14 + File string 15 + SkipTextCompare bool 16 + }{ 17 + {File: "copy.patch"}, 18 + {File: "copy_modify.patch"}, 19 + {File: "delete.patch"}, 20 + {File: "mode.patch"}, 21 + {File: "mode_modify.patch"}, 22 + {File: "modify.patch"}, 23 + {File: "new.patch"}, 24 + {File: "new_empty.patch"}, 25 + {File: "new_mode.patch"}, 26 + {File: "rename.patch"}, 27 + {File: "rename_modify.patch"}, 28 + 29 + // Due to differences between Go's 'encoding/zlib' package and the zlib 30 + // C library, binary patches cannot be compared directly as the patch 31 + // data is slightly different when re-encoded by Go. 32 + {File: "binary_modify.patch", SkipTextCompare: true}, 33 + {File: "binary_new.patch", SkipTextCompare: true}, 34 + {File: "binary_modify_nodata.patch"}, 35 + } 36 + 37 + for _, patch := range patches { 38 + t.Run(patch.File, func(t *testing.T) { 39 + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) 40 + if err != nil { 41 + t.Fatalf("failed to read patch: %v", err) 42 + } 43 + 44 + original := assertParseSingleFile(t, b, "patch") 45 + str := original.String() 46 + 47 + if !patch.SkipTextCompare { 48 + if string(b) != str { 49 + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) 50 + } 51 + } 52 + 53 + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") 54 + assertFilesEqual(t, original, reparsed) 55 + }) 56 + } 57 + } 58 + 59 + func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { 60 + files, _, err := Parse(bytes.NewReader(b)) 61 + if err != nil { 62 + t.Fatalf("failed to parse %s: %v", kind, err) 63 + } 64 + if len(files) != 1 { 65 + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) 66 + } 67 + return files[0] 68 + } 69 + 70 + func assertFilesEqual(t *testing.T, expected, actual *File) { 71 + assertEqual(t, expected.OldName, actual.OldName, "OldName") 72 + assertEqual(t, expected.NewName, actual.NewName, "NewName") 73 + 74 + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") 75 + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") 76 + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") 77 + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") 78 + 79 + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") 80 + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") 81 + 82 + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") 83 + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") 84 + assertEqual(t, expected.Score, actual.Score, "Score") 85 + 86 + if len(expected.TextFragments) == len(actual.TextFragments) { 87 + for i := range expected.TextFragments { 88 + prefix := fmt.Sprintf("TextFragments[%d].", i) 89 + ef := expected.TextFragments[i] 90 + af := actual.TextFragments[i] 91 + 92 + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") 93 + 94 + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") 95 + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") 96 + 97 + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") 98 + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") 99 + 100 + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") 101 + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") 102 + 103 + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") 104 + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") 105 + 106 + if !slices.Equal(ef.Lines, af.Lines) { 107 + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) 108 + } 109 + } 110 + } else { 111 + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) 112 + } 113 + 114 + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") 115 + 116 + if expected.BinaryFragment != nil { 117 + if actual.BinaryFragment == nil { 118 + t.Errorf("BinaryFragment: expected non-nil, actual is nil") 119 + } else { 120 + ef := expected.BinaryFragment 121 + af := expected.BinaryFragment 122 + 123 + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") 124 + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") 125 + 126 + if !slices.Equal(ef.Data, af.Data) { 127 + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) 128 + } 129 + } 130 + } else if actual.BinaryFragment != nil { 131 + t.Errorf("BinaryFragment: expected nil, actual is non-nil") 132 + } 133 + 134 + if expected.ReverseBinaryFragment != nil { 135 + if actual.ReverseBinaryFragment == nil { 136 + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") 137 + } else { 138 + ef := expected.ReverseBinaryFragment 139 + af := expected.ReverseBinaryFragment 140 + 141 + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") 142 + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") 143 + 144 + if !slices.Equal(ef.Data, af.Data) { 145 + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) 146 + } 147 + } 148 + } else if actual.ReverseBinaryFragment != nil { 149 + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") 150 + } 151 + } 152 + 153 + func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { 154 + if expected != actual { 155 + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) 156 + } 157 + }

+28

gitdiff/format_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "strings" 5 + "testing" 6 + ) 7 + 8 + func TestFormatter_WriteQuotedName(t *testing.T) { 9 + tests := []struct { 10 + Input string 11 + Expected string 12 + }{ 13 + {"noquotes.txt", `noquotes.txt`}, 14 + {"no quotes.txt", `no quotes.txt`}, 15 + {"new\nline", `"new\nline"`}, 16 + {"escape\x1B null\x00", `"escape\033 null\000"`}, 17 + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, 18 + {"\"already quoted\"", `"\"already quoted\""`}, 19 + } 20 + 21 + for _, test := range tests { 22 + var b strings.Builder 23 + newFormatter(&b).WriteQuotedName(test.Input) 24 + if b.String() != test.Expected { 25 + t.Errorf("expected %q, got %q", test.Expected, b.String()) 26 + } 27 + } 28 + }

+33 -2

gitdiff/gitdiff.go

··· 4 4 "errors" 5 5 "fmt" 6 6 "os" 7 + "strings" 7 8 ) 8 9 9 10 // File describes changes to a single file. It can be either a text file or a ··· 38 39 ReverseBinaryFragment *BinaryFragment 39 40 } 40 41 42 + // String returns a git diff representation of this file. The value can be 43 + // parsed by this library to obtain the same File, but may not be the same as 44 + // the original input. 45 + func (f *File) String() string { 46 + var diff strings.Builder 47 + newFormatter(&diff).FormatFile(f) 48 + return diff.String() 49 + } 50 + 41 51 // TextFragment describes changed lines starting at a specific line in a text file. 42 52 type TextFragment struct { 43 53 Comment string ··· 57 67 Lines []Line 58 68 } 59 69 60 - // Header returns the canonical header of this fragment. 70 + // String returns a git diff format of this fragment. See [File.String] for 71 + // more details on this format. 72 + func (f *TextFragment) String() string { 73 + var diff strings.Builder 74 + newFormatter(&diff).FormatTextFragment(f) 75 + return diff.String() 76 + } 77 + 78 + // Header returns a git diff header of this fragment. See [File.String] for 79 + // more details on this format. 61 80 func (f *TextFragment) Header() string { 62 - return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) 81 + var hdr strings.Builder 82 + newFormatter(&hdr).FormatTextFragmentHeader(f) 83 + return hdr.String() 63 84 } 64 85 65 86 // Validate checks that the fragment is self-consistent and appliable. Validate ··· 197 218 // BinaryPatchLiteral indicates the data is the exact file content 198 219 BinaryPatchLiteral 199 220 ) 221 + 222 + // String returns a git diff format of this fragment. Due to differences in 223 + // zlib implementation between Go and Git, encoded binary data in the result 224 + // will likely differ from what Git produces for the same input. See 225 + // [File.String] for more details on this format. 226 + func (f *BinaryFragment) String() string { 227 + var diff strings.Builder 228 + newFormatter(&diff).FormatBinaryFragment(f) 229 + return diff.String() 230 + }

+22 -22

gitdiff/io.go

··· 5 5 "io" 6 6 ) 7 7 8 + const ( 9 + byteBufferSize = 32 * 1024 // from io.Copy 10 + lineBufferSize = 32 11 + indexBufferSize = 1024 12 + ) 13 + 8 14 // LineReaderAt is the interface that wraps the ReadLinesAt method. 9 15 // 10 - // ReadLinesAt reads len(lines) into lines starting at line offset in the 11 - // input source. It returns number of full lines read (0 <= n <= len(lines)) 12 - // and any error encountered. Line numbers are zero-indexed. 16 + // ReadLinesAt reads len(lines) into lines starting at line offset. It returns 17 + // the number of lines read (0 <= n <= len(lines)) and any error encountered. 18 + // Line numbers are zero-indexed. 13 19 // 14 20 // If n < len(lines), ReadLinesAt returns a non-nil error explaining why more 15 21 // lines were not returned. 16 22 // 17 - // Each full line includes the line ending character(s). If the last line of 18 - // the input does not have a line ending character, ReadLinesAt returns the 19 - // content of the line and io.EOF. 20 - // 21 - // If the content of the input source changes after the first call to 22 - // ReadLinesAt, the behavior of future calls is undefined. 23 + // Lines read by ReadLinesAt include the newline character. The last line does 24 + // not have a final newline character if the input ends without one. 23 25 type LineReaderAt interface { 24 26 ReadLinesAt(lines [][]byte, offset int64) (n int, err error) 25 27 } ··· 65 67 lines[n] = buf[start:end] 66 68 } 67 69 68 - if n < count || buf[len(buf)-1] != '\n' { 70 + if n < count { 69 71 return n, io.EOF 70 72 } 71 73 return n, nil ··· 75 77 // for line or a read returns io.EOF. It returns an error if and only if there 76 78 // is an error reading data. 77 79 func (r *lineReaderAt) indexTo(line int64) error { 78 - var buf [1024]byte 79 - 80 - var offset int64 81 - if len(r.index) > 0 { 82 - offset = r.index[len(r.index)-1] 83 - } 80 + var buf [indexBufferSize]byte 84 81 82 + offset := r.lastOffset() 85 83 for int64(len(r.index)) < line { 86 84 n, err := r.r.ReadAt(buf[:], offset) 87 85 if err != nil && err != io.EOF { ··· 94 92 } 95 93 } 96 94 if err == io.EOF { 97 - if n > 0 && buf[n-1] != '\n' { 95 + if offset > r.lastOffset() { 98 96 r.index = append(r.index, offset) 99 97 } 100 98 r.eof = true ··· 102 100 } 103 101 } 104 102 return nil 103 + } 104 + 105 + func (r *lineReaderAt) lastOffset() int64 { 106 + if n := len(r.index); n > 0 { 107 + return r.index[n-1] 108 + } 109 + return 0 105 110 } 106 111 107 112 // readBytes reads the bytes of the n lines starting at line and returns the ··· 146 151 } 147 152 return false, err 148 153 } 149 - 150 - const ( 151 - byteBufferSize = 32 * 1024 // from io.Copy 152 - lineBufferSize = 32 153 - ) 154 154 155 155 // copyFrom writes bytes starting from offset off in src to dst stopping at the 156 156 // end of src or at the first error. copyFrom returns the number of bytes

+54 -2

gitdiff/io_test.go

··· 9 9 ) 10 10 11 11 func TestLineReaderAt(t *testing.T) { 12 + const lineTemplate = "generated test line %d\n" 13 + 12 14 tests := map[string]struct { 13 15 InputLines int 14 16 Offset int64 ··· 41 43 InputLines: 4, 42 44 Offset: 2, 43 45 Count: 0, 46 + }, 47 + "readAllLines": { 48 + InputLines: 64, 49 + Offset: 0, 50 + Count: 64, 44 51 }, 45 52 "readThroughEOF": { 46 53 InputLines: 16, ··· 71 78 }, 72 79 } 73 80 74 - const lineTemplate = "generated test line %d\n" 75 - 76 81 for name, test := range tests { 77 82 t.Run(name, func(t *testing.T) { 78 83 var input bytes.Buffer ··· 110 115 for i := 0; i < n; i++ { 111 116 if !bytes.Equal(output[i], lines[i]) { 112 117 t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], lines[i]) 118 + } 119 + } 120 + }) 121 + } 122 + 123 + newlineTests := map[string]struct { 124 + InputSize int 125 + }{ 126 + "readLinesNoFinalNewline": { 127 + InputSize: indexBufferSize + indexBufferSize/2, 128 + }, 129 + "readLinesNoFinalNewlineBufferMultiple": { 130 + InputSize: 4 * indexBufferSize, 131 + }, 132 + } 133 + 134 + for name, test := range newlineTests { 135 + t.Run(name, func(t *testing.T) { 136 + input := bytes.Repeat([]byte("0"), test.InputSize) 137 + 138 + var output [][]byte 139 + for i := 0; i < len(input); i++ { 140 + last := i 141 + i += rand.Intn(80) 142 + if i < len(input)-1 { // last character of input must not be a newline 143 + input[i] = '\n' 144 + output = append(output, input[last:i+1]) 145 + } else { 146 + output = append(output, input[last:]) 147 + } 148 + } 149 + 150 + r := &lineReaderAt{r: bytes.NewReader(input)} 151 + lines := make([][]byte, len(output)) 152 + 153 + n, err := r.ReadLinesAt(lines, 0) 154 + if err != nil { 155 + t.Fatalf("unexpected error reading reading lines: %v", err) 156 + } 157 + 158 + if n != len(output) { 159 + t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n) 160 + } 161 + 162 + for i, line := range lines { 163 + if !bytes.Equal(output[i], line) { 164 + t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line) 113 165 } 114 166 } 115 167 })

+7 -3

gitdiff/parser.go

··· 12 12 // Parse parses a patch with changes to one or more files. Any content before 13 13 // the first file is returned as the second value. If an error occurs while 14 14 // parsing, it returns all files parsed before the error. 15 + // 16 + // Parse expects to receive a single patch. If the input may contain multiple 17 + // patches (for example, if it is an mbox file), callers should split it into 18 + // individual patches and call Parse on each one. 15 19 func Parse(r io.Reader) ([]*File, string, error) { 16 20 p := newParser(r) 17 21 ··· 29 33 if err != nil { 30 34 return files, preamble, err 31 35 } 36 + if len(files) == 0 { 37 + preamble = pre 38 + } 32 39 if file == nil { 33 40 break 34 41 } ··· 46 53 } 47 54 } 48 55 49 - if len(files) == 0 { 50 - preamble = pre 51 - } 52 56 files = append(files, file) 53 57 } 54 58

+16 -2

gitdiff/parser_test.go

··· 281 281 --- could this be a header? 282 282 nope, it's just some dashes 283 283 `, 284 - Output: nil, 285 - Preamble: "", 284 + Output: nil, 285 + Preamble: ` 286 + this is a line 287 + this is another line 288 + --- could this be a header? 289 + nope, it's just some dashes 290 + `, 286 291 }, 287 292 "detatchedFragmentLike": { 288 293 Input: ` ··· 290 295 @@ -1,3 +1,4 ~1,5 @@ 291 296 `, 292 297 Output: nil, 298 + Preamble: ` 299 + a wild fragment appears? 300 + @@ -1,3 +1,4 ~1,5 @@ 301 + `, 293 302 }, 294 303 "detatchedFragment": { 295 304 Input: ` ··· 425 434 }, 426 435 }, 427 436 Preamble: textPreamble, 437 + }, 438 + "noFiles": { 439 + InputFile: "testdata/no_files.patch", 440 + Output: nil, 441 + Preamble: textPreamble, 428 442 }, 429 443 "newBinaryFile": { 430 444 InputFile: "testdata/new_binary_file.patch",

+483

gitdiff/patch_header.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "bufio" 5 + "errors" 6 + "fmt" 7 + "io" 8 + "io/ioutil" 9 + "mime/quotedprintable" 10 + "net/mail" 11 + "strconv" 12 + "strings" 13 + "time" 14 + "unicode" 15 + ) 16 + 17 + const ( 18 + mailHeaderPrefix = "From " 19 + prettyHeaderPrefix = "commit " 20 + mailMinimumHeaderPrefix = "From:" 21 + ) 22 + 23 + // PatchHeader is a parsed version of the preamble content that appears before 24 + // the first diff in a patch. It includes metadata about the patch, such as the 25 + // author and a subject. 26 + type PatchHeader struct { 27 + // The SHA of the commit the patch was generated from. Empty if the SHA is 28 + // not included in the header. 29 + SHA string 30 + 31 + // The author details of the patch. If these details are not included in 32 + // the header, Author is nil and AuthorDate is the zero time. 33 + Author *PatchIdentity 34 + AuthorDate time.Time 35 + 36 + // The committer details of the patch. If these details are not included in 37 + // the header, Committer is nil and CommitterDate is the zero time. 38 + Committer *PatchIdentity 39 + CommitterDate time.Time 40 + 41 + // The title and body of the commit message describing the changes in the 42 + // patch. Empty if no message is included in the header. 43 + Title string 44 + Body string 45 + 46 + // If the preamble looks like an email, ParsePatchHeader will 47 + // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the 48 + // Title and place them here. 49 + SubjectPrefix string 50 + 51 + // If the preamble looks like an email, and it contains a `---` 52 + // line, that line will be removed and everything after it will be 53 + // placed in BodyAppendix. 54 + BodyAppendix string 55 + 56 + // All headers completely unparsed 57 + RawHeaders map[string][]string 58 + } 59 + 60 + // Message returns the commit message for the header. The message consists of 61 + // the title and the body separated by an empty line. 62 + func (h *PatchHeader) Message() string { 63 + var msg strings.Builder 64 + if h != nil { 65 + msg.WriteString(h.Title) 66 + if h.Body != "" { 67 + msg.WriteString("\n\n") 68 + msg.WriteString(h.Body) 69 + } 70 + } 71 + return msg.String() 72 + } 73 + 74 + // ParsePatchDate parses a patch date string. It returns the parsed time or an 75 + // error if s has an unknown format. ParsePatchDate supports the iso, rfc, 76 + // short, raw, unix, and default formats (with local variants) used by the 77 + // --date flag in Git. 78 + func ParsePatchDate(s string) (time.Time, error) { 79 + const ( 80 + isoFormat = "2006-01-02 15:04:05 -0700" 81 + isoStrictFormat = "2006-01-02T15:04:05-07:00" 82 + rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700" 83 + shortFormat = "2006-01-02" 84 + defaultFormat = "Mon Jan 2 15:04:05 2006 -0700" 85 + defaultLocalFormat = "Mon Jan 2 15:04:05 2006" 86 + ) 87 + 88 + if s == "" { 89 + return time.Time{}, nil 90 + } 91 + 92 + for _, fmt := range []string{ 93 + isoFormat, 94 + isoStrictFormat, 95 + rfc2822Format, 96 + shortFormat, 97 + defaultFormat, 98 + defaultLocalFormat, 99 + } { 100 + if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil { 101 + return t, nil 102 + } 103 + } 104 + 105 + // unix format 106 + if unix, err := strconv.ParseInt(s, 10, 64); err == nil { 107 + return time.Unix(unix, 0), nil 108 + } 109 + 110 + // raw format 111 + if space := strings.IndexByte(s, ' '); space > 0 { 112 + unix, uerr := strconv.ParseInt(s[:space], 10, 64) 113 + zone, zerr := time.Parse("-0700", s[space+1:]) 114 + if uerr == nil && zerr == nil { 115 + return time.Unix(unix, 0).In(zone.Location()), nil 116 + } 117 + } 118 + 119 + return time.Time{}, fmt.Errorf("unknown date format: %s", s) 120 + } 121 + 122 + // A PatchHeaderOption modifies the behavior of ParsePatchHeader. 123 + type PatchHeaderOption func(*patchHeaderOptions) 124 + 125 + // SubjectCleanMode controls how ParsePatchHeader cleans subject lines when 126 + // parsing mail-formatted patches. 127 + type SubjectCleanMode int 128 + 129 + const ( 130 + // SubjectCleanWhitespace removes leading and trailing whitespace. 131 + SubjectCleanWhitespace SubjectCleanMode = iota 132 + 133 + // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", 134 + // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. 135 + // This is the default behavior of git (see `git mailinfo`) and this 136 + // package. 137 + SubjectCleanAll 138 + 139 + // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes 140 + // leading strings enclosed by '[' and ']' if they start with "PATCH". 141 + SubjectCleanPatchOnly 142 + ) 143 + 144 + // WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By 145 + // default, uses SubjectCleanAll. 146 + func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { 147 + return func(opts *patchHeaderOptions) { 148 + opts.subjectCleanMode = m 149 + } 150 + } 151 + 152 + type patchHeaderOptions struct { 153 + subjectCleanMode SubjectCleanMode 154 + } 155 + 156 + // ParsePatchHeader parses the preamble string returned by [Parse] into a 157 + // PatchHeader. Due to the variety of header formats, some fields of the parsed 158 + // PatchHeader may be unset after parsing. 159 + // 160 + // Supported formats are the short, medium, full, fuller, and email pretty 161 + // formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox 162 + // format used by `git format-patch`. 163 + // 164 + // When parsing mail-formatted headers, ParsePatchHeader tries to remove 165 + // email-specific content from the title and body: 166 + // 167 + // - Based on the SubjectCleanMode, remove prefixes like reply markers and 168 + // "[PATCH]" strings from the subject, saving any removed content in the 169 + // SubjectPrefix field. Parsing always discards leading and trailing 170 + // whitespace from the subject line. The default mode is SubjectCleanAll. 171 + // 172 + // - If the body contains a "---" line (3 hyphens), remove that line and any 173 + // content after it from the body and save it in the BodyAppendix field. 174 + // 175 + // ParsePatchHeader tries to process content it does not understand wthout 176 + // returning errors, but will return errors if well-identified content like 177 + // dates or identies uses unknown or invalid formats. 178 + func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { 179 + opts := patchHeaderOptions{ 180 + subjectCleanMode: SubjectCleanAll, // match git defaults 181 + } 182 + for _, optFn := range options { 183 + optFn(&opts) 184 + } 185 + 186 + header = strings.TrimSpace(header) 187 + if header == "" { 188 + return &PatchHeader{}, nil 189 + } 190 + 191 + var firstLine, rest string 192 + if idx := strings.IndexByte(header, '\n'); idx >= 0 { 193 + firstLine = header[:idx] 194 + rest = header[idx+1:] 195 + } else { 196 + firstLine = header 197 + rest = "" 198 + } 199 + 200 + switch { 201 + case strings.HasPrefix(firstLine, mailHeaderPrefix): 202 + return parseHeaderMail(firstLine, strings.NewReader(rest), opts) 203 + 204 + case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): 205 + // With a minimum header, the first line is part of the actual mail 206 + // content and needs to be parsed as part of the "rest" 207 + return parseHeaderMail("", strings.NewReader(header), opts) 208 + 209 + case strings.HasPrefix(firstLine, prettyHeaderPrefix): 210 + return parseHeaderPretty(firstLine, strings.NewReader(rest)) 211 + } 212 + 213 + return nil, errors.New("unrecognized patch header format") 214 + } 215 + 216 + func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { 217 + const ( 218 + authorPrefix = "Author:" 219 + commitPrefix = "Commit:" 220 + datePrefix = "Date:" 221 + authorDatePrefix = "AuthorDate:" 222 + commitDatePrefix = "CommitDate:" 223 + ) 224 + 225 + h := &PatchHeader{} 226 + 227 + prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix) 228 + if i := strings.IndexByte(prettyLine, ' '); i > 0 { 229 + h.SHA = prettyLine[:i] 230 + } else { 231 + h.SHA = prettyLine 232 + } 233 + 234 + s := bufio.NewScanner(r) 235 + for s.Scan() { 236 + line := s.Text() 237 + 238 + // empty line marks end of fields, remaining lines are title/message 239 + if strings.TrimSpace(line) == "" { 240 + break 241 + } 242 + 243 + items := strings.SplitN(line, ":", 2) 244 + 245 + // we have "key: value" 246 + if len(items) == 2 { 247 + key := items[0] 248 + val := items[1] 249 + h.RawHeaders[key] = append(h.RawHeaders[key], val) 250 + } 251 + 252 + switch { 253 + case strings.HasPrefix(line, authorPrefix): 254 + u, err := ParsePatchIdentity(line[len(authorPrefix):]) 255 + if err != nil { 256 + return nil, err 257 + } 258 + h.Author = &u 259 + 260 + case strings.HasPrefix(line, commitPrefix): 261 + u, err := ParsePatchIdentity(line[len(commitPrefix):]) 262 + if err != nil { 263 + return nil, err 264 + } 265 + h.Committer = &u 266 + 267 + case strings.HasPrefix(line, datePrefix): 268 + d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):])) 269 + if err != nil { 270 + return nil, err 271 + } 272 + h.AuthorDate = d 273 + 274 + case strings.HasPrefix(line, authorDatePrefix): 275 + d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):])) 276 + if err != nil { 277 + return nil, err 278 + } 279 + h.AuthorDate = d 280 + 281 + case strings.HasPrefix(line, commitDatePrefix): 282 + d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):])) 283 + if err != nil { 284 + return nil, err 285 + } 286 + h.CommitterDate = d 287 + } 288 + } 289 + if s.Err() != nil { 290 + return nil, s.Err() 291 + } 292 + 293 + title, indent := scanMessageTitle(s) 294 + if s.Err() != nil { 295 + return nil, s.Err() 296 + } 297 + h.Title = title 298 + 299 + if title != "" { 300 + // Don't check for an appendix, pretty headers do not contain them 301 + body, _ := scanMessageBody(s, indent, false) 302 + if s.Err() != nil { 303 + return nil, s.Err() 304 + } 305 + h.Body = body 306 + } 307 + 308 + return h, nil 309 + } 310 + 311 + func scanMessageTitle(s *bufio.Scanner) (title string, indent string) { 312 + var b strings.Builder 313 + for i := 0; s.Scan(); i++ { 314 + line := s.Text() 315 + trimLine := strings.TrimSpace(line) 316 + if trimLine == "" { 317 + break 318 + } 319 + 320 + if i == 0 { 321 + if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 { 322 + indent = line[:start] 323 + } 324 + } 325 + if b.Len() > 0 { 326 + b.WriteByte(' ') 327 + } 328 + b.WriteString(trimLine) 329 + } 330 + return b.String(), indent 331 + } 332 + 333 + func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { 334 + // Body and appendix 335 + var body, appendix strings.Builder 336 + c := &body 337 + var empty int 338 + for i := 0; s.Scan(); i++ { 339 + line := s.Text() 340 + 341 + line = strings.TrimRightFunc(line, unicode.IsSpace) 342 + line = strings.TrimPrefix(line, indent) 343 + 344 + if line == "" { 345 + empty++ 346 + continue 347 + } 348 + 349 + // If requested, parse out "appendix" information (often added 350 + // by `git format-patch` and removed by `git am`). 351 + if separateAppendix && c == &body && line == "---" { 352 + c = &appendix 353 + continue 354 + } 355 + 356 + if c.Len() > 0 { 357 + c.WriteByte('\n') 358 + if empty > 0 { 359 + c.WriteByte('\n') 360 + } 361 + } 362 + empty = 0 363 + 364 + c.WriteString(line) 365 + } 366 + return body.String(), appendix.String() 367 + } 368 + 369 + func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { 370 + msg, err := mail.ReadMessage(r) 371 + if err != nil { 372 + return nil, err 373 + } 374 + 375 + h := &PatchHeader{} 376 + h.RawHeaders = msg.Header 377 + 378 + if strings.HasPrefix(mailLine, mailHeaderPrefix) { 379 + mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix) 380 + if i := strings.IndexByte(mailLine, ' '); i > 0 { 381 + h.SHA = mailLine[:i] 382 + } 383 + } 384 + 385 + from := msg.Header.Get("From") 386 + if from != "" { 387 + u, err := ParsePatchIdentity(from) 388 + if err != nil { 389 + return nil, err 390 + } 391 + h.Author = &u 392 + } 393 + 394 + date := msg.Header.Get("Date") 395 + if date != "" { 396 + d, err := ParsePatchDate(date) 397 + if err != nil { 398 + return nil, err 399 + } 400 + h.AuthorDate = d 401 + } 402 + 403 + subject := msg.Header.Get("Subject") 404 + h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) 405 + 406 + s := bufio.NewScanner(msg.Body) 407 + h.Body, h.BodyAppendix = scanMessageBody(s, "", true) 408 + if s.Err() != nil { 409 + return nil, s.Err() 410 + } 411 + 412 + return h, nil 413 + } 414 + 415 + func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { 416 + switch mode { 417 + case SubjectCleanAll, SubjectCleanPatchOnly: 418 + case SubjectCleanWhitespace: 419 + return "", strings.TrimSpace(decodeSubject(s)) 420 + default: 421 + panic(fmt.Sprintf("unknown clean mode: %d", mode)) 422 + } 423 + 424 + // Based on the algorithm from Git in mailinfo.c:cleanup_subject() 425 + // If compatibility with `git am` drifts, go there to see if there are any updates. 426 + 427 + at := 0 428 + for at < len(s) { 429 + switch s[at] { 430 + case 'r', 'R': 431 + // Detect re:, Re:, rE: and RE: 432 + if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { 433 + at += 3 434 + continue 435 + } 436 + 437 + case ' ', '\t', ':': 438 + // Delete whitespace and duplicate ':' characters 439 + at++ 440 + continue 441 + 442 + case '[': 443 + if i := strings.IndexByte(s[at:], ']'); i > 0 { 444 + if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { 445 + at += i + 1 446 + continue 447 + } 448 + } 449 + } 450 + 451 + // Nothing was removed, end processing 452 + break 453 + } 454 + 455 + prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) 456 + subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) 457 + return 458 + } 459 + 460 + // Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result 461 + // of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). 462 + // See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject 463 + func decodeSubject(encoded string) string { 464 + if !strings.HasPrefix(encoded, "=?UTF-8?q?") { 465 + // not UTF-8 encoded 466 + return encoded 467 + } 468 + 469 + // If the subject is too long, `git format-patch` may produce a subject line across 470 + // multiple lines. When parsed, this can look like the following: 471 + // <UTF8-prefix><first-line> <UTF8-prefix><second-line> 472 + payload := " " + encoded 473 + payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") 474 + payload = strings.ReplaceAll(payload, "?=", "") 475 + 476 + decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) 477 + if err != nil { 478 + // if err, abort decoding and return original subject 479 + return encoded 480 + } 481 + 482 + return string(decoded) 483 + }

+590

gitdiff/patch_header_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "testing" 5 + "time" 6 + ) 7 + 8 + func TestParsePatchDate(t *testing.T) { 9 + expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC) 10 + 11 + tests := map[string]struct { 12 + Input string 13 + Output time.Time 14 + Err interface{} 15 + }{ 16 + "default": { 17 + Input: "Thu Apr 9 01:07:06 2020 -0700", 18 + Output: expected, 19 + }, 20 + "defaultLocal": { 21 + Input: "Thu Apr 9 01:07:06 2020", 22 + Output: time.Date(2020, 4, 9, 1, 7, 6, 0, time.Local), 23 + }, 24 + "iso": { 25 + Input: "2020-04-09 01:07:06 -0700", 26 + Output: expected, 27 + }, 28 + "isoStrict": { 29 + Input: "2020-04-09T01:07:06-07:00", 30 + Output: expected, 31 + }, 32 + "rfc": { 33 + Input: "Thu, 9 Apr 2020 01:07:06 -0700", 34 + Output: expected, 35 + }, 36 + "short": { 37 + Input: "2020-04-09", 38 + Output: time.Date(2020, 4, 9, 0, 0, 0, 0, time.Local), 39 + }, 40 + "raw": { 41 + Input: "1586419626 -0700", 42 + Output: expected, 43 + }, 44 + "unix": { 45 + Input: "1586419626", 46 + Output: expected, 47 + }, 48 + "unknownFormat": { 49 + Input: "4/9/2020 01:07:06 PDT", 50 + Err: "unknown date format", 51 + }, 52 + "empty": { 53 + Input: "", 54 + }, 55 + } 56 + 57 + for name, test := range tests { 58 + t.Run(name, func(t *testing.T) { 59 + d, err := ParsePatchDate(test.Input) 60 + if test.Err != nil { 61 + assertError(t, test.Err, err, "parsing date") 62 + return 63 + } 64 + if err != nil { 65 + t.Fatalf("unexpected error parsing date: %v", err) 66 + } 67 + if !test.Output.Equal(d) { 68 + t.Errorf("incorrect parsed date: expected %v, actual %v", test.Output, d) 69 + } 70 + }) 71 + } 72 + } 73 + 74 + func TestParsePatchHeader(t *testing.T) { 75 + expectedSHA := "61f5cd90bed4d204ee3feb3aa41ee91d4734855b" 76 + expectedIdentity := &PatchIdentity{ 77 + Name: "Morton Haypenny", 78 + Email: "mhaypenny@example.com", 79 + } 80 + expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) 81 + expectedTitle := "A sample commit to test header parsing" 82 + expectedEmojiOneLineTitle := "🤖 Enabling auto-merging" 83 + expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-König diet" 84 + expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." 85 + expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>" 86 + 87 + tests := map[string]struct { 88 + Input string 89 + Options []PatchHeaderOption 90 + Header PatchHeader 91 + Err interface{} 92 + }{ 93 + "prettyShort": { 94 + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 95 + Author: Morton Haypenny <mhaypenny@example.com> 96 + 97 + A sample commit to test header parsing 98 + `, 99 + Header: PatchHeader{ 100 + SHA: expectedSHA, 101 + Author: expectedIdentity, 102 + Title: expectedTitle, 103 + }, 104 + }, 105 + "prettyMedium": { 106 + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 107 + Author: Morton Haypenny <mhaypenny@example.com> 108 + Date: Sat Apr 11 15:21:23 2020 -0700 109 + 110 + A sample commit to test header parsing 111 + 112 + The medium format shows the body, which 113 + may wrap on to multiple lines. 114 + 115 + Another body line. 116 + `, 117 + Header: PatchHeader{ 118 + SHA: expectedSHA, 119 + Author: expectedIdentity, 120 + AuthorDate: expectedDate, 121 + Title: expectedTitle, 122 + Body: expectedBody, 123 + }, 124 + }, 125 + "prettyFull": { 126 + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 127 + Author: Morton Haypenny <mhaypenny@example.com> 128 + Commit: Morton Haypenny <mhaypenny@example.com> 129 + 130 + A sample commit to test header parsing 131 + 132 + The medium format shows the body, which 133 + may wrap on to multiple lines. 134 + 135 + Another body line. 136 + `, 137 + Header: PatchHeader{ 138 + SHA: expectedSHA, 139 + Author: expectedIdentity, 140 + Committer: expectedIdentity, 141 + Title: expectedTitle, 142 + Body: expectedBody, 143 + }, 144 + }, 145 + "prettyFuller": { 146 + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 147 + Author: Morton Haypenny <mhaypenny@example.com> 148 + AuthorDate: Sat Apr 11 15:21:23 2020 -0700 149 + Commit: Morton Haypenny <mhaypenny@example.com> 150 + CommitDate: Sat Apr 11 15:21:23 2020 -0700 151 + 152 + A sample commit to test header parsing 153 + 154 + The medium format shows the body, which 155 + may wrap on to multiple lines. 156 + 157 + Another body line. 158 + `, 159 + Header: PatchHeader{ 160 + SHA: expectedSHA, 161 + Author: expectedIdentity, 162 + AuthorDate: expectedDate, 163 + Committer: expectedIdentity, 164 + CommitterDate: expectedDate, 165 + Title: expectedTitle, 166 + Body: expectedBody, 167 + }, 168 + }, 169 + "prettyAppendix": { 170 + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 171 + Author: Morton Haypenny <mhaypenny@example.com> 172 + AuthorDate: Sat Apr 11 15:21:23 2020 -0700 173 + Commit: Morton Haypenny <mhaypenny@example.com> 174 + CommitDate: Sat Apr 11 15:21:23 2020 -0700 175 + 176 + A sample commit to test header parsing 177 + 178 + The medium format shows the body, which 179 + may wrap on to multiple lines. 180 + 181 + Another body line. 182 + --- 183 + CC: Joe Smith <joe.smith@company.com> 184 + `, 185 + Header: PatchHeader{ 186 + SHA: expectedSHA, 187 + Author: expectedIdentity, 188 + AuthorDate: expectedDate, 189 + Committer: expectedIdentity, 190 + CommitterDate: expectedDate, 191 + Title: expectedTitle, 192 + Body: expectedBody + "\n---\n" + expectedBodyAppendix, 193 + }, 194 + }, 195 + "mailbox": { 196 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 197 + From: Morton Haypenny <mhaypenny@example.com> 198 + Date: Sat, 11 Apr 2020 15:21:23 -0700 199 + Subject: [PATCH] A sample commit to test header parsing 200 + 201 + The medium format shows the body, which 202 + may wrap on to multiple lines. 203 + 204 + Another body line. 205 + `, 206 + Header: PatchHeader{ 207 + SHA: expectedSHA, 208 + Author: expectedIdentity, 209 + AuthorDate: expectedDate, 210 + Title: expectedTitle, 211 + Body: expectedBody, 212 + }, 213 + }, 214 + "mailboxPatchOnly": { 215 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 216 + From: Morton Haypenny <mhaypenny@example.com> 217 + Date: Sat, 11 Apr 2020 15:21:23 -0700 218 + Subject: [PATCH] [BUG-123] A sample commit to test header parsing 219 + 220 + The medium format shows the body, which 221 + may wrap on to multiple lines. 222 + 223 + Another body line. 224 + `, 225 + Options: []PatchHeaderOption{ 226 + WithSubjectCleanMode(SubjectCleanPatchOnly), 227 + }, 228 + Header: PatchHeader{ 229 + SHA: expectedSHA, 230 + Author: expectedIdentity, 231 + AuthorDate: expectedDate, 232 + Title: "[BUG-123] " + expectedTitle, 233 + Body: expectedBody, 234 + }, 235 + }, 236 + "mailboxEmojiOneLine": { 237 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 238 + From: Morton Haypenny <mhaypenny@example.com> 239 + Date: Sat, 11 Apr 2020 15:21:23 -0700 240 + Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?= 241 + 242 + The medium format shows the body, which 243 + may wrap on to multiple lines. 244 + 245 + Another body line. 246 + `, 247 + Header: PatchHeader{ 248 + SHA: expectedSHA, 249 + Author: expectedIdentity, 250 + AuthorDate: expectedDate, 251 + Title: expectedEmojiOneLineTitle, 252 + Body: expectedBody, 253 + }, 254 + }, 255 + "mailboxEmojiMultiLine": { 256 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 257 + From: Morton Haypenny <mhaypenny@example.com> 258 + Date: Sat, 11 Apr 2020 15:21:23 -0700 259 + Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?= 260 + =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?= 261 + 262 + The medium format shows the body, which 263 + may wrap on to multiple lines. 264 + 265 + Another body line. 266 + `, 267 + Header: PatchHeader{ 268 + SHA: expectedSHA, 269 + Author: expectedIdentity, 270 + AuthorDate: expectedDate, 271 + Title: expectedEmojiMultiLineTitle, 272 + Body: expectedBody, 273 + }, 274 + }, 275 + "mailboxRFC5322SpecialCharacters": { 276 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 277 + From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com> 278 + Date: Sat, 11 Apr 2020 15:21:23 -0700 279 + Subject: [PATCH] A sample commit to test header parsing 280 + 281 + The medium format shows the body, which 282 + may wrap on to multiple lines. 283 + 284 + Another body line. 285 + `, 286 + Header: PatchHeader{ 287 + SHA: expectedSHA, 288 + Author: &PatchIdentity{ 289 + Name: "dependabot[bot]", 290 + Email: "12345+dependabot[bot]@users.noreply.github.com", 291 + }, 292 + AuthorDate: expectedDate, 293 + Title: expectedTitle, 294 + Body: expectedBody, 295 + }, 296 + }, 297 + "mailboxAppendix": { 298 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 299 + From: Morton Haypenny <mhaypenny@example.com> 300 + Date: Sat, 11 Apr 2020 15:21:23 -0700 301 + Subject: [PATCH] A sample commit to test header parsing 302 + 303 + The medium format shows the body, which 304 + may wrap on to multiple lines. 305 + 306 + Another body line. 307 + --- 308 + CC: Joe Smith <joe.smith@company.com> 309 + `, 310 + Header: PatchHeader{ 311 + SHA: expectedSHA, 312 + Author: expectedIdentity, 313 + AuthorDate: expectedDate, 314 + Title: expectedTitle, 315 + Body: expectedBody, 316 + BodyAppendix: expectedBodyAppendix, 317 + }, 318 + }, 319 + "mailboxMinimalNoName": { 320 + Input: `From: <mhaypenny@example.com> 321 + Subject: [PATCH] A sample commit to test header parsing 322 + 323 + The medium format shows the body, which 324 + may wrap on to multiple lines. 325 + 326 + Another body line. 327 + `, 328 + Header: PatchHeader{ 329 + Author: &PatchIdentity{expectedIdentity.Email, expectedIdentity.Email}, 330 + Title: expectedTitle, 331 + Body: expectedBody, 332 + }, 333 + }, 334 + "mailboxMinimal": { 335 + Input: `From: Morton Haypenny <mhaypenny@example.com> 336 + Subject: [PATCH] A sample commit to test header parsing 337 + 338 + The medium format shows the body, which 339 + may wrap on to multiple lines. 340 + 341 + Another body line. 342 + `, 343 + Header: PatchHeader{ 344 + Author: expectedIdentity, 345 + Title: expectedTitle, 346 + Body: expectedBody, 347 + }, 348 + }, 349 + "unwrapTitle": { 350 + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 351 + Author: Morton Haypenny <mhaypenny@example.com> 352 + Date: Sat Apr 11 15:21:23 2020 -0700 353 + 354 + A sample commit to test header parsing with a long 355 + title that is wrapped. 356 + `, 357 + Header: PatchHeader{ 358 + SHA: expectedSHA, 359 + Author: expectedIdentity, 360 + AuthorDate: expectedDate, 361 + Title: expectedTitle + " with a long title that is wrapped.", 362 + }, 363 + }, 364 + "normalizeBodySpace": { 365 + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 366 + Author: Morton Haypenny <mhaypenny@example.com> 367 + Date: Sat Apr 11 15:21:23 2020 -0700 368 + 369 + A sample commit to test header parsing 370 + 371 + 372 + The medium format shows the body, which 373 + may wrap on to multiple lines. 374 + 375 + 376 + Another body line. 377 + 378 + 379 + `, 380 + Header: PatchHeader{ 381 + SHA: expectedSHA, 382 + Author: expectedIdentity, 383 + AuthorDate: expectedDate, 384 + Title: expectedTitle, 385 + Body: expectedBody, 386 + }, 387 + }, 388 + "ignoreLeadingBlankLines": { 389 + Input: ` 390 + 391 + ` + " " + ` 392 + commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 393 + Author: Morton Haypenny <mhaypenny@example.com> 394 + 395 + A sample commit to test header parsing 396 + `, 397 + Header: PatchHeader{ 398 + SHA: expectedSHA, 399 + Author: expectedIdentity, 400 + Title: expectedTitle, 401 + }, 402 + }, 403 + "emptyHeader": { 404 + Input: "", 405 + Header: PatchHeader{}, 406 + }, 407 + } 408 + 409 + for name, test := range tests { 410 + t.Run(name, func(t *testing.T) { 411 + h, err := ParsePatchHeader(test.Input, test.Options...) 412 + if test.Err != nil { 413 + assertError(t, test.Err, err, "parsing patch header") 414 + return 415 + } 416 + if err != nil { 417 + t.Fatalf("unexpected error parsing patch header: %v", err) 418 + } 419 + if h == nil { 420 + t.Fatalf("expected non-nil header, but got nil") 421 + } 422 + 423 + exp := test.Header 424 + act := *h 425 + 426 + if exp.SHA != act.SHA { 427 + t.Errorf("incorrect parsed SHA: expected %q, actual %q", exp.SHA, act.SHA) 428 + } 429 + 430 + assertPatchIdentity(t, "author", exp.Author, act.Author) 431 + if !exp.AuthorDate.Equal(act.AuthorDate) { 432 + t.Errorf("incorrect parsed author date: expected %v, but got %v", exp.AuthorDate, act.AuthorDate) 433 + } 434 + 435 + assertPatchIdentity(t, "committer", exp.Committer, act.Committer) 436 + if !exp.CommitterDate.Equal(act.CommitterDate) { 437 + t.Errorf("incorrect parsed committer date: expected %v, but got %v", exp.CommitterDate, act.CommitterDate) 438 + } 439 + 440 + if exp.Title != act.Title { 441 + t.Errorf("incorrect parsed title:\n expected: %q\n actual: %q", exp.Title, act.Title) 442 + } 443 + if exp.Body != act.Body { 444 + t.Errorf("incorrect parsed body:\n expected: %q\n actual: %q", exp.Body, act.Body) 445 + } 446 + if exp.BodyAppendix != act.BodyAppendix { 447 + t.Errorf("incorrect parsed body appendix:\n expected: %q\n actual: %q", 448 + exp.BodyAppendix, act.BodyAppendix) 449 + } 450 + }) 451 + } 452 + } 453 + 454 + func assertPatchIdentity(t *testing.T, kind string, exp, act *PatchIdentity) { 455 + switch { 456 + case exp == nil && act == nil: 457 + case exp == nil && act != nil: 458 + t.Errorf("incorrect parsed %s: expected nil, but got %+v", kind, act) 459 + case exp != nil && act == nil: 460 + t.Errorf("incorrect parsed %s: expected %+v, but got nil", kind, exp) 461 + case exp.Name != act.Name || exp.Email != act.Email: 462 + t.Errorf("incorrect parsed %s, expected %+v, bot got %+v", kind, exp, act) 463 + } 464 + } 465 + 466 + func TestCleanSubject(t *testing.T) { 467 + expectedSubject := "A sample commit to test header parsing" 468 + 469 + tests := map[string]struct { 470 + Input string 471 + Mode SubjectCleanMode 472 + Prefix string 473 + Subject string 474 + }{ 475 + "CleanAll/noPrefix": { 476 + Input: expectedSubject, 477 + Mode: SubjectCleanAll, 478 + Subject: expectedSubject, 479 + }, 480 + "CleanAll/patchPrefix": { 481 + Input: "[PATCH] " + expectedSubject, 482 + Mode: SubjectCleanAll, 483 + Prefix: "[PATCH] ", 484 + Subject: expectedSubject, 485 + }, 486 + "CleanAll/patchPrefixNoSpace": { 487 + Input: "[PATCH]" + expectedSubject, 488 + Mode: SubjectCleanAll, 489 + Prefix: "[PATCH]", 490 + Subject: expectedSubject, 491 + }, 492 + "CleanAll/patchPrefixContent": { 493 + Input: "[PATCH 3/7] " + expectedSubject, 494 + Mode: SubjectCleanAll, 495 + Prefix: "[PATCH 3/7] ", 496 + Subject: expectedSubject, 497 + }, 498 + "CleanAll/spacePrefix": { 499 + Input: " " + expectedSubject, 500 + Mode: SubjectCleanAll, 501 + Subject: expectedSubject, 502 + }, 503 + "CleanAll/replyLowerPrefix": { 504 + Input: "re: " + expectedSubject, 505 + Mode: SubjectCleanAll, 506 + Prefix: "re: ", 507 + Subject: expectedSubject, 508 + }, 509 + "CleanAll/replyMixedPrefix": { 510 + Input: "Re: " + expectedSubject, 511 + Mode: SubjectCleanAll, 512 + Prefix: "Re: ", 513 + Subject: expectedSubject, 514 + }, 515 + "CleanAll/replyCapsPrefix": { 516 + Input: "RE: " + expectedSubject, 517 + Mode: SubjectCleanAll, 518 + Prefix: "RE: ", 519 + Subject: expectedSubject, 520 + }, 521 + "CleanAll/replyDoublePrefix": { 522 + Input: "Re: re: " + expectedSubject, 523 + Mode: SubjectCleanAll, 524 + Prefix: "Re: re: ", 525 + Subject: expectedSubject, 526 + }, 527 + "CleanAll/noPrefixSubjectHasRe": { 528 + Input: "Reimplement parsing", 529 + Mode: SubjectCleanAll, 530 + Subject: "Reimplement parsing", 531 + }, 532 + "CleanAll/patchPrefixSubjectHasRe": { 533 + Input: "[PATCH 1/2] Reimplement parsing", 534 + Mode: SubjectCleanAll, 535 + Prefix: "[PATCH 1/2] ", 536 + Subject: "Reimplement parsing", 537 + }, 538 + "CleanAll/unclosedPrefix": { 539 + Input: "[Just to annoy people", 540 + Mode: SubjectCleanAll, 541 + Subject: "[Just to annoy people", 542 + }, 543 + "CleanAll/multiplePrefix": { 544 + Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ", 545 + Mode: SubjectCleanAll, 546 + Prefix: "Re:Re: [PATCH 1/2][DRAFT] ", 547 + Subject: expectedSubject, 548 + }, 549 + "CleanPatchOnly/patchPrefix": { 550 + Input: "[PATCH] " + expectedSubject, 551 + Mode: SubjectCleanPatchOnly, 552 + Prefix: "[PATCH] ", 553 + Subject: expectedSubject, 554 + }, 555 + "CleanPatchOnly/mixedPrefix": { 556 + Input: "[PATCH] [TICKET-123] " + expectedSubject, 557 + Mode: SubjectCleanPatchOnly, 558 + Prefix: "[PATCH] ", 559 + Subject: "[TICKET-123] " + expectedSubject, 560 + }, 561 + "CleanPatchOnly/multiplePrefix": { 562 + Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject, 563 + Mode: SubjectCleanPatchOnly, 564 + Prefix: "Re:Re: [PATCH 1/2]", 565 + Subject: "[DRAFT] " + expectedSubject, 566 + }, 567 + "CleanWhitespace/leadingSpace": { 568 + Input: " [PATCH] " + expectedSubject, 569 + Mode: SubjectCleanWhitespace, 570 + Subject: "[PATCH] " + expectedSubject, 571 + }, 572 + "CleanWhitespace/trailingSpace": { 573 + Input: "[PATCH] " + expectedSubject + " ", 574 + Mode: SubjectCleanWhitespace, 575 + Subject: "[PATCH] " + expectedSubject, 576 + }, 577 + } 578 + 579 + for name, test := range tests { 580 + t.Run(name, func(t *testing.T) { 581 + prefix, subject := cleanSubject(test.Input, test.Mode) 582 + if prefix != test.Prefix { 583 + t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix) 584 + } 585 + if subject != test.Subject { 586 + t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject) 587 + } 588 + }) 589 + } 590 + }

+166

gitdiff/patch_identity.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "fmt" 5 + "strings" 6 + ) 7 + 8 + // PatchIdentity identifies a person who authored or committed a patch. 9 + type PatchIdentity struct { 10 + Name string 11 + Email string 12 + } 13 + 14 + func (i PatchIdentity) String() string { 15 + name := i.Name 16 + if name == "" { 17 + name = `""` 18 + } 19 + return fmt.Sprintf("%s <%s>", name, i.Email) 20 + } 21 + 22 + // ParsePatchIdentity parses a patch identity string. A patch identity contains 23 + // an email address and an optional name in [RFC 5322] format. This is either a 24 + // plain email adddress or a name followed by an address in angle brackets: 25 + // 26 + // author@example.com 27 + // Author Name <author@example.com> 28 + // 29 + // If the input is not one of these formats, ParsePatchIdentity applies a 30 + // heuristic to separate the name and email portions. If both the name and 31 + // email are missing or empty, ParsePatchIdentity returns an error. It 32 + // otherwise does not validate the result. 33 + // 34 + // [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 35 + func ParsePatchIdentity(s string) (PatchIdentity, error) { 36 + s = normalizeSpace(s) 37 + s = unquotePairs(s) 38 + 39 + var name, email string 40 + if at := strings.IndexByte(s, '@'); at >= 0 { 41 + start, end := at, at 42 + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { 43 + start-- 44 + } 45 + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { 46 + end++ 47 + } 48 + email = s[start+1 : end] 49 + 50 + // Adjust the boundaries so that we drop angle brackets, but keep 51 + // spaces when removing the email to form the name. 52 + if start < 0 || s[start] != '<' { 53 + start++ 54 + } 55 + if end >= len(s) || s[end] != '>' { 56 + end-- 57 + } 58 + name = s[:start] + s[end+1:] 59 + } else { 60 + start, end := 0, 0 61 + for i := 0; i < len(s); i++ { 62 + if s[i] == '<' && start == 0 { 63 + start = i + 1 64 + } 65 + if s[i] == '>' && start > 0 { 66 + end = i 67 + break 68 + } 69 + } 70 + if start > 0 && end >= start { 71 + email = strings.TrimSpace(s[start:end]) 72 + name = s[:start-1] 73 + } 74 + } 75 + 76 + // After extracting the email, the name might contain extra whitespace 77 + // again and may be surrounded by comment characters. The git source gives 78 + // these examples of when this can happen: 79 + // 80 + // "Name <email@domain>" 81 + // "email@domain (Name)" 82 + // "Name <email@domain> (Comment)" 83 + // 84 + name = normalizeSpace(name) 85 + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { 86 + name = name[1 : len(name)-1] 87 + } 88 + name = strings.TrimSpace(name) 89 + 90 + // If the name is empty or contains email-like characters, use the email 91 + // instead (assuming one exists) 92 + if name == "" || strings.ContainsAny(name, "@<>") { 93 + name = email 94 + } 95 + 96 + if name == "" && email == "" { 97 + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) 98 + } 99 + return PatchIdentity{Name: name, Email: email}, nil 100 + } 101 + 102 + // unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to 103 + // remove any "quoted-pairs" (backslash-espaced characters). It also removes 104 + // the quotes from any quoted strings, but leaves the comment delimiters. 105 + func unquotePairs(s string) string { 106 + quote := false 107 + comments := 0 108 + escaped := false 109 + 110 + var out strings.Builder 111 + for i := 0; i < len(s); i++ { 112 + if escaped { 113 + escaped = false 114 + } else { 115 + switch s[i] { 116 + case '\\': 117 + // quoted-pair is only allowed in quoted-string/comment 118 + if quote || comments > 0 { 119 + escaped = true 120 + continue // drop '\' character 121 + } 122 + 123 + case '"': 124 + if comments == 0 { 125 + quote = !quote 126 + continue // drop '"' character 127 + } 128 + 129 + case '(': 130 + if !quote { 131 + comments++ 132 + } 133 + case ')': 134 + if comments > 0 { 135 + comments-- 136 + } 137 + } 138 + } 139 + out.WriteByte(s[i]) 140 + } 141 + return out.String() 142 + } 143 + 144 + // normalizeSpace trims leading and trailing whitespace from s and converts 145 + // inner sequences of one or more whitespace characters to single spaces. 146 + func normalizeSpace(s string) string { 147 + var sb strings.Builder 148 + for i := 0; i < len(s); i++ { 149 + c := s[i] 150 + if !isRFC5332Space(c) { 151 + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { 152 + sb.WriteByte(' ') 153 + } 154 + sb.WriteByte(c) 155 + } 156 + } 157 + return sb.String() 158 + } 159 + 160 + func isRFC5332Space(c byte) bool { 161 + switch c { 162 + case '\t', '\n', '\r', ' ': 163 + return true 164 + } 165 + return false 166 + }

+127

gitdiff/patch_identity_test.go

··· 1 + package gitdiff 2 + 3 + import ( 4 + "testing" 5 + ) 6 + 7 + func TestParsePatchIdentity(t *testing.T) { 8 + tests := map[string]struct { 9 + Input string 10 + Output PatchIdentity 11 + Err interface{} 12 + }{ 13 + "simple": { 14 + Input: "Morton Haypenny <mhaypenny@example.com>", 15 + Output: PatchIdentity{ 16 + Name: "Morton Haypenny", 17 + Email: "mhaypenny@example.com", 18 + }, 19 + }, 20 + "extraWhitespace": { 21 + Input: "\t Morton Haypenny \r\n<mhaypenny@example.com> ", 22 + Output: PatchIdentity{ 23 + Name: "Morton Haypenny", 24 + Email: "mhaypenny@example.com", 25 + }, 26 + }, 27 + "trailingCharacters": { 28 + Input: "Morton Haypenny <mhaypenny@example.com> II", 29 + Output: PatchIdentity{ 30 + Name: "Morton Haypenny II", 31 + Email: "mhaypenny@example.com", 32 + }, 33 + }, 34 + "onlyEmail": { 35 + Input: "mhaypenny@example.com", 36 + Output: PatchIdentity{ 37 + Name: "mhaypenny@example.com", 38 + Email: "mhaypenny@example.com", 39 + }, 40 + }, 41 + "onlyEmailInBrackets": { 42 + Input: "<mhaypenny@example.com>", 43 + Output: PatchIdentity{ 44 + Name: "mhaypenny@example.com", 45 + Email: "mhaypenny@example.com", 46 + }, 47 + }, 48 + "rfc5322SpecialCharacters": { 49 + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, 50 + Output: PatchIdentity{ 51 + Name: "dependabot[bot]", 52 + Email: "12345+dependabot[bot]@users.noreply.github.com", 53 + }, 54 + }, 55 + "rfc5322QuotedPairs": { 56 + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III $PhD$)`, 57 + Output: PatchIdentity{ 58 + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, 59 + Email: "mhaypenny+[1900]@example.com", 60 + }, 61 + }, 62 + "rfc5322QuotedPairsOutOfContext": { 63 + Input: `Morton \\Backslash Haypenny <mhaypenny@example.com>`, 64 + Output: PatchIdentity{ 65 + Name: `Morton \\Backslash Haypenny`, 66 + Email: "mhaypenny@example.com", 67 + }, 68 + }, 69 + "emptyEmail": { 70 + Input: "Morton Haypenny <>", 71 + Output: PatchIdentity{ 72 + Name: "Morton Haypenny", 73 + Email: "", 74 + }, 75 + }, 76 + "unclosedEmail": { 77 + Input: "Morton Haypenny <mhaypenny@example.com", 78 + Output: PatchIdentity{ 79 + Name: "Morton Haypenny", 80 + Email: "mhaypenny@example.com", 81 + }, 82 + }, 83 + "bogusEmail": { 84 + Input: "Morton Haypenny <mhaypenny>", 85 + Output: PatchIdentity{ 86 + Name: "Morton Haypenny", 87 + Email: "mhaypenny", 88 + }, 89 + }, 90 + "bogusEmailWithWhitespace": { 91 + Input: "Morton Haypenny < mhaypenny >", 92 + Output: PatchIdentity{ 93 + Name: "Morton Haypenny", 94 + Email: "mhaypenny", 95 + }, 96 + }, 97 + "missingEmail": { 98 + Input: "Morton Haypenny", 99 + Err: "invalid identity", 100 + }, 101 + "missingNameAndEmptyEmail": { 102 + Input: "<>", 103 + Err: "invalid identity", 104 + }, 105 + "empty": { 106 + Input: "", 107 + Err: "invalid identity", 108 + }, 109 + } 110 + 111 + for name, test := range tests { 112 + t.Run(name, func(t *testing.T) { 113 + id, err := ParsePatchIdentity(test.Input) 114 + if test.Err != nil { 115 + assertError(t, test.Err, err, "parsing identity") 116 + return 117 + } 118 + if err != nil { 119 + t.Fatalf("unexpected error parsing identity: %v", err) 120 + } 121 + 122 + if test.Output != id { 123 + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) 124 + } 125 + }) 126 + } 127 + }

+1 -1

gitdiff/testdata/apply/bin.go

··· 1 - //+build ignore 1 + //go:build ignore 2 2 3 3 // bin.go is a helper CLI to manipulate binary diff data for testing purposes. 4 4 // It can decode patches generated by git using the standard parsing functions

gitdiff/testdata/apply/bin_file_modify.out