+8
-17
.github/workflows/go.yml
+8
-17
.github/workflows/go.yml
···
9
9
name: Verify
10
10
runs-on: ubuntu-latest
11
11
steps:
12
-
- name: Set up Go 1.13
13
-
uses: actions/setup-go@v1
12
+
- name: Set up Go 1.21
13
+
uses: actions/setup-go@v5
14
14
with:
15
-
go-version: 1.13
16
-
id: go
17
-
18
-
- name: Set up environment
19
-
run: |
20
-
# https://github.com/actions/setup-go/issues/14
21
-
echo "::set-env name=GOPATH::$(go env GOPATH)"
22
-
echo "::add-path::$(go env GOPATH)/bin"
23
-
24
-
- name: Install golangci-lint
25
-
run: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.23.1
26
-
15
+
go-version: 1.21
16
+
27
17
- name: Check out code into the Go module directory
28
-
uses: actions/checkout@v1
18
+
uses: actions/checkout@v4
29
19
30
20
- name: Lint
31
-
run: golangci-lint run
21
+
uses: golangci/golangci-lint-action@v7
22
+
with:
23
+
version: v2.0
32
24
33
25
- name: Test
34
26
run: go test -v ./...
35
-
+41
-13
.golangci.yml
+41
-13
.golangci.yml
···
1
+
version: "2"
2
+
1
3
run:
2
4
tests: false
3
5
4
6
linters:
5
-
disable-all: true
7
+
default: none
6
8
enable:
7
-
- deadcode
8
9
- errcheck
9
-
- gofmt
10
-
- goimports
11
-
- golint
12
10
- govet
13
11
- ineffassign
14
-
- typecheck
12
+
- misspell
13
+
- revive
15
14
- unconvert
16
-
- varcheck
17
-
18
-
issues:
19
-
exclude-use-default: false
15
+
- unused
16
+
settings:
17
+
errcheck:
18
+
exclude-functions:
19
+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write
20
+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString
21
+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte
22
+
- fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter)
23
+
revive:
24
+
rules:
25
+
- name: context-keys-type
26
+
- name: time-naming
27
+
- name: var-declaration
28
+
- name: unexported-return
29
+
- name: errorf
30
+
- name: blank-imports
31
+
- name: context-as-argument
32
+
- name: dot-imports
33
+
- name: error-return
34
+
- name: error-strings
35
+
- name: error-naming
36
+
- name: exported
37
+
- name: increment-decrement
38
+
- name: var-naming
39
+
- name: package-comments
40
+
- name: range
41
+
- name: receiver-naming
42
+
- name: indent-error-flow
20
43
21
-
linter-settings:
22
-
goimports:
23
-
local-prefixes: github.com/bluekeyes/go-gitdiff
44
+
formatters:
45
+
enable:
46
+
- gofmt
47
+
- goimports
48
+
settings:
49
+
goimports:
50
+
local-prefixes:
51
+
- github.com/bluekeyes/go-gitdiff
+17
-5
README.md
+17
-5
README.md
···
4
4
5
5
A Go library for parsing and applying patches generated by `git diff`, `git
6
6
show`, and `git format-patch`. It can also parse and apply unified diffs
7
-
generated by the standard `diff` tool.
7
+
generated by the standard GNU `diff` tool.
8
8
9
9
It supports standard line-oriented text patches and Git binary patches, and
10
10
aims to parse anything accepted by the `git apply` command.
···
29
29
30
30
// apply the changes in the patch to a source file
31
31
var output bytes.Buffer
32
-
if err := gitdiff.NewApplier(code).ApplyFile(&output, files[0]); err != nil {
32
+
if err := gitdiff.Apply(&output, code, files[0]); err != nil {
33
33
log.Fatal(err)
34
34
}
35
35
```
36
36
37
37
## Development Status
38
38
39
-
Mostly complete, but API changes are possible. Patch parsing and strict
40
-
application works and is well-covered by unit tests, but has not been validated
41
-
extensively against real-world patches.
39
+
The parsing API and types are complete and I expect will remain stable. Version
40
+
0.7.0 introduced a new apply API that may change more in the future to support
41
+
non-strict patch application.
42
+
43
+
Parsing and strict application are well-covered by unit tests and the library
44
+
is used in a production application that parses and applies thousands of
45
+
patches every day. However, the space of all possible patches is large, so
46
+
there are likely undiscovered bugs.
47
+
48
+
The parsing code has also had a modest amount of fuzz testing.
42
49
43
50
## Why another git/unified diff parser?
44
51
···
71
78
72
79
- Numbers immediately followed by non-numeric characters
73
80
- Trailing characters on a line after valid or expected content
81
+
- Malformed file header lines (lines that start with `diff --git`)
74
82
75
83
2. Errors for invalid input are generally more verbose and specific than those
76
84
from `git apply`.
···
91
99
context of each fragment must exactly match the source file; `git apply`
92
100
implements a search algorithm that tries different lines and amounts of
93
101
context, with further options to normalize or ignore whitespace changes.
102
+
103
+
7. When parsing mail-formatted patch headers, leading and trailing whitespace
104
+
is always removed from `Subject` lines. There is no exact equivalent to `git
105
+
mailinfo -k`.
+30
-337
gitdiff/apply.go
+30
-337
gitdiff/apply.go
···
13
13
// Users can test if an error was caused by a conflict by using errors.Is with
14
14
// an empty Conflict:
15
15
//
16
-
// if errors.Is(err, &Conflict{}) {
17
-
// // handle conflict
18
-
// }
19
-
//
16
+
// if errors.Is(err, &Conflict{}) {
17
+
// // handle conflict
18
+
// }
20
19
type Conflict struct {
21
20
msg string
22
21
}
···
89
88
90
89
var (
91
90
errApplyInProgress = errors.New("gitdiff: incompatible apply in progress")
92
-
)
93
-
94
-
const (
95
-
applyInitial = iota
96
-
applyText
97
-
applyBinary
98
-
applyFile
91
+
errApplierClosed = errors.New("gitdiff: applier is closed")
99
92
)
100
93
101
-
// Apply is a convenience function that creates an Applier for src with default
102
-
// settings and applies the changes in f, writing the result to dst.
103
-
func Apply(dst io.Writer, src io.ReaderAt, f *File) error {
104
-
return NewApplier(src).ApplyFile(dst, f)
105
-
}
106
-
107
-
// Applier applies changes described in fragments to source data. If changes
108
-
// are described in multiple fragments, those fragments must be applied in
109
-
// order, usually by calling ApplyFile.
94
+
// Apply applies the changes in f to src, writing the result to dst. It can
95
+
// apply both text and binary changes.
110
96
//
111
-
// By default, Applier operates in "strict" mode, where fragment content and
112
-
// positions must exactly match those of the source.
113
-
//
114
-
// If an error occurs while applying, methods on Applier return instances of
115
-
// *ApplyError that annotate the wrapped error with additional information
116
-
// when available. If the error is because of a conflict between a fragment and
117
-
// the source, the wrapped error will be a *Conflict.
118
-
//
119
-
// While an Applier can apply both text and binary fragments, only one fragment
120
-
// type can be used without resetting the Applier. The first fragment applied
121
-
// sets the type for the Applier. Mixing fragment types or mixing
122
-
// fragment-level and file-level applies results in an error.
123
-
type Applier struct {
124
-
src io.ReaderAt
125
-
lineSrc LineReaderAt
126
-
nextLine int64
127
-
applyType int
128
-
}
129
-
130
-
// NewApplier creates an Applier that reads data from src. If src is a
131
-
// LineReaderAt, it is used directly to apply text fragments.
132
-
func NewApplier(src io.ReaderAt) *Applier {
133
-
a := new(Applier)
134
-
a.Reset(src)
135
-
return a
136
-
}
137
-
138
-
// Reset resets the input and internal state of the Applier. If src is nil, the
139
-
// existing source is reused.
140
-
func (a *Applier) Reset(src io.ReaderAt) {
141
-
if src != nil {
142
-
a.src = src
143
-
if lineSrc, ok := src.(LineReaderAt); ok {
144
-
a.lineSrc = lineSrc
145
-
} else {
146
-
a.lineSrc = &lineReaderAt{r: src}
97
+
// If an error occurs while applying, Apply returns an *ApplyError that
98
+
// annotates the error with additional information. If the error is because of
99
+
// a conflict with the source, the wrapped error will be a *Conflict.
100
+
func Apply(dst io.Writer, src io.ReaderAt, f *File) error {
101
+
if f.IsBinary {
102
+
if len(f.TextFragments) > 0 {
103
+
return applyError(errors.New("binary file contains text fragments"))
147
104
}
148
-
}
149
-
a.nextLine = 0
150
-
a.applyType = applyInitial
151
-
}
152
-
153
-
// ApplyFile applies the changes in all of the fragments of f and writes the
154
-
// result to dst.
155
-
func (a *Applier) ApplyFile(dst io.Writer, f *File) error {
156
-
if a.applyType != applyInitial {
157
-
return applyError(errApplyInProgress)
158
-
}
159
-
defer func() { a.applyType = applyFile }()
160
-
161
-
if f.IsBinary && len(f.TextFragments) > 0 {
162
-
return applyError(errors.New("binary file contains text fragments"))
163
-
}
164
-
if !f.IsBinary && f.BinaryFragment != nil {
165
-
return applyError(errors.New("text file contains binary fragment"))
105
+
if f.BinaryFragment == nil {
106
+
return applyError(errors.New("binary file does not contain a binary fragment"))
107
+
}
108
+
} else {
109
+
if f.BinaryFragment != nil {
110
+
return applyError(errors.New("text file contains a binary fragment"))
111
+
}
166
112
}
167
113
168
114
switch {
169
115
case f.BinaryFragment != nil:
170
-
return a.ApplyBinaryFragment(dst, f.BinaryFragment)
116
+
applier := NewBinaryApplier(dst, src)
117
+
if err := applier.ApplyFragment(f.BinaryFragment); err != nil {
118
+
return err
119
+
}
120
+
return applier.Close()
171
121
172
122
case len(f.TextFragments) > 0:
173
123
frags := make([]*TextFragment, len(f.TextFragments))
···
181
131
// right now, the application fails if fragments overlap, but it should be
182
132
// possible to precompute the result of applying them in order
183
133
134
+
applier := NewTextApplier(dst, src)
184
135
for i, frag := range frags {
185
-
if err := a.ApplyTextFragment(dst, frag); err != nil {
136
+
if err := applier.ApplyFragment(frag); err != nil {
186
137
return applyError(err, fragNum(i))
187
138
}
188
139
}
189
-
}
190
-
191
-
return applyError(a.Flush(dst))
192
-
}
193
-
194
-
// ApplyTextFragment applies the changes in the fragment f and writes unwritten
195
-
// data before the start of the fragment and the result to dst. If multiple
196
-
// text fragments apply to the same source, ApplyTextFragment must be called in
197
-
// order of increasing start position. As a result, each fragment can be
198
-
// applied at most once before a call to Reset.
199
-
func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error {
200
-
if a.applyType != applyInitial && a.applyType != applyText {
201
-
return applyError(errApplyInProgress)
202
-
}
203
-
defer func() { a.applyType = applyText }()
204
-
205
-
// application code assumes fragment fields are consistent
206
-
if err := f.Validate(); err != nil {
207
-
return applyError(err)
208
-
}
209
-
210
-
// lines are 0-indexed, positions are 1-indexed (but new files have position = 0)
211
-
fragStart := f.OldPosition - 1
212
-
if fragStart < 0 {
213
-
fragStart = 0
214
-
}
215
-
fragEnd := fragStart + f.OldLines
216
-
217
-
start := a.nextLine
218
-
if fragStart < start {
219
-
return applyError(&Conflict{"fragment overlaps with an applied fragment"})
220
-
}
221
-
222
-
if f.OldPosition == 0 {
223
-
ok, err := isLen(a.src, 0)
224
-
if err != nil {
225
-
return applyError(err)
226
-
}
227
-
if !ok {
228
-
return applyError(&Conflict{"cannot create new file from non-empty src"})
229
-
}
230
-
}
140
+
return applier.Close()
231
141
232
-
preimage := make([][]byte, fragEnd-start)
233
-
n, err := a.lineSrc.ReadLinesAt(preimage, start)
234
-
switch {
235
-
case err == nil:
236
-
case err == io.EOF && n == len(preimage): // last line of frag has no newline character
237
142
default:
238
-
return applyError(err, lineNum(start+int64(n)))
239
-
}
240
-
241
-
// copy leading data before the fragment starts
242
-
for i, line := range preimage[:fragStart-start] {
243
-
if _, err := dst.Write(line); err != nil {
244
-
a.nextLine = start + int64(i)
245
-
return applyError(err, lineNum(a.nextLine))
246
-
}
247
-
}
248
-
preimage = preimage[fragStart-start:]
249
-
250
-
// apply the changes in the fragment
251
-
used := int64(0)
252
-
for i, line := range f.Lines {
253
-
if err := applyTextLine(dst, line, preimage, used); err != nil {
254
-
a.nextLine = fragStart + used
255
-
return applyError(err, lineNum(a.nextLine), fragLineNum(i))
256
-
}
257
-
if line.Old() {
258
-
used++
259
-
}
260
-
}
261
-
a.nextLine = fragStart + used
262
-
263
-
// new position of +0,0 mean a full delete, so check for leftovers
264
-
if f.NewPosition == 0 && f.NewLines == 0 {
265
-
var b [1][]byte
266
-
n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine)
267
-
if err != nil && err != io.EOF {
268
-
return applyError(err, lineNum(a.nextLine))
269
-
}
270
-
if n > 0 {
271
-
return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine))
272
-
}
273
-
}
274
-
275
-
return nil
276
-
}
277
-
278
-
func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) {
279
-
if line.Old() && string(preimage[i]) != line.Line {
280
-
return &Conflict{"fragment line does not match src line"}
281
-
}
282
-
if line.New() {
283
-
_, err = io.WriteString(dst, line.Line)
284
-
}
285
-
return err
286
-
}
287
-
288
-
// Flush writes any data following the last applied fragment to dst.
289
-
func (a *Applier) Flush(dst io.Writer) (err error) {
290
-
switch a.applyType {
291
-
case applyInitial:
292
-
_, err = copyFrom(dst, a.src, 0)
293
-
case applyText:
294
-
_, err = copyLinesFrom(dst, a.lineSrc, a.nextLine)
295
-
case applyBinary:
296
-
// nothing to flush, binary apply "consumes" full source
297
-
}
298
-
return err
299
-
}
300
-
301
-
// ApplyBinaryFragment applies the changes in the fragment f and writes the
302
-
// result to dst. At most one binary fragment can be applied before a call to
303
-
// Reset.
304
-
func (a *Applier) ApplyBinaryFragment(dst io.Writer, f *BinaryFragment) error {
305
-
if a.applyType != applyInitial {
306
-
return applyError(errApplyInProgress)
307
-
}
308
-
defer func() { a.applyType = applyBinary }()
309
-
310
-
if f == nil {
311
-
return applyError(errors.New("nil fragment"))
312
-
}
313
-
314
-
switch f.Method {
315
-
case BinaryPatchLiteral:
316
-
if _, err := dst.Write(f.Data); err != nil {
317
-
return applyError(err)
318
-
}
319
-
case BinaryPatchDelta:
320
-
if err := applyBinaryDeltaFragment(dst, a.src, f.Data); err != nil {
321
-
return applyError(err)
322
-
}
323
-
default:
324
-
return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method))
325
-
}
326
-
return nil
327
-
}
328
-
329
-
func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error {
330
-
srcSize, delta := readBinaryDeltaSize(frag)
331
-
if err := checkBinarySrcSize(src, srcSize); err != nil {
143
+
// nothing to apply, just copy all the data
144
+
_, err := copyFrom(dst, src, 0)
332
145
return err
333
146
}
334
-
335
-
dstSize, delta := readBinaryDeltaSize(delta)
336
-
337
-
for len(delta) > 0 {
338
-
op := delta[0]
339
-
if op == 0 {
340
-
return errors.New("invalid delta opcode 0")
341
-
}
342
-
343
-
var n int64
344
-
var err error
345
-
switch op & 0x80 {
346
-
case 0x80:
347
-
n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src)
348
-
case 0x00:
349
-
n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:])
350
-
}
351
-
if err != nil {
352
-
return err
353
-
}
354
-
dstSize -= n
355
-
}
356
-
357
-
if dstSize != 0 {
358
-
return errors.New("corrupt binary delta: insufficient or extra data")
359
-
}
360
-
return nil
361
-
}
362
-
363
-
// readBinaryDeltaSize reads a variable length size from a delta-encoded binary
364
-
// fragment, returing the size and the unused data. Data is encoded as:
365
-
//
366
-
// [[1xxxxxxx]...] [0xxxxxxx]
367
-
//
368
-
// in little-endian order, with 7 bits of the value per byte.
369
-
func readBinaryDeltaSize(d []byte) (size int64, rest []byte) {
370
-
shift := uint(0)
371
-
for i, b := range d {
372
-
size |= int64(b&0x7F) << shift
373
-
shift += 7
374
-
if b <= 0x7F {
375
-
return size, d[i+1:]
376
-
}
377
-
}
378
-
return size, nil
379
-
}
380
-
381
-
// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary
382
-
// fragment, returning the amount of data written and the usused part of the
383
-
// fragment. An add operation takes the form:
384
-
//
385
-
// [0xxxxxx][[data1]...]
386
-
//
387
-
// where the lower seven bits of the opcode is the number of data bytes
388
-
// following the opcode. See also pack-format.txt in the Git source.
389
-
func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) {
390
-
size := int(op)
391
-
if len(delta) < size {
392
-
return 0, delta, errors.New("corrupt binary delta: incomplete add")
393
-
}
394
-
_, err = w.Write(delta[:size])
395
-
return int64(size), delta[size:], err
396
-
}
397
-
398
-
// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary
399
-
// fragment, returing the amount of data written and the unused part of the
400
-
// fragment. A copy operation takes the form:
401
-
//
402
-
// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3]
403
-
//
404
-
// where the lower seven bits of the opcode determine which non-zero offset and
405
-
// size bytes are present in little-endian order: if bit 0 is set, offset1 is
406
-
// present, etc. If no offset or size bytes are present, offset is 0 and size
407
-
// is 0x10000. See also pack-format.txt in the Git source.
408
-
func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) {
409
-
const defaultSize = 0x10000
410
-
411
-
unpack := func(start, bits uint) (v int64) {
412
-
for i := uint(0); i < bits; i++ {
413
-
mask := byte(1 << (i + start))
414
-
if op&mask > 0 {
415
-
if len(delta) == 0 {
416
-
err = errors.New("corrupt binary delta: incomplete copy")
417
-
return
418
-
}
419
-
v |= int64(delta[0]) << (8 * i)
420
-
delta = delta[1:]
421
-
}
422
-
}
423
-
return
424
-
}
425
-
426
-
offset := unpack(0, 4)
427
-
size := unpack(4, 3)
428
-
if err != nil {
429
-
return 0, delta, err
430
-
}
431
-
if size == 0 {
432
-
size = defaultSize
433
-
}
434
-
435
-
// TODO(bkeyes): consider pooling these buffers
436
-
b := make([]byte, size)
437
-
if _, err := src.ReadAt(b, offset); err != nil {
438
-
return 0, delta, err
439
-
}
440
-
441
-
_, err = w.Write(b)
442
-
return size, delta, err
443
-
}
444
-
445
-
func checkBinarySrcSize(r io.ReaderAt, size int64) error {
446
-
ok, err := isLen(r, size)
447
-
if err != nil {
448
-
return err
449
-
}
450
-
if !ok {
451
-
return &Conflict{"fragment src size does not match actual src size"}
452
-
}
453
-
return nil
454
147
}
+206
gitdiff/apply_binary.go
+206
gitdiff/apply_binary.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"errors"
5
+
"fmt"
6
+
"io"
7
+
)
8
+
9
+
// BinaryApplier applies binary changes described in a fragment to source data.
10
+
// The applier must be closed after use.
11
+
type BinaryApplier struct {
12
+
dst io.Writer
13
+
src io.ReaderAt
14
+
15
+
closed bool
16
+
dirty bool
17
+
}
18
+
19
+
// NewBinaryApplier creates an BinaryApplier that reads data from src and
20
+
// writes modified data to dst.
21
+
func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier {
22
+
a := BinaryApplier{
23
+
dst: dst,
24
+
src: src,
25
+
}
26
+
return &a
27
+
}
28
+
29
+
// ApplyFragment applies the changes in the fragment f and writes the result to
30
+
// dst. ApplyFragment can be called at most once.
31
+
//
32
+
// If an error occurs while applying, ApplyFragment returns an *ApplyError that
33
+
// annotates the error with additional information. If the error is because of
34
+
// a conflict between a fragment and the source, the wrapped error will be a
35
+
// *Conflict.
36
+
func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error {
37
+
if f == nil {
38
+
return applyError(errors.New("nil fragment"))
39
+
}
40
+
if a.closed {
41
+
return applyError(errApplierClosed)
42
+
}
43
+
if a.dirty {
44
+
return applyError(errApplyInProgress)
45
+
}
46
+
47
+
// mark an apply as in progress, even if it fails before making changes
48
+
a.dirty = true
49
+
50
+
switch f.Method {
51
+
case BinaryPatchLiteral:
52
+
if _, err := a.dst.Write(f.Data); err != nil {
53
+
return applyError(err)
54
+
}
55
+
case BinaryPatchDelta:
56
+
if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil {
57
+
return applyError(err)
58
+
}
59
+
default:
60
+
return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method))
61
+
}
62
+
return nil
63
+
}
64
+
65
+
// Close writes any data following the last applied fragment and prevents
66
+
// future calls to ApplyFragment.
67
+
func (a *BinaryApplier) Close() (err error) {
68
+
if a.closed {
69
+
return nil
70
+
}
71
+
72
+
a.closed = true
73
+
if !a.dirty {
74
+
_, err = copyFrom(a.dst, a.src, 0)
75
+
} else {
76
+
// do nothing, applying a binary fragment copies all data
77
+
}
78
+
return err
79
+
}
80
+
81
+
func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error {
82
+
srcSize, delta := readBinaryDeltaSize(frag)
83
+
if err := checkBinarySrcSize(src, srcSize); err != nil {
84
+
return err
85
+
}
86
+
87
+
dstSize, delta := readBinaryDeltaSize(delta)
88
+
89
+
for len(delta) > 0 {
90
+
op := delta[0]
91
+
if op == 0 {
92
+
return errors.New("invalid delta opcode 0")
93
+
}
94
+
95
+
var n int64
96
+
var err error
97
+
switch op & 0x80 {
98
+
case 0x80:
99
+
n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src)
100
+
case 0x00:
101
+
n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:])
102
+
}
103
+
if err != nil {
104
+
return err
105
+
}
106
+
dstSize -= n
107
+
}
108
+
109
+
if dstSize != 0 {
110
+
return errors.New("corrupt binary delta: insufficient or extra data")
111
+
}
112
+
return nil
113
+
}
114
+
115
+
// readBinaryDeltaSize reads a variable length size from a delta-encoded binary
116
+
// fragment, returing the size and the unused data. Data is encoded as:
117
+
//
118
+
// [[1xxxxxxx]...] [0xxxxxxx]
119
+
//
120
+
// in little-endian order, with 7 bits of the value per byte.
121
+
func readBinaryDeltaSize(d []byte) (size int64, rest []byte) {
122
+
shift := uint(0)
123
+
for i, b := range d {
124
+
size |= int64(b&0x7F) << shift
125
+
shift += 7
126
+
if b <= 0x7F {
127
+
return size, d[i+1:]
128
+
}
129
+
}
130
+
return size, nil
131
+
}
132
+
133
+
// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary
134
+
// fragment, returning the amount of data written and the usused part of the
135
+
// fragment. An add operation takes the form:
136
+
//
137
+
// [0xxxxxx][[data1]...]
138
+
//
139
+
// where the lower seven bits of the opcode is the number of data bytes
140
+
// following the opcode. See also pack-format.txt in the Git source.
141
+
func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) {
142
+
size := int(op)
143
+
if len(delta) < size {
144
+
return 0, delta, errors.New("corrupt binary delta: incomplete add")
145
+
}
146
+
_, err = w.Write(delta[:size])
147
+
return int64(size), delta[size:], err
148
+
}
149
+
150
+
// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary
151
+
// fragment, returing the amount of data written and the unused part of the
152
+
// fragment. A copy operation takes the form:
153
+
//
154
+
// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3]
155
+
//
156
+
// where the lower seven bits of the opcode determine which non-zero offset and
157
+
// size bytes are present in little-endian order: if bit 0 is set, offset1 is
158
+
// present, etc. If no offset or size bytes are present, offset is 0 and size
159
+
// is 0x10000. See also pack-format.txt in the Git source.
160
+
func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) {
161
+
const defaultSize = 0x10000
162
+
163
+
unpack := func(start, bits uint) (v int64) {
164
+
for i := uint(0); i < bits; i++ {
165
+
mask := byte(1 << (i + start))
166
+
if op&mask > 0 {
167
+
if len(delta) == 0 {
168
+
err = errors.New("corrupt binary delta: incomplete copy")
169
+
return
170
+
}
171
+
v |= int64(delta[0]) << (8 * i)
172
+
delta = delta[1:]
173
+
}
174
+
}
175
+
return
176
+
}
177
+
178
+
offset := unpack(0, 4)
179
+
size := unpack(4, 3)
180
+
if err != nil {
181
+
return 0, delta, err
182
+
}
183
+
if size == 0 {
184
+
size = defaultSize
185
+
}
186
+
187
+
// TODO(bkeyes): consider pooling these buffers
188
+
b := make([]byte, size)
189
+
if _, err := src.ReadAt(b, offset); err != nil {
190
+
return 0, delta, err
191
+
}
192
+
193
+
_, err = w.Write(b)
194
+
return size, delta, err
195
+
}
196
+
197
+
func checkBinarySrcSize(r io.ReaderAt, size int64) error {
198
+
ok, err := isLen(r, size)
199
+
if err != nil {
200
+
return err
201
+
}
202
+
if !ok {
203
+
return &Conflict{"fragment src size does not match actual src size"}
204
+
}
205
+
return nil
206
+
}
+11
-73
gitdiff/apply_test.go
+11
-73
gitdiff/apply_test.go
···
9
9
"testing"
10
10
)
11
11
12
-
func TestApplierInvariants(t *testing.T) {
13
-
binary := &BinaryFragment{
14
-
Method: BinaryPatchLiteral,
15
-
Size: 2,
16
-
Data: []byte("\xbe\xef"),
17
-
}
18
-
19
-
text := &TextFragment{
20
-
NewPosition: 1,
21
-
NewLines: 1,
22
-
LinesAdded: 1,
23
-
Lines: []Line{
24
-
{Op: OpAdd, Line: "new line\n"},
25
-
},
26
-
}
27
-
28
-
file := &File{
29
-
TextFragments: []*TextFragment{text},
30
-
}
31
-
32
-
src := bytes.NewReader(nil)
33
-
dst := ioutil.Discard
34
-
35
-
assertInProgress := func(t *testing.T, kind string, err error) {
36
-
if !errors.Is(err, errApplyInProgress) {
37
-
t.Fatalf("expected in-progress error for %s apply, but got: %v", kind, err)
38
-
}
39
-
}
40
-
41
-
t.Run("binaryFirst", func(t *testing.T) {
42
-
a := NewApplier(src)
43
-
if err := a.ApplyBinaryFragment(dst, binary); err != nil {
44
-
t.Fatalf("unexpected error applying fragment: %v", err)
45
-
}
46
-
assertInProgress(t, "text", a.ApplyTextFragment(dst, text))
47
-
assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary))
48
-
assertInProgress(t, "file", a.ApplyFile(dst, file))
49
-
})
50
-
51
-
t.Run("textFirst", func(t *testing.T) {
52
-
a := NewApplier(src)
53
-
if err := a.ApplyTextFragment(dst, text); err != nil {
54
-
t.Fatalf("unexpected error applying fragment: %v", err)
55
-
}
56
-
// additional text fragments are allowed
57
-
if err := a.ApplyTextFragment(dst, text); err != nil {
58
-
t.Fatalf("unexpected error applying second fragment: %v", err)
59
-
}
60
-
assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary))
61
-
assertInProgress(t, "file", a.ApplyFile(dst, file))
62
-
})
63
-
64
-
t.Run("fileFirst", func(t *testing.T) {
65
-
a := NewApplier(src)
66
-
if err := a.ApplyFile(dst, file); err != nil {
67
-
t.Fatalf("unexpected error applying file: %v", err)
68
-
}
69
-
assertInProgress(t, "text", a.ApplyTextFragment(dst, text))
70
-
assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary))
71
-
assertInProgress(t, "file", a.ApplyFile(dst, file))
72
-
})
73
-
}
74
-
75
12
func TestApplyTextFragment(t *testing.T) {
76
13
tests := map[string]applyTest{
77
14
"createFile": {Files: getApplyFiles("text_fragment_new")},
···
85
22
"changeStart": {Files: getApplyFiles("text_fragment_change_start")},
86
23
"changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")},
87
24
"changeEnd": {Files: getApplyFiles("text_fragment_change_end")},
25
+
"changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")},
88
26
"changeExact": {Files: getApplyFiles("text_fragment_change_exact")},
89
27
"changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")},
90
28
···
127
65
128
66
for name, test := range tests {
129
67
t.Run(name, func(t *testing.T) {
130
-
test.run(t, func(w io.Writer, applier *Applier, file *File) error {
68
+
test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error {
131
69
if len(file.TextFragments) != 1 {
132
70
t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments))
133
71
}
134
-
return applier.ApplyTextFragment(w, file.TextFragments[0])
72
+
applier := NewTextApplier(dst, src)
73
+
return applier.ApplyFragment(file.TextFragments[0])
135
74
})
136
75
})
137
76
}
···
176
115
177
116
for name, test := range tests {
178
117
t.Run(name, func(t *testing.T) {
179
-
test.run(t, func(w io.Writer, applier *Applier, file *File) error {
180
-
return applier.ApplyBinaryFragment(w, file.BinaryFragment)
118
+
test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error {
119
+
applier := NewBinaryApplier(dst, src)
120
+
return applier.ApplyFragment(file.BinaryFragment)
181
121
})
182
122
})
183
123
}
···
216
156
217
157
for name, test := range tests {
218
158
t.Run(name, func(t *testing.T) {
219
-
test.run(t, func(w io.Writer, applier *Applier, file *File) error {
220
-
return applier.ApplyFile(w, file)
159
+
test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error {
160
+
return Apply(dst, src, file)
221
161
})
222
162
})
223
163
}
···
228
168
Err interface{}
229
169
}
230
170
231
-
func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) error) {
171
+
func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) {
232
172
src, patch, out := at.Files.Load(t)
233
173
234
174
files, _, err := Parse(bytes.NewReader(patch))
···
239
179
t.Fatalf("patch should contain exactly one file, but it has %d", len(files))
240
180
}
241
181
242
-
applier := NewApplier(bytes.NewReader(src))
243
-
244
182
var dst bytes.Buffer
245
-
err = apply(&dst, applier, files[0])
183
+
err = apply(&dst, bytes.NewReader(src), files[0])
246
184
if at.Err != nil {
247
185
assertError(t, at.Err, err, "applying fragment")
248
186
return
+152
gitdiff/apply_text.go
+152
gitdiff/apply_text.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"io"
5
+
)
6
+
7
+
// TextApplier applies changes described in text fragments to source data. If
8
+
// changes are described in multiple fragments, those fragments must be applied
9
+
// in order. The applier must be closed after use.
10
+
//
11
+
// By default, TextApplier operates in "strict" mode, where fragment content
12
+
// and positions must exactly match those of the source.
13
+
type TextApplier struct {
14
+
dst io.Writer
15
+
src io.ReaderAt
16
+
lineSrc LineReaderAt
17
+
nextLine int64
18
+
19
+
closed bool
20
+
dirty bool
21
+
}
22
+
23
+
// NewTextApplier creates a TextApplier that reads data from src and writes
24
+
// modified data to dst. If src implements LineReaderAt, it is used directly.
25
+
func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier {
26
+
a := TextApplier{
27
+
dst: dst,
28
+
src: src,
29
+
}
30
+
31
+
if lineSrc, ok := src.(LineReaderAt); ok {
32
+
a.lineSrc = lineSrc
33
+
} else {
34
+
a.lineSrc = &lineReaderAt{r: src}
35
+
}
36
+
37
+
return &a
38
+
}
39
+
40
+
// ApplyFragment applies the changes in the fragment f, writing unwritten data
41
+
// before the start of the fragment and any changes from the fragment. If
42
+
// multiple text fragments apply to the same content, ApplyFragment must be
43
+
// called in order of increasing start position. As a result, each fragment can
44
+
// be applied at most once.
45
+
//
46
+
// If an error occurs while applying, ApplyFragment returns an *ApplyError that
47
+
// annotates the error with additional information. If the error is because of
48
+
// a conflict between the fragment and the source, the wrapped error will be a
49
+
// *Conflict.
50
+
func (a *TextApplier) ApplyFragment(f *TextFragment) error {
51
+
if a.closed {
52
+
return applyError(errApplierClosed)
53
+
}
54
+
55
+
// mark an apply as in progress, even if it fails before making changes
56
+
a.dirty = true
57
+
58
+
// application code assumes fragment fields are consistent
59
+
if err := f.Validate(); err != nil {
60
+
return applyError(err)
61
+
}
62
+
63
+
// lines are 0-indexed, positions are 1-indexed (but new files have position = 0)
64
+
fragStart := f.OldPosition - 1
65
+
if fragStart < 0 {
66
+
fragStart = 0
67
+
}
68
+
fragEnd := fragStart + f.OldLines
69
+
70
+
start := a.nextLine
71
+
if fragStart < start {
72
+
return applyError(&Conflict{"fragment overlaps with an applied fragment"})
73
+
}
74
+
75
+
if f.OldPosition == 0 {
76
+
ok, err := isLen(a.src, 0)
77
+
if err != nil {
78
+
return applyError(err)
79
+
}
80
+
if !ok {
81
+
return applyError(&Conflict{"cannot create new file from non-empty src"})
82
+
}
83
+
}
84
+
85
+
preimage := make([][]byte, fragEnd-start)
86
+
n, err := a.lineSrc.ReadLinesAt(preimage, start)
87
+
if err != nil {
88
+
return applyError(err, lineNum(start+int64(n)))
89
+
}
90
+
91
+
// copy leading data before the fragment starts
92
+
for i, line := range preimage[:fragStart-start] {
93
+
if _, err := a.dst.Write(line); err != nil {
94
+
a.nextLine = start + int64(i)
95
+
return applyError(err, lineNum(a.nextLine))
96
+
}
97
+
}
98
+
preimage = preimage[fragStart-start:]
99
+
100
+
// apply the changes in the fragment
101
+
used := int64(0)
102
+
for i, line := range f.Lines {
103
+
if err := applyTextLine(a.dst, line, preimage, used); err != nil {
104
+
a.nextLine = fragStart + used
105
+
return applyError(err, lineNum(a.nextLine), fragLineNum(i))
106
+
}
107
+
if line.Old() {
108
+
used++
109
+
}
110
+
}
111
+
a.nextLine = fragStart + used
112
+
113
+
// new position of +0,0 mean a full delete, so check for leftovers
114
+
if f.NewPosition == 0 && f.NewLines == 0 {
115
+
var b [1][]byte
116
+
n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine)
117
+
if err != nil && err != io.EOF {
118
+
return applyError(err, lineNum(a.nextLine))
119
+
}
120
+
if n > 0 {
121
+
return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine))
122
+
}
123
+
}
124
+
125
+
return nil
126
+
}
127
+
128
+
func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) {
129
+
if line.Old() && string(preimage[i]) != line.Line {
130
+
return &Conflict{"fragment line does not match src line"}
131
+
}
132
+
if line.New() {
133
+
_, err = io.WriteString(dst, line.Line)
134
+
}
135
+
return err
136
+
}
137
+
138
+
// Close writes any data following the last applied fragment and prevents
139
+
// future calls to ApplyFragment.
140
+
func (a *TextApplier) Close() (err error) {
141
+
if a.closed {
142
+
return nil
143
+
}
144
+
145
+
a.closed = true
146
+
if !a.dirty {
147
+
_, err = copyFrom(a.dst, a.src, 0)
148
+
} else {
149
+
_, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine)
150
+
}
151
+
return err
152
+
}
+41
-2
gitdiff/base85.go
+41
-2
gitdiff/base85.go
···
19
19
}
20
20
21
21
// base85Decode decodes Base85-encoded data from src into dst. It uses the
22
-
// alphabet defined by base85.c in the Git source tree, which appears to be
23
-
// unique. src must contain at least len(dst) bytes of encoded data.
22
+
// alphabet defined by base85.c in the Git source tree. src must contain at
23
+
// least len(dst) bytes of encoded data.
24
24
func base85Decode(dst, src []byte) error {
25
25
var v uint32
26
26
var n, ndst int
···
50
50
}
51
51
return nil
52
52
}
53
+
54
+
// base85Encode encodes src in Base85, writing the result to dst. It uses the
55
+
// alphabet defined by base85.c in the Git source tree.
56
+
func base85Encode(dst, src []byte) {
57
+
var di, si int
58
+
59
+
encode := func(v uint32) {
60
+
dst[di+0] = b85Alpha[(v/(85*85*85*85))%85]
61
+
dst[di+1] = b85Alpha[(v/(85*85*85))%85]
62
+
dst[di+2] = b85Alpha[(v/(85*85))%85]
63
+
dst[di+3] = b85Alpha[(v/85)%85]
64
+
dst[di+4] = b85Alpha[v%85]
65
+
}
66
+
67
+
n := (len(src) / 4) * 4
68
+
for si < n {
69
+
encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]))
70
+
si += 4
71
+
di += 5
72
+
}
73
+
74
+
var v uint32
75
+
switch len(src) - si {
76
+
case 3:
77
+
v |= uint32(src[si+2]) << 8
78
+
fallthrough
79
+
case 2:
80
+
v |= uint32(src[si+1]) << 16
81
+
fallthrough
82
+
case 1:
83
+
v |= uint32(src[si+0]) << 24
84
+
encode(v)
85
+
}
86
+
}
87
+
88
+
// base85Len returns the length of n bytes of Base85 encoded data.
89
+
func base85Len(n int) int {
90
+
return (n + 3) / 4 * 5
91
+
}
+58
gitdiff/base85_test.go
+58
gitdiff/base85_test.go
···
1
1
package gitdiff
2
2
3
3
import (
4
+
"bytes"
4
5
"testing"
5
6
)
6
7
···
58
59
})
59
60
}
60
61
}
62
+
63
+
func TestBase85Encode(t *testing.T) {
64
+
tests := map[string]struct {
65
+
Input []byte
66
+
Output string
67
+
}{
68
+
"zeroBytes": {
69
+
Input: []byte{},
70
+
Output: "",
71
+
},
72
+
"twoBytes": {
73
+
Input: []byte{0xCA, 0xFE},
74
+
Output: "%KiWV",
75
+
},
76
+
"fourBytes": {
77
+
Input: []byte{0x0, 0x0, 0xCA, 0xFE},
78
+
Output: "007GV",
79
+
},
80
+
"sixBytes": {
81
+
Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE},
82
+
Output: "007GV%KiWV",
83
+
},
84
+
}
85
+
86
+
for name, test := range tests {
87
+
t.Run(name, func(t *testing.T) {
88
+
dst := make([]byte, len(test.Output))
89
+
base85Encode(dst, test.Input)
90
+
for i, b := range test.Output {
91
+
if dst[i] != byte(b) {
92
+
t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i])
93
+
}
94
+
}
95
+
})
96
+
}
97
+
}
98
+
99
+
func FuzzBase85Roundtrip(f *testing.F) {
100
+
f.Add([]byte{0x2b, 0x0d})
101
+
f.Add([]byte{0xbc, 0xb4, 0x3f})
102
+
f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25})
103
+
f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8})
104
+
105
+
f.Fuzz(func(t *testing.T, in []byte) {
106
+
n := len(in)
107
+
dst := make([]byte, base85Len(n))
108
+
out := make([]byte, n)
109
+
110
+
base85Encode(dst, in)
111
+
if err := base85Decode(out, dst); err != nil {
112
+
t.Fatalf("unexpected error decoding base85 data: %v", err)
113
+
}
114
+
if !bytes.Equal(in, out) {
115
+
t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst))
116
+
}
117
+
})
118
+
}
+11
-4
gitdiff/binary.go
+11
-4
gitdiff/binary.go
···
50
50
}
51
51
52
52
func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) {
53
-
switch p.Line(0) {
54
-
case "GIT binary patch\n":
53
+
line := p.Line(0)
54
+
switch {
55
+
case line == "GIT binary patch\n":
55
56
hasData = true
56
-
case "Binary files differ\n":
57
-
case "Files differ\n":
57
+
case isBinaryNoDataMarker(line):
58
58
default:
59
59
return false, false, nil
60
60
}
···
63
63
return false, false, err
64
64
}
65
65
return true, hasData, nil
66
+
}
67
+
68
+
func isBinaryNoDataMarker(line string) bool {
69
+
if strings.HasSuffix(line, " differ\n") {
70
+
return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ")
71
+
}
72
+
return false
66
73
}
67
74
68
75
func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) {
+10
gitdiff/binary_test.go
+10
gitdiff/binary_test.go
···
25
25
IsBinary: true,
26
26
HasData: false,
27
27
},
28
+
"binaryFileNoPatchPaths": {
29
+
Input: "Binary files a/foo.bin and b/foo.bin differ\n",
30
+
IsBinary: true,
31
+
HasData: false,
32
+
},
33
+
"fileNoPatch": {
34
+
Input: "Files differ\n",
35
+
IsBinary: true,
36
+
HasData: false,
37
+
},
28
38
"textFile": {
29
39
Input: "@@ -10,14 +22,31 @@\n",
30
40
IsBinary: false,
+103
-27
gitdiff/file_header.go
+103
-27
gitdiff/file_header.go
···
57
57
return nil, "", err
58
58
}
59
59
}
60
-
return nil, "", nil
60
+
return nil, preamble.String(), nil
61
61
}
62
62
63
63
func (p *parser) ParseGitFileHeader() (*File, error) {
···
172
172
// If the names in the header do not match because the patch is a rename,
173
173
// return an empty default name.
174
174
func parseGitHeaderName(header string) (string, error) {
175
-
firstName, n, err := parseName(header, -1, 1)
176
-
if err != nil {
177
-
return "", err
175
+
header = strings.TrimSuffix(header, "\n")
176
+
if len(header) == 0 {
177
+
return "", nil
178
178
}
179
179
180
-
if n < len(header) && (header[n] == ' ' || header[n] == '\t') {
181
-
n++
182
-
}
180
+
var err error
181
+
var first, second string
182
+
183
+
// there are 4 cases to account for:
184
+
//
185
+
// 1) unquoted unquoted
186
+
// 2) unquoted "quoted"
187
+
// 3) "quoted" unquoted
188
+
// 4) "quoted" "quoted"
189
+
//
190
+
quote := strings.IndexByte(header, '"')
191
+
switch {
192
+
case quote < 0:
193
+
// case 1
194
+
first = header
195
+
196
+
case quote > 0:
197
+
// case 2
198
+
first = header[:quote-1]
199
+
if !isSpace(header[quote-1]) {
200
+
return "", fmt.Errorf("missing separator")
201
+
}
202
+
203
+
second, _, err = parseQuotedName(header[quote:])
204
+
if err != nil {
205
+
return "", err
206
+
}
207
+
208
+
case quote == 0:
209
+
// case 3 or case 4
210
+
var n int
211
+
first, n, err = parseQuotedName(header)
212
+
if err != nil {
213
+
return "", err
214
+
}
215
+
216
+
// git accepts multiple spaces after a quoted name, but not after an
217
+
// unquoted name, since the name might end with one or more spaces
218
+
for n < len(header) && isSpace(header[n]) {
219
+
n++
220
+
}
221
+
if n == len(header) {
222
+
return "", nil
223
+
}
183
224
184
-
secondName, _, err := parseName(header[n:], -1, 1)
185
-
if err != nil {
186
-
return "", err
225
+
if header[n] == '"' {
226
+
second, _, err = parseQuotedName(header[n:])
227
+
if err != nil {
228
+
return "", err
229
+
}
230
+
} else {
231
+
second = header[n:]
232
+
}
187
233
}
188
234
189
-
if firstName != secondName {
235
+
first = trimTreePrefix(first, 1)
236
+
if second != "" {
237
+
if first == trimTreePrefix(second, 1) {
238
+
return first, nil
239
+
}
190
240
return "", nil
191
241
}
192
-
return firstName, nil
242
+
243
+
// at this point, both names are unquoted (case 1)
244
+
// since names may contain spaces, we can't use a known separator
245
+
// instead, look for a split that produces two equal names
246
+
247
+
for i := 0; i < len(first)-1; i++ {
248
+
if !isSpace(first[i]) {
249
+
continue
250
+
}
251
+
second = trimTreePrefix(first[i+1:], 1)
252
+
if name := first[:i]; name == second {
253
+
return name, nil
254
+
}
255
+
}
256
+
return "", nil
193
257
}
194
258
195
259
// parseGitHeaderData parses a single line of metadata from a Git file header.
···
260
324
}
261
325
262
326
func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
263
-
f.OldMode, err = parseMode(line)
327
+
f.OldMode, err = parseMode(strings.TrimSpace(line))
264
328
return
265
329
}
266
330
267
331
func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
268
-
f.NewMode, err = parseMode(line)
332
+
f.NewMode, err = parseMode(strings.TrimSpace(line))
269
333
return
270
334
}
271
335
···
283
347
284
348
func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) {
285
349
f.IsCopy = true
286
-
f.OldName, _, err = parseName(line, -1, 0)
350
+
f.OldName, _, err = parseName(line, 0, 0)
287
351
return
288
352
}
289
353
290
354
func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) {
291
355
f.IsCopy = true
292
-
f.NewName, _, err = parseName(line, -1, 0)
356
+
f.NewName, _, err = parseName(line, 0, 0)
293
357
return
294
358
}
295
359
296
360
func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) {
297
361
f.IsRename = true
298
-
f.OldName, _, err = parseName(line, -1, 0)
362
+
f.OldName, _, err = parseName(line, 0, 0)
299
363
return
300
364
}
301
365
302
366
func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) {
303
367
f.IsRename = true
304
-
f.NewName, _, err = parseName(line, -1, 0)
368
+
f.NewName, _, err = parseName(line, 0, 0)
305
369
return
306
370
}
307
371
···
349
413
350
414
// parseName extracts a file name from the start of a string and returns the
351
415
// name and the index of the first character after the name. If the name is
352
-
// unquoted and term is non-negative, parsing stops at the first occurrence of
353
-
// term. Otherwise parsing of unquoted names stops at the first space or tab.
416
+
// unquoted and term is non-zero, parsing stops at the first occurrence of
417
+
// term.
354
418
//
355
419
// If the name is exactly "/dev/null", no further processing occurs. Otherwise,
356
420
// if dropPrefix is greater than zero, that number of prefix components
357
421
// separated by forward slashes are dropped from the name and any duplicate
358
422
// slashes are collapsed.
359
-
func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) {
423
+
func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) {
360
424
if len(s) > 0 && s[0] == '"' {
361
425
name, n, err = parseQuotedName(s)
362
426
} else {
···
387
451
return name, n, err
388
452
}
389
453
390
-
func parseUnquotedName(s string, term rune) (name string, n int, err error) {
454
+
func parseUnquotedName(s string, term byte) (name string, n int, err error) {
391
455
for n = 0; n < len(s); n++ {
392
456
if s[n] == '\n' {
393
457
break
394
458
}
395
-
if term >= 0 && rune(s[n]) == term {
396
-
break
397
-
}
398
-
if term < 0 && (s[n] == ' ' || s[n] == '\t') {
459
+
if term > 0 && s[n] == term {
399
460
break
400
461
}
401
462
}
···
440
501
return b.String()
441
502
}
442
503
504
+
// trimTreePrefix removes up to n leading directory components from name.
505
+
func trimTreePrefix(name string, n int) string {
506
+
i := 0
507
+
for ; i < len(name) && n > 0; i++ {
508
+
if name[i] == '/' {
509
+
n--
510
+
}
511
+
}
512
+
return name[i:]
513
+
}
514
+
443
515
// hasEpochTimestamp returns true if the string ends with a POSIX-formatted
444
516
// timestamp for the UNIX epoch after a tab character. According to git, this
445
517
// is used by GNU diff to mark creations and deletions.
···
455
527
456
528
// a valid timestamp can have optional ':' in zone specifier
457
529
// remove that if it exists so we have a single format
458
-
if ts[len(ts)-3] == ':' {
530
+
if len(ts) >= 3 && ts[len(ts)-3] == ':' {
459
531
ts = ts[:len(ts)-3] + ts[len(ts)-2:]
460
532
}
461
533
···
468
540
}
469
541
return true
470
542
}
543
+
544
+
func isSpace(c byte) bool {
545
+
return c == ' ' || c == '\t' || c == '\n'
546
+
}
+64
-8
gitdiff/file_header_test.go
+64
-8
gitdiff/file_header_test.go
···
310
310
func TestParseName(t *testing.T) {
311
311
tests := map[string]struct {
312
312
Input string
313
-
Term rune
313
+
Term byte
314
314
Drop int
315
315
Output string
316
316
N int
···
334
334
"dropPrefix": {
335
335
Input: "a/dir/file.txt", Drop: 1, Output: "dir/file.txt", N: 14,
336
336
},
337
-
"multipleNames": {
338
-
Input: "dir/a.txt dir/b.txt", Term: -1, Output: "dir/a.txt", N: 9,
337
+
"unquotedWithSpaces": {
338
+
Input: "dir/with spaces.txt", Output: "dir/with spaces.txt", N: 19,
339
+
},
340
+
"unquotedWithTrailingSpaces": {
341
+
Input: "dir/with spaces.space ", Output: "dir/with spaces.space ", N: 23,
339
342
},
340
343
"devNull": {
341
344
Input: "/dev/null", Term: '\t', Drop: 1, Output: "/dev/null", N: 9,
342
345
},
343
-
"newlineAlwaysSeparates": {
344
-
Input: "dir/file.txt\n", Term: 0, Output: "dir/file.txt", N: 12,
346
+
"newlineSeparates": {
347
+
Input: "dir/file.txt\n", Output: "dir/file.txt", N: 12,
345
348
},
346
349
"emptyString": {
347
350
Input: "", Err: true,
···
483
486
OldMode: os.FileMode(0100644),
484
487
},
485
488
},
489
+
"oldModeWithTrailingSpace": {
490
+
Line: "old mode 100644\r\n",
491
+
OutputFile: &File{
492
+
OldMode: os.FileMode(0100644),
493
+
},
494
+
},
486
495
"invalidOldMode": {
487
496
Line: "old mode rw\n",
488
497
Err: true,
489
498
},
490
499
"newMode": {
491
500
Line: "new mode 100755\n",
501
+
OutputFile: &File{
502
+
NewMode: os.FileMode(0100755),
503
+
},
504
+
},
505
+
"newModeWithTrailingSpace": {
506
+
Line: "new mode 100755\r\n",
492
507
OutputFile: &File{
493
508
NewMode: os.FileMode(0100755),
494
509
},
···
508
523
},
509
524
"newFileMode": {
510
525
Line: "new file mode 100755\n",
526
+
DefaultName: "dir/file.txt",
527
+
OutputFile: &File{
528
+
NewName: "dir/file.txt",
529
+
NewMode: os.FileMode(0100755),
530
+
IsNew: true,
531
+
},
532
+
},
533
+
"newFileModeWithTrailingSpace": {
534
+
Line: "new file mode 100755\r\n",
511
535
DefaultName: "dir/file.txt",
512
536
OutputFile: &File{
513
537
NewName: "dir/file.txt",
···
630
654
Input: "a/dir/foo.txt b/dir/bar.txt",
631
655
Output: "",
632
656
},
633
-
"missingSecondName": {
634
-
Input: "a/dir/foo.txt",
635
-
Err: true,
657
+
"matchingNamesWithSpaces": {
658
+
Input: "a/dir/file with spaces.txt b/dir/file with spaces.txt",
659
+
Output: "dir/file with spaces.txt",
660
+
},
661
+
"matchingNamesWithTrailingSpaces": {
662
+
Input: "a/dir/spaces b/dir/spaces ",
663
+
Output: "dir/spaces ",
664
+
},
665
+
"matchingNamesQuoted": {
666
+
Input: `"a/dir/\"quotes\".txt" "b/dir/\"quotes\".txt"`,
667
+
Output: `dir/"quotes".txt`,
668
+
},
669
+
"matchingNamesFirstQuoted": {
670
+
Input: `"a/dir/file.txt" b/dir/file.txt`,
671
+
Output: "dir/file.txt",
672
+
},
673
+
"matchingNamesSecondQuoted": {
674
+
Input: `a/dir/file.txt "b/dir/file.txt"`,
675
+
Output: "dir/file.txt",
676
+
},
677
+
"noSecondName": {
678
+
Input: "a/dir/foo.txt",
679
+
Output: "",
680
+
},
681
+
"noSecondNameQuoted": {
682
+
Input: `"a/dir/foo.txt"`,
683
+
Output: "",
636
684
},
637
685
"invalidName": {
638
686
Input: `"a/dir/file.txt b/dir/file.txt`,
···
695
743
},
696
744
"notEpoch": {
697
745
Input: "+++ file.txt\t2019-03-21 12:34:56.789 -0700\n",
746
+
Output: false,
747
+
},
748
+
"notTimestamp": {
749
+
Input: "+++ file.txt\trandom text\n",
750
+
Output: false,
751
+
},
752
+
"notTimestampShort": {
753
+
Input: "+++ file.txt\t0\n",
698
754
Output: false,
699
755
},
700
756
}
+281
gitdiff/format.go
+281
gitdiff/format.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"bytes"
5
+
"compress/zlib"
6
+
"fmt"
7
+
"io"
8
+
"strconv"
9
+
)
10
+
11
+
type formatter struct {
12
+
w io.Writer
13
+
err error
14
+
}
15
+
16
+
func newFormatter(w io.Writer) *formatter {
17
+
return &formatter{w: w}
18
+
}
19
+
20
+
func (fm *formatter) Write(p []byte) (int, error) {
21
+
if fm.err != nil {
22
+
return len(p), nil
23
+
}
24
+
if _, err := fm.w.Write(p); err != nil {
25
+
fm.err = err
26
+
}
27
+
return len(p), nil
28
+
}
29
+
30
+
func (fm *formatter) WriteString(s string) (int, error) {
31
+
fm.Write([]byte(s))
32
+
return len(s), nil
33
+
}
34
+
35
+
func (fm *formatter) WriteByte(c byte) error {
36
+
fm.Write([]byte{c})
37
+
return nil
38
+
}
39
+
40
+
func (fm *formatter) WriteQuotedName(s string) {
41
+
qpos := 0
42
+
for i := 0; i < len(s); i++ {
43
+
ch := s[i]
44
+
if q, quoted := quoteByte(ch); quoted {
45
+
if qpos == 0 {
46
+
fm.WriteByte('"')
47
+
}
48
+
fm.WriteString(s[qpos:i])
49
+
fm.Write(q)
50
+
qpos = i + 1
51
+
}
52
+
}
53
+
fm.WriteString(s[qpos:])
54
+
if qpos > 0 {
55
+
fm.WriteByte('"')
56
+
}
57
+
}
58
+
59
+
var quoteEscapeTable = map[byte]byte{
60
+
'\a': 'a',
61
+
'\b': 'b',
62
+
'\t': 't',
63
+
'\n': 'n',
64
+
'\v': 'v',
65
+
'\f': 'f',
66
+
'\r': 'r',
67
+
'"': '"',
68
+
'\\': '\\',
69
+
}
70
+
71
+
func quoteByte(b byte) ([]byte, bool) {
72
+
if q, ok := quoteEscapeTable[b]; ok {
73
+
return []byte{'\\', q}, true
74
+
}
75
+
if b < 0x20 || b >= 0x7F {
76
+
return []byte{
77
+
'\\',
78
+
'0' + (b>>6)&0o3,
79
+
'0' + (b>>3)&0o7,
80
+
'0' + (b>>0)&0o7,
81
+
}, true
82
+
}
83
+
return nil, false
84
+
}
85
+
86
+
func (fm *formatter) FormatFile(f *File) {
87
+
fm.WriteString("diff --git ")
88
+
89
+
var aName, bName string
90
+
switch {
91
+
case f.OldName == "":
92
+
aName = f.NewName
93
+
bName = f.NewName
94
+
95
+
case f.NewName == "":
96
+
aName = f.OldName
97
+
bName = f.OldName
98
+
99
+
default:
100
+
aName = f.OldName
101
+
bName = f.NewName
102
+
}
103
+
104
+
fm.WriteQuotedName("a/" + aName)
105
+
fm.WriteByte(' ')
106
+
fm.WriteQuotedName("b/" + bName)
107
+
fm.WriteByte('\n')
108
+
109
+
if f.OldMode != 0 {
110
+
if f.IsDelete {
111
+
fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode)
112
+
} else if f.NewMode != 0 {
113
+
fmt.Fprintf(fm, "old mode %o\n", f.OldMode)
114
+
}
115
+
}
116
+
117
+
if f.NewMode != 0 {
118
+
if f.IsNew {
119
+
fmt.Fprintf(fm, "new file mode %o\n", f.NewMode)
120
+
} else if f.OldMode != 0 {
121
+
fmt.Fprintf(fm, "new mode %o\n", f.NewMode)
122
+
}
123
+
}
124
+
125
+
if f.Score > 0 {
126
+
if f.IsCopy || f.IsRename {
127
+
fmt.Fprintf(fm, "similarity index %d%%\n", f.Score)
128
+
} else {
129
+
fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score)
130
+
}
131
+
}
132
+
133
+
if f.IsCopy {
134
+
if f.OldName != "" {
135
+
fm.WriteString("copy from ")
136
+
fm.WriteQuotedName(f.OldName)
137
+
fm.WriteByte('\n')
138
+
}
139
+
if f.NewName != "" {
140
+
fm.WriteString("copy to ")
141
+
fm.WriteQuotedName(f.NewName)
142
+
fm.WriteByte('\n')
143
+
}
144
+
}
145
+
146
+
if f.IsRename {
147
+
if f.OldName != "" {
148
+
fm.WriteString("rename from ")
149
+
fm.WriteQuotedName(f.OldName)
150
+
fm.WriteByte('\n')
151
+
}
152
+
if f.NewName != "" {
153
+
fm.WriteString("rename to ")
154
+
fm.WriteQuotedName(f.NewName)
155
+
fm.WriteByte('\n')
156
+
}
157
+
}
158
+
159
+
if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" {
160
+
fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix)
161
+
162
+
// Mode is only included on the index line when it is not changing
163
+
if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) {
164
+
fmt.Fprintf(fm, " %o", f.OldMode)
165
+
}
166
+
167
+
fm.WriteByte('\n')
168
+
}
169
+
170
+
if f.IsBinary {
171
+
if f.BinaryFragment == nil {
172
+
fm.WriteString("Binary files ")
173
+
fm.WriteQuotedName("a/" + aName)
174
+
fm.WriteString(" and ")
175
+
fm.WriteQuotedName("b/" + bName)
176
+
fm.WriteString(" differ\n")
177
+
} else {
178
+
fm.WriteString("GIT binary patch\n")
179
+
fm.FormatBinaryFragment(f.BinaryFragment)
180
+
if f.ReverseBinaryFragment != nil {
181
+
fm.FormatBinaryFragment(f.ReverseBinaryFragment)
182
+
}
183
+
}
184
+
}
185
+
186
+
// The "---" and "+++" lines only appear for text patches with fragments
187
+
if len(f.TextFragments) > 0 {
188
+
fm.WriteString("--- ")
189
+
if f.OldName == "" {
190
+
fm.WriteString("/dev/null")
191
+
} else {
192
+
fm.WriteQuotedName("a/" + f.OldName)
193
+
}
194
+
fm.WriteByte('\n')
195
+
196
+
fm.WriteString("+++ ")
197
+
if f.NewName == "" {
198
+
fm.WriteString("/dev/null")
199
+
} else {
200
+
fm.WriteQuotedName("b/" + f.NewName)
201
+
}
202
+
fm.WriteByte('\n')
203
+
204
+
for _, frag := range f.TextFragments {
205
+
fm.FormatTextFragment(frag)
206
+
}
207
+
}
208
+
}
209
+
210
+
func (fm *formatter) FormatTextFragment(f *TextFragment) {
211
+
fm.FormatTextFragmentHeader(f)
212
+
fm.WriteByte('\n')
213
+
214
+
for _, line := range f.Lines {
215
+
fm.WriteString(line.Op.String())
216
+
fm.WriteString(line.Line)
217
+
if line.NoEOL() {
218
+
fm.WriteString("\n\\ No newline at end of file\n")
219
+
}
220
+
}
221
+
}
222
+
223
+
func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) {
224
+
fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines)
225
+
if f.Comment != "" {
226
+
fm.WriteByte(' ')
227
+
fm.WriteString(f.Comment)
228
+
}
229
+
}
230
+
231
+
func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) {
232
+
const (
233
+
maxBytesPerLine = 52
234
+
)
235
+
236
+
switch f.Method {
237
+
case BinaryPatchDelta:
238
+
fm.WriteString("delta ")
239
+
case BinaryPatchLiteral:
240
+
fm.WriteString("literal ")
241
+
}
242
+
fm.Write(strconv.AppendInt(nil, f.Size, 10))
243
+
fm.WriteByte('\n')
244
+
245
+
data := deflateBinaryChunk(f.Data)
246
+
n := (len(data) / maxBytesPerLine) * maxBytesPerLine
247
+
248
+
buf := make([]byte, base85Len(maxBytesPerLine))
249
+
for i := 0; i < n; i += maxBytesPerLine {
250
+
base85Encode(buf, data[i:i+maxBytesPerLine])
251
+
fm.WriteByte('z')
252
+
fm.Write(buf)
253
+
fm.WriteByte('\n')
254
+
}
255
+
if remainder := len(data) - n; remainder > 0 {
256
+
buf = buf[0:base85Len(remainder)]
257
+
258
+
sizeChar := byte(remainder)
259
+
if remainder <= 26 {
260
+
sizeChar = 'A' + sizeChar - 1
261
+
} else {
262
+
sizeChar = 'a' + sizeChar - 27
263
+
}
264
+
265
+
base85Encode(buf, data[n:])
266
+
fm.WriteByte(sizeChar)
267
+
fm.Write(buf)
268
+
fm.WriteByte('\n')
269
+
}
270
+
fm.WriteByte('\n')
271
+
}
272
+
273
+
func deflateBinaryChunk(data []byte) []byte {
274
+
var b bytes.Buffer
275
+
276
+
zw := zlib.NewWriter(&b)
277
+
_, _ = zw.Write(data)
278
+
_ = zw.Close()
279
+
280
+
return b.Bytes()
281
+
}
+157
gitdiff/format_roundtrip_test.go
+157
gitdiff/format_roundtrip_test.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"bytes"
5
+
"fmt"
6
+
"os"
7
+
"path/filepath"
8
+
"slices"
9
+
"testing"
10
+
)
11
+
12
+
func TestFormatRoundtrip(t *testing.T) {
13
+
patches := []struct {
14
+
File string
15
+
SkipTextCompare bool
16
+
}{
17
+
{File: "copy.patch"},
18
+
{File: "copy_modify.patch"},
19
+
{File: "delete.patch"},
20
+
{File: "mode.patch"},
21
+
{File: "mode_modify.patch"},
22
+
{File: "modify.patch"},
23
+
{File: "new.patch"},
24
+
{File: "new_empty.patch"},
25
+
{File: "new_mode.patch"},
26
+
{File: "rename.patch"},
27
+
{File: "rename_modify.patch"},
28
+
29
+
// Due to differences between Go's 'encoding/zlib' package and the zlib
30
+
// C library, binary patches cannot be compared directly as the patch
31
+
// data is slightly different when re-encoded by Go.
32
+
{File: "binary_modify.patch", SkipTextCompare: true},
33
+
{File: "binary_new.patch", SkipTextCompare: true},
34
+
{File: "binary_modify_nodata.patch"},
35
+
}
36
+
37
+
for _, patch := range patches {
38
+
t.Run(patch.File, func(t *testing.T) {
39
+
b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File))
40
+
if err != nil {
41
+
t.Fatalf("failed to read patch: %v", err)
42
+
}
43
+
44
+
original := assertParseSingleFile(t, b, "patch")
45
+
str := original.String()
46
+
47
+
if !patch.SkipTextCompare {
48
+
if string(b) != str {
49
+
t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str)
50
+
}
51
+
}
52
+
53
+
reparsed := assertParseSingleFile(t, []byte(str), "formatted patch")
54
+
assertFilesEqual(t, original, reparsed)
55
+
})
56
+
}
57
+
}
58
+
59
+
func assertParseSingleFile(t *testing.T, b []byte, kind string) *File {
60
+
files, _, err := Parse(bytes.NewReader(b))
61
+
if err != nil {
62
+
t.Fatalf("failed to parse %s: %v", kind, err)
63
+
}
64
+
if len(files) != 1 {
65
+
t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files))
66
+
}
67
+
return files[0]
68
+
}
69
+
70
+
func assertFilesEqual(t *testing.T, expected, actual *File) {
71
+
assertEqual(t, expected.OldName, actual.OldName, "OldName")
72
+
assertEqual(t, expected.NewName, actual.NewName, "NewName")
73
+
74
+
assertEqual(t, expected.IsNew, actual.IsNew, "IsNew")
75
+
assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete")
76
+
assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy")
77
+
assertEqual(t, expected.IsRename, actual.IsRename, "IsRename")
78
+
79
+
assertEqual(t, expected.OldMode, actual.OldMode, "OldMode")
80
+
assertEqual(t, expected.NewMode, actual.NewMode, "NewMode")
81
+
82
+
assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix")
83
+
assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix")
84
+
assertEqual(t, expected.Score, actual.Score, "Score")
85
+
86
+
if len(expected.TextFragments) == len(actual.TextFragments) {
87
+
for i := range expected.TextFragments {
88
+
prefix := fmt.Sprintf("TextFragments[%d].", i)
89
+
ef := expected.TextFragments[i]
90
+
af := actual.TextFragments[i]
91
+
92
+
assertEqual(t, ef.Comment, af.Comment, prefix+"Comment")
93
+
94
+
assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition")
95
+
assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines")
96
+
97
+
assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition")
98
+
assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines")
99
+
100
+
assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded")
101
+
assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted")
102
+
103
+
assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext")
104
+
assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext")
105
+
106
+
if !slices.Equal(ef.Lines, af.Lines) {
107
+
t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines)
108
+
}
109
+
}
110
+
} else {
111
+
t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments))
112
+
}
113
+
114
+
assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary")
115
+
116
+
if expected.BinaryFragment != nil {
117
+
if actual.BinaryFragment == nil {
118
+
t.Errorf("BinaryFragment: expected non-nil, actual is nil")
119
+
} else {
120
+
ef := expected.BinaryFragment
121
+
af := expected.BinaryFragment
122
+
123
+
assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method")
124
+
assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size")
125
+
126
+
if !slices.Equal(ef.Data, af.Data) {
127
+
t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data)
128
+
}
129
+
}
130
+
} else if actual.BinaryFragment != nil {
131
+
t.Errorf("BinaryFragment: expected nil, actual is non-nil")
132
+
}
133
+
134
+
if expected.ReverseBinaryFragment != nil {
135
+
if actual.ReverseBinaryFragment == nil {
136
+
t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil")
137
+
} else {
138
+
ef := expected.ReverseBinaryFragment
139
+
af := expected.ReverseBinaryFragment
140
+
141
+
assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method")
142
+
assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size")
143
+
144
+
if !slices.Equal(ef.Data, af.Data) {
145
+
t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data)
146
+
}
147
+
}
148
+
} else if actual.ReverseBinaryFragment != nil {
149
+
t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil")
150
+
}
151
+
}
152
+
153
+
func assertEqual[T comparable](t *testing.T, expected, actual T, name string) {
154
+
if expected != actual {
155
+
t.Errorf("%s: expected %#v, actual %#v", name, expected, actual)
156
+
}
157
+
}
+28
gitdiff/format_test.go
+28
gitdiff/format_test.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"strings"
5
+
"testing"
6
+
)
7
+
8
+
func TestFormatter_WriteQuotedName(t *testing.T) {
9
+
tests := []struct {
10
+
Input string
11
+
Expected string
12
+
}{
13
+
{"noquotes.txt", `noquotes.txt`},
14
+
{"no quotes.txt", `no quotes.txt`},
15
+
{"new\nline", `"new\nline"`},
16
+
{"escape\x1B null\x00", `"escape\033 null\000"`},
17
+
{"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`},
18
+
{"\"already quoted\"", `"\"already quoted\""`},
19
+
}
20
+
21
+
for _, test := range tests {
22
+
var b strings.Builder
23
+
newFormatter(&b).WriteQuotedName(test.Input)
24
+
if b.String() != test.Expected {
25
+
t.Errorf("expected %q, got %q", test.Expected, b.String())
26
+
}
27
+
}
28
+
}
+33
-2
gitdiff/gitdiff.go
+33
-2
gitdiff/gitdiff.go
···
4
4
"errors"
5
5
"fmt"
6
6
"os"
7
+
"strings"
7
8
)
8
9
9
10
// File describes changes to a single file. It can be either a text file or a
···
38
39
ReverseBinaryFragment *BinaryFragment
39
40
}
40
41
42
+
// String returns a git diff representation of this file. The value can be
43
+
// parsed by this library to obtain the same File, but may not be the same as
44
+
// the original input.
45
+
func (f *File) String() string {
46
+
var diff strings.Builder
47
+
newFormatter(&diff).FormatFile(f)
48
+
return diff.String()
49
+
}
50
+
41
51
// TextFragment describes changed lines starting at a specific line in a text file.
42
52
type TextFragment struct {
43
53
Comment string
···
57
67
Lines []Line
58
68
}
59
69
60
-
// Header returns the canonical header of this fragment.
70
+
// String returns a git diff format of this fragment. See [File.String] for
71
+
// more details on this format.
72
+
func (f *TextFragment) String() string {
73
+
var diff strings.Builder
74
+
newFormatter(&diff).FormatTextFragment(f)
75
+
return diff.String()
76
+
}
77
+
78
+
// Header returns a git diff header of this fragment. See [File.String] for
79
+
// more details on this format.
61
80
func (f *TextFragment) Header() string {
62
-
return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment)
81
+
var hdr strings.Builder
82
+
newFormatter(&hdr).FormatTextFragmentHeader(f)
83
+
return hdr.String()
63
84
}
64
85
65
86
// Validate checks that the fragment is self-consistent and appliable. Validate
···
197
218
// BinaryPatchLiteral indicates the data is the exact file content
198
219
BinaryPatchLiteral
199
220
)
221
+
222
+
// String returns a git diff format of this fragment. Due to differences in
223
+
// zlib implementation between Go and Git, encoded binary data in the result
224
+
// will likely differ from what Git produces for the same input. See
225
+
// [File.String] for more details on this format.
226
+
func (f *BinaryFragment) String() string {
227
+
var diff strings.Builder
228
+
newFormatter(&diff).FormatBinaryFragment(f)
229
+
return diff.String()
230
+
}
+22
-22
gitdiff/io.go
+22
-22
gitdiff/io.go
···
5
5
"io"
6
6
)
7
7
8
+
const (
9
+
byteBufferSize = 32 * 1024 // from io.Copy
10
+
lineBufferSize = 32
11
+
indexBufferSize = 1024
12
+
)
13
+
8
14
// LineReaderAt is the interface that wraps the ReadLinesAt method.
9
15
//
10
-
// ReadLinesAt reads len(lines) into lines starting at line offset in the
11
-
// input source. It returns number of full lines read (0 <= n <= len(lines))
12
-
// and any error encountered. Line numbers are zero-indexed.
16
+
// ReadLinesAt reads len(lines) into lines starting at line offset. It returns
17
+
// the number of lines read (0 <= n <= len(lines)) and any error encountered.
18
+
// Line numbers are zero-indexed.
13
19
//
14
20
// If n < len(lines), ReadLinesAt returns a non-nil error explaining why more
15
21
// lines were not returned.
16
22
//
17
-
// Each full line includes the line ending character(s). If the last line of
18
-
// the input does not have a line ending character, ReadLinesAt returns the
19
-
// content of the line and io.EOF.
20
-
//
21
-
// If the content of the input source changes after the first call to
22
-
// ReadLinesAt, the behavior of future calls is undefined.
23
+
// Lines read by ReadLinesAt include the newline character. The last line does
24
+
// not have a final newline character if the input ends without one.
23
25
type LineReaderAt interface {
24
26
ReadLinesAt(lines [][]byte, offset int64) (n int, err error)
25
27
}
···
65
67
lines[n] = buf[start:end]
66
68
}
67
69
68
-
if n < count || buf[len(buf)-1] != '\n' {
70
+
if n < count {
69
71
return n, io.EOF
70
72
}
71
73
return n, nil
···
75
77
// for line or a read returns io.EOF. It returns an error if and only if there
76
78
// is an error reading data.
77
79
func (r *lineReaderAt) indexTo(line int64) error {
78
-
var buf [1024]byte
79
-
80
-
var offset int64
81
-
if len(r.index) > 0 {
82
-
offset = r.index[len(r.index)-1]
83
-
}
80
+
var buf [indexBufferSize]byte
84
81
82
+
offset := r.lastOffset()
85
83
for int64(len(r.index)) < line {
86
84
n, err := r.r.ReadAt(buf[:], offset)
87
85
if err != nil && err != io.EOF {
···
94
92
}
95
93
}
96
94
if err == io.EOF {
97
-
if n > 0 && buf[n-1] != '\n' {
95
+
if offset > r.lastOffset() {
98
96
r.index = append(r.index, offset)
99
97
}
100
98
r.eof = true
···
102
100
}
103
101
}
104
102
return nil
103
+
}
104
+
105
+
func (r *lineReaderAt) lastOffset() int64 {
106
+
if n := len(r.index); n > 0 {
107
+
return r.index[n-1]
108
+
}
109
+
return 0
105
110
}
106
111
107
112
// readBytes reads the bytes of the n lines starting at line and returns the
···
146
151
}
147
152
return false, err
148
153
}
149
-
150
-
const (
151
-
byteBufferSize = 32 * 1024 // from io.Copy
152
-
lineBufferSize = 32
153
-
)
154
154
155
155
// copyFrom writes bytes starting from offset off in src to dst stopping at the
156
156
// end of src or at the first error. copyFrom returns the number of bytes
+54
-2
gitdiff/io_test.go
+54
-2
gitdiff/io_test.go
···
9
9
)
10
10
11
11
func TestLineReaderAt(t *testing.T) {
12
+
const lineTemplate = "generated test line %d\n"
13
+
12
14
tests := map[string]struct {
13
15
InputLines int
14
16
Offset int64
···
41
43
InputLines: 4,
42
44
Offset: 2,
43
45
Count: 0,
46
+
},
47
+
"readAllLines": {
48
+
InputLines: 64,
49
+
Offset: 0,
50
+
Count: 64,
44
51
},
45
52
"readThroughEOF": {
46
53
InputLines: 16,
···
71
78
},
72
79
}
73
80
74
-
const lineTemplate = "generated test line %d\n"
75
-
76
81
for name, test := range tests {
77
82
t.Run(name, func(t *testing.T) {
78
83
var input bytes.Buffer
···
110
115
for i := 0; i < n; i++ {
111
116
if !bytes.Equal(output[i], lines[i]) {
112
117
t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], lines[i])
118
+
}
119
+
}
120
+
})
121
+
}
122
+
123
+
newlineTests := map[string]struct {
124
+
InputSize int
125
+
}{
126
+
"readLinesNoFinalNewline": {
127
+
InputSize: indexBufferSize + indexBufferSize/2,
128
+
},
129
+
"readLinesNoFinalNewlineBufferMultiple": {
130
+
InputSize: 4 * indexBufferSize,
131
+
},
132
+
}
133
+
134
+
for name, test := range newlineTests {
135
+
t.Run(name, func(t *testing.T) {
136
+
input := bytes.Repeat([]byte("0"), test.InputSize)
137
+
138
+
var output [][]byte
139
+
for i := 0; i < len(input); i++ {
140
+
last := i
141
+
i += rand.Intn(80)
142
+
if i < len(input)-1 { // last character of input must not be a newline
143
+
input[i] = '\n'
144
+
output = append(output, input[last:i+1])
145
+
} else {
146
+
output = append(output, input[last:])
147
+
}
148
+
}
149
+
150
+
r := &lineReaderAt{r: bytes.NewReader(input)}
151
+
lines := make([][]byte, len(output))
152
+
153
+
n, err := r.ReadLinesAt(lines, 0)
154
+
if err != nil {
155
+
t.Fatalf("unexpected error reading reading lines: %v", err)
156
+
}
157
+
158
+
if n != len(output) {
159
+
t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n)
160
+
}
161
+
162
+
for i, line := range lines {
163
+
if !bytes.Equal(output[i], line) {
164
+
t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line)
113
165
}
114
166
}
115
167
})
+7
-3
gitdiff/parser.go
+7
-3
gitdiff/parser.go
···
12
12
// Parse parses a patch with changes to one or more files. Any content before
13
13
// the first file is returned as the second value. If an error occurs while
14
14
// parsing, it returns all files parsed before the error.
15
+
//
16
+
// Parse expects to receive a single patch. If the input may contain multiple
17
+
// patches (for example, if it is an mbox file), callers should split it into
18
+
// individual patches and call Parse on each one.
15
19
func Parse(r io.Reader) ([]*File, string, error) {
16
20
p := newParser(r)
17
21
···
29
33
if err != nil {
30
34
return files, preamble, err
31
35
}
36
+
if len(files) == 0 {
37
+
preamble = pre
38
+
}
32
39
if file == nil {
33
40
break
34
41
}
···
46
53
}
47
54
}
48
55
49
-
if len(files) == 0 {
50
-
preamble = pre
51
-
}
52
56
files = append(files, file)
53
57
}
54
58
+16
-2
gitdiff/parser_test.go
+16
-2
gitdiff/parser_test.go
···
281
281
--- could this be a header?
282
282
nope, it's just some dashes
283
283
`,
284
-
Output: nil,
285
-
Preamble: "",
284
+
Output: nil,
285
+
Preamble: `
286
+
this is a line
287
+
this is another line
288
+
--- could this be a header?
289
+
nope, it's just some dashes
290
+
`,
286
291
},
287
292
"detatchedFragmentLike": {
288
293
Input: `
···
290
295
@@ -1,3 +1,4 ~1,5 @@
291
296
`,
292
297
Output: nil,
298
+
Preamble: `
299
+
a wild fragment appears?
300
+
@@ -1,3 +1,4 ~1,5 @@
301
+
`,
293
302
},
294
303
"detatchedFragment": {
295
304
Input: `
···
425
434
},
426
435
},
427
436
Preamble: textPreamble,
437
+
},
438
+
"noFiles": {
439
+
InputFile: "testdata/no_files.patch",
440
+
Output: nil,
441
+
Preamble: textPreamble,
428
442
},
429
443
"newBinaryFile": {
430
444
InputFile: "testdata/new_binary_file.patch",
+156
-128
gitdiff/patch_header.go
+156
-128
gitdiff/patch_header.go
···
5
5
"errors"
6
6
"fmt"
7
7
"io"
8
+
"io/ioutil"
9
+
"mime/quotedprintable"
8
10
"net/mail"
9
11
"strconv"
10
12
"strings"
···
13
15
)
14
16
15
17
const (
16
-
mailHeaderPrefix = "From "
17
-
prettyHeaderPrefix = "commit "
18
+
mailHeaderPrefix = "From "
19
+
prettyHeaderPrefix = "commit "
20
+
mailMinimumHeaderPrefix = "From:"
18
21
)
19
22
20
23
// PatchHeader is a parsed version of the preamble content that appears before
···
49
52
// line, that line will be removed and everything after it will be
50
53
// placed in BodyAppendix.
51
54
BodyAppendix string
55
+
56
+
// All headers completely unparsed
57
+
RawHeaders map[string][]string
52
58
}
53
59
54
60
// Message returns the commit message for the header. The message consists of
···
65
71
return msg.String()
66
72
}
67
73
68
-
// PatchIdentity identifies a person who authored or committed a patch.
69
-
type PatchIdentity struct {
70
-
Name string
71
-
Email string
72
-
}
73
-
74
-
func (i PatchIdentity) String() string {
75
-
name := i.Name
76
-
if name == "" {
77
-
name = `""`
78
-
}
79
-
return fmt.Sprintf("%s <%s>", name, i.Email)
80
-
}
81
-
82
-
// ParsePatchIdentity parses a patch identity string. A valid string contains a
83
-
// non-empty name followed by an email address in angle brackets. Like Git,
84
-
// ParsePatchIdentity does not require that the email address is valid or
85
-
// properly formatted, only that it is non-empty. The name must not contain a
86
-
// left angle bracket, '<', and the email address must not contain a right
87
-
// angle bracket, '>'.
88
-
func ParsePatchIdentity(s string) (PatchIdentity, error) {
89
-
var emailStart, emailEnd int
90
-
for i, c := range s {
91
-
if c == '<' && emailStart == 0 {
92
-
emailStart = i + 1
93
-
}
94
-
if c == '>' && emailStart > 0 {
95
-
emailEnd = i
96
-
break
97
-
}
98
-
}
99
-
if emailStart > 0 && emailEnd == 0 {
100
-
return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s)
101
-
}
102
-
103
-
var name, email string
104
-
if emailStart > 0 {
105
-
name = strings.TrimSpace(s[:emailStart-1])
106
-
}
107
-
if emailStart > 0 && emailEnd > 0 {
108
-
email = strings.TrimSpace(s[emailStart:emailEnd])
109
-
}
110
-
if name == "" || email == "" {
111
-
return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s)
112
-
}
113
-
114
-
return PatchIdentity{Name: name, Email: email}, nil
115
-
}
116
-
117
74
// ParsePatchDate parses a patch date string. It returns the parsed time or an
118
75
// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
119
76
// short, raw, unix, and default formats (with local variants) used by the
···
162
119
return time.Time{}, fmt.Errorf("unknown date format: %s", s)
163
120
}
164
121
165
-
// ParsePatchHeader parses a preamble string as returned by Parse into a
122
+
// A PatchHeaderOption modifies the behavior of ParsePatchHeader.
123
+
type PatchHeaderOption func(*patchHeaderOptions)
124
+
125
+
// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when
126
+
// parsing mail-formatted patches.
127
+
type SubjectCleanMode int
128
+
129
+
const (
130
+
// SubjectCleanWhitespace removes leading and trailing whitespace.
131
+
SubjectCleanWhitespace SubjectCleanMode = iota
132
+
133
+
// SubjectCleanAll removes leading and trailing whitespace, leading "Re:",
134
+
// "re:", and ":" strings, and leading strings enclosed by '[' and ']'.
135
+
// This is the default behavior of git (see `git mailinfo`) and this
136
+
// package.
137
+
SubjectCleanAll
138
+
139
+
// SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes
140
+
// leading strings enclosed by '[' and ']' if they start with "PATCH".
141
+
SubjectCleanPatchOnly
142
+
)
143
+
144
+
// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By
145
+
// default, uses SubjectCleanAll.
146
+
func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption {
147
+
return func(opts *patchHeaderOptions) {
148
+
opts.subjectCleanMode = m
149
+
}
150
+
}
151
+
152
+
type patchHeaderOptions struct {
153
+
subjectCleanMode SubjectCleanMode
154
+
}
155
+
156
+
// ParsePatchHeader parses the preamble string returned by [Parse] into a
166
157
// PatchHeader. Due to the variety of header formats, some fields of the parsed
167
158
// PatchHeader may be unset after parsing.
168
159
//
169
160
// Supported formats are the short, medium, full, fuller, and email pretty
170
-
// formats used by git diff, git log, and git show and the UNIX mailbox format
171
-
// used by git format-patch.
161
+
// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox
162
+
// format used by `git format-patch`.
172
163
//
173
-
// If ParsePatchHeader detects that it is handling an email, it will
174
-
// remove extra content at the beginning of the title line, such as
175
-
// `[PATCH]` or `Re:` in the same way that `git mailinfo` does.
176
-
// SubjectPrefix will be set to the value of this removed string.
177
-
// (`git mailinfo` is the core part of `git am` that pulls information
178
-
// out of an individual mail.)
164
+
// When parsing mail-formatted headers, ParsePatchHeader tries to remove
165
+
// email-specific content from the title and body:
166
+
//
167
+
// - Based on the SubjectCleanMode, remove prefixes like reply markers and
168
+
// "[PATCH]" strings from the subject, saving any removed content in the
169
+
// SubjectPrefix field. Parsing always discards leading and trailing
170
+
// whitespace from the subject line. The default mode is SubjectCleanAll.
179
171
//
180
-
// Additionally, if ParsePatchHeader detects that it's handling an
181
-
// email, it will remove a `---` line and put anything after it into
182
-
// BodyAppendix.
172
+
// - If the body contains a "---" line (3 hyphens), remove that line and any
173
+
// content after it from the body and save it in the BodyAppendix field.
183
174
//
184
-
// Those wishing the effect of a plain `git am` should use
185
-
// `PatchHeader.Title + "\n" + PatchHeader.Body` (or
186
-
// `PatchHeader.Message()`). Those wishing to retain the subject
187
-
// prefix and appendix material should use `PatchHeader.SubjectPrefix
188
-
// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" +
189
-
// PatchHeader.BodyAppendix`.
190
-
func ParsePatchHeader(s string) (*PatchHeader, error) {
191
-
r := bufio.NewReader(strings.NewReader(s))
175
+
// ParsePatchHeader tries to process content it does not understand wthout
176
+
// returning errors, but will return errors if well-identified content like
177
+
// dates or identies uses unknown or invalid formats.
178
+
func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) {
179
+
opts := patchHeaderOptions{
180
+
subjectCleanMode: SubjectCleanAll, // match git defaults
181
+
}
182
+
for _, optFn := range options {
183
+
optFn(&opts)
184
+
}
192
185
193
-
var line string
194
-
for {
195
-
var err error
196
-
line, err = r.ReadString('\n')
197
-
if err == io.EOF {
198
-
break
199
-
}
200
-
if err != nil {
201
-
return nil, err
202
-
}
186
+
header = strings.TrimSpace(header)
187
+
if header == "" {
188
+
return &PatchHeader{}, nil
189
+
}
203
190
204
-
line = strings.TrimSpace(line)
205
-
if len(line) > 0 {
206
-
break
207
-
}
191
+
var firstLine, rest string
192
+
if idx := strings.IndexByte(header, '\n'); idx >= 0 {
193
+
firstLine = header[:idx]
194
+
rest = header[idx+1:]
195
+
} else {
196
+
firstLine = header
197
+
rest = ""
208
198
}
209
199
210
200
switch {
211
-
case strings.HasPrefix(line, mailHeaderPrefix):
212
-
return parseHeaderMail(line, r)
213
-
case strings.HasPrefix(line, prettyHeaderPrefix):
214
-
return parseHeaderPretty(line, r)
201
+
case strings.HasPrefix(firstLine, mailHeaderPrefix):
202
+
return parseHeaderMail(firstLine, strings.NewReader(rest), opts)
203
+
204
+
case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix):
205
+
// With a minimum header, the first line is part of the actual mail
206
+
// content and needs to be parsed as part of the "rest"
207
+
return parseHeaderMail("", strings.NewReader(header), opts)
208
+
209
+
case strings.HasPrefix(firstLine, prettyHeaderPrefix):
210
+
return parseHeaderPretty(firstLine, strings.NewReader(rest))
215
211
}
212
+
216
213
return nil, errors.New("unrecognized patch header format")
217
214
}
218
215
···
227
224
228
225
h := &PatchHeader{}
229
226
230
-
prettyLine = prettyLine[len(prettyHeaderPrefix):]
227
+
prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix)
231
228
if i := strings.IndexByte(prettyLine, ' '); i > 0 {
232
229
h.SHA = prettyLine[:i]
233
230
} else {
···
243
240
break
244
241
}
245
242
243
+
items := strings.SplitN(line, ":", 2)
244
+
245
+
// we have "key: value"
246
+
if len(items) == 2 {
247
+
key := items[0]
248
+
val := items[1]
249
+
h.RawHeaders[key] = append(h.RawHeaders[key], val)
250
+
}
251
+
246
252
switch {
247
253
case strings.HasPrefix(line, authorPrefix):
248
254
u, err := ParsePatchIdentity(line[len(authorPrefix):])
···
291
297
h.Title = title
292
298
293
299
if title != "" {
294
-
// Don't check for an appendix
300
+
// Don't check for an appendix, pretty headers do not contain them
295
301
body, _ := scanMessageBody(s, indent, false)
296
302
if s.Err() != nil {
297
303
return nil, s.Err()
···
360
366
return body.String(), appendix.String()
361
367
}
362
368
363
-
func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) {
369
+
func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) {
364
370
msg, err := mail.ReadMessage(r)
365
371
if err != nil {
366
372
return nil, err
367
373
}
368
374
369
375
h := &PatchHeader{}
376
+
h.RawHeaders = msg.Header
370
377
371
-
mailLine = mailLine[len(mailHeaderPrefix):]
372
-
if i := strings.IndexByte(mailLine, ' '); i > 0 {
373
-
h.SHA = mailLine[:i]
378
+
if strings.HasPrefix(mailLine, mailHeaderPrefix) {
379
+
mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix)
380
+
if i := strings.IndexByte(mailLine, ' '); i > 0 {
381
+
h.SHA = mailLine[:i]
382
+
}
374
383
}
375
384
376
-
addrs, err := msg.Header.AddressList("From")
377
-
if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) {
378
-
return nil, err
379
-
}
380
-
if len(addrs) > 0 {
381
-
addr := addrs[0]
382
-
if addr.Name == "" {
383
-
return nil, fmt.Errorf("invalid user string: %s", addr)
385
+
from := msg.Header.Get("From")
386
+
if from != "" {
387
+
u, err := ParsePatchIdentity(from)
388
+
if err != nil {
389
+
return nil, err
384
390
}
385
-
h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address}
391
+
h.Author = &u
386
392
}
387
393
388
394
date := msg.Header.Get("Date")
···
395
401
}
396
402
397
403
subject := msg.Header.Get("Subject")
398
-
h.SubjectPrefix, h.Title = parseSubject(subject)
404
+
h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode)
399
405
400
406
s := bufio.NewScanner(msg.Body)
401
407
h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
···
406
412
return h, nil
407
413
}
408
414
409
-
// Takes an email subject and returns the patch prefix and commit
410
-
// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH
411
-
// v3 3/5] ` and `Implement foo`
412
-
func parseSubject(s string) (string, string) {
413
-
// This is meant to be compatible with
414
-
// https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject().
415
-
// If compatibility with `git am` drifts, go there to see if there
416
-
// are any updates.
415
+
func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) {
416
+
switch mode {
417
+
case SubjectCleanAll, SubjectCleanPatchOnly:
418
+
case SubjectCleanWhitespace:
419
+
return "", strings.TrimSpace(decodeSubject(s))
420
+
default:
421
+
panic(fmt.Sprintf("unknown clean mode: %d", mode))
422
+
}
423
+
424
+
// Based on the algorithm from Git in mailinfo.c:cleanup_subject()
425
+
// If compatibility with `git am` drifts, go there to see if there are any updates.
417
426
418
427
at := 0
419
428
for at < len(s) {
420
429
switch s[at] {
421
430
case 'r', 'R':
422
431
// Detect re:, Re:, rE: and RE:
423
-
if at+2 < len(s) &&
424
-
(s[at+1] == 'e' || s[at+1] == 'E') &&
425
-
s[at+2] == ':' {
432
+
if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' {
426
433
at += 3
427
434
continue
428
435
}
···
433
440
continue
434
441
435
442
case '[':
436
-
// Look for closing parenthesis
437
-
j := at + 1
438
-
for ; j < len(s); j++ {
439
-
if s[j] == ']' {
440
-
break
443
+
if i := strings.IndexByte(s[at:], ']'); i > 0 {
444
+
if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") {
445
+
at += i + 1
446
+
continue
441
447
}
442
448
}
443
-
444
-
if j < len(s) {
445
-
at = j + 1
446
-
continue
447
-
}
448
449
}
449
450
450
-
// Only loop if we actually removed something
451
+
// Nothing was removed, end processing
451
452
break
452
453
}
453
454
454
-
return s[:at], s[at:]
455
+
prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace)
456
+
subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace)
457
+
return
458
+
}
459
+
460
+
// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
461
+
// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji).
462
+
// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject
463
+
func decodeSubject(encoded string) string {
464
+
if !strings.HasPrefix(encoded, "=?UTF-8?q?") {
465
+
// not UTF-8 encoded
466
+
return encoded
467
+
}
468
+
469
+
// If the subject is too long, `git format-patch` may produce a subject line across
470
+
// multiple lines. When parsed, this can look like the following:
471
+
// <UTF8-prefix><first-line> <UTF8-prefix><second-line>
472
+
payload := " " + encoded
473
+
payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "")
474
+
payload = strings.ReplaceAll(payload, "?=", "")
475
+
476
+
decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
477
+
if err != nil {
478
+
// if err, abort decoding and return original subject
479
+
return encoded
480
+
}
481
+
482
+
return string(decoded)
455
483
}
+243
-104
gitdiff/patch_header_test.go
+243
-104
gitdiff/patch_header_test.go
···
5
5
"time"
6
6
)
7
7
8
-
func TestParsePatchIdentity(t *testing.T) {
9
-
tests := map[string]struct {
10
-
Input string
11
-
Output PatchIdentity
12
-
Err interface{}
13
-
}{
14
-
"simple": {
15
-
Input: "Morton Haypenny <mhaypenny@example.com>",
16
-
Output: PatchIdentity{
17
-
Name: "Morton Haypenny",
18
-
Email: "mhaypenny@example.com",
19
-
},
20
-
},
21
-
"extraWhitespace": {
22
-
Input: " Morton Haypenny <mhaypenny@example.com > ",
23
-
Output: PatchIdentity{
24
-
Name: "Morton Haypenny",
25
-
Email: "mhaypenny@example.com",
26
-
},
27
-
},
28
-
"trailingCharacters": {
29
-
Input: "Morton Haypenny <mhaypenny@example.com> unrelated garbage",
30
-
Output: PatchIdentity{
31
-
Name: "Morton Haypenny",
32
-
Email: "mhaypenny@example.com",
33
-
},
34
-
},
35
-
"missingName": {
36
-
Input: "<mhaypenny@example.com>",
37
-
Err: "invalid identity",
38
-
},
39
-
"missingEmail": {
40
-
Input: "Morton Haypenny",
41
-
Err: "invalid identity",
42
-
},
43
-
"unclosedEmail": {
44
-
Input: "Morton Haypenny <mhaypenny@example.com",
45
-
Err: "unclosed email",
46
-
},
47
-
}
48
-
49
-
for name, test := range tests {
50
-
t.Run(name, func(t *testing.T) {
51
-
id, err := ParsePatchIdentity(test.Input)
52
-
if test.Err != nil {
53
-
assertError(t, test.Err, err, "parsing identity")
54
-
return
55
-
}
56
-
if err != nil {
57
-
t.Fatalf("unexpected error parsing identity: %v", err)
58
-
}
59
-
60
-
if test.Output != id {
61
-
t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id)
62
-
}
63
-
})
64
-
}
65
-
}
66
-
67
8
func TestParsePatchDate(t *testing.T) {
68
9
expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC)
69
10
···
138
79
}
139
80
expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60))
140
81
expectedTitle := "A sample commit to test header parsing"
82
+
expectedEmojiOneLineTitle := "๐ค Enabling auto-merging"
83
+
expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-Kรถnig diet"
141
84
expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line."
142
85
expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>"
143
86
144
87
tests := map[string]struct {
145
-
Input string
146
-
Header PatchHeader
147
-
Err interface{}
88
+
Input string
89
+
Options []PatchHeaderOption
90
+
Header PatchHeader
91
+
Err interface{}
148
92
}{
149
93
"prettyShort": {
150
94
Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b
···
267
211
Body: expectedBody,
268
212
},
269
213
},
214
+
"mailboxPatchOnly": {
215
+
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
216
+
From: Morton Haypenny <mhaypenny@example.com>
217
+
Date: Sat, 11 Apr 2020 15:21:23 -0700
218
+
Subject: [PATCH] [BUG-123] A sample commit to test header parsing
219
+
220
+
The medium format shows the body, which
221
+
may wrap on to multiple lines.
222
+
223
+
Another body line.
224
+
`,
225
+
Options: []PatchHeaderOption{
226
+
WithSubjectCleanMode(SubjectCleanPatchOnly),
227
+
},
228
+
Header: PatchHeader{
229
+
SHA: expectedSHA,
230
+
Author: expectedIdentity,
231
+
AuthorDate: expectedDate,
232
+
Title: "[BUG-123] " + expectedTitle,
233
+
Body: expectedBody,
234
+
},
235
+
},
236
+
"mailboxEmojiOneLine": {
237
+
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
238
+
From: Morton Haypenny <mhaypenny@example.com>
239
+
Date: Sat, 11 Apr 2020 15:21:23 -0700
240
+
Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?=
241
+
242
+
The medium format shows the body, which
243
+
may wrap on to multiple lines.
244
+
245
+
Another body line.
246
+
`,
247
+
Header: PatchHeader{
248
+
SHA: expectedSHA,
249
+
Author: expectedIdentity,
250
+
AuthorDate: expectedDate,
251
+
Title: expectedEmojiOneLineTitle,
252
+
Body: expectedBody,
253
+
},
254
+
},
255
+
"mailboxEmojiMultiLine": {
256
+
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
257
+
From: Morton Haypenny <mhaypenny@example.com>
258
+
Date: Sat, 11 Apr 2020 15:21:23 -0700
259
+
Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?=
260
+
=?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?=
261
+
262
+
The medium format shows the body, which
263
+
may wrap on to multiple lines.
264
+
265
+
Another body line.
266
+
`,
267
+
Header: PatchHeader{
268
+
SHA: expectedSHA,
269
+
Author: expectedIdentity,
270
+
AuthorDate: expectedDate,
271
+
Title: expectedEmojiMultiLineTitle,
272
+
Body: expectedBody,
273
+
},
274
+
},
275
+
"mailboxRFC5322SpecialCharacters": {
276
+
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
277
+
From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>
278
+
Date: Sat, 11 Apr 2020 15:21:23 -0700
279
+
Subject: [PATCH] A sample commit to test header parsing
280
+
281
+
The medium format shows the body, which
282
+
may wrap on to multiple lines.
283
+
284
+
Another body line.
285
+
`,
286
+
Header: PatchHeader{
287
+
SHA: expectedSHA,
288
+
Author: &PatchIdentity{
289
+
Name: "dependabot[bot]",
290
+
Email: "12345+dependabot[bot]@users.noreply.github.com",
291
+
},
292
+
AuthorDate: expectedDate,
293
+
Title: expectedTitle,
294
+
Body: expectedBody,
295
+
},
296
+
},
270
297
"mailboxAppendix": {
271
298
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
272
299
From: Morton Haypenny <mhaypenny@example.com>
···
289
316
BodyAppendix: expectedBodyAppendix,
290
317
},
291
318
},
319
+
"mailboxMinimalNoName": {
320
+
Input: `From: <mhaypenny@example.com>
321
+
Subject: [PATCH] A sample commit to test header parsing
322
+
323
+
The medium format shows the body, which
324
+
may wrap on to multiple lines.
325
+
326
+
Another body line.
327
+
`,
328
+
Header: PatchHeader{
329
+
Author: &PatchIdentity{expectedIdentity.Email, expectedIdentity.Email},
330
+
Title: expectedTitle,
331
+
Body: expectedBody,
332
+
},
333
+
},
334
+
"mailboxMinimal": {
335
+
Input: `From: Morton Haypenny <mhaypenny@example.com>
336
+
Subject: [PATCH] A sample commit to test header parsing
337
+
338
+
The medium format shows the body, which
339
+
may wrap on to multiple lines.
340
+
341
+
Another body line.
342
+
`,
343
+
Header: PatchHeader{
344
+
Author: expectedIdentity,
345
+
Title: expectedTitle,
346
+
Body: expectedBody,
347
+
},
348
+
},
292
349
"unwrapTitle": {
293
350
Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b
294
351
Author: Morton Haypenny <mhaypenny@example.com>
···
342
399
Author: expectedIdentity,
343
400
Title: expectedTitle,
344
401
},
402
+
},
403
+
"emptyHeader": {
404
+
Input: "",
405
+
Header: PatchHeader{},
345
406
},
346
407
}
347
408
348
409
for name, test := range tests {
349
410
t.Run(name, func(t *testing.T) {
350
-
h, err := ParsePatchHeader(test.Input)
411
+
h, err := ParsePatchHeader(test.Input, test.Options...)
351
412
if test.Err != nil {
352
413
assertError(t, test.Err, err, "parsing patch header")
353
414
return
···
402
463
}
403
464
}
404
465
405
-
func TestCleanupSubject(t *testing.T) {
406
-
exp := "A sample commit to test header parsing"
407
-
tests := map[string]string{
408
-
"plain": "",
409
-
"patch": "[PATCH] ",
410
-
"patchv5": "[PATCH v5] ",
411
-
"patchrfc": "[PATCH RFC] ",
412
-
"patchnospace": "[PATCH]",
413
-
"space": " ",
414
-
"re": "re: ",
415
-
"Re": "Re: ",
416
-
"RE": "rE: ",
417
-
"rere": "re: re: ",
418
-
}
466
+
func TestCleanSubject(t *testing.T) {
467
+
expectedSubject := "A sample commit to test header parsing"
419
468
420
-
for name, prefix := range tests {
421
-
gotprefix, gottitle := parseSubject(prefix + exp)
422
-
if gottitle != exp {
423
-
t.Errorf("%s: Incorrect parsing of prefix %s: got title %s, wanted %s",
424
-
name, prefix, gottitle, exp)
425
-
}
426
-
if gotprefix != prefix {
427
-
t.Errorf("%s: Incorrect parsing of prefix %s: got prefix %s",
428
-
name, prefix, gotprefix)
429
-
}
430
-
}
431
-
432
-
moretests := map[string]struct {
433
-
in, eprefix, etitle string
469
+
tests := map[string]struct {
470
+
Input string
471
+
Mode SubjectCleanMode
472
+
Prefix string
473
+
Subject string
434
474
}{
435
-
"Reimplement": {"Reimplement something", "", "Reimplement something"},
436
-
"patch-reimplement": {"[PATCH v5] Reimplement something", "[PATCH v5] ", "Reimplement something"},
437
-
"Openbracket": {"[Just to annoy people", "", "[Just to annoy people"},
475
+
"CleanAll/noPrefix": {
476
+
Input: expectedSubject,
477
+
Mode: SubjectCleanAll,
478
+
Subject: expectedSubject,
479
+
},
480
+
"CleanAll/patchPrefix": {
481
+
Input: "[PATCH] " + expectedSubject,
482
+
Mode: SubjectCleanAll,
483
+
Prefix: "[PATCH] ",
484
+
Subject: expectedSubject,
485
+
},
486
+
"CleanAll/patchPrefixNoSpace": {
487
+
Input: "[PATCH]" + expectedSubject,
488
+
Mode: SubjectCleanAll,
489
+
Prefix: "[PATCH]",
490
+
Subject: expectedSubject,
491
+
},
492
+
"CleanAll/patchPrefixContent": {
493
+
Input: "[PATCH 3/7] " + expectedSubject,
494
+
Mode: SubjectCleanAll,
495
+
Prefix: "[PATCH 3/7] ",
496
+
Subject: expectedSubject,
497
+
},
498
+
"CleanAll/spacePrefix": {
499
+
Input: " " + expectedSubject,
500
+
Mode: SubjectCleanAll,
501
+
Subject: expectedSubject,
502
+
},
503
+
"CleanAll/replyLowerPrefix": {
504
+
Input: "re: " + expectedSubject,
505
+
Mode: SubjectCleanAll,
506
+
Prefix: "re: ",
507
+
Subject: expectedSubject,
508
+
},
509
+
"CleanAll/replyMixedPrefix": {
510
+
Input: "Re: " + expectedSubject,
511
+
Mode: SubjectCleanAll,
512
+
Prefix: "Re: ",
513
+
Subject: expectedSubject,
514
+
},
515
+
"CleanAll/replyCapsPrefix": {
516
+
Input: "RE: " + expectedSubject,
517
+
Mode: SubjectCleanAll,
518
+
Prefix: "RE: ",
519
+
Subject: expectedSubject,
520
+
},
521
+
"CleanAll/replyDoublePrefix": {
522
+
Input: "Re: re: " + expectedSubject,
523
+
Mode: SubjectCleanAll,
524
+
Prefix: "Re: re: ",
525
+
Subject: expectedSubject,
526
+
},
527
+
"CleanAll/noPrefixSubjectHasRe": {
528
+
Input: "Reimplement parsing",
529
+
Mode: SubjectCleanAll,
530
+
Subject: "Reimplement parsing",
531
+
},
532
+
"CleanAll/patchPrefixSubjectHasRe": {
533
+
Input: "[PATCH 1/2] Reimplement parsing",
534
+
Mode: SubjectCleanAll,
535
+
Prefix: "[PATCH 1/2] ",
536
+
Subject: "Reimplement parsing",
537
+
},
538
+
"CleanAll/unclosedPrefix": {
539
+
Input: "[Just to annoy people",
540
+
Mode: SubjectCleanAll,
541
+
Subject: "[Just to annoy people",
542
+
},
543
+
"CleanAll/multiplePrefix": {
544
+
Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ",
545
+
Mode: SubjectCleanAll,
546
+
Prefix: "Re:Re: [PATCH 1/2][DRAFT] ",
547
+
Subject: expectedSubject,
548
+
},
549
+
"CleanPatchOnly/patchPrefix": {
550
+
Input: "[PATCH] " + expectedSubject,
551
+
Mode: SubjectCleanPatchOnly,
552
+
Prefix: "[PATCH] ",
553
+
Subject: expectedSubject,
554
+
},
555
+
"CleanPatchOnly/mixedPrefix": {
556
+
Input: "[PATCH] [TICKET-123] " + expectedSubject,
557
+
Mode: SubjectCleanPatchOnly,
558
+
Prefix: "[PATCH] ",
559
+
Subject: "[TICKET-123] " + expectedSubject,
560
+
},
561
+
"CleanPatchOnly/multiplePrefix": {
562
+
Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject,
563
+
Mode: SubjectCleanPatchOnly,
564
+
Prefix: "Re:Re: [PATCH 1/2]",
565
+
Subject: "[DRAFT] " + expectedSubject,
566
+
},
567
+
"CleanWhitespace/leadingSpace": {
568
+
Input: " [PATCH] " + expectedSubject,
569
+
Mode: SubjectCleanWhitespace,
570
+
Subject: "[PATCH] " + expectedSubject,
571
+
},
572
+
"CleanWhitespace/trailingSpace": {
573
+
Input: "[PATCH] " + expectedSubject + " ",
574
+
Mode: SubjectCleanWhitespace,
575
+
Subject: "[PATCH] " + expectedSubject,
576
+
},
438
577
}
439
578
440
-
for name, test := range moretests {
441
-
prefix, title := parseSubject(test.in)
442
-
if title != test.etitle {
443
-
t.Errorf("%s: Incorrect parsing of %s: got title %s, wanted %s",
444
-
name, test.in, title, test.etitle)
445
-
}
446
-
if prefix != test.eprefix {
447
-
t.Errorf("%s: Incorrect parsing of %s: got prefix %s, wanted %s",
448
-
name, test.in, title, test.etitle)
449
-
}
579
+
for name, test := range tests {
580
+
t.Run(name, func(t *testing.T) {
581
+
prefix, subject := cleanSubject(test.Input, test.Mode)
582
+
if prefix != test.Prefix {
583
+
t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix)
584
+
}
585
+
if subject != test.Subject {
586
+
t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject)
587
+
}
588
+
})
450
589
}
451
590
}
+166
gitdiff/patch_identity.go
+166
gitdiff/patch_identity.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"fmt"
5
+
"strings"
6
+
)
7
+
8
+
// PatchIdentity identifies a person who authored or committed a patch.
9
+
type PatchIdentity struct {
10
+
Name string
11
+
Email string
12
+
}
13
+
14
+
func (i PatchIdentity) String() string {
15
+
name := i.Name
16
+
if name == "" {
17
+
name = `""`
18
+
}
19
+
return fmt.Sprintf("%s <%s>", name, i.Email)
20
+
}
21
+
22
+
// ParsePatchIdentity parses a patch identity string. A patch identity contains
23
+
// an email address and an optional name in [RFC 5322] format. This is either a
24
+
// plain email adddress or a name followed by an address in angle brackets:
25
+
//
26
+
// author@example.com
27
+
// Author Name <author@example.com>
28
+
//
29
+
// If the input is not one of these formats, ParsePatchIdentity applies a
30
+
// heuristic to separate the name and email portions. If both the name and
31
+
// email are missing or empty, ParsePatchIdentity returns an error. It
32
+
// otherwise does not validate the result.
33
+
//
34
+
// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322
35
+
func ParsePatchIdentity(s string) (PatchIdentity, error) {
36
+
s = normalizeSpace(s)
37
+
s = unquotePairs(s)
38
+
39
+
var name, email string
40
+
if at := strings.IndexByte(s, '@'); at >= 0 {
41
+
start, end := at, at
42
+
for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' {
43
+
start--
44
+
}
45
+
for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' {
46
+
end++
47
+
}
48
+
email = s[start+1 : end]
49
+
50
+
// Adjust the boundaries so that we drop angle brackets, but keep
51
+
// spaces when removing the email to form the name.
52
+
if start < 0 || s[start] != '<' {
53
+
start++
54
+
}
55
+
if end >= len(s) || s[end] != '>' {
56
+
end--
57
+
}
58
+
name = s[:start] + s[end+1:]
59
+
} else {
60
+
start, end := 0, 0
61
+
for i := 0; i < len(s); i++ {
62
+
if s[i] == '<' && start == 0 {
63
+
start = i + 1
64
+
}
65
+
if s[i] == '>' && start > 0 {
66
+
end = i
67
+
break
68
+
}
69
+
}
70
+
if start > 0 && end >= start {
71
+
email = strings.TrimSpace(s[start:end])
72
+
name = s[:start-1]
73
+
}
74
+
}
75
+
76
+
// After extracting the email, the name might contain extra whitespace
77
+
// again and may be surrounded by comment characters. The git source gives
78
+
// these examples of when this can happen:
79
+
//
80
+
// "Name <email@domain>"
81
+
// "email@domain (Name)"
82
+
// "Name <email@domain> (Comment)"
83
+
//
84
+
name = normalizeSpace(name)
85
+
if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
86
+
name = name[1 : len(name)-1]
87
+
}
88
+
name = strings.TrimSpace(name)
89
+
90
+
// If the name is empty or contains email-like characters, use the email
91
+
// instead (assuming one exists)
92
+
if name == "" || strings.ContainsAny(name, "@<>") {
93
+
name = email
94
+
}
95
+
96
+
if name == "" && email == "" {
97
+
return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s)
98
+
}
99
+
return PatchIdentity{Name: name, Email: email}, nil
100
+
}
101
+
102
+
// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to
103
+
// remove any "quoted-pairs" (backslash-espaced characters). It also removes
104
+
// the quotes from any quoted strings, but leaves the comment delimiters.
105
+
func unquotePairs(s string) string {
106
+
quote := false
107
+
comments := 0
108
+
escaped := false
109
+
110
+
var out strings.Builder
111
+
for i := 0; i < len(s); i++ {
112
+
if escaped {
113
+
escaped = false
114
+
} else {
115
+
switch s[i] {
116
+
case '\\':
117
+
// quoted-pair is only allowed in quoted-string/comment
118
+
if quote || comments > 0 {
119
+
escaped = true
120
+
continue // drop '\' character
121
+
}
122
+
123
+
case '"':
124
+
if comments == 0 {
125
+
quote = !quote
126
+
continue // drop '"' character
127
+
}
128
+
129
+
case '(':
130
+
if !quote {
131
+
comments++
132
+
}
133
+
case ')':
134
+
if comments > 0 {
135
+
comments--
136
+
}
137
+
}
138
+
}
139
+
out.WriteByte(s[i])
140
+
}
141
+
return out.String()
142
+
}
143
+
144
+
// normalizeSpace trims leading and trailing whitespace from s and converts
145
+
// inner sequences of one or more whitespace characters to single spaces.
146
+
func normalizeSpace(s string) string {
147
+
var sb strings.Builder
148
+
for i := 0; i < len(s); i++ {
149
+
c := s[i]
150
+
if !isRFC5332Space(c) {
151
+
if sb.Len() > 0 && isRFC5332Space(s[i-1]) {
152
+
sb.WriteByte(' ')
153
+
}
154
+
sb.WriteByte(c)
155
+
}
156
+
}
157
+
return sb.String()
158
+
}
159
+
160
+
func isRFC5332Space(c byte) bool {
161
+
switch c {
162
+
case '\t', '\n', '\r', ' ':
163
+
return true
164
+
}
165
+
return false
166
+
}
+127
gitdiff/patch_identity_test.go
+127
gitdiff/patch_identity_test.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"testing"
5
+
)
6
+
7
+
func TestParsePatchIdentity(t *testing.T) {
8
+
tests := map[string]struct {
9
+
Input string
10
+
Output PatchIdentity
11
+
Err interface{}
12
+
}{
13
+
"simple": {
14
+
Input: "Morton Haypenny <mhaypenny@example.com>",
15
+
Output: PatchIdentity{
16
+
Name: "Morton Haypenny",
17
+
Email: "mhaypenny@example.com",
18
+
},
19
+
},
20
+
"extraWhitespace": {
21
+
Input: "\t Morton Haypenny \r\n<mhaypenny@example.com> ",
22
+
Output: PatchIdentity{
23
+
Name: "Morton Haypenny",
24
+
Email: "mhaypenny@example.com",
25
+
},
26
+
},
27
+
"trailingCharacters": {
28
+
Input: "Morton Haypenny <mhaypenny@example.com> II",
29
+
Output: PatchIdentity{
30
+
Name: "Morton Haypenny II",
31
+
Email: "mhaypenny@example.com",
32
+
},
33
+
},
34
+
"onlyEmail": {
35
+
Input: "mhaypenny@example.com",
36
+
Output: PatchIdentity{
37
+
Name: "mhaypenny@example.com",
38
+
Email: "mhaypenny@example.com",
39
+
},
40
+
},
41
+
"onlyEmailInBrackets": {
42
+
Input: "<mhaypenny@example.com>",
43
+
Output: PatchIdentity{
44
+
Name: "mhaypenny@example.com",
45
+
Email: "mhaypenny@example.com",
46
+
},
47
+
},
48
+
"rfc5322SpecialCharacters": {
49
+
Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`,
50
+
Output: PatchIdentity{
51
+
Name: "dependabot[bot]",
52
+
Email: "12345+dependabot[bot]@users.noreply.github.com",
53
+
},
54
+
},
55
+
"rfc5322QuotedPairs": {
56
+
Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`,
57
+
Output: PatchIdentity{
58
+
Name: `Morton "Old-Timer" Haypenny (III (PhD))`,
59
+
Email: "mhaypenny+[1900]@example.com",
60
+
},
61
+
},
62
+
"rfc5322QuotedPairsOutOfContext": {
63
+
Input: `Morton \\Backslash Haypenny <mhaypenny@example.com>`,
64
+
Output: PatchIdentity{
65
+
Name: `Morton \\Backslash Haypenny`,
66
+
Email: "mhaypenny@example.com",
67
+
},
68
+
},
69
+
"emptyEmail": {
70
+
Input: "Morton Haypenny <>",
71
+
Output: PatchIdentity{
72
+
Name: "Morton Haypenny",
73
+
Email: "",
74
+
},
75
+
},
76
+
"unclosedEmail": {
77
+
Input: "Morton Haypenny <mhaypenny@example.com",
78
+
Output: PatchIdentity{
79
+
Name: "Morton Haypenny",
80
+
Email: "mhaypenny@example.com",
81
+
},
82
+
},
83
+
"bogusEmail": {
84
+
Input: "Morton Haypenny <mhaypenny>",
85
+
Output: PatchIdentity{
86
+
Name: "Morton Haypenny",
87
+
Email: "mhaypenny",
88
+
},
89
+
},
90
+
"bogusEmailWithWhitespace": {
91
+
Input: "Morton Haypenny < mhaypenny >",
92
+
Output: PatchIdentity{
93
+
Name: "Morton Haypenny",
94
+
Email: "mhaypenny",
95
+
},
96
+
},
97
+
"missingEmail": {
98
+
Input: "Morton Haypenny",
99
+
Err: "invalid identity",
100
+
},
101
+
"missingNameAndEmptyEmail": {
102
+
Input: "<>",
103
+
Err: "invalid identity",
104
+
},
105
+
"empty": {
106
+
Input: "",
107
+
Err: "invalid identity",
108
+
},
109
+
}
110
+
111
+
for name, test := range tests {
112
+
t.Run(name, func(t *testing.T) {
113
+
id, err := ParsePatchIdentity(test.Input)
114
+
if test.Err != nil {
115
+
assertError(t, test.Err, err, "parsing identity")
116
+
return
117
+
}
118
+
if err != nil {
119
+
t.Fatalf("unexpected error parsing identity: %v", err)
120
+
}
121
+
122
+
if test.Output != id {
123
+
t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id)
124
+
}
125
+
})
126
+
}
127
+
}
+1
-1
gitdiff/testdata/apply/bin.go
+1
-1
gitdiff/testdata/apply/bin.go
+10
gitdiff/testdata/apply/text_fragment_change_end_eol.patch
+10
gitdiff/testdata/apply/text_fragment_change_end_eol.patch
···
1
+
diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src
2
+
index a92d664..8cf2f17 100644
3
+
--- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src
4
+
+++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src
5
+
@@ -1,3 +1,3 @@
6
+
line 1
7
+
line 2
8
+
-line 3
9
+
+line 3
10
+
\ No newline at end of file
+8
gitdiff/testdata/no_files.patch
+8
gitdiff/testdata/no_files.patch
+9
gitdiff/testdata/string/binary_modify.patch
+9
gitdiff/testdata/string/binary_modify.patch
+3
gitdiff/testdata/string/binary_modify_nodata.patch
+3
gitdiff/testdata/string/binary_modify_nodata.patch
+11
gitdiff/testdata/string/binary_new.patch
+11
gitdiff/testdata/string/binary_new.patch
···
1
+
diff --git a/file.bin b/file.bin
2
+
new file mode 100644
3
+
index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c
4
+
GIT binary patch
5
+
literal 72
6
+
zcmV-O0Jr~td-`u6JcK&{KDK=<a#;v1^LR5&K)zQ0=Goz82(?nJ6_nD`f#8O9p}}{P
7
+
eiXim+rDI+BDadMQmMsO5Sw@;DbrCA+PamP;Ng_@F
8
+
9
+
literal 0
10
+
HcmV?d00001
11
+
+4
gitdiff/testdata/string/copy.patch
+4
gitdiff/testdata/string/copy.patch
+21
gitdiff/testdata/string/copy_modify.patch
+21
gitdiff/testdata/string/copy_modify.patch
···
1
+
diff --git a/file.txt b/numbers.txt
2
+
similarity index 57%
3
+
copy from file.txt
4
+
copy to numbers.txt
5
+
index c9e9e05..6c4a3e0 100644
6
+
--- a/file.txt
7
+
+++ b/numbers.txt
8
+
@@ -1,6 +1,6 @@
9
+
one
10
+
two
11
+
-three
12
+
+three three three
13
+
four
14
+
five
15
+
six
16
+
@@ -8,3 +8,5 @@ seven
17
+
eight
18
+
nine
19
+
ten
20
+
+eleven
21
+
+twelve
+16
gitdiff/testdata/string/delete.patch
+16
gitdiff/testdata/string/delete.patch
+3
gitdiff/testdata/string/mode.patch
+3
gitdiff/testdata/string/mode.patch
+10
gitdiff/testdata/string/mode_modify.patch
+10
gitdiff/testdata/string/mode_modify.patch
+16
gitdiff/testdata/string/modify.patch
+16
gitdiff/testdata/string/modify.patch
+16
gitdiff/testdata/string/new.patch
+16
gitdiff/testdata/string/new.patch
+3
gitdiff/testdata/string/new_empty.patch
+3
gitdiff/testdata/string/new_empty.patch
+16
gitdiff/testdata/string/new_mode.patch
+16
gitdiff/testdata/string/new_mode.patch
+4
gitdiff/testdata/string/rename.patch
+4
gitdiff/testdata/string/rename.patch
+18
gitdiff/testdata/string/rename_modify.patch
+18
gitdiff/testdata/string/rename_modify.patch
···
1
+
diff --git a/file.txt b/numbers.txt
2
+
similarity index 77%
3
+
rename from file.txt
4
+
rename to numbers.txt
5
+
index c9e9e05..a6b31d6 100644
6
+
--- a/file.txt
7
+
+++ b/numbers.txt
8
+
@@ -3,8 +3,9 @@ two
9
+
three
10
+
four
11
+
five
12
+
-six
13
+
+ six
14
+
seven
15
+
eight
16
+
nine
17
+
ten
18
+
+eleven
+30
-18
gitdiff/text.go
+30
-18
gitdiff/text.go
···
79
79
return p.Errorf(0, "no content following fragment header")
80
80
}
81
81
82
-
isNoNewlineLine := func(s string) bool {
83
-
// test for "\ No newline at end of file" by prefix because the text
84
-
// changes by locale (git claims all versions are at least 12 chars)
85
-
return len(s) >= 12 && s[:2] == "\\ "
86
-
}
87
-
88
82
oldLines, newLines := frag.OldLines, frag.NewLines
89
-
for {
83
+
for oldLines > 0 || newLines > 0 {
90
84
line := p.Line(0)
91
85
op, data := line[0], line[1:]
92
86
···
113
107
frag.LinesAdded++
114
108
frag.TrailingContext = 0
115
109
frag.Lines = append(frag.Lines, Line{OpAdd, data})
116
-
default:
110
+
case '\\':
117
111
// this may appear in middle of fragment if it's for a deleted line
118
-
if isNoNewlineLine(line) {
119
-
last := &frag.Lines[len(frag.Lines)-1]
120
-
last.Line = strings.TrimSuffix(last.Line, "\n")
112
+
if isNoNewlineMarker(line) {
113
+
removeLastNewline(frag)
121
114
break
122
115
}
116
+
fallthrough
117
+
default:
123
118
// TODO(bkeyes): if this is because we hit the next header, it
124
119
// would be helpful to return the miscounts line error. We could
125
120
// either test for the common headers ("@@ -", "diff --git") or
126
121
// assume any invalid op ends the fragment; git returns the same
127
122
// generic error in all cases so either is compatible
128
123
return p.Errorf(0, "invalid line operation: %q", op)
129
-
}
130
-
131
-
next := p.Line(1)
132
-
if oldLines <= 0 && newLines <= 0 && !isNoNewlineLine(next) {
133
-
break
134
124
}
135
125
136
126
if err := p.Next(); err != nil {
···
145
135
hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1
146
136
return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines)
147
137
}
138
+
if frag.LinesAdded == 0 && frag.LinesDeleted == 0 {
139
+
return p.Errorf(0, "fragment contains no changes")
140
+
}
148
141
149
-
if err := p.Next(); err != nil && err != io.EOF {
150
-
return err
142
+
// check for a final "no newline" marker since it is not included in the
143
+
// counters used to stop the loop above
144
+
if isNoNewlineMarker(p.Line(0)) {
145
+
removeLastNewline(frag)
146
+
if err := p.Next(); err != nil && err != io.EOF {
147
+
return err
148
+
}
151
149
}
150
+
152
151
return nil
152
+
}
153
+
154
+
func isNoNewlineMarker(s string) bool {
155
+
// test for "\ No newline at end of file" by prefix because the text
156
+
// changes by locale (git claims all versions are at least 12 chars)
157
+
return len(s) >= 12 && s[:2] == "\\ "
158
+
}
159
+
160
+
func removeLastNewline(frag *TextFragment) {
161
+
if len(frag.Lines) > 0 {
162
+
last := &frag.Lines[len(frag.Lines)-1]
163
+
last.Line = strings.TrimSuffix(last.Line, "\n")
164
+
}
153
165
}
154
166
155
167
func parseRange(s string) (start int64, end int64, err error) {
+18
gitdiff/text_test.go
+18
gitdiff/text_test.go
···
317
317
},
318
318
Err: true,
319
319
},
320
+
"onlyContext": {
321
+
Input: ` context line
322
+
context line
323
+
`,
324
+
Fragment: TextFragment{
325
+
OldLines: 2,
326
+
NewLines: 2,
327
+
},
328
+
Err: true,
329
+
},
330
+
"unexpectedNoNewlineMarker": {
331
+
Input: `\ No newline at end of file`,
332
+
Fragment: TextFragment{
333
+
OldLines: 1,
334
+
NewLines: 1,
335
+
},
336
+
Err: true,
337
+
},
320
338
}
321
339
322
340
for name, test := range tests {