+6
-6
.github/workflows/go.yml
+6
-6
.github/workflows/go.yml
···
9
9
name: Verify
10
10
runs-on: ubuntu-latest
11
11
steps:
12
-
- name: Set up Go 1.16
13
-
uses: actions/setup-go@v2
12
+
- name: Set up Go 1.21
13
+
uses: actions/setup-go@v5
14
14
with:
15
-
go-version: 1.16
15
+
go-version: 1.21
16
16
17
17
- name: Check out code into the Go module directory
18
-
uses: actions/checkout@v2
18
+
uses: actions/checkout@v4
19
19
20
20
- name: Lint
21
-
uses: golangci/golangci-lint-action@v2
21
+
uses: golangci/golangci-lint-action@v7
22
22
with:
23
-
version: v1.28
23
+
version: v2.0
24
24
25
25
- name: Test
26
26
run: go test -v ./...
+41
-13
.golangci.yml
+41
-13
.golangci.yml
···
1
+
version: "2"
2
+
1
3
run:
2
4
tests: false
3
5
4
6
linters:
5
-
disable-all: true
7
+
default: none
6
8
enable:
7
-
- deadcode
8
9
- errcheck
9
-
- gofmt
10
-
- goimports
11
-
- golint
12
10
- govet
13
11
- ineffassign
14
-
- typecheck
12
+
- misspell
13
+
- revive
15
14
- unconvert
16
-
- varcheck
17
-
18
-
issues:
19
-
exclude-use-default: false
15
+
- unused
16
+
settings:
17
+
errcheck:
18
+
exclude-functions:
19
+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write
20
+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString
21
+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte
22
+
- fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter)
23
+
revive:
24
+
rules:
25
+
- name: context-keys-type
26
+
- name: time-naming
27
+
- name: var-declaration
28
+
- name: unexported-return
29
+
- name: errorf
30
+
- name: blank-imports
31
+
- name: context-as-argument
32
+
- name: dot-imports
33
+
- name: error-return
34
+
- name: error-strings
35
+
- name: error-naming
36
+
- name: exported
37
+
- name: increment-decrement
38
+
- name: var-naming
39
+
- name: package-comments
40
+
- name: range
41
+
- name: receiver-naming
42
+
- name: indent-error-flow
20
43
21
-
linter-settings:
22
-
goimports:
23
-
local-prefixes: github.com/bluekeyes/go-gitdiff
44
+
formatters:
45
+
enable:
46
+
- gofmt
47
+
- goimports
48
+
settings:
49
+
goimports:
50
+
local-prefixes:
51
+
- github.com/bluekeyes/go-gitdiff
+15
-8
README.md
+15
-8
README.md
···
4
4
5
5
A Go library for parsing and applying patches generated by `git diff`, `git
6
6
show`, and `git format-patch`. It can also parse and apply unified diffs
7
-
generated by the standard `diff` tool.
7
+
generated by the standard GNU `diff` tool.
8
8
9
9
It supports standard line-oriented text patches and Git binary patches, and
10
10
aims to parse anything accepted by the `git apply` command.
···
29
29
30
30
// apply the changes in the patch to a source file
31
31
var output bytes.Buffer
32
-
if err := gitdiff.NewApplier(code).ApplyFile(&output, files[0]); err != nil {
32
+
if err := gitdiff.Apply(&output, code, files[0]); err != nil {
33
33
log.Fatal(err)
34
34
}
35
35
```
36
36
37
37
## Development Status
38
38
39
-
Mostly complete. API changes are possible, particularly for patch application,
40
-
but I expect the parsing interface and types to remain stable.
39
+
The parsing API and types are complete and I expect will remain stable. Version
40
+
0.7.0 introduced a new apply API that may change more in the future to support
41
+
non-strict patch application.
42
+
43
+
Parsing and strict application are well-covered by unit tests and the library
44
+
is used in a production application that parses and applies thousands of
45
+
patches every day. However, the space of all possible patches is large, so
46
+
there are likely undiscovered bugs.
41
47
42
-
Patch parsing and strict application are well-covered by unit tests and the
43
-
library is used in a production application that parses and applies thousands of
44
-
patches every day, but the space of all possible patches is large, so there are
45
-
likely undiscovered bugs.
48
+
The parsing code has also had a modest amount of fuzz testing.
46
49
47
50
## Why another git/unified diff parser?
48
51
···
96
99
context of each fragment must exactly match the source file; `git apply`
97
100
implements a search algorithm that tries different lines and amounts of
98
101
context, with further options to normalize or ignore whitespace changes.
102
+
103
+
7. When parsing mail-formatted patch headers, leading and trailing whitespace
104
+
is always removed from `Subject` lines. There is no exact equivalent to `git
105
+
mailinfo -k`.
+30
-334
gitdiff/apply.go
+30
-334
gitdiff/apply.go
···
13
13
// Users can test if an error was caused by a conflict by using errors.Is with
14
14
// an empty Conflict:
15
15
//
16
-
// if errors.Is(err, &Conflict{}) {
17
-
// // handle conflict
18
-
// }
19
-
//
16
+
// if errors.Is(err, &Conflict{}) {
17
+
// // handle conflict
18
+
// }
20
19
type Conflict struct {
21
20
msg string
22
21
}
···
89
88
90
89
var (
91
90
errApplyInProgress = errors.New("gitdiff: incompatible apply in progress")
92
-
)
93
-
94
-
const (
95
-
applyInitial = iota
96
-
applyText
97
-
applyBinary
98
-
applyFile
91
+
errApplierClosed = errors.New("gitdiff: applier is closed")
99
92
)
100
93
101
-
// Apply is a convenience function that creates an Applier for src with default
102
-
// settings and applies the changes in f, writing the result to dst.
94
+
// Apply applies the changes in f to src, writing the result to dst. It can
95
+
// apply both text and binary changes.
96
+
//
97
+
// If an error occurs while applying, Apply returns an *ApplyError that
98
+
// annotates the error with additional information. If the error is because of
99
+
// a conflict with the source, the wrapped error will be a *Conflict.
103
100
func Apply(dst io.Writer, src io.ReaderAt, f *File) error {
104
-
return NewApplier(src).ApplyFile(dst, f)
105
-
}
106
-
107
-
// Applier applies changes described in fragments to source data. If changes
108
-
// are described in multiple fragments, those fragments must be applied in
109
-
// order, usually by calling ApplyFile.
110
-
//
111
-
// By default, Applier operates in "strict" mode, where fragment content and
112
-
// positions must exactly match those of the source.
113
-
//
114
-
// If an error occurs while applying, methods on Applier return instances of
115
-
// *ApplyError that annotate the wrapped error with additional information
116
-
// when available. If the error is because of a conflict between a fragment and
117
-
// the source, the wrapped error will be a *Conflict.
118
-
//
119
-
// While an Applier can apply both text and binary fragments, only one fragment
120
-
// type can be used without resetting the Applier. The first fragment applied
121
-
// sets the type for the Applier. Mixing fragment types or mixing
122
-
// fragment-level and file-level applies results in an error.
123
-
type Applier struct {
124
-
src io.ReaderAt
125
-
lineSrc LineReaderAt
126
-
nextLine int64
127
-
applyType int
128
-
}
129
-
130
-
// NewApplier creates an Applier that reads data from src. If src is a
131
-
// LineReaderAt, it is used directly to apply text fragments.
132
-
func NewApplier(src io.ReaderAt) *Applier {
133
-
a := new(Applier)
134
-
a.Reset(src)
135
-
return a
136
-
}
137
-
138
-
// Reset resets the input and internal state of the Applier. If src is nil, the
139
-
// existing source is reused.
140
-
func (a *Applier) Reset(src io.ReaderAt) {
141
-
if src != nil {
142
-
a.src = src
143
-
if lineSrc, ok := src.(LineReaderAt); ok {
144
-
a.lineSrc = lineSrc
145
-
} else {
146
-
a.lineSrc = &lineReaderAt{r: src}
101
+
if f.IsBinary {
102
+
if len(f.TextFragments) > 0 {
103
+
return applyError(errors.New("binary file contains text fragments"))
104
+
}
105
+
if f.BinaryFragment == nil {
106
+
return applyError(errors.New("binary file does not contain a binary fragment"))
147
107
}
148
-
}
149
-
a.nextLine = 0
150
-
a.applyType = applyInitial
151
-
}
152
-
153
-
// ApplyFile applies the changes in all of the fragments of f and writes the
154
-
// result to dst.
155
-
func (a *Applier) ApplyFile(dst io.Writer, f *File) error {
156
-
if a.applyType != applyInitial {
157
-
return applyError(errApplyInProgress)
158
-
}
159
-
defer func() { a.applyType = applyFile }()
160
-
161
-
if f.IsBinary && len(f.TextFragments) > 0 {
162
-
return applyError(errors.New("binary file contains text fragments"))
163
-
}
164
-
if !f.IsBinary && f.BinaryFragment != nil {
165
-
return applyError(errors.New("text file contains binary fragment"))
108
+
} else {
109
+
if f.BinaryFragment != nil {
110
+
return applyError(errors.New("text file contains a binary fragment"))
111
+
}
166
112
}
167
113
168
114
switch {
169
115
case f.BinaryFragment != nil:
170
-
return a.ApplyBinaryFragment(dst, f.BinaryFragment)
116
+
applier := NewBinaryApplier(dst, src)
117
+
if err := applier.ApplyFragment(f.BinaryFragment); err != nil {
118
+
return err
119
+
}
120
+
return applier.Close()
171
121
172
122
case len(f.TextFragments) > 0:
173
123
frags := make([]*TextFragment, len(f.TextFragments))
···
181
131
// right now, the application fails if fragments overlap, but it should be
182
132
// possible to precompute the result of applying them in order
183
133
134
+
applier := NewTextApplier(dst, src)
184
135
for i, frag := range frags {
185
-
if err := a.ApplyTextFragment(dst, frag); err != nil {
136
+
if err := applier.ApplyFragment(frag); err != nil {
186
137
return applyError(err, fragNum(i))
187
138
}
188
139
}
189
-
}
190
-
191
-
return applyError(a.Flush(dst))
192
-
}
193
-
194
-
// ApplyTextFragment applies the changes in the fragment f and writes unwritten
195
-
// data before the start of the fragment and the result to dst. If multiple
196
-
// text fragments apply to the same source, ApplyTextFragment must be called in
197
-
// order of increasing start position. As a result, each fragment can be
198
-
// applied at most once before a call to Reset.
199
-
func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error {
200
-
if a.applyType != applyInitial && a.applyType != applyText {
201
-
return applyError(errApplyInProgress)
202
-
}
203
-
defer func() { a.applyType = applyText }()
204
-
205
-
// application code assumes fragment fields are consistent
206
-
if err := f.Validate(); err != nil {
207
-
return applyError(err)
208
-
}
209
-
210
-
// lines are 0-indexed, positions are 1-indexed (but new files have position = 0)
211
-
fragStart := f.OldPosition - 1
212
-
if fragStart < 0 {
213
-
fragStart = 0
214
-
}
215
-
fragEnd := fragStart + f.OldLines
216
-
217
-
start := a.nextLine
218
-
if fragStart < start {
219
-
return applyError(&Conflict{"fragment overlaps with an applied fragment"})
220
-
}
221
-
222
-
if f.OldPosition == 0 {
223
-
ok, err := isLen(a.src, 0)
224
-
if err != nil {
225
-
return applyError(err)
226
-
}
227
-
if !ok {
228
-
return applyError(&Conflict{"cannot create new file from non-empty src"})
229
-
}
230
-
}
231
-
232
-
preimage := make([][]byte, fragEnd-start)
233
-
n, err := a.lineSrc.ReadLinesAt(preimage, start)
234
-
if err != nil {
235
-
return applyError(err, lineNum(start+int64(n)))
236
-
}
237
-
238
-
// copy leading data before the fragment starts
239
-
for i, line := range preimage[:fragStart-start] {
240
-
if _, err := dst.Write(line); err != nil {
241
-
a.nextLine = start + int64(i)
242
-
return applyError(err, lineNum(a.nextLine))
243
-
}
244
-
}
245
-
preimage = preimage[fragStart-start:]
246
-
247
-
// apply the changes in the fragment
248
-
used := int64(0)
249
-
for i, line := range f.Lines {
250
-
if err := applyTextLine(dst, line, preimage, used); err != nil {
251
-
a.nextLine = fragStart + used
252
-
return applyError(err, lineNum(a.nextLine), fragLineNum(i))
253
-
}
254
-
if line.Old() {
255
-
used++
256
-
}
257
-
}
258
-
a.nextLine = fragStart + used
259
-
260
-
// new position of +0,0 mean a full delete, so check for leftovers
261
-
if f.NewPosition == 0 && f.NewLines == 0 {
262
-
var b [1][]byte
263
-
n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine)
264
-
if err != nil && err != io.EOF {
265
-
return applyError(err, lineNum(a.nextLine))
266
-
}
267
-
if n > 0 {
268
-
return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine))
269
-
}
270
-
}
271
-
272
-
return nil
273
-
}
274
-
275
-
func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) {
276
-
if line.Old() && string(preimage[i]) != line.Line {
277
-
return &Conflict{"fragment line does not match src line"}
278
-
}
279
-
if line.New() {
280
-
_, err = io.WriteString(dst, line.Line)
281
-
}
282
-
return err
283
-
}
284
-
285
-
// Flush writes any data following the last applied fragment to dst.
286
-
func (a *Applier) Flush(dst io.Writer) (err error) {
287
-
switch a.applyType {
288
-
case applyInitial:
289
-
_, err = copyFrom(dst, a.src, 0)
290
-
case applyText:
291
-
_, err = copyLinesFrom(dst, a.lineSrc, a.nextLine)
292
-
case applyBinary:
293
-
// nothing to flush, binary apply "consumes" full source
294
-
}
295
-
return err
296
-
}
140
+
return applier.Close()
297
141
298
-
// ApplyBinaryFragment applies the changes in the fragment f and writes the
299
-
// result to dst. At most one binary fragment can be applied before a call to
300
-
// Reset.
301
-
func (a *Applier) ApplyBinaryFragment(dst io.Writer, f *BinaryFragment) error {
302
-
if a.applyType != applyInitial {
303
-
return applyError(errApplyInProgress)
304
-
}
305
-
defer func() { a.applyType = applyBinary }()
306
-
307
-
if f == nil {
308
-
return applyError(errors.New("nil fragment"))
309
-
}
310
-
311
-
switch f.Method {
312
-
case BinaryPatchLiteral:
313
-
if _, err := dst.Write(f.Data); err != nil {
314
-
return applyError(err)
315
-
}
316
-
case BinaryPatchDelta:
317
-
if err := applyBinaryDeltaFragment(dst, a.src, f.Data); err != nil {
318
-
return applyError(err)
319
-
}
320
142
default:
321
-
return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method))
322
-
}
323
-
return nil
324
-
}
325
-
326
-
func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error {
327
-
srcSize, delta := readBinaryDeltaSize(frag)
328
-
if err := checkBinarySrcSize(src, srcSize); err != nil {
329
-
return err
330
-
}
331
-
332
-
dstSize, delta := readBinaryDeltaSize(delta)
333
-
334
-
for len(delta) > 0 {
335
-
op := delta[0]
336
-
if op == 0 {
337
-
return errors.New("invalid delta opcode 0")
338
-
}
339
-
340
-
var n int64
341
-
var err error
342
-
switch op & 0x80 {
343
-
case 0x80:
344
-
n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src)
345
-
case 0x00:
346
-
n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:])
347
-
}
348
-
if err != nil {
349
-
return err
350
-
}
351
-
dstSize -= n
352
-
}
353
-
354
-
if dstSize != 0 {
355
-
return errors.New("corrupt binary delta: insufficient or extra data")
356
-
}
357
-
return nil
358
-
}
359
-
360
-
// readBinaryDeltaSize reads a variable length size from a delta-encoded binary
361
-
// fragment, returing the size and the unused data. Data is encoded as:
362
-
//
363
-
// [[1xxxxxxx]...] [0xxxxxxx]
364
-
//
365
-
// in little-endian order, with 7 bits of the value per byte.
366
-
func readBinaryDeltaSize(d []byte) (size int64, rest []byte) {
367
-
shift := uint(0)
368
-
for i, b := range d {
369
-
size |= int64(b&0x7F) << shift
370
-
shift += 7
371
-
if b <= 0x7F {
372
-
return size, d[i+1:]
373
-
}
374
-
}
375
-
return size, nil
376
-
}
377
-
378
-
// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary
379
-
// fragment, returning the amount of data written and the usused part of the
380
-
// fragment. An add operation takes the form:
381
-
//
382
-
// [0xxxxxx][[data1]...]
383
-
//
384
-
// where the lower seven bits of the opcode is the number of data bytes
385
-
// following the opcode. See also pack-format.txt in the Git source.
386
-
func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) {
387
-
size := int(op)
388
-
if len(delta) < size {
389
-
return 0, delta, errors.New("corrupt binary delta: incomplete add")
390
-
}
391
-
_, err = w.Write(delta[:size])
392
-
return int64(size), delta[size:], err
393
-
}
394
-
395
-
// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary
396
-
// fragment, returing the amount of data written and the unused part of the
397
-
// fragment. A copy operation takes the form:
398
-
//
399
-
// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3]
400
-
//
401
-
// where the lower seven bits of the opcode determine which non-zero offset and
402
-
// size bytes are present in little-endian order: if bit 0 is set, offset1 is
403
-
// present, etc. If no offset or size bytes are present, offset is 0 and size
404
-
// is 0x10000. See also pack-format.txt in the Git source.
405
-
func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) {
406
-
const defaultSize = 0x10000
407
-
408
-
unpack := func(start, bits uint) (v int64) {
409
-
for i := uint(0); i < bits; i++ {
410
-
mask := byte(1 << (i + start))
411
-
if op&mask > 0 {
412
-
if len(delta) == 0 {
413
-
err = errors.New("corrupt binary delta: incomplete copy")
414
-
return
415
-
}
416
-
v |= int64(delta[0]) << (8 * i)
417
-
delta = delta[1:]
418
-
}
419
-
}
420
-
return
421
-
}
422
-
423
-
offset := unpack(0, 4)
424
-
size := unpack(4, 3)
425
-
if err != nil {
426
-
return 0, delta, err
427
-
}
428
-
if size == 0 {
429
-
size = defaultSize
430
-
}
431
-
432
-
// TODO(bkeyes): consider pooling these buffers
433
-
b := make([]byte, size)
434
-
if _, err := src.ReadAt(b, offset); err != nil {
435
-
return 0, delta, err
436
-
}
437
-
438
-
_, err = w.Write(b)
439
-
return size, delta, err
440
-
}
441
-
442
-
func checkBinarySrcSize(r io.ReaderAt, size int64) error {
443
-
ok, err := isLen(r, size)
444
-
if err != nil {
143
+
// nothing to apply, just copy all the data
144
+
_, err := copyFrom(dst, src, 0)
445
145
return err
446
146
}
447
-
if !ok {
448
-
return &Conflict{"fragment src size does not match actual src size"}
449
-
}
450
-
return nil
451
147
}
+206
gitdiff/apply_binary.go
+206
gitdiff/apply_binary.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"errors"
5
+
"fmt"
6
+
"io"
7
+
)
8
+
9
+
// BinaryApplier applies binary changes described in a fragment to source data.
10
+
// The applier must be closed after use.
11
+
type BinaryApplier struct {
12
+
dst io.Writer
13
+
src io.ReaderAt
14
+
15
+
closed bool
16
+
dirty bool
17
+
}
18
+
19
+
// NewBinaryApplier creates an BinaryApplier that reads data from src and
20
+
// writes modified data to dst.
21
+
func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier {
22
+
a := BinaryApplier{
23
+
dst: dst,
24
+
src: src,
25
+
}
26
+
return &a
27
+
}
28
+
29
+
// ApplyFragment applies the changes in the fragment f and writes the result to
30
+
// dst. ApplyFragment can be called at most once.
31
+
//
32
+
// If an error occurs while applying, ApplyFragment returns an *ApplyError that
33
+
// annotates the error with additional information. If the error is because of
34
+
// a conflict between a fragment and the source, the wrapped error will be a
35
+
// *Conflict.
36
+
func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error {
37
+
if f == nil {
38
+
return applyError(errors.New("nil fragment"))
39
+
}
40
+
if a.closed {
41
+
return applyError(errApplierClosed)
42
+
}
43
+
if a.dirty {
44
+
return applyError(errApplyInProgress)
45
+
}
46
+
47
+
// mark an apply as in progress, even if it fails before making changes
48
+
a.dirty = true
49
+
50
+
switch f.Method {
51
+
case BinaryPatchLiteral:
52
+
if _, err := a.dst.Write(f.Data); err != nil {
53
+
return applyError(err)
54
+
}
55
+
case BinaryPatchDelta:
56
+
if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil {
57
+
return applyError(err)
58
+
}
59
+
default:
60
+
return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method))
61
+
}
62
+
return nil
63
+
}
64
+
65
+
// Close writes any data following the last applied fragment and prevents
66
+
// future calls to ApplyFragment.
67
+
func (a *BinaryApplier) Close() (err error) {
68
+
if a.closed {
69
+
return nil
70
+
}
71
+
72
+
a.closed = true
73
+
if !a.dirty {
74
+
_, err = copyFrom(a.dst, a.src, 0)
75
+
} else {
76
+
// do nothing, applying a binary fragment copies all data
77
+
}
78
+
return err
79
+
}
80
+
81
+
func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error {
82
+
srcSize, delta := readBinaryDeltaSize(frag)
83
+
if err := checkBinarySrcSize(src, srcSize); err != nil {
84
+
return err
85
+
}
86
+
87
+
dstSize, delta := readBinaryDeltaSize(delta)
88
+
89
+
for len(delta) > 0 {
90
+
op := delta[0]
91
+
if op == 0 {
92
+
return errors.New("invalid delta opcode 0")
93
+
}
94
+
95
+
var n int64
96
+
var err error
97
+
switch op & 0x80 {
98
+
case 0x80:
99
+
n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src)
100
+
case 0x00:
101
+
n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:])
102
+
}
103
+
if err != nil {
104
+
return err
105
+
}
106
+
dstSize -= n
107
+
}
108
+
109
+
if dstSize != 0 {
110
+
return errors.New("corrupt binary delta: insufficient or extra data")
111
+
}
112
+
return nil
113
+
}
114
+
115
+
// readBinaryDeltaSize reads a variable length size from a delta-encoded binary
116
+
// fragment, returing the size and the unused data. Data is encoded as:
117
+
//
118
+
// [[1xxxxxxx]...] [0xxxxxxx]
119
+
//
120
+
// in little-endian order, with 7 bits of the value per byte.
121
+
func readBinaryDeltaSize(d []byte) (size int64, rest []byte) {
122
+
shift := uint(0)
123
+
for i, b := range d {
124
+
size |= int64(b&0x7F) << shift
125
+
shift += 7
126
+
if b <= 0x7F {
127
+
return size, d[i+1:]
128
+
}
129
+
}
130
+
return size, nil
131
+
}
132
+
133
+
// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary
134
+
// fragment, returning the amount of data written and the usused part of the
135
+
// fragment. An add operation takes the form:
136
+
//
137
+
// [0xxxxxx][[data1]...]
138
+
//
139
+
// where the lower seven bits of the opcode is the number of data bytes
140
+
// following the opcode. See also pack-format.txt in the Git source.
141
+
func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) {
142
+
size := int(op)
143
+
if len(delta) < size {
144
+
return 0, delta, errors.New("corrupt binary delta: incomplete add")
145
+
}
146
+
_, err = w.Write(delta[:size])
147
+
return int64(size), delta[size:], err
148
+
}
149
+
150
+
// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary
151
+
// fragment, returing the amount of data written and the unused part of the
152
+
// fragment. A copy operation takes the form:
153
+
//
154
+
// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3]
155
+
//
156
+
// where the lower seven bits of the opcode determine which non-zero offset and
157
+
// size bytes are present in little-endian order: if bit 0 is set, offset1 is
158
+
// present, etc. If no offset or size bytes are present, offset is 0 and size
159
+
// is 0x10000. See also pack-format.txt in the Git source.
160
+
func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) {
161
+
const defaultSize = 0x10000
162
+
163
+
unpack := func(start, bits uint) (v int64) {
164
+
for i := uint(0); i < bits; i++ {
165
+
mask := byte(1 << (i + start))
166
+
if op&mask > 0 {
167
+
if len(delta) == 0 {
168
+
err = errors.New("corrupt binary delta: incomplete copy")
169
+
return
170
+
}
171
+
v |= int64(delta[0]) << (8 * i)
172
+
delta = delta[1:]
173
+
}
174
+
}
175
+
return
176
+
}
177
+
178
+
offset := unpack(0, 4)
179
+
size := unpack(4, 3)
180
+
if err != nil {
181
+
return 0, delta, err
182
+
}
183
+
if size == 0 {
184
+
size = defaultSize
185
+
}
186
+
187
+
// TODO(bkeyes): consider pooling these buffers
188
+
b := make([]byte, size)
189
+
if _, err := src.ReadAt(b, offset); err != nil {
190
+
return 0, delta, err
191
+
}
192
+
193
+
_, err = w.Write(b)
194
+
return size, delta, err
195
+
}
196
+
197
+
func checkBinarySrcSize(r io.ReaderAt, size int64) error {
198
+
ok, err := isLen(r, size)
199
+
if err != nil {
200
+
return err
201
+
}
202
+
if !ok {
203
+
return &Conflict{"fragment src size does not match actual src size"}
204
+
}
205
+
return nil
206
+
}
+11
-73
gitdiff/apply_test.go
+11
-73
gitdiff/apply_test.go
···
9
9
"testing"
10
10
)
11
11
12
-
func TestApplierInvariants(t *testing.T) {
13
-
binary := &BinaryFragment{
14
-
Method: BinaryPatchLiteral,
15
-
Size: 2,
16
-
Data: []byte("\xbe\xef"),
17
-
}
18
-
19
-
text := &TextFragment{
20
-
NewPosition: 1,
21
-
NewLines: 1,
22
-
LinesAdded: 1,
23
-
Lines: []Line{
24
-
{Op: OpAdd, Line: "new line\n"},
25
-
},
26
-
}
27
-
28
-
file := &File{
29
-
TextFragments: []*TextFragment{text},
30
-
}
31
-
32
-
src := bytes.NewReader(nil)
33
-
dst := ioutil.Discard
34
-
35
-
assertInProgress := func(t *testing.T, kind string, err error) {
36
-
if !errors.Is(err, errApplyInProgress) {
37
-
t.Fatalf("expected in-progress error for %s apply, but got: %v", kind, err)
38
-
}
39
-
}
40
-
41
-
t.Run("binaryFirst", func(t *testing.T) {
42
-
a := NewApplier(src)
43
-
if err := a.ApplyBinaryFragment(dst, binary); err != nil {
44
-
t.Fatalf("unexpected error applying fragment: %v", err)
45
-
}
46
-
assertInProgress(t, "text", a.ApplyTextFragment(dst, text))
47
-
assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary))
48
-
assertInProgress(t, "file", a.ApplyFile(dst, file))
49
-
})
50
-
51
-
t.Run("textFirst", func(t *testing.T) {
52
-
a := NewApplier(src)
53
-
if err := a.ApplyTextFragment(dst, text); err != nil {
54
-
t.Fatalf("unexpected error applying fragment: %v", err)
55
-
}
56
-
// additional text fragments are allowed
57
-
if err := a.ApplyTextFragment(dst, text); err != nil {
58
-
t.Fatalf("unexpected error applying second fragment: %v", err)
59
-
}
60
-
assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary))
61
-
assertInProgress(t, "file", a.ApplyFile(dst, file))
62
-
})
63
-
64
-
t.Run("fileFirst", func(t *testing.T) {
65
-
a := NewApplier(src)
66
-
if err := a.ApplyFile(dst, file); err != nil {
67
-
t.Fatalf("unexpected error applying file: %v", err)
68
-
}
69
-
assertInProgress(t, "text", a.ApplyTextFragment(dst, text))
70
-
assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary))
71
-
assertInProgress(t, "file", a.ApplyFile(dst, file))
72
-
})
73
-
}
74
-
75
12
func TestApplyTextFragment(t *testing.T) {
76
13
tests := map[string]applyTest{
77
14
"createFile": {Files: getApplyFiles("text_fragment_new")},
···
85
22
"changeStart": {Files: getApplyFiles("text_fragment_change_start")},
86
23
"changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")},
87
24
"changeEnd": {Files: getApplyFiles("text_fragment_change_end")},
25
+
"changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")},
88
26
"changeExact": {Files: getApplyFiles("text_fragment_change_exact")},
89
27
"changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")},
90
28
···
127
65
128
66
for name, test := range tests {
129
67
t.Run(name, func(t *testing.T) {
130
-
test.run(t, func(w io.Writer, applier *Applier, file *File) error {
68
+
test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error {
131
69
if len(file.TextFragments) != 1 {
132
70
t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments))
133
71
}
134
-
return applier.ApplyTextFragment(w, file.TextFragments[0])
72
+
applier := NewTextApplier(dst, src)
73
+
return applier.ApplyFragment(file.TextFragments[0])
135
74
})
136
75
})
137
76
}
···
176
115
177
116
for name, test := range tests {
178
117
t.Run(name, func(t *testing.T) {
179
-
test.run(t, func(w io.Writer, applier *Applier, file *File) error {
180
-
return applier.ApplyBinaryFragment(w, file.BinaryFragment)
118
+
test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error {
119
+
applier := NewBinaryApplier(dst, src)
120
+
return applier.ApplyFragment(file.BinaryFragment)
181
121
})
182
122
})
183
123
}
···
216
156
217
157
for name, test := range tests {
218
158
t.Run(name, func(t *testing.T) {
219
-
test.run(t, func(w io.Writer, applier *Applier, file *File) error {
220
-
return applier.ApplyFile(w, file)
159
+
test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error {
160
+
return Apply(dst, src, file)
221
161
})
222
162
})
223
163
}
···
228
168
Err interface{}
229
169
}
230
170
231
-
func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) error) {
171
+
func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) {
232
172
src, patch, out := at.Files.Load(t)
233
173
234
174
files, _, err := Parse(bytes.NewReader(patch))
···
239
179
t.Fatalf("patch should contain exactly one file, but it has %d", len(files))
240
180
}
241
181
242
-
applier := NewApplier(bytes.NewReader(src))
243
-
244
182
var dst bytes.Buffer
245
-
err = apply(&dst, applier, files[0])
183
+
err = apply(&dst, bytes.NewReader(src), files[0])
246
184
if at.Err != nil {
247
185
assertError(t, at.Err, err, "applying fragment")
248
186
return
+152
gitdiff/apply_text.go
+152
gitdiff/apply_text.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"io"
5
+
)
6
+
7
+
// TextApplier applies changes described in text fragments to source data. If
8
+
// changes are described in multiple fragments, those fragments must be applied
9
+
// in order. The applier must be closed after use.
10
+
//
11
+
// By default, TextApplier operates in "strict" mode, where fragment content
12
+
// and positions must exactly match those of the source.
13
+
type TextApplier struct {
14
+
dst io.Writer
15
+
src io.ReaderAt
16
+
lineSrc LineReaderAt
17
+
nextLine int64
18
+
19
+
closed bool
20
+
dirty bool
21
+
}
22
+
23
+
// NewTextApplier creates a TextApplier that reads data from src and writes
24
+
// modified data to dst. If src implements LineReaderAt, it is used directly.
25
+
func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier {
26
+
a := TextApplier{
27
+
dst: dst,
28
+
src: src,
29
+
}
30
+
31
+
if lineSrc, ok := src.(LineReaderAt); ok {
32
+
a.lineSrc = lineSrc
33
+
} else {
34
+
a.lineSrc = &lineReaderAt{r: src}
35
+
}
36
+
37
+
return &a
38
+
}
39
+
40
+
// ApplyFragment applies the changes in the fragment f, writing unwritten data
41
+
// before the start of the fragment and any changes from the fragment. If
42
+
// multiple text fragments apply to the same content, ApplyFragment must be
43
+
// called in order of increasing start position. As a result, each fragment can
44
+
// be applied at most once.
45
+
//
46
+
// If an error occurs while applying, ApplyFragment returns an *ApplyError that
47
+
// annotates the error with additional information. If the error is because of
48
+
// a conflict between the fragment and the source, the wrapped error will be a
49
+
// *Conflict.
50
+
func (a *TextApplier) ApplyFragment(f *TextFragment) error {
51
+
if a.closed {
52
+
return applyError(errApplierClosed)
53
+
}
54
+
55
+
// mark an apply as in progress, even if it fails before making changes
56
+
a.dirty = true
57
+
58
+
// application code assumes fragment fields are consistent
59
+
if err := f.Validate(); err != nil {
60
+
return applyError(err)
61
+
}
62
+
63
+
// lines are 0-indexed, positions are 1-indexed (but new files have position = 0)
64
+
fragStart := f.OldPosition - 1
65
+
if fragStart < 0 {
66
+
fragStart = 0
67
+
}
68
+
fragEnd := fragStart + f.OldLines
69
+
70
+
start := a.nextLine
71
+
if fragStart < start {
72
+
return applyError(&Conflict{"fragment overlaps with an applied fragment"})
73
+
}
74
+
75
+
if f.OldPosition == 0 {
76
+
ok, err := isLen(a.src, 0)
77
+
if err != nil {
78
+
return applyError(err)
79
+
}
80
+
if !ok {
81
+
return applyError(&Conflict{"cannot create new file from non-empty src"})
82
+
}
83
+
}
84
+
85
+
preimage := make([][]byte, fragEnd-start)
86
+
n, err := a.lineSrc.ReadLinesAt(preimage, start)
87
+
if err != nil {
88
+
return applyError(err, lineNum(start+int64(n)))
89
+
}
90
+
91
+
// copy leading data before the fragment starts
92
+
for i, line := range preimage[:fragStart-start] {
93
+
if _, err := a.dst.Write(line); err != nil {
94
+
a.nextLine = start + int64(i)
95
+
return applyError(err, lineNum(a.nextLine))
96
+
}
97
+
}
98
+
preimage = preimage[fragStart-start:]
99
+
100
+
// apply the changes in the fragment
101
+
used := int64(0)
102
+
for i, line := range f.Lines {
103
+
if err := applyTextLine(a.dst, line, preimage, used); err != nil {
104
+
a.nextLine = fragStart + used
105
+
return applyError(err, lineNum(a.nextLine), fragLineNum(i))
106
+
}
107
+
if line.Old() {
108
+
used++
109
+
}
110
+
}
111
+
a.nextLine = fragStart + used
112
+
113
+
// new position of +0,0 mean a full delete, so check for leftovers
114
+
if f.NewPosition == 0 && f.NewLines == 0 {
115
+
var b [1][]byte
116
+
n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine)
117
+
if err != nil && err != io.EOF {
118
+
return applyError(err, lineNum(a.nextLine))
119
+
}
120
+
if n > 0 {
121
+
return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine))
122
+
}
123
+
}
124
+
125
+
return nil
126
+
}
127
+
128
+
func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) {
129
+
if line.Old() && string(preimage[i]) != line.Line {
130
+
return &Conflict{"fragment line does not match src line"}
131
+
}
132
+
if line.New() {
133
+
_, err = io.WriteString(dst, line.Line)
134
+
}
135
+
return err
136
+
}
137
+
138
+
// Close writes any data following the last applied fragment and prevents
139
+
// future calls to ApplyFragment.
140
+
func (a *TextApplier) Close() (err error) {
141
+
if a.closed {
142
+
return nil
143
+
}
144
+
145
+
a.closed = true
146
+
if !a.dirty {
147
+
_, err = copyFrom(a.dst, a.src, 0)
148
+
} else {
149
+
_, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine)
150
+
}
151
+
return err
152
+
}
+41
-2
gitdiff/base85.go
+41
-2
gitdiff/base85.go
···
19
19
}
20
20
21
21
// base85Decode decodes Base85-encoded data from src into dst. It uses the
22
-
// alphabet defined by base85.c in the Git source tree, which appears to be
23
-
// unique. src must contain at least len(dst) bytes of encoded data.
22
+
// alphabet defined by base85.c in the Git source tree. src must contain at
23
+
// least len(dst) bytes of encoded data.
24
24
func base85Decode(dst, src []byte) error {
25
25
var v uint32
26
26
var n, ndst int
···
50
50
}
51
51
return nil
52
52
}
53
+
54
+
// base85Encode encodes src in Base85, writing the result to dst. It uses the
55
+
// alphabet defined by base85.c in the Git source tree.
56
+
func base85Encode(dst, src []byte) {
57
+
var di, si int
58
+
59
+
encode := func(v uint32) {
60
+
dst[di+0] = b85Alpha[(v/(85*85*85*85))%85]
61
+
dst[di+1] = b85Alpha[(v/(85*85*85))%85]
62
+
dst[di+2] = b85Alpha[(v/(85*85))%85]
63
+
dst[di+3] = b85Alpha[(v/85)%85]
64
+
dst[di+4] = b85Alpha[v%85]
65
+
}
66
+
67
+
n := (len(src) / 4) * 4
68
+
for si < n {
69
+
encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]))
70
+
si += 4
71
+
di += 5
72
+
}
73
+
74
+
var v uint32
75
+
switch len(src) - si {
76
+
case 3:
77
+
v |= uint32(src[si+2]) << 8
78
+
fallthrough
79
+
case 2:
80
+
v |= uint32(src[si+1]) << 16
81
+
fallthrough
82
+
case 1:
83
+
v |= uint32(src[si+0]) << 24
84
+
encode(v)
85
+
}
86
+
}
87
+
88
+
// base85Len returns the length of n bytes of Base85 encoded data.
89
+
func base85Len(n int) int {
90
+
return (n + 3) / 4 * 5
91
+
}
+58
gitdiff/base85_test.go
+58
gitdiff/base85_test.go
···
1
1
package gitdiff
2
2
3
3
import (
4
+
"bytes"
4
5
"testing"
5
6
)
6
7
···
58
59
})
59
60
}
60
61
}
62
+
63
+
func TestBase85Encode(t *testing.T) {
64
+
tests := map[string]struct {
65
+
Input []byte
66
+
Output string
67
+
}{
68
+
"zeroBytes": {
69
+
Input: []byte{},
70
+
Output: "",
71
+
},
72
+
"twoBytes": {
73
+
Input: []byte{0xCA, 0xFE},
74
+
Output: "%KiWV",
75
+
},
76
+
"fourBytes": {
77
+
Input: []byte{0x0, 0x0, 0xCA, 0xFE},
78
+
Output: "007GV",
79
+
},
80
+
"sixBytes": {
81
+
Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE},
82
+
Output: "007GV%KiWV",
83
+
},
84
+
}
85
+
86
+
for name, test := range tests {
87
+
t.Run(name, func(t *testing.T) {
88
+
dst := make([]byte, len(test.Output))
89
+
base85Encode(dst, test.Input)
90
+
for i, b := range test.Output {
91
+
if dst[i] != byte(b) {
92
+
t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i])
93
+
}
94
+
}
95
+
})
96
+
}
97
+
}
98
+
99
+
func FuzzBase85Roundtrip(f *testing.F) {
100
+
f.Add([]byte{0x2b, 0x0d})
101
+
f.Add([]byte{0xbc, 0xb4, 0x3f})
102
+
f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25})
103
+
f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8})
104
+
105
+
f.Fuzz(func(t *testing.T, in []byte) {
106
+
n := len(in)
107
+
dst := make([]byte, base85Len(n))
108
+
out := make([]byte, n)
109
+
110
+
base85Encode(dst, in)
111
+
if err := base85Decode(out, dst); err != nil {
112
+
t.Fatalf("unexpected error decoding base85 data: %v", err)
113
+
}
114
+
if !bytes.Equal(in, out) {
115
+
t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst))
116
+
}
117
+
})
118
+
}
+11
-4
gitdiff/binary.go
+11
-4
gitdiff/binary.go
···
50
50
}
51
51
52
52
func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) {
53
-
switch p.Line(0) {
54
-
case "GIT binary patch\n":
53
+
line := p.Line(0)
54
+
switch {
55
+
case line == "GIT binary patch\n":
55
56
hasData = true
56
-
case "Binary files differ\n":
57
-
case "Files differ\n":
57
+
case isBinaryNoDataMarker(line):
58
58
default:
59
59
return false, false, nil
60
60
}
···
63
63
return false, false, err
64
64
}
65
65
return true, hasData, nil
66
+
}
67
+
68
+
func isBinaryNoDataMarker(line string) bool {
69
+
if strings.HasSuffix(line, " differ\n") {
70
+
return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ")
71
+
}
72
+
return false
66
73
}
67
74
68
75
func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) {
+10
gitdiff/binary_test.go
+10
gitdiff/binary_test.go
···
25
25
IsBinary: true,
26
26
HasData: false,
27
27
},
28
+
"binaryFileNoPatchPaths": {
29
+
Input: "Binary files a/foo.bin and b/foo.bin differ\n",
30
+
IsBinary: true,
31
+
HasData: false,
32
+
},
33
+
"fileNoPatch": {
34
+
Input: "Files differ\n",
35
+
IsBinary: true,
36
+
HasData: false,
37
+
},
28
38
"textFile": {
29
39
Input: "@@ -10,14 +22,31 @@\n",
30
40
IsBinary: false,
+4
-4
gitdiff/file_header.go
+4
-4
gitdiff/file_header.go
···
57
57
return nil, "", err
58
58
}
59
59
}
60
-
return nil, "", nil
60
+
return nil, preamble.String(), nil
61
61
}
62
62
63
63
func (p *parser) ParseGitFileHeader() (*File, error) {
···
324
324
}
325
325
326
326
func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
327
-
f.OldMode, err = parseMode(line)
327
+
f.OldMode, err = parseMode(strings.TrimSpace(line))
328
328
return
329
329
}
330
330
331
331
func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
332
-
f.NewMode, err = parseMode(line)
332
+
f.NewMode, err = parseMode(strings.TrimSpace(line))
333
333
return
334
334
}
335
335
···
527
527
528
528
// a valid timestamp can have optional ':' in zone specifier
529
529
// remove that if it exists so we have a single format
530
-
if ts[len(ts)-3] == ':' {
530
+
if len(ts) >= 3 && ts[len(ts)-3] == ':' {
531
531
ts = ts[:len(ts)-3] + ts[len(ts)-2:]
532
532
}
533
533
+29
gitdiff/file_header_test.go
+29
gitdiff/file_header_test.go
···
486
486
OldMode: os.FileMode(0100644),
487
487
},
488
488
},
489
+
"oldModeWithTrailingSpace": {
490
+
Line: "old mode 100644\r\n",
491
+
OutputFile: &File{
492
+
OldMode: os.FileMode(0100644),
493
+
},
494
+
},
489
495
"invalidOldMode": {
490
496
Line: "old mode rw\n",
491
497
Err: true,
···
496
502
NewMode: os.FileMode(0100755),
497
503
},
498
504
},
505
+
"newModeWithTrailingSpace": {
506
+
Line: "new mode 100755\r\n",
507
+
OutputFile: &File{
508
+
NewMode: os.FileMode(0100755),
509
+
},
510
+
},
499
511
"invalidNewMode": {
500
512
Line: "new mode rwx\n",
501
513
Err: true,
···
511
523
},
512
524
"newFileMode": {
513
525
Line: "new file mode 100755\n",
526
+
DefaultName: "dir/file.txt",
527
+
OutputFile: &File{
528
+
NewName: "dir/file.txt",
529
+
NewMode: os.FileMode(0100755),
530
+
IsNew: true,
531
+
},
532
+
},
533
+
"newFileModeWithTrailingSpace": {
534
+
Line: "new file mode 100755\r\n",
514
535
DefaultName: "dir/file.txt",
515
536
OutputFile: &File{
516
537
NewName: "dir/file.txt",
···
722
743
},
723
744
"notEpoch": {
724
745
Input: "+++ file.txt\t2019-03-21 12:34:56.789 -0700\n",
746
+
Output: false,
747
+
},
748
+
"notTimestamp": {
749
+
Input: "+++ file.txt\trandom text\n",
750
+
Output: false,
751
+
},
752
+
"notTimestampShort": {
753
+
Input: "+++ file.txt\t0\n",
725
754
Output: false,
726
755
},
727
756
}
+281
gitdiff/format.go
+281
gitdiff/format.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"bytes"
5
+
"compress/zlib"
6
+
"fmt"
7
+
"io"
8
+
"strconv"
9
+
)
10
+
11
+
type formatter struct {
12
+
w io.Writer
13
+
err error
14
+
}
15
+
16
+
func newFormatter(w io.Writer) *formatter {
17
+
return &formatter{w: w}
18
+
}
19
+
20
+
func (fm *formatter) Write(p []byte) (int, error) {
21
+
if fm.err != nil {
22
+
return len(p), nil
23
+
}
24
+
if _, err := fm.w.Write(p); err != nil {
25
+
fm.err = err
26
+
}
27
+
return len(p), nil
28
+
}
29
+
30
+
func (fm *formatter) WriteString(s string) (int, error) {
31
+
fm.Write([]byte(s))
32
+
return len(s), nil
33
+
}
34
+
35
+
func (fm *formatter) WriteByte(c byte) error {
36
+
fm.Write([]byte{c})
37
+
return nil
38
+
}
39
+
40
+
func (fm *formatter) WriteQuotedName(s string) {
41
+
qpos := 0
42
+
for i := 0; i < len(s); i++ {
43
+
ch := s[i]
44
+
if q, quoted := quoteByte(ch); quoted {
45
+
if qpos == 0 {
46
+
fm.WriteByte('"')
47
+
}
48
+
fm.WriteString(s[qpos:i])
49
+
fm.Write(q)
50
+
qpos = i + 1
51
+
}
52
+
}
53
+
fm.WriteString(s[qpos:])
54
+
if qpos > 0 {
55
+
fm.WriteByte('"')
56
+
}
57
+
}
58
+
59
+
var quoteEscapeTable = map[byte]byte{
60
+
'\a': 'a',
61
+
'\b': 'b',
62
+
'\t': 't',
63
+
'\n': 'n',
64
+
'\v': 'v',
65
+
'\f': 'f',
66
+
'\r': 'r',
67
+
'"': '"',
68
+
'\\': '\\',
69
+
}
70
+
71
+
func quoteByte(b byte) ([]byte, bool) {
72
+
if q, ok := quoteEscapeTable[b]; ok {
73
+
return []byte{'\\', q}, true
74
+
}
75
+
if b < 0x20 || b >= 0x7F {
76
+
return []byte{
77
+
'\\',
78
+
'0' + (b>>6)&0o3,
79
+
'0' + (b>>3)&0o7,
80
+
'0' + (b>>0)&0o7,
81
+
}, true
82
+
}
83
+
return nil, false
84
+
}
85
+
86
+
func (fm *formatter) FormatFile(f *File) {
87
+
fm.WriteString("diff --git ")
88
+
89
+
var aName, bName string
90
+
switch {
91
+
case f.OldName == "":
92
+
aName = f.NewName
93
+
bName = f.NewName
94
+
95
+
case f.NewName == "":
96
+
aName = f.OldName
97
+
bName = f.OldName
98
+
99
+
default:
100
+
aName = f.OldName
101
+
bName = f.NewName
102
+
}
103
+
104
+
fm.WriteQuotedName("a/" + aName)
105
+
fm.WriteByte(' ')
106
+
fm.WriteQuotedName("b/" + bName)
107
+
fm.WriteByte('\n')
108
+
109
+
if f.OldMode != 0 {
110
+
if f.IsDelete {
111
+
fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode)
112
+
} else if f.NewMode != 0 {
113
+
fmt.Fprintf(fm, "old mode %o\n", f.OldMode)
114
+
}
115
+
}
116
+
117
+
if f.NewMode != 0 {
118
+
if f.IsNew {
119
+
fmt.Fprintf(fm, "new file mode %o\n", f.NewMode)
120
+
} else if f.OldMode != 0 {
121
+
fmt.Fprintf(fm, "new mode %o\n", f.NewMode)
122
+
}
123
+
}
124
+
125
+
if f.Score > 0 {
126
+
if f.IsCopy || f.IsRename {
127
+
fmt.Fprintf(fm, "similarity index %d%%\n", f.Score)
128
+
} else {
129
+
fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score)
130
+
}
131
+
}
132
+
133
+
if f.IsCopy {
134
+
if f.OldName != "" {
135
+
fm.WriteString("copy from ")
136
+
fm.WriteQuotedName(f.OldName)
137
+
fm.WriteByte('\n')
138
+
}
139
+
if f.NewName != "" {
140
+
fm.WriteString("copy to ")
141
+
fm.WriteQuotedName(f.NewName)
142
+
fm.WriteByte('\n')
143
+
}
144
+
}
145
+
146
+
if f.IsRename {
147
+
if f.OldName != "" {
148
+
fm.WriteString("rename from ")
149
+
fm.WriteQuotedName(f.OldName)
150
+
fm.WriteByte('\n')
151
+
}
152
+
if f.NewName != "" {
153
+
fm.WriteString("rename to ")
154
+
fm.WriteQuotedName(f.NewName)
155
+
fm.WriteByte('\n')
156
+
}
157
+
}
158
+
159
+
if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" {
160
+
fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix)
161
+
162
+
// Mode is only included on the index line when it is not changing
163
+
if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) {
164
+
fmt.Fprintf(fm, " %o", f.OldMode)
165
+
}
166
+
167
+
fm.WriteByte('\n')
168
+
}
169
+
170
+
if f.IsBinary {
171
+
if f.BinaryFragment == nil {
172
+
fm.WriteString("Binary files ")
173
+
fm.WriteQuotedName("a/" + aName)
174
+
fm.WriteString(" and ")
175
+
fm.WriteQuotedName("b/" + bName)
176
+
fm.WriteString(" differ\n")
177
+
} else {
178
+
fm.WriteString("GIT binary patch\n")
179
+
fm.FormatBinaryFragment(f.BinaryFragment)
180
+
if f.ReverseBinaryFragment != nil {
181
+
fm.FormatBinaryFragment(f.ReverseBinaryFragment)
182
+
}
183
+
}
184
+
}
185
+
186
+
// The "---" and "+++" lines only appear for text patches with fragments
187
+
if len(f.TextFragments) > 0 {
188
+
fm.WriteString("--- ")
189
+
if f.OldName == "" {
190
+
fm.WriteString("/dev/null")
191
+
} else {
192
+
fm.WriteQuotedName("a/" + f.OldName)
193
+
}
194
+
fm.WriteByte('\n')
195
+
196
+
fm.WriteString("+++ ")
197
+
if f.NewName == "" {
198
+
fm.WriteString("/dev/null")
199
+
} else {
200
+
fm.WriteQuotedName("b/" + f.NewName)
201
+
}
202
+
fm.WriteByte('\n')
203
+
204
+
for _, frag := range f.TextFragments {
205
+
fm.FormatTextFragment(frag)
206
+
}
207
+
}
208
+
}
209
+
210
+
func (fm *formatter) FormatTextFragment(f *TextFragment) {
211
+
fm.FormatTextFragmentHeader(f)
212
+
fm.WriteByte('\n')
213
+
214
+
for _, line := range f.Lines {
215
+
fm.WriteString(line.Op.String())
216
+
fm.WriteString(line.Line)
217
+
if line.NoEOL() {
218
+
fm.WriteString("\n\\ No newline at end of file\n")
219
+
}
220
+
}
221
+
}
222
+
223
+
func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) {
224
+
fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines)
225
+
if f.Comment != "" {
226
+
fm.WriteByte(' ')
227
+
fm.WriteString(f.Comment)
228
+
}
229
+
}
230
+
231
+
func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) {
232
+
const (
233
+
maxBytesPerLine = 52
234
+
)
235
+
236
+
switch f.Method {
237
+
case BinaryPatchDelta:
238
+
fm.WriteString("delta ")
239
+
case BinaryPatchLiteral:
240
+
fm.WriteString("literal ")
241
+
}
242
+
fm.Write(strconv.AppendInt(nil, f.Size, 10))
243
+
fm.WriteByte('\n')
244
+
245
+
data := deflateBinaryChunk(f.Data)
246
+
n := (len(data) / maxBytesPerLine) * maxBytesPerLine
247
+
248
+
buf := make([]byte, base85Len(maxBytesPerLine))
249
+
for i := 0; i < n; i += maxBytesPerLine {
250
+
base85Encode(buf, data[i:i+maxBytesPerLine])
251
+
fm.WriteByte('z')
252
+
fm.Write(buf)
253
+
fm.WriteByte('\n')
254
+
}
255
+
if remainder := len(data) - n; remainder > 0 {
256
+
buf = buf[0:base85Len(remainder)]
257
+
258
+
sizeChar := byte(remainder)
259
+
if remainder <= 26 {
260
+
sizeChar = 'A' + sizeChar - 1
261
+
} else {
262
+
sizeChar = 'a' + sizeChar - 27
263
+
}
264
+
265
+
base85Encode(buf, data[n:])
266
+
fm.WriteByte(sizeChar)
267
+
fm.Write(buf)
268
+
fm.WriteByte('\n')
269
+
}
270
+
fm.WriteByte('\n')
271
+
}
272
+
273
+
func deflateBinaryChunk(data []byte) []byte {
274
+
var b bytes.Buffer
275
+
276
+
zw := zlib.NewWriter(&b)
277
+
_, _ = zw.Write(data)
278
+
_ = zw.Close()
279
+
280
+
return b.Bytes()
281
+
}
+157
gitdiff/format_roundtrip_test.go
+157
gitdiff/format_roundtrip_test.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"bytes"
5
+
"fmt"
6
+
"os"
7
+
"path/filepath"
8
+
"slices"
9
+
"testing"
10
+
)
11
+
12
+
func TestFormatRoundtrip(t *testing.T) {
13
+
patches := []struct {
14
+
File string
15
+
SkipTextCompare bool
16
+
}{
17
+
{File: "copy.patch"},
18
+
{File: "copy_modify.patch"},
19
+
{File: "delete.patch"},
20
+
{File: "mode.patch"},
21
+
{File: "mode_modify.patch"},
22
+
{File: "modify.patch"},
23
+
{File: "new.patch"},
24
+
{File: "new_empty.patch"},
25
+
{File: "new_mode.patch"},
26
+
{File: "rename.patch"},
27
+
{File: "rename_modify.patch"},
28
+
29
+
// Due to differences between Go's 'encoding/zlib' package and the zlib
30
+
// C library, binary patches cannot be compared directly as the patch
31
+
// data is slightly different when re-encoded by Go.
32
+
{File: "binary_modify.patch", SkipTextCompare: true},
33
+
{File: "binary_new.patch", SkipTextCompare: true},
34
+
{File: "binary_modify_nodata.patch"},
35
+
}
36
+
37
+
for _, patch := range patches {
38
+
t.Run(patch.File, func(t *testing.T) {
39
+
b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File))
40
+
if err != nil {
41
+
t.Fatalf("failed to read patch: %v", err)
42
+
}
43
+
44
+
original := assertParseSingleFile(t, b, "patch")
45
+
str := original.String()
46
+
47
+
if !patch.SkipTextCompare {
48
+
if string(b) != str {
49
+
t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str)
50
+
}
51
+
}
52
+
53
+
reparsed := assertParseSingleFile(t, []byte(str), "formatted patch")
54
+
assertFilesEqual(t, original, reparsed)
55
+
})
56
+
}
57
+
}
58
+
59
+
func assertParseSingleFile(t *testing.T, b []byte, kind string) *File {
60
+
files, _, err := Parse(bytes.NewReader(b))
61
+
if err != nil {
62
+
t.Fatalf("failed to parse %s: %v", kind, err)
63
+
}
64
+
if len(files) != 1 {
65
+
t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files))
66
+
}
67
+
return files[0]
68
+
}
69
+
70
+
func assertFilesEqual(t *testing.T, expected, actual *File) {
71
+
assertEqual(t, expected.OldName, actual.OldName, "OldName")
72
+
assertEqual(t, expected.NewName, actual.NewName, "NewName")
73
+
74
+
assertEqual(t, expected.IsNew, actual.IsNew, "IsNew")
75
+
assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete")
76
+
assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy")
77
+
assertEqual(t, expected.IsRename, actual.IsRename, "IsRename")
78
+
79
+
assertEqual(t, expected.OldMode, actual.OldMode, "OldMode")
80
+
assertEqual(t, expected.NewMode, actual.NewMode, "NewMode")
81
+
82
+
assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix")
83
+
assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix")
84
+
assertEqual(t, expected.Score, actual.Score, "Score")
85
+
86
+
if len(expected.TextFragments) == len(actual.TextFragments) {
87
+
for i := range expected.TextFragments {
88
+
prefix := fmt.Sprintf("TextFragments[%d].", i)
89
+
ef := expected.TextFragments[i]
90
+
af := actual.TextFragments[i]
91
+
92
+
assertEqual(t, ef.Comment, af.Comment, prefix+"Comment")
93
+
94
+
assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition")
95
+
assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines")
96
+
97
+
assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition")
98
+
assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines")
99
+
100
+
assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded")
101
+
assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted")
102
+
103
+
assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext")
104
+
assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext")
105
+
106
+
if !slices.Equal(ef.Lines, af.Lines) {
107
+
t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines)
108
+
}
109
+
}
110
+
} else {
111
+
t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments))
112
+
}
113
+
114
+
assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary")
115
+
116
+
if expected.BinaryFragment != nil {
117
+
if actual.BinaryFragment == nil {
118
+
t.Errorf("BinaryFragment: expected non-nil, actual is nil")
119
+
} else {
120
+
ef := expected.BinaryFragment
121
+
af := expected.BinaryFragment
122
+
123
+
assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method")
124
+
assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size")
125
+
126
+
if !slices.Equal(ef.Data, af.Data) {
127
+
t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data)
128
+
}
129
+
}
130
+
} else if actual.BinaryFragment != nil {
131
+
t.Errorf("BinaryFragment: expected nil, actual is non-nil")
132
+
}
133
+
134
+
if expected.ReverseBinaryFragment != nil {
135
+
if actual.ReverseBinaryFragment == nil {
136
+
t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil")
137
+
} else {
138
+
ef := expected.ReverseBinaryFragment
139
+
af := expected.ReverseBinaryFragment
140
+
141
+
assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method")
142
+
assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size")
143
+
144
+
if !slices.Equal(ef.Data, af.Data) {
145
+
t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data)
146
+
}
147
+
}
148
+
} else if actual.ReverseBinaryFragment != nil {
149
+
t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil")
150
+
}
151
+
}
152
+
153
+
func assertEqual[T comparable](t *testing.T, expected, actual T, name string) {
154
+
if expected != actual {
155
+
t.Errorf("%s: expected %#v, actual %#v", name, expected, actual)
156
+
}
157
+
}
+28
gitdiff/format_test.go
+28
gitdiff/format_test.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"strings"
5
+
"testing"
6
+
)
7
+
8
+
func TestFormatter_WriteQuotedName(t *testing.T) {
9
+
tests := []struct {
10
+
Input string
11
+
Expected string
12
+
}{
13
+
{"noquotes.txt", `noquotes.txt`},
14
+
{"no quotes.txt", `no quotes.txt`},
15
+
{"new\nline", `"new\nline"`},
16
+
{"escape\x1B null\x00", `"escape\033 null\000"`},
17
+
{"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`},
18
+
{"\"already quoted\"", `"\"already quoted\""`},
19
+
}
20
+
21
+
for _, test := range tests {
22
+
var b strings.Builder
23
+
newFormatter(&b).WriteQuotedName(test.Input)
24
+
if b.String() != test.Expected {
25
+
t.Errorf("expected %q, got %q", test.Expected, b.String())
26
+
}
27
+
}
28
+
}
+33
-2
gitdiff/gitdiff.go
+33
-2
gitdiff/gitdiff.go
···
4
4
"errors"
5
5
"fmt"
6
6
"os"
7
+
"strings"
7
8
)
8
9
9
10
// File describes changes to a single file. It can be either a text file or a
···
38
39
ReverseBinaryFragment *BinaryFragment
39
40
}
40
41
42
+
// String returns a git diff representation of this file. The value can be
43
+
// parsed by this library to obtain the same File, but may not be the same as
44
+
// the original input.
45
+
func (f *File) String() string {
46
+
var diff strings.Builder
47
+
newFormatter(&diff).FormatFile(f)
48
+
return diff.String()
49
+
}
50
+
41
51
// TextFragment describes changed lines starting at a specific line in a text file.
42
52
type TextFragment struct {
43
53
Comment string
···
57
67
Lines []Line
58
68
}
59
69
60
-
// Header returns the canonical header of this fragment.
70
+
// String returns a git diff format of this fragment. See [File.String] for
71
+
// more details on this format.
72
+
func (f *TextFragment) String() string {
73
+
var diff strings.Builder
74
+
newFormatter(&diff).FormatTextFragment(f)
75
+
return diff.String()
76
+
}
77
+
78
+
// Header returns a git diff header of this fragment. See [File.String] for
79
+
// more details on this format.
61
80
func (f *TextFragment) Header() string {
62
-
return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment)
81
+
var hdr strings.Builder
82
+
newFormatter(&hdr).FormatTextFragmentHeader(f)
83
+
return hdr.String()
63
84
}
64
85
65
86
// Validate checks that the fragment is self-consistent and appliable. Validate
···
197
218
// BinaryPatchLiteral indicates the data is the exact file content
198
219
BinaryPatchLiteral
199
220
)
221
+
222
+
// String returns a git diff format of this fragment. Due to differences in
223
+
// zlib implementation between Go and Git, encoded binary data in the result
224
+
// will likely differ from what Git produces for the same input. See
225
+
// [File.String] for more details on this format.
226
+
func (f *BinaryFragment) String() string {
227
+
var diff strings.Builder
228
+
newFormatter(&diff).FormatBinaryFragment(f)
229
+
return diff.String()
230
+
}
+7
-3
gitdiff/parser.go
+7
-3
gitdiff/parser.go
···
12
12
// Parse parses a patch with changes to one or more files. Any content before
13
13
// the first file is returned as the second value. If an error occurs while
14
14
// parsing, it returns all files parsed before the error.
15
+
//
16
+
// Parse expects to receive a single patch. If the input may contain multiple
17
+
// patches (for example, if it is an mbox file), callers should split it into
18
+
// individual patches and call Parse on each one.
15
19
func Parse(r io.Reader) ([]*File, string, error) {
16
20
p := newParser(r)
17
21
···
29
33
if err != nil {
30
34
return files, preamble, err
31
35
}
36
+
if len(files) == 0 {
37
+
preamble = pre
38
+
}
32
39
if file == nil {
33
40
break
34
41
}
···
46
53
}
47
54
}
48
55
49
-
if len(files) == 0 {
50
-
preamble = pre
51
-
}
52
56
files = append(files, file)
53
57
}
54
58
+16
-2
gitdiff/parser_test.go
+16
-2
gitdiff/parser_test.go
···
281
281
--- could this be a header?
282
282
nope, it's just some dashes
283
283
`,
284
-
Output: nil,
285
-
Preamble: "",
284
+
Output: nil,
285
+
Preamble: `
286
+
this is a line
287
+
this is another line
288
+
--- could this be a header?
289
+
nope, it's just some dashes
290
+
`,
286
291
},
287
292
"detatchedFragmentLike": {
288
293
Input: `
···
290
295
@@ -1,3 +1,4 ~1,5 @@
291
296
`,
292
297
Output: nil,
298
+
Preamble: `
299
+
a wild fragment appears?
300
+
@@ -1,3 +1,4 ~1,5 @@
301
+
`,
293
302
},
294
303
"detatchedFragment": {
295
304
Input: `
···
425
434
},
426
435
},
427
436
Preamble: textPreamble,
437
+
},
438
+
"noFiles": {
439
+
InputFile: "testdata/no_files.patch",
440
+
Output: nil,
441
+
Preamble: textPreamble,
428
442
},
429
443
"newBinaryFile": {
430
444
InputFile: "testdata/new_binary_file.patch",
+125
-129
gitdiff/patch_header.go
+125
-129
gitdiff/patch_header.go
···
5
5
"errors"
6
6
"fmt"
7
7
"io"
8
+
"io/ioutil"
8
9
"mime/quotedprintable"
9
10
"net/mail"
10
11
"strconv"
···
51
52
// line, that line will be removed and everything after it will be
52
53
// placed in BodyAppendix.
53
54
BodyAppendix string
55
+
56
+
// All headers completely unparsed
57
+
RawHeaders map[string][]string
54
58
}
55
59
56
60
// Message returns the commit message for the header. The message consists of
···
67
71
return msg.String()
68
72
}
69
73
70
-
// PatchIdentity identifies a person who authored or committed a patch.
71
-
type PatchIdentity struct {
72
-
Name string
73
-
Email string
74
-
}
75
-
76
-
func (i PatchIdentity) String() string {
77
-
name := i.Name
78
-
if name == "" {
79
-
name = `""`
80
-
}
81
-
return fmt.Sprintf("%s <%s>", name, i.Email)
82
-
}
83
-
84
-
// ParsePatchIdentity parses a patch identity string. A valid string contains a
85
-
// non-empty name followed by an email address in angle brackets. Like Git,
86
-
// ParsePatchIdentity does not require that the email address is valid or
87
-
// properly formatted, only that it is non-empty. The name must not contain a
88
-
// left angle bracket, '<', and the email address must not contain a right
89
-
// angle bracket, '>'.
90
-
func ParsePatchIdentity(s string) (PatchIdentity, error) {
91
-
var emailStart, emailEnd int
92
-
for i, c := range s {
93
-
if c == '<' && emailStart == 0 {
94
-
emailStart = i + 1
95
-
}
96
-
if c == '>' && emailStart > 0 {
97
-
emailEnd = i
98
-
break
99
-
}
100
-
}
101
-
if emailStart > 0 && emailEnd == 0 {
102
-
return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s)
103
-
}
104
-
105
-
var name, email string
106
-
if emailStart > 0 {
107
-
name = strings.TrimSpace(s[:emailStart-1])
108
-
}
109
-
if emailStart > 0 && emailEnd > 0 {
110
-
email = strings.TrimSpace(s[emailStart:emailEnd])
111
-
}
112
-
if name == "" || email == "" {
113
-
return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s)
114
-
}
115
-
116
-
return PatchIdentity{Name: name, Email: email}, nil
117
-
}
118
-
119
74
// ParsePatchDate parses a patch date string. It returns the parsed time or an
120
75
// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
121
76
// short, raw, unix, and default formats (with local variants) used by the
···
164
119
return time.Time{}, fmt.Errorf("unknown date format: %s", s)
165
120
}
166
121
167
-
// ParsePatchHeader parses a preamble string as returned by Parse into a
122
+
// A PatchHeaderOption modifies the behavior of ParsePatchHeader.
123
+
type PatchHeaderOption func(*patchHeaderOptions)
124
+
125
+
// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when
126
+
// parsing mail-formatted patches.
127
+
type SubjectCleanMode int
128
+
129
+
const (
130
+
// SubjectCleanWhitespace removes leading and trailing whitespace.
131
+
SubjectCleanWhitespace SubjectCleanMode = iota
132
+
133
+
// SubjectCleanAll removes leading and trailing whitespace, leading "Re:",
134
+
// "re:", and ":" strings, and leading strings enclosed by '[' and ']'.
135
+
// This is the default behavior of git (see `git mailinfo`) and this
136
+
// package.
137
+
SubjectCleanAll
138
+
139
+
// SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes
140
+
// leading strings enclosed by '[' and ']' if they start with "PATCH".
141
+
SubjectCleanPatchOnly
142
+
)
143
+
144
+
// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By
145
+
// default, uses SubjectCleanAll.
146
+
func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption {
147
+
return func(opts *patchHeaderOptions) {
148
+
opts.subjectCleanMode = m
149
+
}
150
+
}
151
+
152
+
type patchHeaderOptions struct {
153
+
subjectCleanMode SubjectCleanMode
154
+
}
155
+
156
+
// ParsePatchHeader parses the preamble string returned by [Parse] into a
168
157
// PatchHeader. Due to the variety of header formats, some fields of the parsed
169
158
// PatchHeader may be unset after parsing.
170
159
//
171
160
// Supported formats are the short, medium, full, fuller, and email pretty
172
-
// formats used by git diff, git log, and git show and the UNIX mailbox format
173
-
// used by git format-patch.
161
+
// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox
162
+
// format used by `git format-patch`.
163
+
//
164
+
// When parsing mail-formatted headers, ParsePatchHeader tries to remove
165
+
// email-specific content from the title and body:
174
166
//
175
-
// If ParsePatchHeader detects that it is handling an email, it will
176
-
// remove extra content at the beginning of the title line, such as
177
-
// `[PATCH]` or `Re:` in the same way that `git mailinfo` does.
178
-
// SubjectPrefix will be set to the value of this removed string.
179
-
// (`git mailinfo` is the core part of `git am` that pulls information
180
-
// out of an individual mail.)
167
+
// - Based on the SubjectCleanMode, remove prefixes like reply markers and
168
+
// "[PATCH]" strings from the subject, saving any removed content in the
169
+
// SubjectPrefix field. Parsing always discards leading and trailing
170
+
// whitespace from the subject line. The default mode is SubjectCleanAll.
181
171
//
182
-
// Additionally, if ParsePatchHeader detects that it's handling an
183
-
// email, it will remove a `---` line and put anything after it into
184
-
// BodyAppendix.
172
+
// - If the body contains a "---" line (3 hyphens), remove that line and any
173
+
// content after it from the body and save it in the BodyAppendix field.
185
174
//
186
-
// Those wishing the effect of a plain `git am` should use
187
-
// `PatchHeader.Title + "\n" + PatchHeader.Body` (or
188
-
// `PatchHeader.Message()`). Those wishing to retain the subject
189
-
// prefix and appendix material should use `PatchHeader.SubjectPrefix
190
-
// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" +
191
-
// PatchHeader.BodyAppendix`.
192
-
func ParsePatchHeader(s string) (*PatchHeader, error) {
193
-
r := bufio.NewReader(strings.NewReader(s))
175
+
// ParsePatchHeader tries to process content it does not understand wthout
176
+
// returning errors, but will return errors if well-identified content like
177
+
// dates or identies uses unknown or invalid formats.
178
+
func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) {
179
+
opts := patchHeaderOptions{
180
+
subjectCleanMode: SubjectCleanAll, // match git defaults
181
+
}
182
+
for _, optFn := range options {
183
+
optFn(&opts)
184
+
}
194
185
195
-
var line string
196
-
for {
197
-
var err error
198
-
line, err = r.ReadString('\n')
199
-
if err == io.EOF {
200
-
break
201
-
}
202
-
if err != nil {
203
-
return nil, err
204
-
}
186
+
header = strings.TrimSpace(header)
187
+
if header == "" {
188
+
return &PatchHeader{}, nil
189
+
}
205
190
206
-
line = strings.TrimSpace(line)
207
-
if len(line) > 0 {
208
-
break
209
-
}
191
+
var firstLine, rest string
192
+
if idx := strings.IndexByte(header, '\n'); idx >= 0 {
193
+
firstLine = header[:idx]
194
+
rest = header[idx+1:]
195
+
} else {
196
+
firstLine = header
197
+
rest = ""
210
198
}
211
199
212
200
switch {
213
-
case strings.HasPrefix(line, mailHeaderPrefix):
214
-
return parseHeaderMail(line, r)
215
-
case strings.HasPrefix(line, mailMinimumHeaderPrefix):
216
-
r = bufio.NewReader(strings.NewReader(s))
217
-
return parseHeaderMail("", r)
218
-
case strings.HasPrefix(line, prettyHeaderPrefix):
219
-
return parseHeaderPretty(line, r)
201
+
case strings.HasPrefix(firstLine, mailHeaderPrefix):
202
+
return parseHeaderMail(firstLine, strings.NewReader(rest), opts)
203
+
204
+
case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix):
205
+
// With a minimum header, the first line is part of the actual mail
206
+
// content and needs to be parsed as part of the "rest"
207
+
return parseHeaderMail("", strings.NewReader(header), opts)
208
+
209
+
case strings.HasPrefix(firstLine, prettyHeaderPrefix):
210
+
return parseHeaderPretty(firstLine, strings.NewReader(rest))
220
211
}
212
+
221
213
return nil, errors.New("unrecognized patch header format")
222
214
}
223
215
···
232
224
233
225
h := &PatchHeader{}
234
226
235
-
prettyLine = prettyLine[len(prettyHeaderPrefix):]
227
+
prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix)
236
228
if i := strings.IndexByte(prettyLine, ' '); i > 0 {
237
229
h.SHA = prettyLine[:i]
238
230
} else {
···
248
240
break
249
241
}
250
242
243
+
items := strings.SplitN(line, ":", 2)
244
+
245
+
// we have "key: value"
246
+
if len(items) == 2 {
247
+
key := items[0]
248
+
val := items[1]
249
+
h.RawHeaders[key] = append(h.RawHeaders[key], val)
250
+
}
251
+
251
252
switch {
252
253
case strings.HasPrefix(line, authorPrefix):
253
254
u, err := ParsePatchIdentity(line[len(authorPrefix):])
···
296
297
h.Title = title
297
298
298
299
if title != "" {
299
-
// Don't check for an appendix
300
+
// Don't check for an appendix, pretty headers do not contain them
300
301
body, _ := scanMessageBody(s, indent, false)
301
302
if s.Err() != nil {
302
303
return nil, s.Err()
···
365
366
return body.String(), appendix.String()
366
367
}
367
368
368
-
func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) {
369
+
func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) {
369
370
msg, err := mail.ReadMessage(r)
370
371
if err != nil {
371
372
return nil, err
372
373
}
373
374
374
375
h := &PatchHeader{}
376
+
h.RawHeaders = msg.Header
375
377
376
-
if len(mailLine) > len(mailHeaderPrefix) {
377
-
mailLine = mailLine[len(mailHeaderPrefix):]
378
+
if strings.HasPrefix(mailLine, mailHeaderPrefix) {
379
+
mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix)
378
380
if i := strings.IndexByte(mailLine, ' '); i > 0 {
379
381
h.SHA = mailLine[:i]
380
382
}
381
383
}
382
384
383
-
addrs, err := msg.Header.AddressList("From")
384
-
if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) {
385
-
return nil, err
386
-
}
387
-
if len(addrs) > 0 {
388
-
addr := addrs[0]
389
-
if addr.Name == "" {
390
-
addr.Name = addr.Address
385
+
from := msg.Header.Get("From")
386
+
if from != "" {
387
+
u, err := ParsePatchIdentity(from)
388
+
if err != nil {
389
+
return nil, err
391
390
}
392
-
h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address}
391
+
h.Author = &u
393
392
}
394
393
395
394
date := msg.Header.Get("Date")
···
402
401
}
403
402
404
403
subject := msg.Header.Get("Subject")
405
-
h.SubjectPrefix, h.Title = parseSubject(subject)
404
+
h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode)
406
405
407
406
s := bufio.NewScanner(msg.Body)
408
407
h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
···
413
412
return h, nil
414
413
}
415
414
416
-
// Takes an email subject and returns the patch prefix and commit
417
-
// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH
418
-
// v3 3/5] ` and `Implement foo`
419
-
func parseSubject(s string) (string, string) {
420
-
// This is meant to be compatible with
421
-
// https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject().
422
-
// If compatibility with `git am` drifts, go there to see if there
423
-
// are any updates.
415
+
func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) {
416
+
switch mode {
417
+
case SubjectCleanAll, SubjectCleanPatchOnly:
418
+
case SubjectCleanWhitespace:
419
+
return "", strings.TrimSpace(decodeSubject(s))
420
+
default:
421
+
panic(fmt.Sprintf("unknown clean mode: %d", mode))
422
+
}
423
+
424
+
// Based on the algorithm from Git in mailinfo.c:cleanup_subject()
425
+
// If compatibility with `git am` drifts, go there to see if there are any updates.
424
426
425
427
at := 0
426
428
for at < len(s) {
427
429
switch s[at] {
428
430
case 'r', 'R':
429
431
// Detect re:, Re:, rE: and RE:
430
-
if at+2 < len(s) &&
431
-
(s[at+1] == 'e' || s[at+1] == 'E') &&
432
-
s[at+2] == ':' {
432
+
if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' {
433
433
at += 3
434
434
continue
435
435
}
···
440
440
continue
441
441
442
442
case '[':
443
-
// Look for closing parenthesis
444
-
j := at + 1
445
-
for ; j < len(s); j++ {
446
-
if s[j] == ']' {
447
-
break
443
+
if i := strings.IndexByte(s[at:], ']'); i > 0 {
444
+
if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") {
445
+
at += i + 1
446
+
continue
448
447
}
449
-
}
450
-
451
-
if j < len(s) {
452
-
at = j + 1
453
-
continue
454
448
}
455
449
}
456
450
457
-
// Only loop if we actually removed something
451
+
// Nothing was removed, end processing
458
452
break
459
453
}
460
454
461
-
return s[:at], decodeSubject(s[at:])
455
+
prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace)
456
+
subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace)
457
+
return
462
458
}
463
459
464
460
// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
···
477
473
payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "")
478
474
payload = strings.ReplaceAll(payload, "?=", "")
479
475
480
-
decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
476
+
decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
481
477
if err != nil {
482
478
// if err, abort decoding and return original subject
483
479
return encoded
+172
-104
gitdiff/patch_header_test.go
+172
-104
gitdiff/patch_header_test.go
···
5
5
"time"
6
6
)
7
7
8
-
func TestParsePatchIdentity(t *testing.T) {
9
-
tests := map[string]struct {
10
-
Input string
11
-
Output PatchIdentity
12
-
Err interface{}
13
-
}{
14
-
"simple": {
15
-
Input: "Morton Haypenny <mhaypenny@example.com>",
16
-
Output: PatchIdentity{
17
-
Name: "Morton Haypenny",
18
-
Email: "mhaypenny@example.com",
19
-
},
20
-
},
21
-
"extraWhitespace": {
22
-
Input: " Morton Haypenny <mhaypenny@example.com > ",
23
-
Output: PatchIdentity{
24
-
Name: "Morton Haypenny",
25
-
Email: "mhaypenny@example.com",
26
-
},
27
-
},
28
-
"trailingCharacters": {
29
-
Input: "Morton Haypenny <mhaypenny@example.com> unrelated garbage",
30
-
Output: PatchIdentity{
31
-
Name: "Morton Haypenny",
32
-
Email: "mhaypenny@example.com",
33
-
},
34
-
},
35
-
"missingName": {
36
-
Input: "<mhaypenny@example.com>",
37
-
Err: "invalid identity",
38
-
},
39
-
"missingEmail": {
40
-
Input: "Morton Haypenny",
41
-
Err: "invalid identity",
42
-
},
43
-
"unclosedEmail": {
44
-
Input: "Morton Haypenny <mhaypenny@example.com",
45
-
Err: "unclosed email",
46
-
},
47
-
}
48
-
49
-
for name, test := range tests {
50
-
t.Run(name, func(t *testing.T) {
51
-
id, err := ParsePatchIdentity(test.Input)
52
-
if test.Err != nil {
53
-
assertError(t, test.Err, err, "parsing identity")
54
-
return
55
-
}
56
-
if err != nil {
57
-
t.Fatalf("unexpected error parsing identity: %v", err)
58
-
}
59
-
60
-
if test.Output != id {
61
-
t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id)
62
-
}
63
-
})
64
-
}
65
-
}
66
-
67
8
func TestParsePatchDate(t *testing.T) {
68
9
expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC)
69
10
···
144
85
expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>"
145
86
146
87
tests := map[string]struct {
147
-
Input string
148
-
Header PatchHeader
149
-
Err interface{}
88
+
Input string
89
+
Options []PatchHeaderOption
90
+
Header PatchHeader
91
+
Err interface{}
150
92
}{
151
93
"prettyShort": {
152
94
Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b
···
269
211
Body: expectedBody,
270
212
},
271
213
},
214
+
"mailboxPatchOnly": {
215
+
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
216
+
From: Morton Haypenny <mhaypenny@example.com>
217
+
Date: Sat, 11 Apr 2020 15:21:23 -0700
218
+
Subject: [PATCH] [BUG-123] A sample commit to test header parsing
219
+
220
+
The medium format shows the body, which
221
+
may wrap on to multiple lines.
222
+
223
+
Another body line.
224
+
`,
225
+
Options: []PatchHeaderOption{
226
+
WithSubjectCleanMode(SubjectCleanPatchOnly),
227
+
},
228
+
Header: PatchHeader{
229
+
SHA: expectedSHA,
230
+
Author: expectedIdentity,
231
+
AuthorDate: expectedDate,
232
+
Title: "[BUG-123] " + expectedTitle,
233
+
Body: expectedBody,
234
+
},
235
+
},
272
236
"mailboxEmojiOneLine": {
273
237
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
274
238
From: Morton Haypenny <mhaypenny@example.com>
···
308
272
Body: expectedBody,
309
273
},
310
274
},
275
+
"mailboxRFC5322SpecialCharacters": {
276
+
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
277
+
From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>
278
+
Date: Sat, 11 Apr 2020 15:21:23 -0700
279
+
Subject: [PATCH] A sample commit to test header parsing
280
+
281
+
The medium format shows the body, which
282
+
may wrap on to multiple lines.
283
+
284
+
Another body line.
285
+
`,
286
+
Header: PatchHeader{
287
+
SHA: expectedSHA,
288
+
Author: &PatchIdentity{
289
+
Name: "dependabot[bot]",
290
+
Email: "12345+dependabot[bot]@users.noreply.github.com",
291
+
},
292
+
AuthorDate: expectedDate,
293
+
Title: expectedTitle,
294
+
Body: expectedBody,
295
+
},
296
+
},
311
297
"mailboxAppendix": {
312
298
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
313
299
From: Morton Haypenny <mhaypenny@example.com>
···
414
400
Title: expectedTitle,
415
401
},
416
402
},
403
+
"emptyHeader": {
404
+
Input: "",
405
+
Header: PatchHeader{},
406
+
},
417
407
}
418
408
419
409
for name, test := range tests {
420
410
t.Run(name, func(t *testing.T) {
421
-
h, err := ParsePatchHeader(test.Input)
411
+
h, err := ParsePatchHeader(test.Input, test.Options...)
422
412
if test.Err != nil {
423
413
assertError(t, test.Err, err, "parsing patch header")
424
414
return
···
473
463
}
474
464
}
475
465
476
-
func TestCleanupSubject(t *testing.T) {
477
-
exp := "A sample commit to test header parsing"
478
-
tests := map[string]string{
479
-
"plain": "",
480
-
"patch": "[PATCH] ",
481
-
"patchv5": "[PATCH v5] ",
482
-
"patchrfc": "[PATCH RFC] ",
483
-
"patchnospace": "[PATCH]",
484
-
"space": " ",
485
-
"re": "re: ",
486
-
"Re": "Re: ",
487
-
"RE": "rE: ",
488
-
"rere": "re: re: ",
489
-
}
490
-
491
-
for name, prefix := range tests {
492
-
gotprefix, gottitle := parseSubject(prefix + exp)
493
-
if gottitle != exp {
494
-
t.Errorf("%s: Incorrect parsing of prefix %s: got title %s, wanted %s",
495
-
name, prefix, gottitle, exp)
496
-
}
497
-
if gotprefix != prefix {
498
-
t.Errorf("%s: Incorrect parsing of prefix %s: got prefix %s",
499
-
name, prefix, gotprefix)
500
-
}
501
-
}
466
+
func TestCleanSubject(t *testing.T) {
467
+
expectedSubject := "A sample commit to test header parsing"
502
468
503
-
moretests := map[string]struct {
504
-
in, eprefix, etitle string
469
+
tests := map[string]struct {
470
+
Input string
471
+
Mode SubjectCleanMode
472
+
Prefix string
473
+
Subject string
505
474
}{
506
-
"Reimplement": {"Reimplement something", "", "Reimplement something"},
507
-
"patch-reimplement": {"[PATCH v5] Reimplement something", "[PATCH v5] ", "Reimplement something"},
508
-
"Openbracket": {"[Just to annoy people", "", "[Just to annoy people"},
475
+
"CleanAll/noPrefix": {
476
+
Input: expectedSubject,
477
+
Mode: SubjectCleanAll,
478
+
Subject: expectedSubject,
479
+
},
480
+
"CleanAll/patchPrefix": {
481
+
Input: "[PATCH] " + expectedSubject,
482
+
Mode: SubjectCleanAll,
483
+
Prefix: "[PATCH] ",
484
+
Subject: expectedSubject,
485
+
},
486
+
"CleanAll/patchPrefixNoSpace": {
487
+
Input: "[PATCH]" + expectedSubject,
488
+
Mode: SubjectCleanAll,
489
+
Prefix: "[PATCH]",
490
+
Subject: expectedSubject,
491
+
},
492
+
"CleanAll/patchPrefixContent": {
493
+
Input: "[PATCH 3/7] " + expectedSubject,
494
+
Mode: SubjectCleanAll,
495
+
Prefix: "[PATCH 3/7] ",
496
+
Subject: expectedSubject,
497
+
},
498
+
"CleanAll/spacePrefix": {
499
+
Input: " " + expectedSubject,
500
+
Mode: SubjectCleanAll,
501
+
Subject: expectedSubject,
502
+
},
503
+
"CleanAll/replyLowerPrefix": {
504
+
Input: "re: " + expectedSubject,
505
+
Mode: SubjectCleanAll,
506
+
Prefix: "re: ",
507
+
Subject: expectedSubject,
508
+
},
509
+
"CleanAll/replyMixedPrefix": {
510
+
Input: "Re: " + expectedSubject,
511
+
Mode: SubjectCleanAll,
512
+
Prefix: "Re: ",
513
+
Subject: expectedSubject,
514
+
},
515
+
"CleanAll/replyCapsPrefix": {
516
+
Input: "RE: " + expectedSubject,
517
+
Mode: SubjectCleanAll,
518
+
Prefix: "RE: ",
519
+
Subject: expectedSubject,
520
+
},
521
+
"CleanAll/replyDoublePrefix": {
522
+
Input: "Re: re: " + expectedSubject,
523
+
Mode: SubjectCleanAll,
524
+
Prefix: "Re: re: ",
525
+
Subject: expectedSubject,
526
+
},
527
+
"CleanAll/noPrefixSubjectHasRe": {
528
+
Input: "Reimplement parsing",
529
+
Mode: SubjectCleanAll,
530
+
Subject: "Reimplement parsing",
531
+
},
532
+
"CleanAll/patchPrefixSubjectHasRe": {
533
+
Input: "[PATCH 1/2] Reimplement parsing",
534
+
Mode: SubjectCleanAll,
535
+
Prefix: "[PATCH 1/2] ",
536
+
Subject: "Reimplement parsing",
537
+
},
538
+
"CleanAll/unclosedPrefix": {
539
+
Input: "[Just to annoy people",
540
+
Mode: SubjectCleanAll,
541
+
Subject: "[Just to annoy people",
542
+
},
543
+
"CleanAll/multiplePrefix": {
544
+
Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ",
545
+
Mode: SubjectCleanAll,
546
+
Prefix: "Re:Re: [PATCH 1/2][DRAFT] ",
547
+
Subject: expectedSubject,
548
+
},
549
+
"CleanPatchOnly/patchPrefix": {
550
+
Input: "[PATCH] " + expectedSubject,
551
+
Mode: SubjectCleanPatchOnly,
552
+
Prefix: "[PATCH] ",
553
+
Subject: expectedSubject,
554
+
},
555
+
"CleanPatchOnly/mixedPrefix": {
556
+
Input: "[PATCH] [TICKET-123] " + expectedSubject,
557
+
Mode: SubjectCleanPatchOnly,
558
+
Prefix: "[PATCH] ",
559
+
Subject: "[TICKET-123] " + expectedSubject,
560
+
},
561
+
"CleanPatchOnly/multiplePrefix": {
562
+
Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject,
563
+
Mode: SubjectCleanPatchOnly,
564
+
Prefix: "Re:Re: [PATCH 1/2]",
565
+
Subject: "[DRAFT] " + expectedSubject,
566
+
},
567
+
"CleanWhitespace/leadingSpace": {
568
+
Input: " [PATCH] " + expectedSubject,
569
+
Mode: SubjectCleanWhitespace,
570
+
Subject: "[PATCH] " + expectedSubject,
571
+
},
572
+
"CleanWhitespace/trailingSpace": {
573
+
Input: "[PATCH] " + expectedSubject + " ",
574
+
Mode: SubjectCleanWhitespace,
575
+
Subject: "[PATCH] " + expectedSubject,
576
+
},
509
577
}
510
578
511
-
for name, test := range moretests {
512
-
prefix, title := parseSubject(test.in)
513
-
if title != test.etitle {
514
-
t.Errorf("%s: Incorrect parsing of %s: got title %s, wanted %s",
515
-
name, test.in, title, test.etitle)
516
-
}
517
-
if prefix != test.eprefix {
518
-
t.Errorf("%s: Incorrect parsing of %s: got prefix %s, wanted %s",
519
-
name, test.in, title, test.etitle)
520
-
}
579
+
for name, test := range tests {
580
+
t.Run(name, func(t *testing.T) {
581
+
prefix, subject := cleanSubject(test.Input, test.Mode)
582
+
if prefix != test.Prefix {
583
+
t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix)
584
+
}
585
+
if subject != test.Subject {
586
+
t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject)
587
+
}
588
+
})
521
589
}
522
590
}
+166
gitdiff/patch_identity.go
+166
gitdiff/patch_identity.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"fmt"
5
+
"strings"
6
+
)
7
+
8
+
// PatchIdentity identifies a person who authored or committed a patch.
9
+
type PatchIdentity struct {
10
+
Name string
11
+
Email string
12
+
}
13
+
14
+
func (i PatchIdentity) String() string {
15
+
name := i.Name
16
+
if name == "" {
17
+
name = `""`
18
+
}
19
+
return fmt.Sprintf("%s <%s>", name, i.Email)
20
+
}
21
+
22
+
// ParsePatchIdentity parses a patch identity string. A patch identity contains
23
+
// an email address and an optional name in [RFC 5322] format. This is either a
24
+
// plain email adddress or a name followed by an address in angle brackets:
25
+
//
26
+
// author@example.com
27
+
// Author Name <author@example.com>
28
+
//
29
+
// If the input is not one of these formats, ParsePatchIdentity applies a
30
+
// heuristic to separate the name and email portions. If both the name and
31
+
// email are missing or empty, ParsePatchIdentity returns an error. It
32
+
// otherwise does not validate the result.
33
+
//
34
+
// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322
35
+
func ParsePatchIdentity(s string) (PatchIdentity, error) {
36
+
s = normalizeSpace(s)
37
+
s = unquotePairs(s)
38
+
39
+
var name, email string
40
+
if at := strings.IndexByte(s, '@'); at >= 0 {
41
+
start, end := at, at
42
+
for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' {
43
+
start--
44
+
}
45
+
for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' {
46
+
end++
47
+
}
48
+
email = s[start+1 : end]
49
+
50
+
// Adjust the boundaries so that we drop angle brackets, but keep
51
+
// spaces when removing the email to form the name.
52
+
if start < 0 || s[start] != '<' {
53
+
start++
54
+
}
55
+
if end >= len(s) || s[end] != '>' {
56
+
end--
57
+
}
58
+
name = s[:start] + s[end+1:]
59
+
} else {
60
+
start, end := 0, 0
61
+
for i := 0; i < len(s); i++ {
62
+
if s[i] == '<' && start == 0 {
63
+
start = i + 1
64
+
}
65
+
if s[i] == '>' && start > 0 {
66
+
end = i
67
+
break
68
+
}
69
+
}
70
+
if start > 0 && end >= start {
71
+
email = strings.TrimSpace(s[start:end])
72
+
name = s[:start-1]
73
+
}
74
+
}
75
+
76
+
// After extracting the email, the name might contain extra whitespace
77
+
// again and may be surrounded by comment characters. The git source gives
78
+
// these examples of when this can happen:
79
+
//
80
+
// "Name <email@domain>"
81
+
// "email@domain (Name)"
82
+
// "Name <email@domain> (Comment)"
83
+
//
84
+
name = normalizeSpace(name)
85
+
if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
86
+
name = name[1 : len(name)-1]
87
+
}
88
+
name = strings.TrimSpace(name)
89
+
90
+
// If the name is empty or contains email-like characters, use the email
91
+
// instead (assuming one exists)
92
+
if name == "" || strings.ContainsAny(name, "@<>") {
93
+
name = email
94
+
}
95
+
96
+
if name == "" && email == "" {
97
+
return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s)
98
+
}
99
+
return PatchIdentity{Name: name, Email: email}, nil
100
+
}
101
+
102
+
// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to
103
+
// remove any "quoted-pairs" (backslash-espaced characters). It also removes
104
+
// the quotes from any quoted strings, but leaves the comment delimiters.
105
+
func unquotePairs(s string) string {
106
+
quote := false
107
+
comments := 0
108
+
escaped := false
109
+
110
+
var out strings.Builder
111
+
for i := 0; i < len(s); i++ {
112
+
if escaped {
113
+
escaped = false
114
+
} else {
115
+
switch s[i] {
116
+
case '\\':
117
+
// quoted-pair is only allowed in quoted-string/comment
118
+
if quote || comments > 0 {
119
+
escaped = true
120
+
continue // drop '\' character
121
+
}
122
+
123
+
case '"':
124
+
if comments == 0 {
125
+
quote = !quote
126
+
continue // drop '"' character
127
+
}
128
+
129
+
case '(':
130
+
if !quote {
131
+
comments++
132
+
}
133
+
case ')':
134
+
if comments > 0 {
135
+
comments--
136
+
}
137
+
}
138
+
}
139
+
out.WriteByte(s[i])
140
+
}
141
+
return out.String()
142
+
}
143
+
144
+
// normalizeSpace trims leading and trailing whitespace from s and converts
145
+
// inner sequences of one or more whitespace characters to single spaces.
146
+
func normalizeSpace(s string) string {
147
+
var sb strings.Builder
148
+
for i := 0; i < len(s); i++ {
149
+
c := s[i]
150
+
if !isRFC5332Space(c) {
151
+
if sb.Len() > 0 && isRFC5332Space(s[i-1]) {
152
+
sb.WriteByte(' ')
153
+
}
154
+
sb.WriteByte(c)
155
+
}
156
+
}
157
+
return sb.String()
158
+
}
159
+
160
+
func isRFC5332Space(c byte) bool {
161
+
switch c {
162
+
case '\t', '\n', '\r', ' ':
163
+
return true
164
+
}
165
+
return false
166
+
}
+127
gitdiff/patch_identity_test.go
+127
gitdiff/patch_identity_test.go
···
1
+
package gitdiff
2
+
3
+
import (
4
+
"testing"
5
+
)
6
+
7
+
func TestParsePatchIdentity(t *testing.T) {
8
+
tests := map[string]struct {
9
+
Input string
10
+
Output PatchIdentity
11
+
Err interface{}
12
+
}{
13
+
"simple": {
14
+
Input: "Morton Haypenny <mhaypenny@example.com>",
15
+
Output: PatchIdentity{
16
+
Name: "Morton Haypenny",
17
+
Email: "mhaypenny@example.com",
18
+
},
19
+
},
20
+
"extraWhitespace": {
21
+
Input: "\t Morton Haypenny \r\n<mhaypenny@example.com> ",
22
+
Output: PatchIdentity{
23
+
Name: "Morton Haypenny",
24
+
Email: "mhaypenny@example.com",
25
+
},
26
+
},
27
+
"trailingCharacters": {
28
+
Input: "Morton Haypenny <mhaypenny@example.com> II",
29
+
Output: PatchIdentity{
30
+
Name: "Morton Haypenny II",
31
+
Email: "mhaypenny@example.com",
32
+
},
33
+
},
34
+
"onlyEmail": {
35
+
Input: "mhaypenny@example.com",
36
+
Output: PatchIdentity{
37
+
Name: "mhaypenny@example.com",
38
+
Email: "mhaypenny@example.com",
39
+
},
40
+
},
41
+
"onlyEmailInBrackets": {
42
+
Input: "<mhaypenny@example.com>",
43
+
Output: PatchIdentity{
44
+
Name: "mhaypenny@example.com",
45
+
Email: "mhaypenny@example.com",
46
+
},
47
+
},
48
+
"rfc5322SpecialCharacters": {
49
+
Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`,
50
+
Output: PatchIdentity{
51
+
Name: "dependabot[bot]",
52
+
Email: "12345+dependabot[bot]@users.noreply.github.com",
53
+
},
54
+
},
55
+
"rfc5322QuotedPairs": {
56
+
Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`,
57
+
Output: PatchIdentity{
58
+
Name: `Morton "Old-Timer" Haypenny (III (PhD))`,
59
+
Email: "mhaypenny+[1900]@example.com",
60
+
},
61
+
},
62
+
"rfc5322QuotedPairsOutOfContext": {
63
+
Input: `Morton \\Backslash Haypenny <mhaypenny@example.com>`,
64
+
Output: PatchIdentity{
65
+
Name: `Morton \\Backslash Haypenny`,
66
+
Email: "mhaypenny@example.com",
67
+
},
68
+
},
69
+
"emptyEmail": {
70
+
Input: "Morton Haypenny <>",
71
+
Output: PatchIdentity{
72
+
Name: "Morton Haypenny",
73
+
Email: "",
74
+
},
75
+
},
76
+
"unclosedEmail": {
77
+
Input: "Morton Haypenny <mhaypenny@example.com",
78
+
Output: PatchIdentity{
79
+
Name: "Morton Haypenny",
80
+
Email: "mhaypenny@example.com",
81
+
},
82
+
},
83
+
"bogusEmail": {
84
+
Input: "Morton Haypenny <mhaypenny>",
85
+
Output: PatchIdentity{
86
+
Name: "Morton Haypenny",
87
+
Email: "mhaypenny",
88
+
},
89
+
},
90
+
"bogusEmailWithWhitespace": {
91
+
Input: "Morton Haypenny < mhaypenny >",
92
+
Output: PatchIdentity{
93
+
Name: "Morton Haypenny",
94
+
Email: "mhaypenny",
95
+
},
96
+
},
97
+
"missingEmail": {
98
+
Input: "Morton Haypenny",
99
+
Err: "invalid identity",
100
+
},
101
+
"missingNameAndEmptyEmail": {
102
+
Input: "<>",
103
+
Err: "invalid identity",
104
+
},
105
+
"empty": {
106
+
Input: "",
107
+
Err: "invalid identity",
108
+
},
109
+
}
110
+
111
+
for name, test := range tests {
112
+
t.Run(name, func(t *testing.T) {
113
+
id, err := ParsePatchIdentity(test.Input)
114
+
if test.Err != nil {
115
+
assertError(t, test.Err, err, "parsing identity")
116
+
return
117
+
}
118
+
if err != nil {
119
+
t.Fatalf("unexpected error parsing identity: %v", err)
120
+
}
121
+
122
+
if test.Output != id {
123
+
t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id)
124
+
}
125
+
})
126
+
}
127
+
}
+1
-1
gitdiff/testdata/apply/bin.go
+1
-1
gitdiff/testdata/apply/bin.go
+10
gitdiff/testdata/apply/text_fragment_change_end_eol.patch
+10
gitdiff/testdata/apply/text_fragment_change_end_eol.patch
···
1
+
diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src
2
+
index a92d664..8cf2f17 100644
3
+
--- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src
4
+
+++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src
5
+
@@ -1,3 +1,3 @@
6
+
line 1
7
+
line 2
8
+
-line 3
9
+
+line 3
10
+
\ No newline at end of file
+8
gitdiff/testdata/no_files.patch
+8
gitdiff/testdata/no_files.patch
+9
gitdiff/testdata/string/binary_modify.patch
+9
gitdiff/testdata/string/binary_modify.patch
+3
gitdiff/testdata/string/binary_modify_nodata.patch
+3
gitdiff/testdata/string/binary_modify_nodata.patch
+11
gitdiff/testdata/string/binary_new.patch
+11
gitdiff/testdata/string/binary_new.patch
···
1
+
diff --git a/file.bin b/file.bin
2
+
new file mode 100644
3
+
index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c
4
+
GIT binary patch
5
+
literal 72
6
+
zcmV-O0Jr~td-`u6JcK&{KDK=<a#;v1^LR5&K)zQ0=Goz82(?nJ6_nD`f#8O9p}}{P
7
+
eiXim+rDI+BDadMQmMsO5Sw@;DbrCA+PamP;Ng_@F
8
+
9
+
literal 0
10
+
HcmV?d00001
11
+
+4
gitdiff/testdata/string/copy.patch
+4
gitdiff/testdata/string/copy.patch
+21
gitdiff/testdata/string/copy_modify.patch
+21
gitdiff/testdata/string/copy_modify.patch
···
1
+
diff --git a/file.txt b/numbers.txt
2
+
similarity index 57%
3
+
copy from file.txt
4
+
copy to numbers.txt
5
+
index c9e9e05..6c4a3e0 100644
6
+
--- a/file.txt
7
+
+++ b/numbers.txt
8
+
@@ -1,6 +1,6 @@
9
+
one
10
+
two
11
+
-three
12
+
+three three three
13
+
four
14
+
five
15
+
six
16
+
@@ -8,3 +8,5 @@ seven
17
+
eight
18
+
nine
19
+
ten
20
+
+eleven
21
+
+twelve
+16
gitdiff/testdata/string/delete.patch
+16
gitdiff/testdata/string/delete.patch
+3
gitdiff/testdata/string/mode.patch
+3
gitdiff/testdata/string/mode.patch
+10
gitdiff/testdata/string/mode_modify.patch
+10
gitdiff/testdata/string/mode_modify.patch
+16
gitdiff/testdata/string/modify.patch
+16
gitdiff/testdata/string/modify.patch
+16
gitdiff/testdata/string/new.patch
+16
gitdiff/testdata/string/new.patch
+3
gitdiff/testdata/string/new_empty.patch
+3
gitdiff/testdata/string/new_empty.patch
+16
gitdiff/testdata/string/new_mode.patch
+16
gitdiff/testdata/string/new_mode.patch
+4
gitdiff/testdata/string/rename.patch
+4
gitdiff/testdata/string/rename.patch
+18
gitdiff/testdata/string/rename_modify.patch
+18
gitdiff/testdata/string/rename_modify.patch
···
1
+
diff --git a/file.txt b/numbers.txt
2
+
similarity index 77%
3
+
rename from file.txt
4
+
rename to numbers.txt
5
+
index c9e9e05..a6b31d6 100644
6
+
--- a/file.txt
7
+
+++ b/numbers.txt
8
+
@@ -3,8 +3,9 @@ two
9
+
three
10
+
four
11
+
five
12
+
-six
13
+
+ six
14
+
seven
15
+
eight
16
+
nine
17
+
ten
18
+
+eleven
+30
-18
gitdiff/text.go
+30
-18
gitdiff/text.go
···
79
79
return p.Errorf(0, "no content following fragment header")
80
80
}
81
81
82
-
isNoNewlineLine := func(s string) bool {
83
-
// test for "\ No newline at end of file" by prefix because the text
84
-
// changes by locale (git claims all versions are at least 12 chars)
85
-
return len(s) >= 12 && s[:2] == "\\ "
86
-
}
87
-
88
82
oldLines, newLines := frag.OldLines, frag.NewLines
89
-
for {
83
+
for oldLines > 0 || newLines > 0 {
90
84
line := p.Line(0)
91
85
op, data := line[0], line[1:]
92
86
···
113
107
frag.LinesAdded++
114
108
frag.TrailingContext = 0
115
109
frag.Lines = append(frag.Lines, Line{OpAdd, data})
116
-
default:
110
+
case '\\':
117
111
// this may appear in middle of fragment if it's for a deleted line
118
-
if isNoNewlineLine(line) {
119
-
last := &frag.Lines[len(frag.Lines)-1]
120
-
last.Line = strings.TrimSuffix(last.Line, "\n")
112
+
if isNoNewlineMarker(line) {
113
+
removeLastNewline(frag)
121
114
break
122
115
}
116
+
fallthrough
117
+
default:
123
118
// TODO(bkeyes): if this is because we hit the next header, it
124
119
// would be helpful to return the miscounts line error. We could
125
120
// either test for the common headers ("@@ -", "diff --git") or
126
121
// assume any invalid op ends the fragment; git returns the same
127
122
// generic error in all cases so either is compatible
128
123
return p.Errorf(0, "invalid line operation: %q", op)
129
-
}
130
-
131
-
next := p.Line(1)
132
-
if oldLines <= 0 && newLines <= 0 && !isNoNewlineLine(next) {
133
-
break
134
124
}
135
125
136
126
if err := p.Next(); err != nil {
···
145
135
hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1
146
136
return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines)
147
137
}
138
+
if frag.LinesAdded == 0 && frag.LinesDeleted == 0 {
139
+
return p.Errorf(0, "fragment contains no changes")
140
+
}
148
141
149
-
if err := p.Next(); err != nil && err != io.EOF {
150
-
return err
142
+
// check for a final "no newline" marker since it is not included in the
143
+
// counters used to stop the loop above
144
+
if isNoNewlineMarker(p.Line(0)) {
145
+
removeLastNewline(frag)
146
+
if err := p.Next(); err != nil && err != io.EOF {
147
+
return err
148
+
}
151
149
}
150
+
152
151
return nil
152
+
}
153
+
154
+
func isNoNewlineMarker(s string) bool {
155
+
// test for "\ No newline at end of file" by prefix because the text
156
+
// changes by locale (git claims all versions are at least 12 chars)
157
+
return len(s) >= 12 && s[:2] == "\\ "
158
+
}
159
+
160
+
func removeLastNewline(frag *TextFragment) {
161
+
if len(frag.Lines) > 0 {
162
+
last := &frag.Lines[len(frag.Lines)-1]
163
+
last.Line = strings.TrimSuffix(last.Line, "\n")
164
+
}
153
165
}
154
166
155
167
func parseRange(s string) (start int64, end int64, err error) {
+18
gitdiff/text_test.go
+18
gitdiff/text_test.go
···
317
317
},
318
318
Err: true,
319
319
},
320
+
"onlyContext": {
321
+
Input: ` context line
322
+
context line
323
+
`,
324
+
Fragment: TextFragment{
325
+
OldLines: 2,
326
+
NewLines: 2,
327
+
},
328
+
Err: true,
329
+
},
330
+
"unexpectedNoNewlineMarker": {
331
+
Input: `\ No newline at end of file`,
332
+
Fragment: TextFragment{
333
+
OldLines: 1,
334
+
NewLines: 1,
335
+
},
336
+
Err: true,
337
+
},
320
338
}
321
339
322
340
for name, test := range tests {