fork of go-gitdiff with jj support
1package gitdiff
2
3import (
4 "fmt"
5 "io"
6 "os"
7 "strconv"
8 "strings"
9 "time"
10)
11
12const (
13 devNull = "/dev/null"
14)
15
16// ParseNextFileHeader finds and parses the next file header in the stream. If
17// a header is found, it returns a file and all input before the header. It
18// returns nil if no headers are found before the end of the input.
19func (p *parser) ParseNextFileHeader() (*File, string, error) {
20 var preamble strings.Builder
21 var file *File
22 for {
23 // check for disconnected fragment headers (corrupt patch)
24 frag, err := p.ParseTextFragmentHeader()
25 if err != nil {
26 // not a valid header, nothing to worry about
27 goto NextLine
28 }
29 if frag != nil {
30 return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header())
31 }
32
33 // check for a git-generated patch
34 file, err = p.ParseGitFileHeader()
35 if err != nil {
36 return nil, "", err
37 }
38 if file != nil {
39 return file, preamble.String(), nil
40 }
41
42 // check for a "traditional" patch
43 file, err = p.ParseTraditionalFileHeader()
44 if err != nil {
45 return nil, "", err
46 }
47 if file != nil {
48 return file, preamble.String(), nil
49 }
50
51 NextLine:
52 preamble.WriteString(p.Line(0))
53 if err := p.Next(); err != nil {
54 if err == io.EOF {
55 break
56 }
57 return nil, "", err
58 }
59 }
60 return nil, preamble.String(), nil
61}
62
63func (p *parser) ParseGitFileHeader() (*File, error) {
64 const prefix = "diff --git "
65
66 if !strings.HasPrefix(p.Line(0), prefix) {
67 return nil, nil
68 }
69 header := p.Line(0)[len(prefix):]
70
71 defaultName, err := parseGitHeaderName(header)
72 if err != nil {
73 return nil, p.Errorf(0, "git file header: %v", err)
74 }
75
76 f := &File{}
77 for {
78 end, err := parseGitHeaderData(f, p.Line(1), defaultName)
79 if err != nil {
80 return nil, p.Errorf(1, "git file header: %v", err)
81 }
82
83 if err := p.Next(); err != nil {
84 if err == io.EOF {
85 break
86 }
87 return nil, err
88 }
89
90 if end {
91 break
92 }
93 }
94
95 if f.OldName == "" && f.NewName == "" {
96 if defaultName == "" {
97 return nil, p.Errorf(0, "git file header: missing filename information")
98 }
99 f.OldName = defaultName
100 f.NewName = defaultName
101 }
102
103 if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) {
104 return nil, p.Errorf(0, "git file header: missing filename information")
105 }
106
107 return f, nil
108}
109
110func (p *parser) ParseTraditionalFileHeader() (*File, error) {
111 const shortestValidFragHeader = "@@ -1 +1 @@\n"
112 const (
113 oldPrefix = "--- "
114 newPrefix = "+++ "
115 )
116
117 oldLine, newLine := p.Line(0), p.Line(1)
118
119 if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) {
120 return nil, nil
121 }
122 // heuristic: only a file header if followed by a (probable) fragment header
123 if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") {
124 return nil, nil
125 }
126
127 // advance past the first two lines so parser is after the header
128 // no EOF check needed because we know there are >=3 valid lines
129 if err := p.Next(); err != nil {
130 return nil, err
131 }
132 if err := p.Next(); err != nil {
133 return nil, err
134 }
135
136 oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0)
137 if err != nil {
138 return nil, p.Errorf(0, "file header: %v", err)
139 }
140
141 newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0)
142 if err != nil {
143 return nil, p.Errorf(1, "file header: %v", err)
144 }
145
146 f := &File{}
147 switch {
148 case oldName == devNull || hasEpochTimestamp(oldLine):
149 f.IsNew = true
150 f.NewName = newName
151 case newName == devNull || hasEpochTimestamp(newLine):
152 f.IsDelete = true
153 f.OldName = oldName
154 default:
155 // if old name is a prefix of new name, use that instead
156 // this avoids picking variants like "file.bak" or "file~"
157 if strings.HasPrefix(newName, oldName) {
158 f.OldName = oldName
159 f.NewName = oldName
160 } else {
161 f.OldName = newName
162 f.NewName = newName
163 }
164 }
165
166 return f, nil
167}
168
169// parseGitHeaderName extracts a default file name from the Git file header
170// line. This is required for mode-only changes and creation/deletion of empty
171// files. Other types of patch include the file name(s) in the header data.
172// If the names in the header do not match because the patch is a rename,
173// return an empty default name.
174func parseGitHeaderName(header string) (string, error) {
175 header = strings.TrimSuffix(header, "\n")
176 if len(header) == 0 {
177 return "", nil
178 }
179
180 var err error
181 var first, second string
182
183 // there are 4 cases to account for:
184 //
185 // 1) unquoted unquoted
186 // 2) unquoted "quoted"
187 // 3) "quoted" unquoted
188 // 4) "quoted" "quoted"
189 //
190 quote := strings.IndexByte(header, '"')
191 switch {
192 case quote < 0:
193 // case 1
194 first = header
195
196 case quote > 0:
197 // case 2
198 first = header[:quote-1]
199 if !isSpace(header[quote-1]) {
200 return "", fmt.Errorf("missing separator")
201 }
202
203 second, _, err = parseQuotedName(header[quote:])
204 if err != nil {
205 return "", err
206 }
207
208 case quote == 0:
209 // case 3 or case 4
210 var n int
211 first, n, err = parseQuotedName(header)
212 if err != nil {
213 return "", err
214 }
215
216 // git accepts multiple spaces after a quoted name, but not after an
217 // unquoted name, since the name might end with one or more spaces
218 for n < len(header) && isSpace(header[n]) {
219 n++
220 }
221 if n == len(header) {
222 return "", nil
223 }
224
225 if header[n] == '"' {
226 second, _, err = parseQuotedName(header[n:])
227 if err != nil {
228 return "", err
229 }
230 } else {
231 second = header[n:]
232 }
233 }
234
235 first = trimTreePrefix(first, 1)
236 if second != "" {
237 if first == trimTreePrefix(second, 1) {
238 return first, nil
239 }
240 return "", nil
241 }
242
243 // at this point, both names are unquoted (case 1)
244 // since names may contain spaces, we can't use a known separator
245 // instead, look for a split that produces two equal names
246
247 for i := 0; i < len(first)-1; i++ {
248 if !isSpace(first[i]) {
249 continue
250 }
251 second = trimTreePrefix(first[i+1:], 1)
252 if name := first[:i]; name == second {
253 return name, nil
254 }
255 }
256 return "", nil
257}
258
259// parseGitHeaderData parses a single line of metadata from a Git file header.
260// It returns true when header parsing is complete; in that case, line was the
261// first line of non-header content.
262func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) {
263 if len(line) > 0 && line[len(line)-1] == '\n' {
264 line = line[:len(line)-1]
265 }
266
267 for _, hdr := range []struct {
268 prefix string
269 end bool
270 parse func(*File, string, string) error
271 }{
272 {"@@ -", true, nil},
273 {"--- ", false, parseGitHeaderOldName},
274 {"+++ ", false, parseGitHeaderNewName},
275 {"old mode ", false, parseGitHeaderOldMode},
276 {"new mode ", false, parseGitHeaderNewMode},
277 {"deleted file mode ", false, parseGitHeaderDeletedMode},
278 {"new file mode ", false, parseGitHeaderCreatedMode},
279 {"copy from ", false, parseGitHeaderCopyFrom},
280 {"copy to ", false, parseGitHeaderCopyTo},
281 {"rename old ", false, parseGitHeaderRenameFrom},
282 {"rename new ", false, parseGitHeaderRenameTo},
283 {"rename from ", false, parseGitHeaderRenameFrom},
284 {"rename to ", false, parseGitHeaderRenameTo},
285 {"similarity index ", false, parseGitHeaderScore},
286 {"dissimilarity index ", false, parseGitHeaderScore},
287 {"index ", false, parseGitHeaderIndex},
288 } {
289 if strings.HasPrefix(line, hdr.prefix) {
290 if hdr.parse != nil {
291 err = hdr.parse(f, line[len(hdr.prefix):], defaultName)
292 }
293 return hdr.end, err
294 }
295 }
296
297 // unknown line indicates the end of the header
298 // this usually happens if the diff is empty
299 return true, nil
300}
301
302func parseGitHeaderOldName(f *File, line, defaultName string) error {
303 name, _, err := parseName(line, '\t', 1)
304 if err != nil {
305 return err
306 }
307 if f.OldName == "" && !f.IsNew {
308 f.OldName = name
309 return nil
310 }
311 return verifyGitHeaderName(name, f.OldName, f.IsNew, "old")
312}
313
314func parseGitHeaderNewName(f *File, line, defaultName string) error {
315 name, _, err := parseName(line, '\t', 1)
316 if err != nil {
317 return err
318 }
319 if f.NewName == "" && !f.IsDelete {
320 f.NewName = name
321 return nil
322 }
323 return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new")
324}
325
326func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
327 f.OldMode, err = parseMode(strings.TrimSpace(line))
328 return
329}
330
331func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
332 f.NewMode, err = parseMode(strings.TrimSpace(line))
333 return
334}
335
336func parseGitHeaderDeletedMode(f *File, line, defaultName string) error {
337 f.IsDelete = true
338 f.OldName = defaultName
339 return parseGitHeaderOldMode(f, line, defaultName)
340}
341
342func parseGitHeaderCreatedMode(f *File, line, defaultName string) error {
343 f.IsNew = true
344 f.NewName = defaultName
345 return parseGitHeaderNewMode(f, line, defaultName)
346}
347
348func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) {
349 f.IsCopy = true
350 f.OldName, _, err = parseName(line, 0, 0)
351 return
352}
353
354func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) {
355 f.IsCopy = true
356 f.NewName, _, err = parseName(line, 0, 0)
357 return
358}
359
360func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) {
361 f.IsRename = true
362 f.OldName, _, err = parseName(line, 0, 0)
363 return
364}
365
366func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) {
367 f.IsRename = true
368 f.NewName, _, err = parseName(line, 0, 0)
369 return
370}
371
372func parseGitHeaderScore(f *File, line, defaultName string) error {
373 score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32)
374 if err != nil {
375 nerr := err.(*strconv.NumError)
376 return fmt.Errorf("invalid score line: %v", nerr.Err)
377 }
378 if score <= 100 {
379 f.Score = int(score)
380 }
381 return nil
382}
383
384func parseGitHeaderIndex(f *File, line, defaultName string) error {
385 const sep = ".."
386
387 // note that git stops parsing if the OIDs are too long to be valid
388 // checking this requires knowing if the repository uses SHA1 or SHA256
389 // hashes, which we don't know, so we just skip that check
390
391 parts := strings.SplitN(line, " ", 2)
392 oids := strings.SplitN(parts[0], sep, 2)
393
394 if len(oids) < 2 {
395 return fmt.Errorf("invalid index line: missing %q", sep)
396 }
397 f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1]
398
399 if len(parts) > 1 {
400 return parseGitHeaderOldMode(f, parts[1], defaultName)
401 }
402 return nil
403}
404
405func parseMode(s string) (os.FileMode, error) {
406 mode, err := strconv.ParseInt(s, 8, 32)
407 if err != nil {
408 nerr := err.(*strconv.NumError)
409 return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err)
410 }
411 return os.FileMode(mode), nil
412}
413
414// parseName extracts a file name from the start of a string and returns the
415// name and the index of the first character after the name. If the name is
416// unquoted and term is non-zero, parsing stops at the first occurrence of
417// term.
418//
419// If the name is exactly "/dev/null", no further processing occurs. Otherwise,
420// if dropPrefix is greater than zero, that number of prefix components
421// separated by forward slashes are dropped from the name and any duplicate
422// slashes are collapsed.
423func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) {
424 if len(s) > 0 && s[0] == '"' {
425 name, n, err = parseQuotedName(s)
426 } else {
427 name, n, err = parseUnquotedName(s, term)
428 }
429 if err != nil {
430 return "", 0, err
431 }
432 if name == devNull {
433 return name, n, nil
434 }
435 return cleanName(name, dropPrefix), n, nil
436}
437
438func parseQuotedName(s string) (name string, n int, err error) {
439 for n = 1; n < len(s); n++ {
440 if s[n] == '"' && s[n-1] != '\\' {
441 n++
442 break
443 }
444 }
445 if n == 2 {
446 return "", 0, fmt.Errorf("missing name")
447 }
448 if name, err = strconv.Unquote(s[:n]); err != nil {
449 return "", 0, err
450 }
451 return name, n, err
452}
453
454func parseUnquotedName(s string, term byte) (name string, n int, err error) {
455 for n = 0; n < len(s); n++ {
456 if s[n] == '\n' {
457 break
458 }
459 if term > 0 && s[n] == term {
460 break
461 }
462 }
463 if n == 0 {
464 return "", 0, fmt.Errorf("missing name")
465 }
466 return s[:n], n, nil
467}
468
469// verifyGitHeaderName checks a parsed name against state set by previous lines
470func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error {
471 if existing != "" {
472 if isNull {
473 return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing)
474 }
475 if existing != parsed {
476 return fmt.Errorf("inconsistent %s filename", side)
477 }
478 }
479 if isNull && parsed != devNull {
480 return fmt.Errorf("expected %s", devNull)
481 }
482 return nil
483}
484
485// cleanName removes double slashes and drops prefix segments.
486func cleanName(name string, drop int) string {
487 var b strings.Builder
488 for i := 0; i < len(name); i++ {
489 if name[i] == '/' {
490 if i < len(name)-1 && name[i+1] == '/' {
491 continue
492 }
493 if drop > 0 {
494 drop--
495 b.Reset()
496 continue
497 }
498 }
499 b.WriteByte(name[i])
500 }
501 return b.String()
502}
503
504// trimTreePrefix removes up to n leading directory components from name.
505func trimTreePrefix(name string, n int) string {
506 i := 0
507 for ; i < len(name) && n > 0; i++ {
508 if name[i] == '/' {
509 n--
510 }
511 }
512 return name[i:]
513}
514
515// hasEpochTimestamp returns true if the string ends with a POSIX-formatted
516// timestamp for the UNIX epoch after a tab character. According to git, this
517// is used by GNU diff to mark creations and deletions.
518func hasEpochTimestamp(s string) bool {
519 const posixTimeLayout = "2006-01-02 15:04:05.9 -0700"
520
521 start := strings.IndexRune(s, '\t')
522 if start < 0 {
523 return false
524 }
525
526 ts := strings.TrimSuffix(s[start+1:], "\n")
527
528 // a valid timestamp can have optional ':' in zone specifier
529 // remove that if it exists so we have a single format
530 if len(ts) >= 3 && ts[len(ts)-3] == ':' {
531 ts = ts[:len(ts)-3] + ts[len(ts)-2:]
532 }
533
534 t, err := time.Parse(posixTimeLayout, ts)
535 if err != nil {
536 return false
537 }
538 if !t.Equal(time.Unix(0, 0)) {
539 return false
540 }
541 return true
542}
543
544func isSpace(c byte) bool {
545 return c == ' ' || c == '\t' || c == '\n'
546}