fork of go-gitdiff with jj support
1package gitdiff
2
3import (
4 "fmt"
5 "io"
6 "os"
7 "strconv"
8 "strings"
9 "time"
10)
11
12const (
13 devNull = "/dev/null"
14)
15
16// ParseNextFileHeader finds and parses the next file header in the stream. If
17// a header is found, it returns a file and all input before the header. It
18// returns nil if no headers are found before the end of the input.
19func (p *parser) ParseNextFileHeader() (*File, string, error) {
20 var preamble strings.Builder
21 var file *File
22 for {
23 // check for disconnected fragment headers (corrupt patch)
24 frag, err := p.ParseTextFragmentHeader()
25 if err != nil {
26 // not a valid header, nothing to worry about
27 goto NextLine
28 }
29 if frag != nil {
30 return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header())
31 }
32
33 // check for a git-generated patch
34 file, err = p.ParseGitFileHeader()
35 if err != nil {
36 return nil, "", err
37 }
38 if file != nil {
39 return file, preamble.String(), nil
40 }
41
42 // check for a "traditional" patch
43 file, err = p.ParseTraditionalFileHeader()
44 if err != nil {
45 return nil, "", err
46 }
47 if file != nil {
48 return file, preamble.String(), nil
49 }
50
51 NextLine:
52 preamble.WriteString(p.Line(0))
53 if err := p.Next(); err != nil {
54 if err == io.EOF {
55 break
56 }
57 return nil, "", err
58 }
59 }
60 return nil, "", nil
61}
62
63func (p *parser) ParseGitFileHeader() (*File, error) {
64 const prefix = "diff --git "
65
66 if !strings.HasPrefix(p.Line(0), prefix) {
67 return nil, nil
68 }
69 header := p.Line(0)[len(prefix):]
70
71 defaultName, err := parseGitHeaderName(header)
72 if err != nil {
73 return nil, p.Errorf(0, "git file header: %v", err)
74 }
75
76 f := &File{}
77 for {
78 end, err := parseGitHeaderData(f, p.Line(1), defaultName)
79 if err != nil {
80 return nil, p.Errorf(1, "git file header: %v", err)
81 }
82
83 if err := p.Next(); err != nil {
84 if err == io.EOF {
85 break
86 }
87 return nil, err
88 }
89
90 if end {
91 break
92 }
93 }
94
95 if f.OldName == "" && f.NewName == "" {
96 if defaultName == "" {
97 return nil, p.Errorf(0, "git file header: missing filename information")
98 }
99 f.OldName = defaultName
100 f.NewName = defaultName
101 }
102
103 if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) {
104 return nil, p.Errorf(0, "git file header: missing filename information")
105 }
106
107 return f, nil
108}
109
110func (p *parser) ParseTraditionalFileHeader() (*File, error) {
111 const shortestValidFragHeader = "@@ -1 +1 @@\n"
112 const (
113 oldPrefix = "--- "
114 newPrefix = "+++ "
115 )
116
117 oldLine, newLine := p.Line(0), p.Line(1)
118
119 if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) {
120 return nil, nil
121 }
122 // heuristic: only a file header if followed by a (probable) fragment header
123 if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") {
124 return nil, nil
125 }
126
127 // advance past the first two lines so parser is after the header
128 // no EOF check needed because we know there are >=3 valid lines
129 if err := p.Next(); err != nil {
130 return nil, err
131 }
132 if err := p.Next(); err != nil {
133 return nil, err
134 }
135
136 oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0)
137 if err != nil {
138 return nil, p.Errorf(0, "file header: %v", err)
139 }
140
141 newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0)
142 if err != nil {
143 return nil, p.Errorf(1, "file header: %v", err)
144 }
145
146 f := &File{}
147 switch {
148 case oldName == devNull || hasEpochTimestamp(oldLine):
149 f.IsNew = true
150 f.NewName = newName
151 case newName == devNull || hasEpochTimestamp(newLine):
152 f.IsDelete = true
153 f.OldName = oldName
154 default:
155 // if old name is a prefix of new name, use that instead
156 // this avoids picking variants like "file.bak" or "file~"
157 if strings.HasPrefix(newName, oldName) {
158 f.OldName = oldName
159 f.NewName = oldName
160 } else {
161 f.OldName = newName
162 f.NewName = newName
163 }
164 }
165
166 return f, nil
167}
168
169// parseGitHeaderName extracts a default file name from the Git file header
170// line. This is required for mode-only changes and creation/deletion of empty
171// files. Other types of patch include the file name(s) in the header data.
172// If the names in the header do not match because the patch is a rename,
173// return an empty default name.
174func parseGitHeaderName(header string) (string, error) {
175 firstName, n, err := parseName(header, -1, 1)
176 if err != nil {
177 return "", err
178 }
179
180 if n < len(header) && (header[n] == ' ' || header[n] == '\t') {
181 n++
182 }
183
184 secondName, _, err := parseName(header[n:], -1, 1)
185 if err != nil {
186 return "", err
187 }
188
189 if firstName != secondName {
190 return "", nil
191 }
192 return firstName, nil
193}
194
195// parseGitHeaderData parses a single line of metadata from a Git file header.
196// It returns true when header parsing is complete; in that case, line was the
197// first line of non-header content.
198func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) {
199 if len(line) > 0 && line[len(line)-1] == '\n' {
200 line = line[:len(line)-1]
201 }
202
203 for _, hdr := range []struct {
204 prefix string
205 end bool
206 parse func(*File, string, string) error
207 }{
208 {"@@ -", true, nil},
209 {"--- ", false, parseGitHeaderOldName},
210 {"+++ ", false, parseGitHeaderNewName},
211 {"old mode ", false, parseGitHeaderOldMode},
212 {"new mode ", false, parseGitHeaderNewMode},
213 {"deleted file mode ", false, parseGitHeaderDeletedMode},
214 {"new file mode ", false, parseGitHeaderCreatedMode},
215 {"copy from ", false, parseGitHeaderCopyFrom},
216 {"copy to ", false, parseGitHeaderCopyTo},
217 {"rename old ", false, parseGitHeaderRenameFrom},
218 {"rename new ", false, parseGitHeaderRenameTo},
219 {"rename from ", false, parseGitHeaderRenameFrom},
220 {"rename to ", false, parseGitHeaderRenameTo},
221 {"similarity index ", false, parseGitHeaderScore},
222 {"dissimilarity index ", false, parseGitHeaderScore},
223 {"index ", false, parseGitHeaderIndex},
224 } {
225 if strings.HasPrefix(line, hdr.prefix) {
226 if hdr.parse != nil {
227 err = hdr.parse(f, line[len(hdr.prefix):], defaultName)
228 }
229 return hdr.end, err
230 }
231 }
232
233 // unknown line indicates the end of the header
234 // this usually happens if the diff is empty
235 return true, nil
236}
237
238func parseGitHeaderOldName(f *File, line, defaultName string) error {
239 name, _, err := parseName(line, '\t', 1)
240 if err != nil {
241 return err
242 }
243 if f.OldName == "" && !f.IsNew {
244 f.OldName = name
245 return nil
246 }
247 return verifyGitHeaderName(name, f.OldName, f.IsNew, "old")
248}
249
250func parseGitHeaderNewName(f *File, line, defaultName string) error {
251 name, _, err := parseName(line, '\t', 1)
252 if err != nil {
253 return err
254 }
255 if f.NewName == "" && !f.IsDelete {
256 f.NewName = name
257 return nil
258 }
259 return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new")
260}
261
262func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
263 f.OldMode, err = parseMode(line)
264 return
265}
266
267func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
268 f.NewMode, err = parseMode(line)
269 return
270}
271
272func parseGitHeaderDeletedMode(f *File, line, defaultName string) error {
273 f.IsDelete = true
274 f.OldName = defaultName
275 return parseGitHeaderOldMode(f, line, defaultName)
276}
277
278func parseGitHeaderCreatedMode(f *File, line, defaultName string) error {
279 f.IsNew = true
280 f.NewName = defaultName
281 return parseGitHeaderNewMode(f, line, defaultName)
282}
283
284func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) {
285 f.IsCopy = true
286 f.OldName, _, err = parseName(line, -1, 0)
287 return
288}
289
290func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) {
291 f.IsCopy = true
292 f.NewName, _, err = parseName(line, -1, 0)
293 return
294}
295
296func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) {
297 f.IsRename = true
298 f.OldName, _, err = parseName(line, -1, 0)
299 return
300}
301
302func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) {
303 f.IsRename = true
304 f.NewName, _, err = parseName(line, -1, 0)
305 return
306}
307
308func parseGitHeaderScore(f *File, line, defaultName string) error {
309 score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32)
310 if err != nil {
311 nerr := err.(*strconv.NumError)
312 return fmt.Errorf("invalid score line: %v", nerr.Err)
313 }
314 if score <= 100 {
315 f.Score = int(score)
316 }
317 return nil
318}
319
320func parseGitHeaderIndex(f *File, line, defaultName string) error {
321 const sep = ".."
322
323 // note that git stops parsing if the OIDs are too long to be valid
324 // checking this requires knowing if the repository uses SHA1 or SHA256
325 // hashes, which we don't know, so we just skip that check
326
327 parts := strings.SplitN(line, " ", 2)
328 oids := strings.SplitN(parts[0], sep, 2)
329
330 if len(oids) < 2 {
331 return fmt.Errorf("invalid index line: missing %q", sep)
332 }
333 f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1]
334
335 if len(parts) > 1 {
336 return parseGitHeaderOldMode(f, parts[1], defaultName)
337 }
338 return nil
339}
340
341func parseMode(s string) (os.FileMode, error) {
342 mode, err := strconv.ParseInt(s, 8, 32)
343 if err != nil {
344 nerr := err.(*strconv.NumError)
345 return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err)
346 }
347 return os.FileMode(mode), nil
348}
349
350// parseName extracts a file name from the start of a string and returns the
351// name and the index of the first character after the name. If the name is
352// unquoted and term is non-negative, parsing stops at the first occurrence of
353// term. Otherwise parsing of unquoted names stops at the first space or tab.
354//
355// If the name is exactly "/dev/null", no further processing occurs. Otherwise,
356// if dropPrefix is greater than zero, that number of prefix components
357// separated by forward slashes are dropped from the name and any duplicate
358// slashes are collapsed.
359func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) {
360 if len(s) > 0 && s[0] == '"' {
361 name, n, err = parseQuotedName(s)
362 } else {
363 name, n, err = parseUnquotedName(s, term)
364 }
365 if err != nil {
366 return "", 0, err
367 }
368 if name == devNull {
369 return name, n, nil
370 }
371 return cleanName(name, dropPrefix), n, nil
372}
373
374func parseQuotedName(s string) (name string, n int, err error) {
375 for n = 1; n < len(s); n++ {
376 if s[n] == '"' && s[n-1] != '\\' {
377 n++
378 break
379 }
380 }
381 if n == 2 {
382 return "", 0, fmt.Errorf("missing name")
383 }
384 if name, err = strconv.Unquote(s[:n]); err != nil {
385 return "", 0, err
386 }
387 return name, n, err
388}
389
390func parseUnquotedName(s string, term rune) (name string, n int, err error) {
391 for n = 0; n < len(s); n++ {
392 if s[n] == '\n' {
393 break
394 }
395 if term >= 0 && rune(s[n]) == term {
396 break
397 }
398 if term < 0 && (s[n] == ' ' || s[n] == '\t') {
399 break
400 }
401 }
402 if n == 0 {
403 return "", 0, fmt.Errorf("missing name")
404 }
405 return s[:n], n, nil
406}
407
408// verifyGitHeaderName checks a parsed name against state set by previous lines
409func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error {
410 if existing != "" {
411 if isNull {
412 return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing)
413 }
414 if existing != parsed {
415 return fmt.Errorf("inconsistent %s filename", side)
416 }
417 }
418 if isNull && parsed != devNull {
419 return fmt.Errorf("expected %s", devNull)
420 }
421 return nil
422}
423
424// cleanName removes double slashes and drops prefix segments.
425func cleanName(name string, drop int) string {
426 var b strings.Builder
427 for i := 0; i < len(name); i++ {
428 if name[i] == '/' {
429 if i < len(name)-1 && name[i+1] == '/' {
430 continue
431 }
432 if drop > 0 {
433 drop--
434 b.Reset()
435 continue
436 }
437 }
438 b.WriteByte(name[i])
439 }
440 return b.String()
441}
442
443// hasEpochTimestamp returns true if the string ends with a POSIX-formatted
444// timestamp for the UNIX epoch after a tab character. According to git, this
445// is used by GNU diff to mark creations and deletions.
446func hasEpochTimestamp(s string) bool {
447 const posixTimeLayout = "2006-01-02 15:04:05.9 -0700"
448
449 start := strings.IndexRune(s, '\t')
450 if start < 0 {
451 return false
452 }
453
454 ts := strings.TrimSuffix(s[start+1:], "\n")
455
456 // a valid timestamp can have optional ':' in zone specifier
457 // remove that if it exists so we have a single format
458 if ts[len(ts)-3] == ':' {
459 ts = ts[:len(ts)-3] + ts[len(ts)-2:]
460 }
461
462 t, err := time.Parse(posixTimeLayout, ts)
463 if err != nil {
464 return false
465 }
466 if !t.Equal(time.Unix(0, 0)) {
467 return false
468 }
469 return true
470}