fork of go-gitdiff with jj support
at v0.5.1 14 kB view raw
1package gitdiff 2 3import ( 4 "fmt" 5 "io" 6 "os" 7 "strconv" 8 "strings" 9 "time" 10) 11 12const ( 13 devNull = "/dev/null" 14) 15 16// ParseNextFileHeader finds and parses the next file header in the stream. If 17// a header is found, it returns a file and all input before the header. It 18// returns nil if no headers are found before the end of the input. 19func (p *parser) ParseNextFileHeader() (*File, string, error) { 20 var preamble strings.Builder 21 var file *File 22 for { 23 // check for disconnected fragment headers (corrupt patch) 24 frag, err := p.ParseTextFragmentHeader() 25 if err != nil { 26 // not a valid header, nothing to worry about 27 goto NextLine 28 } 29 if frag != nil { 30 return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header()) 31 } 32 33 // check for a git-generated patch 34 file, err = p.ParseGitFileHeader() 35 if err != nil { 36 return nil, "", err 37 } 38 if file != nil { 39 return file, preamble.String(), nil 40 } 41 42 // check for a "traditional" patch 43 file, err = p.ParseTraditionalFileHeader() 44 if err != nil { 45 return nil, "", err 46 } 47 if file != nil { 48 return file, preamble.String(), nil 49 } 50 51 NextLine: 52 preamble.WriteString(p.Line(0)) 53 if err := p.Next(); err != nil { 54 if err == io.EOF { 55 break 56 } 57 return nil, "", err 58 } 59 } 60 return nil, "", nil 61} 62 63func (p *parser) ParseGitFileHeader() (*File, error) { 64 const prefix = "diff --git " 65 66 if !strings.HasPrefix(p.Line(0), prefix) { 67 return nil, nil 68 } 69 header := p.Line(0)[len(prefix):] 70 71 defaultName, err := parseGitHeaderName(header) 72 if err != nil { 73 return nil, p.Errorf(0, "git file header: %v", err) 74 } 75 76 f := &File{} 77 for { 78 end, err := parseGitHeaderData(f, p.Line(1), defaultName) 79 if err != nil { 80 return nil, p.Errorf(1, "git file header: %v", err) 81 } 82 83 if err := p.Next(); err != nil { 84 if err == io.EOF { 85 break 86 } 87 return nil, err 88 } 89 90 if end { 91 break 92 } 93 } 94 95 if f.OldName == "" && f.NewName == "" { 96 if defaultName == "" { 97 return nil, p.Errorf(0, "git file header: missing filename information") 98 } 99 f.OldName = defaultName 100 f.NewName = defaultName 101 } 102 103 if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) { 104 return nil, p.Errorf(0, "git file header: missing filename information") 105 } 106 107 return f, nil 108} 109 110func (p *parser) ParseTraditionalFileHeader() (*File, error) { 111 const shortestValidFragHeader = "@@ -1 +1 @@\n" 112 const ( 113 oldPrefix = "--- " 114 newPrefix = "+++ " 115 ) 116 117 oldLine, newLine := p.Line(0), p.Line(1) 118 119 if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) { 120 return nil, nil 121 } 122 // heuristic: only a file header if followed by a (probable) fragment header 123 if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") { 124 return nil, nil 125 } 126 127 // advance past the first two lines so parser is after the header 128 // no EOF check needed because we know there are >=3 valid lines 129 if err := p.Next(); err != nil { 130 return nil, err 131 } 132 if err := p.Next(); err != nil { 133 return nil, err 134 } 135 136 oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0) 137 if err != nil { 138 return nil, p.Errorf(0, "file header: %v", err) 139 } 140 141 newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0) 142 if err != nil { 143 return nil, p.Errorf(1, "file header: %v", err) 144 } 145 146 f := &File{} 147 switch { 148 case oldName == devNull || hasEpochTimestamp(oldLine): 149 f.IsNew = true 150 f.NewName = newName 151 case newName == devNull || hasEpochTimestamp(newLine): 152 f.IsDelete = true 153 f.OldName = oldName 154 default: 155 // if old name is a prefix of new name, use that instead 156 // this avoids picking variants like "file.bak" or "file~" 157 if strings.HasPrefix(newName, oldName) { 158 f.OldName = oldName 159 f.NewName = oldName 160 } else { 161 f.OldName = newName 162 f.NewName = newName 163 } 164 } 165 166 return f, nil 167} 168 169// parseGitHeaderName extracts a default file name from the Git file header 170// line. This is required for mode-only changes and creation/deletion of empty 171// files. Other types of patch include the file name(s) in the header data. 172// If the names in the header do not match because the patch is a rename, 173// return an empty default name. 174func parseGitHeaderName(header string) (string, error) { 175 header = strings.TrimSuffix(header, "\n") 176 if len(header) == 0 { 177 return "", nil 178 } 179 180 var err error 181 var first, second string 182 183 // there are 4 cases to account for: 184 // 185 // 1) unquoted unquoted 186 // 2) unquoted "quoted" 187 // 3) "quoted" unquoted 188 // 4) "quoted" "quoted" 189 // 190 quote := strings.IndexByte(header, '"') 191 switch { 192 case quote < 0: 193 // case 1 194 first = header 195 196 case quote > 0: 197 // case 2 198 first = header[:quote-1] 199 if !isSpace(header[quote-1]) { 200 return "", fmt.Errorf("missing separator") 201 } 202 203 second, _, err = parseQuotedName(header[quote:]) 204 if err != nil { 205 return "", err 206 } 207 208 case quote == 0: 209 // case 3 or case 4 210 var n int 211 first, n, err = parseQuotedName(header) 212 if err != nil { 213 return "", err 214 } 215 216 // git accepts multiple spaces after a quoted name, but not after an 217 // unquoted name, since the name might end with one or more spaces 218 for n < len(header) && isSpace(header[n]) { 219 n++ 220 } 221 if n == len(header) { 222 return "", nil 223 } 224 225 if header[n] == '"' { 226 second, _, err = parseQuotedName(header[n:]) 227 if err != nil { 228 return "", err 229 } 230 } else { 231 second = header[n:] 232 } 233 } 234 235 first = trimTreePrefix(first, 1) 236 if second != "" { 237 if first == trimTreePrefix(second, 1) { 238 return first, nil 239 } 240 return "", nil 241 } 242 243 // at this point, both names are unquoted (case 1) 244 // since names may contain spaces, we can't use a known separator 245 // instead, look for a split that produces two equal names 246 247 for i := 0; i < len(first)-1; i++ { 248 if !isSpace(first[i]) { 249 continue 250 } 251 second = trimTreePrefix(first[i+1:], 1) 252 if name := first[:i]; name == second { 253 return name, nil 254 } 255 } 256 return "", nil 257} 258 259// parseGitHeaderData parses a single line of metadata from a Git file header. 260// It returns true when header parsing is complete; in that case, line was the 261// first line of non-header content. 262func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) { 263 if len(line) > 0 && line[len(line)-1] == '\n' { 264 line = line[:len(line)-1] 265 } 266 267 for _, hdr := range []struct { 268 prefix string 269 end bool 270 parse func(*File, string, string) error 271 }{ 272 {"@@ -", true, nil}, 273 {"--- ", false, parseGitHeaderOldName}, 274 {"+++ ", false, parseGitHeaderNewName}, 275 {"old mode ", false, parseGitHeaderOldMode}, 276 {"new mode ", false, parseGitHeaderNewMode}, 277 {"deleted file mode ", false, parseGitHeaderDeletedMode}, 278 {"new file mode ", false, parseGitHeaderCreatedMode}, 279 {"copy from ", false, parseGitHeaderCopyFrom}, 280 {"copy to ", false, parseGitHeaderCopyTo}, 281 {"rename old ", false, parseGitHeaderRenameFrom}, 282 {"rename new ", false, parseGitHeaderRenameTo}, 283 {"rename from ", false, parseGitHeaderRenameFrom}, 284 {"rename to ", false, parseGitHeaderRenameTo}, 285 {"similarity index ", false, parseGitHeaderScore}, 286 {"dissimilarity index ", false, parseGitHeaderScore}, 287 {"index ", false, parseGitHeaderIndex}, 288 } { 289 if strings.HasPrefix(line, hdr.prefix) { 290 if hdr.parse != nil { 291 err = hdr.parse(f, line[len(hdr.prefix):], defaultName) 292 } 293 return hdr.end, err 294 } 295 } 296 297 // unknown line indicates the end of the header 298 // this usually happens if the diff is empty 299 return true, nil 300} 301 302func parseGitHeaderOldName(f *File, line, defaultName string) error { 303 name, _, err := parseName(line, '\t', 1) 304 if err != nil { 305 return err 306 } 307 if f.OldName == "" && !f.IsNew { 308 f.OldName = name 309 return nil 310 } 311 return verifyGitHeaderName(name, f.OldName, f.IsNew, "old") 312} 313 314func parseGitHeaderNewName(f *File, line, defaultName string) error { 315 name, _, err := parseName(line, '\t', 1) 316 if err != nil { 317 return err 318 } 319 if f.NewName == "" && !f.IsDelete { 320 f.NewName = name 321 return nil 322 } 323 return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new") 324} 325 326func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { 327 f.OldMode, err = parseMode(line) 328 return 329} 330 331func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { 332 f.NewMode, err = parseMode(line) 333 return 334} 335 336func parseGitHeaderDeletedMode(f *File, line, defaultName string) error { 337 f.IsDelete = true 338 f.OldName = defaultName 339 return parseGitHeaderOldMode(f, line, defaultName) 340} 341 342func parseGitHeaderCreatedMode(f *File, line, defaultName string) error { 343 f.IsNew = true 344 f.NewName = defaultName 345 return parseGitHeaderNewMode(f, line, defaultName) 346} 347 348func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { 349 f.IsCopy = true 350 f.OldName, _, err = parseName(line, 0, 0) 351 return 352} 353 354func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { 355 f.IsCopy = true 356 f.NewName, _, err = parseName(line, 0, 0) 357 return 358} 359 360func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { 361 f.IsRename = true 362 f.OldName, _, err = parseName(line, 0, 0) 363 return 364} 365 366func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { 367 f.IsRename = true 368 f.NewName, _, err = parseName(line, 0, 0) 369 return 370} 371 372func parseGitHeaderScore(f *File, line, defaultName string) error { 373 score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32) 374 if err != nil { 375 nerr := err.(*strconv.NumError) 376 return fmt.Errorf("invalid score line: %v", nerr.Err) 377 } 378 if score <= 100 { 379 f.Score = int(score) 380 } 381 return nil 382} 383 384func parseGitHeaderIndex(f *File, line, defaultName string) error { 385 const sep = ".." 386 387 // note that git stops parsing if the OIDs are too long to be valid 388 // checking this requires knowing if the repository uses SHA1 or SHA256 389 // hashes, which we don't know, so we just skip that check 390 391 parts := strings.SplitN(line, " ", 2) 392 oids := strings.SplitN(parts[0], sep, 2) 393 394 if len(oids) < 2 { 395 return fmt.Errorf("invalid index line: missing %q", sep) 396 } 397 f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1] 398 399 if len(parts) > 1 { 400 return parseGitHeaderOldMode(f, parts[1], defaultName) 401 } 402 return nil 403} 404 405func parseMode(s string) (os.FileMode, error) { 406 mode, err := strconv.ParseInt(s, 8, 32) 407 if err != nil { 408 nerr := err.(*strconv.NumError) 409 return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err) 410 } 411 return os.FileMode(mode), nil 412} 413 414// parseName extracts a file name from the start of a string and returns the 415// name and the index of the first character after the name. If the name is 416// unquoted and term is non-zero, parsing stops at the first occurrence of 417// term. 418// 419// If the name is exactly "/dev/null", no further processing occurs. Otherwise, 420// if dropPrefix is greater than zero, that number of prefix components 421// separated by forward slashes are dropped from the name and any duplicate 422// slashes are collapsed. 423func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { 424 if len(s) > 0 && s[0] == '"' { 425 name, n, err = parseQuotedName(s) 426 } else { 427 name, n, err = parseUnquotedName(s, term) 428 } 429 if err != nil { 430 return "", 0, err 431 } 432 if name == devNull { 433 return name, n, nil 434 } 435 return cleanName(name, dropPrefix), n, nil 436} 437 438func parseQuotedName(s string) (name string, n int, err error) { 439 for n = 1; n < len(s); n++ { 440 if s[n] == '"' && s[n-1] != '\\' { 441 n++ 442 break 443 } 444 } 445 if n == 2 { 446 return "", 0, fmt.Errorf("missing name") 447 } 448 if name, err = strconv.Unquote(s[:n]); err != nil { 449 return "", 0, err 450 } 451 return name, n, err 452} 453 454func parseUnquotedName(s string, term byte) (name string, n int, err error) { 455 for n = 0; n < len(s); n++ { 456 if s[n] == '\n' { 457 break 458 } 459 if term > 0 && s[n] == term { 460 break 461 } 462 } 463 if n == 0 { 464 return "", 0, fmt.Errorf("missing name") 465 } 466 return s[:n], n, nil 467} 468 469// verifyGitHeaderName checks a parsed name against state set by previous lines 470func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error { 471 if existing != "" { 472 if isNull { 473 return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing) 474 } 475 if existing != parsed { 476 return fmt.Errorf("inconsistent %s filename", side) 477 } 478 } 479 if isNull && parsed != devNull { 480 return fmt.Errorf("expected %s", devNull) 481 } 482 return nil 483} 484 485// cleanName removes double slashes and drops prefix segments. 486func cleanName(name string, drop int) string { 487 var b strings.Builder 488 for i := 0; i < len(name); i++ { 489 if name[i] == '/' { 490 if i < len(name)-1 && name[i+1] == '/' { 491 continue 492 } 493 if drop > 0 { 494 drop-- 495 b.Reset() 496 continue 497 } 498 } 499 b.WriteByte(name[i]) 500 } 501 return b.String() 502} 503 504// trimTreePrefix removes up to n leading directory components from name. 505func trimTreePrefix(name string, n int) string { 506 i := 0 507 for ; i < len(name) && n > 0; i++ { 508 if name[i] == '/' { 509 n-- 510 } 511 } 512 return name[i:] 513} 514 515// hasEpochTimestamp returns true if the string ends with a POSIX-formatted 516// timestamp for the UNIX epoch after a tab character. According to git, this 517// is used by GNU diff to mark creations and deletions. 518func hasEpochTimestamp(s string) bool { 519 const posixTimeLayout = "2006-01-02 15:04:05.9 -0700" 520 521 start := strings.IndexRune(s, '\t') 522 if start < 0 { 523 return false 524 } 525 526 ts := strings.TrimSuffix(s[start+1:], "\n") 527 528 // a valid timestamp can have optional ':' in zone specifier 529 // remove that if it exists so we have a single format 530 if ts[len(ts)-3] == ':' { 531 ts = ts[:len(ts)-3] + ts[len(ts)-2:] 532 } 533 534 t, err := time.Parse(posixTimeLayout, ts) 535 if err != nil { 536 return false 537 } 538 if !t.Equal(time.Unix(0, 0)) { 539 return false 540 } 541 return true 542} 543 544func isSpace(c byte) bool { 545 return c == ' ' || c == '\t' || c == '\n' 546}