fork of go-gitdiff with jj support
at v0.1.0 12 kB view raw
1package gitdiff 2 3import ( 4 "fmt" 5 "io" 6 "os" 7 "strconv" 8 "strings" 9 "time" 10) 11 12const ( 13 devNull = "/dev/null" 14) 15 16// ParseNextFileHeader finds and parses the next file header in the stream. If 17// a header is found, it returns a file and all input before the header. It 18// returns nil if no headers are found before the end of the input. 19func (p *parser) ParseNextFileHeader() (*File, string, error) { 20 var preamble strings.Builder 21 var file *File 22 for { 23 // check for disconnected fragment headers (corrupt patch) 24 frag, err := p.ParseTextFragmentHeader() 25 if err != nil { 26 // not a valid header, nothing to worry about 27 goto NextLine 28 } 29 if frag != nil { 30 return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header()) 31 } 32 33 // check for a git-generated patch 34 file, err = p.ParseGitFileHeader() 35 if err != nil { 36 return nil, "", err 37 } 38 if file != nil { 39 return file, preamble.String(), nil 40 } 41 42 // check for a "traditional" patch 43 file, err = p.ParseTraditionalFileHeader() 44 if err != nil { 45 return nil, "", err 46 } 47 if file != nil { 48 return file, preamble.String(), nil 49 } 50 51 NextLine: 52 preamble.WriteString(p.Line(0)) 53 if err := p.Next(); err != nil { 54 if err == io.EOF { 55 break 56 } 57 return nil, "", err 58 } 59 } 60 return nil, "", nil 61} 62 63func (p *parser) ParseGitFileHeader() (*File, error) { 64 const prefix = "diff --git " 65 66 if !strings.HasPrefix(p.Line(0), prefix) { 67 return nil, nil 68 } 69 header := p.Line(0)[len(prefix):] 70 71 defaultName, err := parseGitHeaderName(header) 72 if err != nil { 73 return nil, p.Errorf(0, "git file header: %v", err) 74 } 75 76 f := &File{} 77 for { 78 end, err := parseGitHeaderData(f, p.Line(1), defaultName) 79 if err != nil { 80 return nil, p.Errorf(1, "git file header: %v", err) 81 } 82 83 if err := p.Next(); err != nil { 84 if err == io.EOF { 85 break 86 } 87 return nil, err 88 } 89 90 if end { 91 break 92 } 93 } 94 95 if f.OldName == "" && f.NewName == "" { 96 if defaultName == "" { 97 return nil, p.Errorf(0, "git file header: missing filename information") 98 } 99 f.OldName = defaultName 100 f.NewName = defaultName 101 } 102 103 if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) { 104 return nil, p.Errorf(0, "git file header: missing filename information") 105 } 106 107 return f, nil 108} 109 110func (p *parser) ParseTraditionalFileHeader() (*File, error) { 111 const shortestValidFragHeader = "@@ -1 +1 @@\n" 112 const ( 113 oldPrefix = "--- " 114 newPrefix = "+++ " 115 ) 116 117 oldLine, newLine := p.Line(0), p.Line(1) 118 119 if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) { 120 return nil, nil 121 } 122 // heuristic: only a file header if followed by a (probable) fragment header 123 if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") { 124 return nil, nil 125 } 126 127 // advance past the first two lines so parser is after the header 128 // no EOF check needed because we know there are >=3 valid lines 129 if err := p.Next(); err != nil { 130 return nil, err 131 } 132 if err := p.Next(); err != nil { 133 return nil, err 134 } 135 136 oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0) 137 if err != nil { 138 return nil, p.Errorf(0, "file header: %v", err) 139 } 140 141 newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0) 142 if err != nil { 143 return nil, p.Errorf(1, "file header: %v", err) 144 } 145 146 f := &File{} 147 switch { 148 case oldName == devNull || hasEpochTimestamp(oldLine): 149 f.IsNew = true 150 f.NewName = newName 151 case newName == devNull || hasEpochTimestamp(newLine): 152 f.IsDelete = true 153 f.OldName = oldName 154 default: 155 // if old name is a prefix of new name, use that instead 156 // this avoids picking variants like "file.bak" or "file~" 157 if strings.HasPrefix(newName, oldName) { 158 f.OldName = oldName 159 f.NewName = oldName 160 } else { 161 f.OldName = newName 162 f.NewName = newName 163 } 164 } 165 166 return f, nil 167} 168 169// parseGitHeaderName extracts a default file name from the Git file header 170// line. This is required for mode-only changes and creation/deletion of empty 171// files. Other types of patch include the file name(s) in the header data. 172// If the names in the header do not match because the patch is a rename, 173// return an empty default name. 174func parseGitHeaderName(header string) (string, error) { 175 firstName, n, err := parseName(header, -1, 1) 176 if err != nil { 177 return "", err 178 } 179 180 if n < len(header) && (header[n] == ' ' || header[n] == '\t') { 181 n++ 182 } 183 184 secondName, _, err := parseName(header[n:], -1, 1) 185 if err != nil { 186 return "", err 187 } 188 189 if firstName != secondName { 190 return "", nil 191 } 192 return firstName, nil 193} 194 195// parseGitHeaderData parses a single line of metadata from a Git file header. 196// It returns true when header parsing is complete; in that case, line was the 197// first line of non-header content. 198func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) { 199 if len(line) > 0 && line[len(line)-1] == '\n' { 200 line = line[:len(line)-1] 201 } 202 203 for _, hdr := range []struct { 204 prefix string 205 end bool 206 parse func(*File, string, string) error 207 }{ 208 {"@@ -", true, nil}, 209 {"--- ", false, parseGitHeaderOldName}, 210 {"+++ ", false, parseGitHeaderNewName}, 211 {"old mode ", false, parseGitHeaderOldMode}, 212 {"new mode ", false, parseGitHeaderNewMode}, 213 {"deleted file mode ", false, parseGitHeaderDeletedMode}, 214 {"new file mode ", false, parseGitHeaderCreatedMode}, 215 {"copy from ", false, parseGitHeaderCopyFrom}, 216 {"copy to ", false, parseGitHeaderCopyTo}, 217 {"rename old ", false, parseGitHeaderRenameFrom}, 218 {"rename new ", false, parseGitHeaderRenameTo}, 219 {"rename from ", false, parseGitHeaderRenameFrom}, 220 {"rename to ", false, parseGitHeaderRenameTo}, 221 {"similarity index ", false, parseGitHeaderScore}, 222 {"dissimilarity index ", false, parseGitHeaderScore}, 223 {"index ", false, parseGitHeaderIndex}, 224 } { 225 if strings.HasPrefix(line, hdr.prefix) { 226 if hdr.parse != nil { 227 err = hdr.parse(f, line[len(hdr.prefix):], defaultName) 228 } 229 return hdr.end, err 230 } 231 } 232 233 // unknown line indicates the end of the header 234 // this usually happens if the diff is empty 235 return true, nil 236} 237 238func parseGitHeaderOldName(f *File, line, defaultName string) error { 239 name, _, err := parseName(line, '\t', 1) 240 if err != nil { 241 return err 242 } 243 if f.OldName == "" && !f.IsNew { 244 f.OldName = name 245 return nil 246 } 247 return verifyGitHeaderName(name, f.OldName, f.IsNew, "old") 248} 249 250func parseGitHeaderNewName(f *File, line, defaultName string) error { 251 name, _, err := parseName(line, '\t', 1) 252 if err != nil { 253 return err 254 } 255 if f.NewName == "" && !f.IsDelete { 256 f.NewName = name 257 return nil 258 } 259 return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new") 260} 261 262func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { 263 f.OldMode, err = parseMode(line) 264 return 265} 266 267func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { 268 f.NewMode, err = parseMode(line) 269 return 270} 271 272func parseGitHeaderDeletedMode(f *File, line, defaultName string) error { 273 f.IsDelete = true 274 f.OldName = defaultName 275 return parseGitHeaderOldMode(f, line, defaultName) 276} 277 278func parseGitHeaderCreatedMode(f *File, line, defaultName string) error { 279 f.IsNew = true 280 f.NewName = defaultName 281 return parseGitHeaderNewMode(f, line, defaultName) 282} 283 284func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { 285 f.IsCopy = true 286 f.OldName, _, err = parseName(line, -1, 0) 287 return 288} 289 290func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { 291 f.IsCopy = true 292 f.NewName, _, err = parseName(line, -1, 0) 293 return 294} 295 296func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { 297 f.IsRename = true 298 f.OldName, _, err = parseName(line, -1, 0) 299 return 300} 301 302func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { 303 f.IsRename = true 304 f.NewName, _, err = parseName(line, -1, 0) 305 return 306} 307 308func parseGitHeaderScore(f *File, line, defaultName string) error { 309 score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32) 310 if err != nil { 311 nerr := err.(*strconv.NumError) 312 return fmt.Errorf("invalid score line: %v", nerr.Err) 313 } 314 if score <= 100 { 315 f.Score = int(score) 316 } 317 return nil 318} 319 320func parseGitHeaderIndex(f *File, line, defaultName string) error { 321 const sep = ".." 322 323 // note that git stops parsing if the OIDs are too long to be valid 324 // checking this requires knowing if the repository uses SHA1 or SHA256 325 // hashes, which we don't know, so we just skip that check 326 327 parts := strings.SplitN(line, " ", 2) 328 oids := strings.SplitN(parts[0], sep, 2) 329 330 if len(oids) < 2 { 331 return fmt.Errorf("invalid index line: missing %q", sep) 332 } 333 f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1] 334 335 if len(parts) > 1 { 336 return parseGitHeaderOldMode(f, parts[1], defaultName) 337 } 338 return nil 339} 340 341func parseMode(s string) (os.FileMode, error) { 342 mode, err := strconv.ParseInt(s, 8, 32) 343 if err != nil { 344 nerr := err.(*strconv.NumError) 345 return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err) 346 } 347 return os.FileMode(mode), nil 348} 349 350// parseName extracts a file name from the start of a string and returns the 351// name and the index of the first character after the name. If the name is 352// unquoted and term is non-negative, parsing stops at the first occurrence of 353// term. Otherwise parsing of unquoted names stops at the first space or tab. 354// 355// If the name is exactly "/dev/null", no further processing occurs. Otherwise, 356// if dropPrefix is greater than zero, that number of prefix components 357// separated by forward slashes are dropped from the name and any duplicate 358// slashes are collapsed. 359func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) { 360 if len(s) > 0 && s[0] == '"' { 361 name, n, err = parseQuotedName(s) 362 } else { 363 name, n, err = parseUnquotedName(s, term) 364 } 365 if err != nil { 366 return "", 0, err 367 } 368 if name == devNull { 369 return name, n, nil 370 } 371 return cleanName(name, dropPrefix), n, nil 372} 373 374func parseQuotedName(s string) (name string, n int, err error) { 375 for n = 1; n < len(s); n++ { 376 if s[n] == '"' && s[n-1] != '\\' { 377 n++ 378 break 379 } 380 } 381 if n == 2 { 382 return "", 0, fmt.Errorf("missing name") 383 } 384 if name, err = strconv.Unquote(s[:n]); err != nil { 385 return "", 0, err 386 } 387 return name, n, err 388} 389 390func parseUnquotedName(s string, term rune) (name string, n int, err error) { 391 for n = 0; n < len(s); n++ { 392 if s[n] == '\n' { 393 break 394 } 395 if term >= 0 && rune(s[n]) == term { 396 break 397 } 398 if term < 0 && (s[n] == ' ' || s[n] == '\t') { 399 break 400 } 401 } 402 if n == 0 { 403 return "", 0, fmt.Errorf("missing name") 404 } 405 return s[:n], n, nil 406} 407 408// verifyGitHeaderName checks a parsed name against state set by previous lines 409func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error { 410 if existing != "" { 411 if isNull { 412 return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing) 413 } 414 if existing != parsed { 415 return fmt.Errorf("inconsistent %s filename", side) 416 } 417 } 418 if isNull && parsed != devNull { 419 return fmt.Errorf("expected %s", devNull) 420 } 421 return nil 422} 423 424// cleanName removes double slashes and drops prefix segments. 425func cleanName(name string, drop int) string { 426 var b strings.Builder 427 for i := 0; i < len(name); i++ { 428 if name[i] == '/' { 429 if i < len(name)-1 && name[i+1] == '/' { 430 continue 431 } 432 if drop > 0 { 433 drop-- 434 b.Reset() 435 continue 436 } 437 } 438 b.WriteByte(name[i]) 439 } 440 return b.String() 441} 442 443// hasEpochTimestamp returns true if the string ends with a POSIX-formatted 444// timestamp for the UNIX epoch after a tab character. According to git, this 445// is used by GNU diff to mark creations and deletions. 446func hasEpochTimestamp(s string) bool { 447 const posixTimeLayout = "2006-01-02 15:04:05.9 -0700" 448 449 start := strings.IndexRune(s, '\t') 450 if start < 0 { 451 return false 452 } 453 454 ts := strings.TrimSuffix(s[start+1:], "\n") 455 456 // a valid timestamp can have optional ':' in zone specifier 457 // remove that if it exists so we have a single format 458 if ts[len(ts)-3] == ':' { 459 ts = ts[:len(ts)-3] + ts[len(ts)-2:] 460 } 461 462 t, err := time.Parse(posixTimeLayout, ts) 463 if err != nil { 464 return false 465 } 466 if !t.Equal(time.Unix(0, 0)) { 467 return false 468 } 469 return true 470}