fork of go-gitdiff with jj support
at v0.5.0 11 kB view raw
1package gitdiff 2 3import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "net/mail" 9 "strconv" 10 "strings" 11 "time" 12 "unicode" 13) 14 15const ( 16 mailHeaderPrefix = "From " 17 prettyHeaderPrefix = "commit " 18) 19 20// PatchHeader is a parsed version of the preamble content that appears before 21// the first diff in a patch. It includes metadata about the patch, such as the 22// author and a subject. 23type PatchHeader struct { 24 // The SHA of the commit the patch was generated from. Empty if the SHA is 25 // not included in the header. 26 SHA string 27 28 // The author details of the patch. If these details are not included in 29 // the header, Author is nil and AuthorDate is the zero time. 30 Author *PatchIdentity 31 AuthorDate time.Time 32 33 // The committer details of the patch. If these details are not included in 34 // the header, Committer is nil and CommitterDate is the zero time. 35 Committer *PatchIdentity 36 CommitterDate time.Time 37 38 // The title and body of the commit message describing the changes in the 39 // patch. Empty if no message is included in the header. 40 Title string 41 Body string 42 43 // If the preamble looks like an email, ParsePatchHeader will 44 // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the 45 // Title and place them here. 46 SubjectPrefix string 47 48 // If the preamble looks like an email, and it contains a `---` 49 // line, that line will be removed and everything after it will be 50 // placed in BodyAppendix. 51 BodyAppendix string 52} 53 54// Message returns the commit message for the header. The message consists of 55// the title and the body separated by an empty line. 56func (h *PatchHeader) Message() string { 57 var msg strings.Builder 58 if h != nil { 59 msg.WriteString(h.Title) 60 if h.Body != "" { 61 msg.WriteString("\n\n") 62 msg.WriteString(h.Body) 63 } 64 } 65 return msg.String() 66} 67 68// PatchIdentity identifies a person who authored or committed a patch. 69type PatchIdentity struct { 70 Name string 71 Email string 72} 73 74func (i PatchIdentity) String() string { 75 name := i.Name 76 if name == "" { 77 name = `""` 78 } 79 return fmt.Sprintf("%s <%s>", name, i.Email) 80} 81 82// ParsePatchIdentity parses a patch identity string. A valid string contains a 83// non-empty name followed by an email address in angle brackets. Like Git, 84// ParsePatchIdentity does not require that the email address is valid or 85// properly formatted, only that it is non-empty. The name must not contain a 86// left angle bracket, '<', and the email address must not contain a right 87// angle bracket, '>'. 88func ParsePatchIdentity(s string) (PatchIdentity, error) { 89 var emailStart, emailEnd int 90 for i, c := range s { 91 if c == '<' && emailStart == 0 { 92 emailStart = i + 1 93 } 94 if c == '>' && emailStart > 0 { 95 emailEnd = i 96 break 97 } 98 } 99 if emailStart > 0 && emailEnd == 0 { 100 return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) 101 } 102 103 var name, email string 104 if emailStart > 0 { 105 name = strings.TrimSpace(s[:emailStart-1]) 106 } 107 if emailStart > 0 && emailEnd > 0 { 108 email = strings.TrimSpace(s[emailStart:emailEnd]) 109 } 110 if name == "" || email == "" { 111 return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) 112 } 113 114 return PatchIdentity{Name: name, Email: email}, nil 115} 116 117// ParsePatchDate parses a patch date string. It returns the parsed time or an 118// error if s has an unknown format. ParsePatchDate supports the iso, rfc, 119// short, raw, unix, and default formats (with local variants) used by the 120// --date flag in Git. 121func ParsePatchDate(s string) (time.Time, error) { 122 const ( 123 isoFormat = "2006-01-02 15:04:05 -0700" 124 isoStrictFormat = "2006-01-02T15:04:05-07:00" 125 rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700" 126 shortFormat = "2006-01-02" 127 defaultFormat = "Mon Jan 2 15:04:05 2006 -0700" 128 defaultLocalFormat = "Mon Jan 2 15:04:05 2006" 129 ) 130 131 if s == "" { 132 return time.Time{}, nil 133 } 134 135 for _, fmt := range []string{ 136 isoFormat, 137 isoStrictFormat, 138 rfc2822Format, 139 shortFormat, 140 defaultFormat, 141 defaultLocalFormat, 142 } { 143 if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil { 144 return t, nil 145 } 146 } 147 148 // unix format 149 if unix, err := strconv.ParseInt(s, 10, 64); err == nil { 150 return time.Unix(unix, 0), nil 151 } 152 153 // raw format 154 if space := strings.IndexByte(s, ' '); space > 0 { 155 unix, uerr := strconv.ParseInt(s[:space], 10, 64) 156 zone, zerr := time.Parse("-0700", s[space+1:]) 157 if uerr == nil && zerr == nil { 158 return time.Unix(unix, 0).In(zone.Location()), nil 159 } 160 } 161 162 return time.Time{}, fmt.Errorf("unknown date format: %s", s) 163} 164 165// ParsePatchHeader parses a preamble string as returned by Parse into a 166// PatchHeader. Due to the variety of header formats, some fields of the parsed 167// PatchHeader may be unset after parsing. 168// 169// Supported formats are the short, medium, full, fuller, and email pretty 170// formats used by git diff, git log, and git show and the UNIX mailbox format 171// used by git format-patch. 172// 173// If ParsePatchHeader detects that it is handling an email, it will 174// remove extra content at the beginning of the title line, such as 175// `[PATCH]` or `Re:` in the same way that `git mailinfo` does. 176// SubjectPrefix will be set to the value of this removed string. 177// (`git mailinfo` is the core part of `git am` that pulls information 178// out of an individual mail.) 179// 180// Additionally, if ParsePatchHeader detects that it's handling an 181// email, it will remove a `---` line and put anything after it into 182// BodyAppendix. 183// 184// Those wishing the effect of a plain `git am` should use 185// `PatchHeader.Title + "\n" + PatchHeader.Body` (or 186// `PatchHeader.Message()`). Those wishing to retain the subject 187// prefix and appendix material should use `PatchHeader.SubjectPrefix 188// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + 189// PatchHeader.BodyAppendix`. 190func ParsePatchHeader(s string) (*PatchHeader, error) { 191 r := bufio.NewReader(strings.NewReader(s)) 192 193 var line string 194 for { 195 var err error 196 line, err = r.ReadString('\n') 197 if err == io.EOF { 198 break 199 } 200 if err != nil { 201 return nil, err 202 } 203 204 line = strings.TrimSpace(line) 205 if len(line) > 0 { 206 break 207 } 208 } 209 210 switch { 211 case strings.HasPrefix(line, mailHeaderPrefix): 212 return parseHeaderMail(line, r) 213 case strings.HasPrefix(line, prettyHeaderPrefix): 214 return parseHeaderPretty(line, r) 215 } 216 return nil, errors.New("unrecognized patch header format") 217} 218 219func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { 220 const ( 221 authorPrefix = "Author:" 222 commitPrefix = "Commit:" 223 datePrefix = "Date:" 224 authorDatePrefix = "AuthorDate:" 225 commitDatePrefix = "CommitDate:" 226 ) 227 228 h := &PatchHeader{} 229 230 prettyLine = prettyLine[len(prettyHeaderPrefix):] 231 if i := strings.IndexByte(prettyLine, ' '); i > 0 { 232 h.SHA = prettyLine[:i] 233 } else { 234 h.SHA = prettyLine 235 } 236 237 s := bufio.NewScanner(r) 238 for s.Scan() { 239 line := s.Text() 240 241 // empty line marks end of fields, remaining lines are title/message 242 if strings.TrimSpace(line) == "" { 243 break 244 } 245 246 switch { 247 case strings.HasPrefix(line, authorPrefix): 248 u, err := ParsePatchIdentity(line[len(authorPrefix):]) 249 if err != nil { 250 return nil, err 251 } 252 h.Author = &u 253 254 case strings.HasPrefix(line, commitPrefix): 255 u, err := ParsePatchIdentity(line[len(commitPrefix):]) 256 if err != nil { 257 return nil, err 258 } 259 h.Committer = &u 260 261 case strings.HasPrefix(line, datePrefix): 262 d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):])) 263 if err != nil { 264 return nil, err 265 } 266 h.AuthorDate = d 267 268 case strings.HasPrefix(line, authorDatePrefix): 269 d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):])) 270 if err != nil { 271 return nil, err 272 } 273 h.AuthorDate = d 274 275 case strings.HasPrefix(line, commitDatePrefix): 276 d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):])) 277 if err != nil { 278 return nil, err 279 } 280 h.CommitterDate = d 281 } 282 } 283 if s.Err() != nil { 284 return nil, s.Err() 285 } 286 287 title, indent := scanMessageTitle(s) 288 if s.Err() != nil { 289 return nil, s.Err() 290 } 291 h.Title = title 292 293 if title != "" { 294 // Don't check for an appendix 295 body, _ := scanMessageBody(s, indent, false) 296 if s.Err() != nil { 297 return nil, s.Err() 298 } 299 h.Body = body 300 } 301 302 return h, nil 303} 304 305func scanMessageTitle(s *bufio.Scanner) (title string, indent string) { 306 var b strings.Builder 307 for i := 0; s.Scan(); i++ { 308 line := s.Text() 309 trimLine := strings.TrimSpace(line) 310 if trimLine == "" { 311 break 312 } 313 314 if i == 0 { 315 if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 { 316 indent = line[:start] 317 } 318 } 319 if b.Len() > 0 { 320 b.WriteByte(' ') 321 } 322 b.WriteString(trimLine) 323 } 324 return b.String(), indent 325} 326 327func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { 328 // Body and appendix 329 var body, appendix strings.Builder 330 c := &body 331 var empty int 332 for i := 0; s.Scan(); i++ { 333 line := s.Text() 334 335 line = strings.TrimRightFunc(line, unicode.IsSpace) 336 line = strings.TrimPrefix(line, indent) 337 338 if line == "" { 339 empty++ 340 continue 341 } 342 343 // If requested, parse out "appendix" information (often added 344 // by `git format-patch` and removed by `git am`). 345 if separateAppendix && c == &body && line == "---" { 346 c = &appendix 347 continue 348 } 349 350 if c.Len() > 0 { 351 c.WriteByte('\n') 352 if empty > 0 { 353 c.WriteByte('\n') 354 } 355 } 356 empty = 0 357 358 c.WriteString(line) 359 } 360 return body.String(), appendix.String() 361} 362 363func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { 364 msg, err := mail.ReadMessage(r) 365 if err != nil { 366 return nil, err 367 } 368 369 h := &PatchHeader{} 370 371 mailLine = mailLine[len(mailHeaderPrefix):] 372 if i := strings.IndexByte(mailLine, ' '); i > 0 { 373 h.SHA = mailLine[:i] 374 } 375 376 addrs, err := msg.Header.AddressList("From") 377 if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { 378 return nil, err 379 } 380 if len(addrs) > 0 { 381 addr := addrs[0] 382 if addr.Name == "" { 383 return nil, fmt.Errorf("invalid user string: %s", addr) 384 } 385 h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} 386 } 387 388 date := msg.Header.Get("Date") 389 if date != "" { 390 d, err := ParsePatchDate(date) 391 if err != nil { 392 return nil, err 393 } 394 h.AuthorDate = d 395 } 396 397 subject := msg.Header.Get("Subject") 398 h.SubjectPrefix, h.Title = parseSubject(subject) 399 400 s := bufio.NewScanner(msg.Body) 401 h.Body, h.BodyAppendix = scanMessageBody(s, "", true) 402 if s.Err() != nil { 403 return nil, s.Err() 404 } 405 406 return h, nil 407} 408 409// Takes an email subject and returns the patch prefix and commit 410// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH 411// v3 3/5] ` and `Implement foo` 412func parseSubject(s string) (string, string) { 413 // This is meant to be compatible with 414 // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject(). 415 // If compatibility with `git am` drifts, go there to see if there 416 // are any updates. 417 418 at := 0 419 for at < len(s) { 420 switch s[at] { 421 case 'r', 'R': 422 // Detect re:, Re:, rE: and RE: 423 if at+2 < len(s) && 424 (s[at+1] == 'e' || s[at+1] == 'E') && 425 s[at+2] == ':' { 426 at += 3 427 continue 428 } 429 430 case ' ', '\t', ':': 431 // Delete whitespace and duplicate ':' characters 432 at++ 433 continue 434 435 case '[': 436 // Look for closing parenthesis 437 j := at + 1 438 for ; j < len(s); j++ { 439 if s[j] == ']' { 440 break 441 } 442 } 443 444 if j < len(s) { 445 at = j + 1 446 continue 447 } 448 } 449 450 // Only loop if we actually removed something 451 break 452 } 453 454 return s[:at], s[at:] 455}