fork of go-gitdiff with jj support
at v0.5.2 13 kB view raw
1package gitdiff 2 3import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "mime/quotedprintable" 9 "net/mail" 10 "strconv" 11 "strings" 12 "time" 13 "unicode" 14) 15 16const ( 17 mailHeaderPrefix = "From " 18 prettyHeaderPrefix = "commit " 19 mailMinimumHeaderPrefix = "From:" 20) 21 22// PatchHeader is a parsed version of the preamble content that appears before 23// the first diff in a patch. It includes metadata about the patch, such as the 24// author and a subject. 25type PatchHeader struct { 26 // The SHA of the commit the patch was generated from. Empty if the SHA is 27 // not included in the header. 28 SHA string 29 30 // The author details of the patch. If these details are not included in 31 // the header, Author is nil and AuthorDate is the zero time. 32 Author *PatchIdentity 33 AuthorDate time.Time 34 35 // The committer details of the patch. If these details are not included in 36 // the header, Committer is nil and CommitterDate is the zero time. 37 Committer *PatchIdentity 38 CommitterDate time.Time 39 40 // The title and body of the commit message describing the changes in the 41 // patch. Empty if no message is included in the header. 42 Title string 43 Body string 44 45 // If the preamble looks like an email, ParsePatchHeader will 46 // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the 47 // Title and place them here. 48 SubjectPrefix string 49 50 // If the preamble looks like an email, and it contains a `---` 51 // line, that line will be removed and everything after it will be 52 // placed in BodyAppendix. 53 BodyAppendix string 54} 55 56// Message returns the commit message for the header. The message consists of 57// the title and the body separated by an empty line. 58func (h *PatchHeader) Message() string { 59 var msg strings.Builder 60 if h != nil { 61 msg.WriteString(h.Title) 62 if h.Body != "" { 63 msg.WriteString("\n\n") 64 msg.WriteString(h.Body) 65 } 66 } 67 return msg.String() 68} 69 70// PatchIdentity identifies a person who authored or committed a patch. 71type PatchIdentity struct { 72 Name string 73 Email string 74} 75 76func (i PatchIdentity) String() string { 77 name := i.Name 78 if name == "" { 79 name = `""` 80 } 81 return fmt.Sprintf("%s <%s>", name, i.Email) 82} 83 84// ParsePatchIdentity parses a patch identity string. A valid string contains a 85// non-empty name followed by an email address in angle brackets. Like Git, 86// ParsePatchIdentity does not require that the email address is valid or 87// properly formatted, only that it is non-empty. The name must not contain a 88// left angle bracket, '<', and the email address must not contain a right 89// angle bracket, '>'. 90func ParsePatchIdentity(s string) (PatchIdentity, error) { 91 var emailStart, emailEnd int 92 for i, c := range s { 93 if c == '<' && emailStart == 0 { 94 emailStart = i + 1 95 } 96 if c == '>' && emailStart > 0 { 97 emailEnd = i 98 break 99 } 100 } 101 if emailStart > 0 && emailEnd == 0 { 102 return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) 103 } 104 105 var name, email string 106 if emailStart > 0 { 107 name = strings.TrimSpace(s[:emailStart-1]) 108 } 109 if emailStart > 0 && emailEnd > 0 { 110 email = strings.TrimSpace(s[emailStart:emailEnd]) 111 } 112 if name == "" || email == "" { 113 return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) 114 } 115 116 return PatchIdentity{Name: name, Email: email}, nil 117} 118 119// ParsePatchDate parses a patch date string. It returns the parsed time or an 120// error if s has an unknown format. ParsePatchDate supports the iso, rfc, 121// short, raw, unix, and default formats (with local variants) used by the 122// --date flag in Git. 123func ParsePatchDate(s string) (time.Time, error) { 124 const ( 125 isoFormat = "2006-01-02 15:04:05 -0700" 126 isoStrictFormat = "2006-01-02T15:04:05-07:00" 127 rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700" 128 shortFormat = "2006-01-02" 129 defaultFormat = "Mon Jan 2 15:04:05 2006 -0700" 130 defaultLocalFormat = "Mon Jan 2 15:04:05 2006" 131 ) 132 133 if s == "" { 134 return time.Time{}, nil 135 } 136 137 for _, fmt := range []string{ 138 isoFormat, 139 isoStrictFormat, 140 rfc2822Format, 141 shortFormat, 142 defaultFormat, 143 defaultLocalFormat, 144 } { 145 if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil { 146 return t, nil 147 } 148 } 149 150 // unix format 151 if unix, err := strconv.ParseInt(s, 10, 64); err == nil { 152 return time.Unix(unix, 0), nil 153 } 154 155 // raw format 156 if space := strings.IndexByte(s, ' '); space > 0 { 157 unix, uerr := strconv.ParseInt(s[:space], 10, 64) 158 zone, zerr := time.Parse("-0700", s[space+1:]) 159 if uerr == nil && zerr == nil { 160 return time.Unix(unix, 0).In(zone.Location()), nil 161 } 162 } 163 164 return time.Time{}, fmt.Errorf("unknown date format: %s", s) 165} 166 167// ParsePatchHeader parses a preamble string as returned by Parse into a 168// PatchHeader. Due to the variety of header formats, some fields of the parsed 169// PatchHeader may be unset after parsing. 170// 171// Supported formats are the short, medium, full, fuller, and email pretty 172// formats used by git diff, git log, and git show and the UNIX mailbox format 173// used by git format-patch. 174// 175// If ParsePatchHeader detects that it is handling an email, it will 176// remove extra content at the beginning of the title line, such as 177// `[PATCH]` or `Re:` in the same way that `git mailinfo` does. 178// SubjectPrefix will be set to the value of this removed string. 179// (`git mailinfo` is the core part of `git am` that pulls information 180// out of an individual mail.) 181// 182// Additionally, if ParsePatchHeader detects that it's handling an 183// email, it will remove a `---` line and put anything after it into 184// BodyAppendix. 185// 186// Those wishing the effect of a plain `git am` should use 187// `PatchHeader.Title + "\n" + PatchHeader.Body` (or 188// `PatchHeader.Message()`). Those wishing to retain the subject 189// prefix and appendix material should use `PatchHeader.SubjectPrefix 190// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + 191// PatchHeader.BodyAppendix`. 192func ParsePatchHeader(s string) (*PatchHeader, error) { 193 r := bufio.NewReader(strings.NewReader(s)) 194 195 var line string 196 for { 197 var err error 198 line, err = r.ReadString('\n') 199 if err == io.EOF { 200 break 201 } 202 if err != nil { 203 return nil, err 204 } 205 206 line = strings.TrimSpace(line) 207 if len(line) > 0 { 208 break 209 } 210 } 211 212 switch { 213 case strings.HasPrefix(line, mailHeaderPrefix): 214 return parseHeaderMail(line, r) 215 case strings.HasPrefix(line, mailMinimumHeaderPrefix): 216 r = bufio.NewReader(strings.NewReader(s)) 217 return parseHeaderMail("", r) 218 case strings.HasPrefix(line, prettyHeaderPrefix): 219 return parseHeaderPretty(line, r) 220 } 221 return nil, errors.New("unrecognized patch header format") 222} 223 224func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { 225 const ( 226 authorPrefix = "Author:" 227 commitPrefix = "Commit:" 228 datePrefix = "Date:" 229 authorDatePrefix = "AuthorDate:" 230 commitDatePrefix = "CommitDate:" 231 ) 232 233 h := &PatchHeader{} 234 235 prettyLine = prettyLine[len(prettyHeaderPrefix):] 236 if i := strings.IndexByte(prettyLine, ' '); i > 0 { 237 h.SHA = prettyLine[:i] 238 } else { 239 h.SHA = prettyLine 240 } 241 242 s := bufio.NewScanner(r) 243 for s.Scan() { 244 line := s.Text() 245 246 // empty line marks end of fields, remaining lines are title/message 247 if strings.TrimSpace(line) == "" { 248 break 249 } 250 251 switch { 252 case strings.HasPrefix(line, authorPrefix): 253 u, err := ParsePatchIdentity(line[len(authorPrefix):]) 254 if err != nil { 255 return nil, err 256 } 257 h.Author = &u 258 259 case strings.HasPrefix(line, commitPrefix): 260 u, err := ParsePatchIdentity(line[len(commitPrefix):]) 261 if err != nil { 262 return nil, err 263 } 264 h.Committer = &u 265 266 case strings.HasPrefix(line, datePrefix): 267 d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):])) 268 if err != nil { 269 return nil, err 270 } 271 h.AuthorDate = d 272 273 case strings.HasPrefix(line, authorDatePrefix): 274 d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):])) 275 if err != nil { 276 return nil, err 277 } 278 h.AuthorDate = d 279 280 case strings.HasPrefix(line, commitDatePrefix): 281 d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):])) 282 if err != nil { 283 return nil, err 284 } 285 h.CommitterDate = d 286 } 287 } 288 if s.Err() != nil { 289 return nil, s.Err() 290 } 291 292 title, indent := scanMessageTitle(s) 293 if s.Err() != nil { 294 return nil, s.Err() 295 } 296 h.Title = title 297 298 if title != "" { 299 // Don't check for an appendix 300 body, _ := scanMessageBody(s, indent, false) 301 if s.Err() != nil { 302 return nil, s.Err() 303 } 304 h.Body = body 305 } 306 307 return h, nil 308} 309 310func scanMessageTitle(s *bufio.Scanner) (title string, indent string) { 311 var b strings.Builder 312 for i := 0; s.Scan(); i++ { 313 line := s.Text() 314 trimLine := strings.TrimSpace(line) 315 if trimLine == "" { 316 break 317 } 318 319 if i == 0 { 320 if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 { 321 indent = line[:start] 322 } 323 } 324 if b.Len() > 0 { 325 b.WriteByte(' ') 326 } 327 b.WriteString(trimLine) 328 } 329 return b.String(), indent 330} 331 332func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { 333 // Body and appendix 334 var body, appendix strings.Builder 335 c := &body 336 var empty int 337 for i := 0; s.Scan(); i++ { 338 line := s.Text() 339 340 line = strings.TrimRightFunc(line, unicode.IsSpace) 341 line = strings.TrimPrefix(line, indent) 342 343 if line == "" { 344 empty++ 345 continue 346 } 347 348 // If requested, parse out "appendix" information (often added 349 // by `git format-patch` and removed by `git am`). 350 if separateAppendix && c == &body && line == "---" { 351 c = &appendix 352 continue 353 } 354 355 if c.Len() > 0 { 356 c.WriteByte('\n') 357 if empty > 0 { 358 c.WriteByte('\n') 359 } 360 } 361 empty = 0 362 363 c.WriteString(line) 364 } 365 return body.String(), appendix.String() 366} 367 368func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { 369 msg, err := mail.ReadMessage(r) 370 if err != nil { 371 return nil, err 372 } 373 374 h := &PatchHeader{} 375 376 if len(mailLine) > len(mailHeaderPrefix) { 377 mailLine = mailLine[len(mailHeaderPrefix):] 378 if i := strings.IndexByte(mailLine, ' '); i > 0 { 379 h.SHA = mailLine[:i] 380 } 381 } 382 383 addrs, err := msg.Header.AddressList("From") 384 if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { 385 return nil, err 386 } 387 if len(addrs) > 0 { 388 addr := addrs[0] 389 if addr.Name == "" { 390 addr.Name = addr.Address 391 } 392 h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} 393 } 394 395 date := msg.Header.Get("Date") 396 if date != "" { 397 d, err := ParsePatchDate(date) 398 if err != nil { 399 return nil, err 400 } 401 h.AuthorDate = d 402 } 403 404 subject := msg.Header.Get("Subject") 405 h.SubjectPrefix, h.Title = parseSubject(subject) 406 407 s := bufio.NewScanner(msg.Body) 408 h.Body, h.BodyAppendix = scanMessageBody(s, "", true) 409 if s.Err() != nil { 410 return nil, s.Err() 411 } 412 413 return h, nil 414} 415 416// Takes an email subject and returns the patch prefix and commit 417// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH 418// v3 3/5] ` and `Implement foo` 419func parseSubject(s string) (string, string) { 420 // This is meant to be compatible with 421 // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject(). 422 // If compatibility with `git am` drifts, go there to see if there 423 // are any updates. 424 425 at := 0 426 for at < len(s) { 427 switch s[at] { 428 case 'r', 'R': 429 // Detect re:, Re:, rE: and RE: 430 if at+2 < len(s) && 431 (s[at+1] == 'e' || s[at+1] == 'E') && 432 s[at+2] == ':' { 433 at += 3 434 continue 435 } 436 437 case ' ', '\t', ':': 438 // Delete whitespace and duplicate ':' characters 439 at++ 440 continue 441 442 case '[': 443 // Look for closing parenthesis 444 j := at + 1 445 for ; j < len(s); j++ { 446 if s[j] == ']' { 447 break 448 } 449 } 450 451 if j < len(s) { 452 at = j + 1 453 continue 454 } 455 } 456 457 // Only loop if we actually removed something 458 break 459 } 460 461 return s[:at], decodeSubject(s[at:]) 462} 463 464// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result 465// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). 466// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject 467func decodeSubject(encoded string) string { 468 if !strings.HasPrefix(encoded, "=?UTF-8?q?") { 469 // not UTF-8 encoded 470 return encoded 471 } 472 473 // If the subject is too long, `git format-patch` may produce a subject line across 474 // multiple lines. When parsed, this can look like the following: 475 // <UTF8-prefix><first-line> <UTF8-prefix><second-line> 476 payload := " " + encoded 477 payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") 478 payload = strings.ReplaceAll(payload, "?=", "") 479 480 decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) 481 if err != nil { 482 // if err, abort decoding and return original subject 483 return encoded 484 } 485 486 return string(decoded) 487}