fork of go-gitdiff with jj support

Add option to control patch subject cleaning (#36)

When processing mail-formatted patches, the default cleanup removed all
leading content in square brackets, but this pattern is often used to
identify tickets or other information that should remain in the commit
title. Git supports disabling this the the `-k` and `-b` flags, which we
simulate with the new SubjectCleanMode options.

Use WithSubjectCleanMode(SubjectCleanPatchOnly) to only remove bracketed
strings that contain "PATCH", keeping others that are (probably) part of
the actual commit message.

Note that because of the mail parsing library, we cannot replicate the
`-k` flag exactly and always clean leading and trailing whitespace.

authored by Billy Keyes and committed by GitHub 03daf965 dc43dbf8

Changed files
+232 -93
gitdiff
+4
README.md
··· 101 101 context of each fragment must exactly match the source file; `git apply` 102 102 implements a search algorithm that tries different lines and amounts of 103 103 context, with further options to normalize or ignore whitespace changes. 104 + 105 + 7. When parsing mail-formatted patch headers, leading and trailing whitespace 106 + is always removed from `Subject` lines. There is no exact equivalent to `git 107 + mailinfo -k`.
+81 -47
gitdiff/patch_header.go
··· 165 165 return time.Time{}, fmt.Errorf("unknown date format: %s", s) 166 166 } 167 167 168 - // ParsePatchHeader parses a preamble string as returned by Parse into a 168 + // A PatchHeaderOption modifies the behavior of ParsePatchHeader. 169 + type PatchHeaderOption func(*patchHeaderOptions) 170 + 171 + // SubjectCleanMode controls how ParsePatchHeader cleans subject lines when 172 + // parsing mail-formatted patches. 173 + type SubjectCleanMode int 174 + 175 + const ( 176 + // SubjectCleanWhitespace removes leading and trailing whitespace. 177 + SubjectCleanWhitespace SubjectCleanMode = iota 178 + 179 + // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", 180 + // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. 181 + // This is the default behavior of git (see `git mailinfo`) and this 182 + // package. 183 + SubjectCleanAll 184 + 185 + // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes 186 + // leading strings enclosed by '[' and ']' if they start with "PATCH". 187 + SubjectCleanPatchOnly 188 + ) 189 + 190 + // WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By 191 + // default, uses SubjectCleanAll. 192 + func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { 193 + return func(opts *patchHeaderOptions) { 194 + opts.subjectCleanMode = m 195 + } 196 + } 197 + 198 + type patchHeaderOptions struct { 199 + subjectCleanMode SubjectCleanMode 200 + } 201 + 202 + // ParsePatchHeader parses the preamble string returned by [Parse] into a 169 203 // PatchHeader. Due to the variety of header formats, some fields of the parsed 170 204 // PatchHeader may be unset after parsing. 171 205 // 172 206 // Supported formats are the short, medium, full, fuller, and email pretty 173 - // formats used by git diff, git log, and git show and the UNIX mailbox format 174 - // used by git format-patch. 207 + // formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox 208 + // format used by `git format-patch`. 175 209 // 176 - // If ParsePatchHeader detects that it is handling an email, it will 177 - // remove extra content at the beginning of the title line, such as 178 - // `[PATCH]` or `Re:` in the same way that `git mailinfo` does. 179 - // SubjectPrefix will be set to the value of this removed string. 180 - // (`git mailinfo` is the core part of `git am` that pulls information 181 - // out of an individual mail.) 210 + // When parsing mail-formatted headers, ParsePatchHeader tries to remove 211 + // email-specific content from the title and body: 182 212 // 183 - // Additionally, if ParsePatchHeader detects that it's handling an 184 - // email, it will remove a `---` line and put anything after it into 185 - // BodyAppendix. 213 + // - Based on the SubjectCleanMode, remove prefixes like reply markers and 214 + // "[PATCH]" strings from the subject, saving any removed content in the 215 + // SubjectPrefix field. Parsing always discards leading and trailing 216 + // whitespace from the subject line. The default mode is SubjectCleanAll. 186 217 // 187 - // Those wishing the effect of a plain `git am` should use 188 - // `PatchHeader.Title + "\n" + PatchHeader.Body` (or 189 - // `PatchHeader.Message()`). Those wishing to retain the subject 190 - // prefix and appendix material should use `PatchHeader.SubjectPrefix 191 - // + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + 192 - // PatchHeader.BodyAppendix`. 193 - func ParsePatchHeader(header string) (*PatchHeader, error) { 194 - header = strings.TrimSpace(header) 218 + // - If the body contains a "---" line (3 hyphens), remove that line and any 219 + // content after it from the body and save it in the BodyAppendix field. 220 + // 221 + // ParsePatchHeader tries to process content it does not understand wthout 222 + // returning errors, but will return errors if well-identified content like 223 + // dates or identies uses unknown or invalid formats. 224 + func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { 225 + opts := patchHeaderOptions{ 226 + subjectCleanMode: SubjectCleanAll, // match git defaults 227 + } 228 + for _, optFn := range options { 229 + optFn(&opts) 230 + } 195 231 232 + header = strings.TrimSpace(header) 196 233 if header == "" { 197 234 return &PatchHeader{}, nil 198 235 } ··· 208 245 209 246 switch { 210 247 case strings.HasPrefix(firstLine, mailHeaderPrefix): 211 - return parseHeaderMail(firstLine, strings.NewReader(rest)) 248 + return parseHeaderMail(firstLine, strings.NewReader(rest), opts) 212 249 213 250 case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): 214 251 // With a minimum header, the first line is part of the actual mail 215 252 // content and needs to be parsed as part of the "rest" 216 - return parseHeaderMail("", strings.NewReader(header)) 253 + return parseHeaderMail("", strings.NewReader(header), opts) 217 254 218 255 case strings.HasPrefix(firstLine, prettyHeaderPrefix): 219 256 return parseHeaderPretty(firstLine, strings.NewReader(rest)) ··· 366 403 return body.String(), appendix.String() 367 404 } 368 405 369 - func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { 406 + func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { 370 407 msg, err := mail.ReadMessage(r) 371 408 if err != nil { 372 409 return nil, err ··· 403 440 } 404 441 405 442 subject := msg.Header.Get("Subject") 406 - h.SubjectPrefix, h.Title = parseSubject(subject) 443 + h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) 407 444 408 445 s := bufio.NewScanner(msg.Body) 409 446 h.Body, h.BodyAppendix = scanMessageBody(s, "", true) ··· 414 451 return h, nil 415 452 } 416 453 417 - // Takes an email subject and returns the patch prefix and commit 418 - // title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH 419 - // v3 3/5] ` and `Implement foo` 420 - func parseSubject(s string) (string, string) { 421 - // This is meant to be compatible with 422 - // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject(). 423 - // If compatibility with `git am` drifts, go there to see if there 424 - // are any updates. 454 + func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { 455 + switch mode { 456 + case SubjectCleanAll, SubjectCleanPatchOnly: 457 + case SubjectCleanWhitespace: 458 + return "", strings.TrimSpace(decodeSubject(s)) 459 + default: 460 + panic(fmt.Sprintf("unknown clean mode: %d", mode)) 461 + } 462 + 463 + // Based on the algorithm from Git in mailinfo.c:cleanup_subject() 464 + // If compatibility with `git am` drifts, go there to see if there are any updates. 425 465 426 466 at := 0 427 467 for at < len(s) { 428 468 switch s[at] { 429 469 case 'r', 'R': 430 470 // Detect re:, Re:, rE: and RE: 431 - if at+2 < len(s) && 432 - (s[at+1] == 'e' || s[at+1] == 'E') && 433 - s[at+2] == ':' { 471 + if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { 434 472 at += 3 435 473 continue 436 474 } ··· 441 479 continue 442 480 443 481 case '[': 444 - // Look for closing parenthesis 445 - j := at + 1 446 - for ; j < len(s); j++ { 447 - if s[j] == ']' { 448 - break 482 + if i := strings.IndexByte(s[at:], ']'); i > 0 { 483 + if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { 484 + at += i + 1 485 + continue 449 486 } 450 487 } 451 - 452 - if j < len(s) { 453 - at = j + 1 454 - continue 455 - } 456 488 } 457 489 458 - // Only loop if we actually removed something 490 + // Nothing was removed, end processing 459 491 break 460 492 } 461 493 462 - return s[:at], decodeSubject(s[at:]) 494 + prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) 495 + subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) 496 + return 463 497 } 464 498 465 499 // Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
+147 -46
gitdiff/patch_header_test.go
··· 144 144 expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>" 145 145 146 146 tests := map[string]struct { 147 - Input string 148 - Header PatchHeader 149 - Err interface{} 147 + Input string 148 + Options []PatchHeaderOption 149 + Header PatchHeader 150 + Err interface{} 150 151 }{ 151 152 "prettyShort": { 152 153 Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b ··· 269 270 Body: expectedBody, 270 271 }, 271 272 }, 273 + "mailboxPatchOnly": { 274 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 275 + From: Morton Haypenny <mhaypenny@example.com> 276 + Date: Sat, 11 Apr 2020 15:21:23 -0700 277 + Subject: [PATCH] [BUG-123] A sample commit to test header parsing 278 + 279 + The medium format shows the body, which 280 + may wrap on to multiple lines. 281 + 282 + Another body line. 283 + `, 284 + Options: []PatchHeaderOption{ 285 + WithSubjectCleanMode(SubjectCleanPatchOnly), 286 + }, 287 + Header: PatchHeader{ 288 + SHA: expectedSHA, 289 + Author: expectedIdentity, 290 + AuthorDate: expectedDate, 291 + Title: "[BUG-123] " + expectedTitle, 292 + Body: expectedBody, 293 + }, 294 + }, 272 295 "mailboxEmojiOneLine": { 273 296 Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 274 297 From: Morton Haypenny <mhaypenny@example.com> ··· 414 437 Title: expectedTitle, 415 438 }, 416 439 }, 417 - "empty": { 440 + "emptyHeader": { 418 441 Input: "", 419 442 Header: PatchHeader{}, 420 443 }, ··· 422 445 423 446 for name, test := range tests { 424 447 t.Run(name, func(t *testing.T) { 425 - h, err := ParsePatchHeader(test.Input) 448 + h, err := ParsePatchHeader(test.Input, test.Options...) 426 449 if test.Err != nil { 427 450 assertError(t, test.Err, err, "parsing patch header") 428 451 return ··· 477 500 } 478 501 } 479 502 480 - func TestCleanupSubject(t *testing.T) { 481 - exp := "A sample commit to test header parsing" 482 - tests := map[string]string{ 483 - "plain": "", 484 - "patch": "[PATCH] ", 485 - "patchv5": "[PATCH v5] ", 486 - "patchrfc": "[PATCH RFC] ", 487 - "patchnospace": "[PATCH]", 488 - "space": " ", 489 - "re": "re: ", 490 - "Re": "Re: ", 491 - "RE": "rE: ", 492 - "rere": "re: re: ", 493 - } 503 + func TestCleanSubject(t *testing.T) { 504 + expectedSubject := "A sample commit to test header parsing" 494 505 495 - for name, prefix := range tests { 496 - gotprefix, gottitle := parseSubject(prefix + exp) 497 - if gottitle != exp { 498 - t.Errorf("%s: Incorrect parsing of prefix %s: got title %s, wanted %s", 499 - name, prefix, gottitle, exp) 500 - } 501 - if gotprefix != prefix { 502 - t.Errorf("%s: Incorrect parsing of prefix %s: got prefix %s", 503 - name, prefix, gotprefix) 504 - } 505 - } 506 - 507 - moretests := map[string]struct { 508 - in, eprefix, etitle string 506 + tests := map[string]struct { 507 + Input string 508 + Mode SubjectCleanMode 509 + Prefix string 510 + Subject string 509 511 }{ 510 - "Reimplement": {"Reimplement something", "", "Reimplement something"}, 511 - "patch-reimplement": {"[PATCH v5] Reimplement something", "[PATCH v5] ", "Reimplement something"}, 512 - "Openbracket": {"[Just to annoy people", "", "[Just to annoy people"}, 512 + "CleanAll/noPrefix": { 513 + Input: expectedSubject, 514 + Mode: SubjectCleanAll, 515 + Subject: expectedSubject, 516 + }, 517 + "CleanAll/patchPrefix": { 518 + Input: "[PATCH] " + expectedSubject, 519 + Mode: SubjectCleanAll, 520 + Prefix: "[PATCH] ", 521 + Subject: expectedSubject, 522 + }, 523 + "CleanAll/patchPrefixNoSpace": { 524 + Input: "[PATCH]" + expectedSubject, 525 + Mode: SubjectCleanAll, 526 + Prefix: "[PATCH]", 527 + Subject: expectedSubject, 528 + }, 529 + "CleanAll/patchPrefixContent": { 530 + Input: "[PATCH 3/7] " + expectedSubject, 531 + Mode: SubjectCleanAll, 532 + Prefix: "[PATCH 3/7] ", 533 + Subject: expectedSubject, 534 + }, 535 + "CleanAll/spacePrefix": { 536 + Input: " " + expectedSubject, 537 + Mode: SubjectCleanAll, 538 + Subject: expectedSubject, 539 + }, 540 + "CleanAll/replyLowerPrefix": { 541 + Input: "re: " + expectedSubject, 542 + Mode: SubjectCleanAll, 543 + Prefix: "re: ", 544 + Subject: expectedSubject, 545 + }, 546 + "CleanAll/replyMixedPrefix": { 547 + Input: "Re: " + expectedSubject, 548 + Mode: SubjectCleanAll, 549 + Prefix: "Re: ", 550 + Subject: expectedSubject, 551 + }, 552 + "CleanAll/replyCapsPrefix": { 553 + Input: "RE: " + expectedSubject, 554 + Mode: SubjectCleanAll, 555 + Prefix: "RE: ", 556 + Subject: expectedSubject, 557 + }, 558 + "CleanAll/replyDoublePrefix": { 559 + Input: "Re: re: " + expectedSubject, 560 + Mode: SubjectCleanAll, 561 + Prefix: "Re: re: ", 562 + Subject: expectedSubject, 563 + }, 564 + "CleanAll/noPrefixSubjectHasRe": { 565 + Input: "Reimplement parsing", 566 + Mode: SubjectCleanAll, 567 + Subject: "Reimplement parsing", 568 + }, 569 + "CleanAll/patchPrefixSubjectHasRe": { 570 + Input: "[PATCH 1/2] Reimplement parsing", 571 + Mode: SubjectCleanAll, 572 + Prefix: "[PATCH 1/2] ", 573 + Subject: "Reimplement parsing", 574 + }, 575 + "CleanAll/unclosedPrefix": { 576 + Input: "[Just to annoy people", 577 + Mode: SubjectCleanAll, 578 + Subject: "[Just to annoy people", 579 + }, 580 + "CleanAll/multiplePrefix": { 581 + Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ", 582 + Mode: SubjectCleanAll, 583 + Prefix: "Re:Re: [PATCH 1/2][DRAFT] ", 584 + Subject: expectedSubject, 585 + }, 586 + "CleanPatchOnly/patchPrefix": { 587 + Input: "[PATCH] " + expectedSubject, 588 + Mode: SubjectCleanPatchOnly, 589 + Prefix: "[PATCH] ", 590 + Subject: expectedSubject, 591 + }, 592 + "CleanPatchOnly/mixedPrefix": { 593 + Input: "[PATCH] [TICKET-123] " + expectedSubject, 594 + Mode: SubjectCleanPatchOnly, 595 + Prefix: "[PATCH] ", 596 + Subject: "[TICKET-123] " + expectedSubject, 597 + }, 598 + "CleanPatchOnly/multiplePrefix": { 599 + Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject, 600 + Mode: SubjectCleanPatchOnly, 601 + Prefix: "Re:Re: [PATCH 1/2]", 602 + Subject: "[DRAFT] " + expectedSubject, 603 + }, 604 + "CleanWhitespace/leadingSpace": { 605 + Input: " [PATCH] " + expectedSubject, 606 + Mode: SubjectCleanWhitespace, 607 + Subject: "[PATCH] " + expectedSubject, 608 + }, 609 + "CleanWhitespace/trailingSpace": { 610 + Input: "[PATCH] " + expectedSubject + " ", 611 + Mode: SubjectCleanWhitespace, 612 + Subject: "[PATCH] " + expectedSubject, 613 + }, 513 614 } 514 615 515 - for name, test := range moretests { 516 - prefix, title := parseSubject(test.in) 517 - if title != test.etitle { 518 - t.Errorf("%s: Incorrect parsing of %s: got title %s, wanted %s", 519 - name, test.in, title, test.etitle) 520 - } 521 - if prefix != test.eprefix { 522 - t.Errorf("%s: Incorrect parsing of %s: got prefix %s, wanted %s", 523 - name, test.in, title, test.etitle) 524 - } 616 + for name, test := range tests { 617 + t.Run(name, func(t *testing.T) { 618 + prefix, subject := cleanSubject(test.Input, test.Mode) 619 + if prefix != test.Prefix { 620 + t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix) 621 + } 622 + if subject != test.Subject { 623 + t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject) 624 + } 625 + }) 525 626 } 526 627 }