fork of go-gitdiff with jj support

Remove "appendix" information from commit message (#19)

...when parsing emails, similar to `git am`.

Add a new field, `BodyAppendix` to PatchHeader.

Modify `scanMessageBody` to accept a boolean argument saying whether
to separate out the appendix or not. Do this by keeping two string
builders, and having it switch to the appendix builder when it finds a
`---` line.

Handling the newlines at the end as expected requires moving things
around a bit.

First, we were trimming space from the line once to decide whether the
line was empty, and then trimming space again if we determined it
wasn't empty. This only needs to be done once.

Then, do all the trimming (both of whitespace and the prefix) first,
before deciding what to do about the line.

Request BodyAppendix separately when parsing a mail, but not a commit
message.

Add some tests to verify that it works as expected.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Co-authored-by: George Dunlap <george.dunlap@citrix.com>

authored by George Dunlap George Dunlap and committed by GitHub 1bd59c4f 379b8934

Changed files
+95 -16
gitdiff
+42 -16
gitdiff/patch_header.go
··· 44 44 // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the 45 45 // Title and place them here. 46 46 SubjectPrefix string 47 + 48 + // If the preamble looks like an email, and it contains a `---` 49 + // line, that line will be removed and everything after it will be 50 + // placed in BodyAppendix. 51 + BodyAppendix string 47 52 } 48 53 49 54 // Message returns the commit message for the header. The message consists of ··· 165 170 // formats used by git diff, git log, and git show and the UNIX mailbox format 166 171 // used by git format-patch. 167 172 // 168 - // If ParsePatchHeader detect that it is handling an email, it will 173 + // If ParsePatchHeader detects that it is handling an email, it will 169 174 // remove extra content at the beginning of the title line, such as 170 175 // `[PATCH]` or `Re:` in the same way that `git mailinfo` does. 171 176 // SubjectPrefix will be set to the value of this removed string. 172 177 // (`git mailinfo` is the core part of `git am` that pulls information 173 - // out of an individual mail.) Unline `git mailinfo`, 174 - // ParsePatchHeader does not at the moment remove commit states or 175 - // other extraneous matter after a `---` line. 178 + // out of an individual mail.) 179 + // 180 + // Additionally, if ParsePatchHeader detects that it's handling an 181 + // email, it will remove a `---` line and put anything after it into 182 + // BodyAppendix. 183 + // 184 + // Those wishing the effect of a plain `git am` should use 185 + // `PatchHeader.Title + "\n" + PatchHeader.Body` (or 186 + // `PatchHeader.Message()`). Those wishing to retain the subject 187 + // prefix and appendix material should use `PatchHeader.SubjectPrefix 188 + // + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + 189 + // PatchHeader.BodyAppendix`. 176 190 func ParsePatchHeader(s string) (*PatchHeader, error) { 177 191 r := bufio.NewReader(strings.NewReader(s)) 178 192 ··· 277 291 h.Title = title 278 292 279 293 if title != "" { 280 - body := scanMessageBody(s, indent) 294 + // Don't check for an appendix 295 + body, _ := scanMessageBody(s, indent, false) 281 296 if s.Err() != nil { 282 297 return nil, s.Err() 283 298 } ··· 309 324 return b.String(), indent 310 325 } 311 326 312 - func scanMessageBody(s *bufio.Scanner, indent string) string { 313 - var b strings.Builder 327 + func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { 328 + // Body and appendix 329 + var body, appendix strings.Builder 330 + c := &body 314 331 var empty int 315 332 for i := 0; s.Scan(); i++ { 316 333 line := s.Text() 317 - if strings.TrimSpace(line) == "" { 334 + 335 + line = strings.TrimRightFunc(line, unicode.IsSpace) 336 + line = strings.TrimPrefix(line, indent) 337 + 338 + if line == "" { 318 339 empty++ 319 340 continue 320 341 } 321 342 322 - if b.Len() > 0 { 323 - b.WriteByte('\n') 343 + // If requested, parse out "appendix" information (often added 344 + // by `git format-patch` and removed by `git am`). 345 + if separateAppendix && c == &body && line == "---" { 346 + c = &appendix 347 + continue 348 + } 349 + 350 + if c.Len() > 0 { 351 + c.WriteByte('\n') 324 352 if empty > 0 { 325 - b.WriteByte('\n') 353 + c.WriteByte('\n') 326 354 } 327 355 } 328 356 empty = 0 329 357 330 - line = strings.TrimRightFunc(line, unicode.IsSpace) 331 - line = strings.TrimPrefix(line, indent) 332 - b.WriteString(line) 358 + c.WriteString(line) 333 359 } 334 - return b.String() 360 + return body.String(), appendix.String() 335 361 } 336 362 337 363 func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { ··· 372 398 h.SubjectPrefix, h.Title = parseSubject(subject) 373 399 374 400 s := bufio.NewScanner(msg.Body) 375 - h.Body = scanMessageBody(s, "") 401 + h.Body, h.BodyAppendix = scanMessageBody(s, "", true) 376 402 if s.Err() != nil { 377 403 return nil, s.Err() 378 404 }
+53
gitdiff/patch_header_test.go
··· 139 139 expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) 140 140 expectedTitle := "A sample commit to test header parsing" 141 141 expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." 142 + expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>" 142 143 143 144 tests := map[string]struct { 144 145 Input string ··· 219 220 CommitterDate: expectedDate, 220 221 Title: expectedTitle, 221 222 Body: expectedBody, 223 + }, 224 + }, 225 + "prettyAppendix": { 226 + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 227 + Author: Morton Haypenny <mhaypenny@example.com> 228 + AuthorDate: Sat Apr 11 15:21:23 2020 -0700 229 + Commit: Morton Haypenny <mhaypenny@example.com> 230 + CommitDate: Sat Apr 11 15:21:23 2020 -0700 231 + 232 + A sample commit to test header parsing 233 + 234 + The medium format shows the body, which 235 + may wrap on to multiple lines. 236 + 237 + Another body line. 238 + --- 239 + CC: Joe Smith <joe.smith@company.com> 240 + `, 241 + Header: PatchHeader{ 242 + SHA: expectedSHA, 243 + Author: expectedIdentity, 244 + AuthorDate: expectedDate, 245 + Committer: expectedIdentity, 246 + CommitterDate: expectedDate, 247 + Title: expectedTitle, 248 + Body: expectedBody + "\n---\n" + expectedBodyAppendix, 222 249 }, 223 250 }, 224 251 "mailbox": { ··· 240 267 Body: expectedBody, 241 268 }, 242 269 }, 270 + "mailboxAppendix": { 271 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 272 + From: Morton Haypenny <mhaypenny@example.com> 273 + Date: Sat, 11 Apr 2020 15:21:23 -0700 274 + Subject: [PATCH] A sample commit to test header parsing 275 + 276 + The medium format shows the body, which 277 + may wrap on to multiple lines. 278 + 279 + Another body line. 280 + --- 281 + CC: Joe Smith <joe.smith@company.com> 282 + `, 283 + Header: PatchHeader{ 284 + SHA: expectedSHA, 285 + Author: expectedIdentity, 286 + AuthorDate: expectedDate, 287 + Title: expectedTitle, 288 + Body: expectedBody, 289 + BodyAppendix: expectedBodyAppendix, 290 + }, 291 + }, 243 292 "unwrapTitle": { 244 293 Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b 245 294 Author: Morton Haypenny <mhaypenny@example.com> ··· 332 381 } 333 382 if exp.Body != act.Body { 334 383 t.Errorf("incorrect parsed body:\n expected: %q\n actual: %q", exp.Body, act.Body) 384 + } 385 + if exp.BodyAppendix != act.BodyAppendix { 386 + t.Errorf("incorrect parsed body appendix:\n expected: %q\n actual: %q", 387 + exp.BodyAppendix, act.BodyAppendix) 335 388 } 336 389 }) 337 390 }