fork of go-gitdiff with jj support

Decode quoted-printable UTF8 in email subjects (#25)

authored by Javier Campanini and committed by GitHub b5756546 3772c9eb

Changed files
+68 -1
gitdiff
+27 -1
gitdiff/patch_header.go
··· 5 5 "errors" 6 6 "fmt" 7 7 "io" 8 + "mime/quotedprintable" 8 9 "net/mail" 9 10 "strconv" 10 11 "strings" ··· 457 458 break 458 459 } 459 460 460 - return s[:at], s[at:] 461 + return s[:at], decodeSubject(s[at:]) 462 + } 463 + 464 + // Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result 465 + // of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). 466 + // See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject 467 + func decodeSubject(encoded string) string { 468 + if !strings.HasPrefix(encoded, "=?UTF-8?q?") { 469 + // not UTF-8 encoded 470 + return encoded 471 + } 472 + 473 + // If the subject is too long, `git format-patch` may produce a subject line across 474 + // multiple lines. When parsed, this can look like the following: 475 + // <UTF8-prefix><first-line> <UTF8-prefix><second-line> 476 + payload := " " + encoded 477 + payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") 478 + payload = strings.ReplaceAll(payload, "?=", "") 479 + 480 + decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) 481 + if err != nil { 482 + // if err, abort decoding and return original subject 483 + return encoded 484 + } 485 + 486 + return string(decoded) 461 487 }
+41
gitdiff/patch_header_test.go
··· 138 138 } 139 139 expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) 140 140 expectedTitle := "A sample commit to test header parsing" 141 + expectedEmojiOneLineTitle := "🤖 Enabling auto-merging" 142 + expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-König diet" 141 143 expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." 142 144 expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>" 143 145 ··· 264 266 Author: expectedIdentity, 265 267 AuthorDate: expectedDate, 266 268 Title: expectedTitle, 269 + Body: expectedBody, 270 + }, 271 + }, 272 + "mailboxEmojiOneLine": { 273 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 274 + From: Morton Haypenny <mhaypenny@example.com> 275 + Date: Sat, 11 Apr 2020 15:21:23 -0700 276 + Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?= 277 + 278 + The medium format shows the body, which 279 + may wrap on to multiple lines. 280 + 281 + Another body line. 282 + `, 283 + Header: PatchHeader{ 284 + SHA: expectedSHA, 285 + Author: expectedIdentity, 286 + AuthorDate: expectedDate, 287 + Title: expectedEmojiOneLineTitle, 288 + Body: expectedBody, 289 + }, 290 + }, 291 + "mailboxEmojiMultiLine": { 292 + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 293 + From: Morton Haypenny <mhaypenny@example.com> 294 + Date: Sat, 11 Apr 2020 15:21:23 -0700 295 + Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?= 296 + =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?= 297 + 298 + The medium format shows the body, which 299 + may wrap on to multiple lines. 300 + 301 + Another body line. 302 + `, 303 + Header: PatchHeader{ 304 + SHA: expectedSHA, 305 + Author: expectedIdentity, 306 + AuthorDate: expectedDate, 307 + Title: expectedEmojiMultiLineTitle, 267 308 Body: expectedBody, 268 309 }, 269 310 },