fork of go-gitdiff with jj support
1package gitdiff
2
3import (
4 "bufio"
5 "errors"
6 "fmt"
7 "io"
8 "io/ioutil"
9 "mime/quotedprintable"
10 "net/mail"
11 "strconv"
12 "strings"
13 "time"
14 "unicode"
15)
16
17const (
18 mailHeaderPrefix = "From "
19 prettyHeaderPrefix = "commit "
20 mailMinimumHeaderPrefix = "From:"
21)
22
23// PatchHeader is a parsed version of the preamble content that appears before
24// the first diff in a patch. It includes metadata about the patch, such as the
25// author and a subject.
26type PatchHeader struct {
27 // The SHA of the commit the patch was generated from. Empty if the SHA is
28 // not included in the header.
29 SHA string
30
31 // The author details of the patch. If these details are not included in
32 // the header, Author is nil and AuthorDate is the zero time.
33 Author *PatchIdentity
34 AuthorDate time.Time
35
36 // The committer details of the patch. If these details are not included in
37 // the header, Committer is nil and CommitterDate is the zero time.
38 Committer *PatchIdentity
39 CommitterDate time.Time
40
41 // The title and body of the commit message describing the changes in the
42 // patch. Empty if no message is included in the header.
43 Title string
44 Body string
45
46 // If the preamble looks like an email, ParsePatchHeader will
47 // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the
48 // Title and place them here.
49 SubjectPrefix string
50
51 // If the preamble looks like an email, and it contains a `---`
52 // line, that line will be removed and everything after it will be
53 // placed in BodyAppendix.
54 BodyAppendix string
55}
56
57// Message returns the commit message for the header. The message consists of
58// the title and the body separated by an empty line.
59func (h *PatchHeader) Message() string {
60 var msg strings.Builder
61 if h != nil {
62 msg.WriteString(h.Title)
63 if h.Body != "" {
64 msg.WriteString("\n\n")
65 msg.WriteString(h.Body)
66 }
67 }
68 return msg.String()
69}
70
71// ParsePatchDate parses a patch date string. It returns the parsed time or an
72// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
73// short, raw, unix, and default formats (with local variants) used by the
74// --date flag in Git.
75func ParsePatchDate(s string) (time.Time, error) {
76 const (
77 isoFormat = "2006-01-02 15:04:05 -0700"
78 isoStrictFormat = "2006-01-02T15:04:05-07:00"
79 rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700"
80 shortFormat = "2006-01-02"
81 defaultFormat = "Mon Jan 2 15:04:05 2006 -0700"
82 defaultLocalFormat = "Mon Jan 2 15:04:05 2006"
83 )
84
85 if s == "" {
86 return time.Time{}, nil
87 }
88
89 for _, fmt := range []string{
90 isoFormat,
91 isoStrictFormat,
92 rfc2822Format,
93 shortFormat,
94 defaultFormat,
95 defaultLocalFormat,
96 } {
97 if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil {
98 return t, nil
99 }
100 }
101
102 // unix format
103 if unix, err := strconv.ParseInt(s, 10, 64); err == nil {
104 return time.Unix(unix, 0), nil
105 }
106
107 // raw format
108 if space := strings.IndexByte(s, ' '); space > 0 {
109 unix, uerr := strconv.ParseInt(s[:space], 10, 64)
110 zone, zerr := time.Parse("-0700", s[space+1:])
111 if uerr == nil && zerr == nil {
112 return time.Unix(unix, 0).In(zone.Location()), nil
113 }
114 }
115
116 return time.Time{}, fmt.Errorf("unknown date format: %s", s)
117}
118
119// A PatchHeaderOption modifies the behavior of ParsePatchHeader.
120type PatchHeaderOption func(*patchHeaderOptions)
121
122// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when
123// parsing mail-formatted patches.
124type SubjectCleanMode int
125
126const (
127 // SubjectCleanWhitespace removes leading and trailing whitespace.
128 SubjectCleanWhitespace SubjectCleanMode = iota
129
130 // SubjectCleanAll removes leading and trailing whitespace, leading "Re:",
131 // "re:", and ":" strings, and leading strings enclosed by '[' and ']'.
132 // This is the default behavior of git (see `git mailinfo`) and this
133 // package.
134 SubjectCleanAll
135
136 // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes
137 // leading strings enclosed by '[' and ']' if they start with "PATCH".
138 SubjectCleanPatchOnly
139)
140
141// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By
142// default, uses SubjectCleanAll.
143func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption {
144 return func(opts *patchHeaderOptions) {
145 opts.subjectCleanMode = m
146 }
147}
148
149type patchHeaderOptions struct {
150 subjectCleanMode SubjectCleanMode
151}
152
153// ParsePatchHeader parses the preamble string returned by [Parse] into a
154// PatchHeader. Due to the variety of header formats, some fields of the parsed
155// PatchHeader may be unset after parsing.
156//
157// Supported formats are the short, medium, full, fuller, and email pretty
158// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox
159// format used by `git format-patch`.
160//
161// When parsing mail-formatted headers, ParsePatchHeader tries to remove
162// email-specific content from the title and body:
163//
164// - Based on the SubjectCleanMode, remove prefixes like reply markers and
165// "[PATCH]" strings from the subject, saving any removed content in the
166// SubjectPrefix field. Parsing always discards leading and trailing
167// whitespace from the subject line. The default mode is SubjectCleanAll.
168//
169// - If the body contains a "---" line (3 hyphens), remove that line and any
170// content after it from the body and save it in the BodyAppendix field.
171//
172// ParsePatchHeader tries to process content it does not understand wthout
173// returning errors, but will return errors if well-identified content like
174// dates or identies uses unknown or invalid formats.
175func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) {
176 opts := patchHeaderOptions{
177 subjectCleanMode: SubjectCleanAll, // match git defaults
178 }
179 for _, optFn := range options {
180 optFn(&opts)
181 }
182
183 header = strings.TrimSpace(header)
184 if header == "" {
185 return &PatchHeader{}, nil
186 }
187
188 var firstLine, rest string
189 if idx := strings.IndexByte(header, '\n'); idx >= 0 {
190 firstLine = header[:idx]
191 rest = header[idx+1:]
192 } else {
193 firstLine = header
194 rest = ""
195 }
196
197 switch {
198 case strings.HasPrefix(firstLine, mailHeaderPrefix):
199 return parseHeaderMail(firstLine, strings.NewReader(rest), opts)
200
201 case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix):
202 // With a minimum header, the first line is part of the actual mail
203 // content and needs to be parsed as part of the "rest"
204 return parseHeaderMail("", strings.NewReader(header), opts)
205
206 case strings.HasPrefix(firstLine, prettyHeaderPrefix):
207 return parseHeaderPretty(firstLine, strings.NewReader(rest))
208 }
209
210 return nil, errors.New("unrecognized patch header format")
211}
212
213func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) {
214 const (
215 authorPrefix = "Author:"
216 commitPrefix = "Commit:"
217 datePrefix = "Date:"
218 authorDatePrefix = "AuthorDate:"
219 commitDatePrefix = "CommitDate:"
220 )
221
222 h := &PatchHeader{}
223
224 prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix)
225 if i := strings.IndexByte(prettyLine, ' '); i > 0 {
226 h.SHA = prettyLine[:i]
227 } else {
228 h.SHA = prettyLine
229 }
230
231 s := bufio.NewScanner(r)
232 for s.Scan() {
233 line := s.Text()
234
235 // empty line marks end of fields, remaining lines are title/message
236 if strings.TrimSpace(line) == "" {
237 break
238 }
239
240 switch {
241 case strings.HasPrefix(line, authorPrefix):
242 u, err := ParsePatchIdentity(line[len(authorPrefix):])
243 if err != nil {
244 return nil, err
245 }
246 h.Author = &u
247
248 case strings.HasPrefix(line, commitPrefix):
249 u, err := ParsePatchIdentity(line[len(commitPrefix):])
250 if err != nil {
251 return nil, err
252 }
253 h.Committer = &u
254
255 case strings.HasPrefix(line, datePrefix):
256 d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):]))
257 if err != nil {
258 return nil, err
259 }
260 h.AuthorDate = d
261
262 case strings.HasPrefix(line, authorDatePrefix):
263 d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):]))
264 if err != nil {
265 return nil, err
266 }
267 h.AuthorDate = d
268
269 case strings.HasPrefix(line, commitDatePrefix):
270 d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):]))
271 if err != nil {
272 return nil, err
273 }
274 h.CommitterDate = d
275 }
276 }
277 if s.Err() != nil {
278 return nil, s.Err()
279 }
280
281 title, indent := scanMessageTitle(s)
282 if s.Err() != nil {
283 return nil, s.Err()
284 }
285 h.Title = title
286
287 if title != "" {
288 // Don't check for an appendix, pretty headers do not contain them
289 body, _ := scanMessageBody(s, indent, false)
290 if s.Err() != nil {
291 return nil, s.Err()
292 }
293 h.Body = body
294 }
295
296 return h, nil
297}
298
299func scanMessageTitle(s *bufio.Scanner) (title string, indent string) {
300 var b strings.Builder
301 for i := 0; s.Scan(); i++ {
302 line := s.Text()
303 trimLine := strings.TrimSpace(line)
304 if trimLine == "" {
305 break
306 }
307
308 if i == 0 {
309 if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 {
310 indent = line[:start]
311 }
312 }
313 if b.Len() > 0 {
314 b.WriteByte(' ')
315 }
316 b.WriteString(trimLine)
317 }
318 return b.String(), indent
319}
320
321func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) {
322 // Body and appendix
323 var body, appendix strings.Builder
324 c := &body
325 var empty int
326 for i := 0; s.Scan(); i++ {
327 line := s.Text()
328
329 line = strings.TrimRightFunc(line, unicode.IsSpace)
330 line = strings.TrimPrefix(line, indent)
331
332 if line == "" {
333 empty++
334 continue
335 }
336
337 // If requested, parse out "appendix" information (often added
338 // by `git format-patch` and removed by `git am`).
339 if separateAppendix && c == &body && line == "---" {
340 c = &appendix
341 continue
342 }
343
344 if c.Len() > 0 {
345 c.WriteByte('\n')
346 if empty > 0 {
347 c.WriteByte('\n')
348 }
349 }
350 empty = 0
351
352 c.WriteString(line)
353 }
354 return body.String(), appendix.String()
355}
356
357func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) {
358 msg, err := mail.ReadMessage(r)
359 if err != nil {
360 return nil, err
361 }
362
363 h := &PatchHeader{}
364
365 if strings.HasPrefix(mailLine, mailHeaderPrefix) {
366 mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix)
367 if i := strings.IndexByte(mailLine, ' '); i > 0 {
368 h.SHA = mailLine[:i]
369 }
370 }
371
372 from := msg.Header.Get("From")
373 if from != "" {
374 u, err := ParsePatchIdentity(from)
375 if err != nil {
376 return nil, err
377 }
378 h.Author = &u
379 }
380
381 date := msg.Header.Get("Date")
382 if date != "" {
383 d, err := ParsePatchDate(date)
384 if err != nil {
385 return nil, err
386 }
387 h.AuthorDate = d
388 }
389
390 subject := msg.Header.Get("Subject")
391 h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode)
392
393 s := bufio.NewScanner(msg.Body)
394 h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
395 if s.Err() != nil {
396 return nil, s.Err()
397 }
398
399 return h, nil
400}
401
402func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) {
403 switch mode {
404 case SubjectCleanAll, SubjectCleanPatchOnly:
405 case SubjectCleanWhitespace:
406 return "", strings.TrimSpace(decodeSubject(s))
407 default:
408 panic(fmt.Sprintf("unknown clean mode: %d", mode))
409 }
410
411 // Based on the algorithm from Git in mailinfo.c:cleanup_subject()
412 // If compatibility with `git am` drifts, go there to see if there are any updates.
413
414 at := 0
415 for at < len(s) {
416 switch s[at] {
417 case 'r', 'R':
418 // Detect re:, Re:, rE: and RE:
419 if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' {
420 at += 3
421 continue
422 }
423
424 case ' ', '\t', ':':
425 // Delete whitespace and duplicate ':' characters
426 at++
427 continue
428
429 case '[':
430 if i := strings.IndexByte(s[at:], ']'); i > 0 {
431 if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") {
432 at += i + 1
433 continue
434 }
435 }
436 }
437
438 // Nothing was removed, end processing
439 break
440 }
441
442 prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace)
443 subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace)
444 return
445}
446
447// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
448// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji).
449// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject
450func decodeSubject(encoded string) string {
451 if !strings.HasPrefix(encoded, "=?UTF-8?q?") {
452 // not UTF-8 encoded
453 return encoded
454 }
455
456 // If the subject is too long, `git format-patch` may produce a subject line across
457 // multiple lines. When parsed, this can look like the following:
458 // <UTF8-prefix><first-line> <UTF8-prefix><second-line>
459 payload := " " + encoded
460 payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "")
461 payload = strings.ReplaceAll(payload, "?=", "")
462
463 decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
464 if err != nil {
465 // if err, abort decoding and return original subject
466 return encoded
467 }
468
469 return string(decoded)
470}