fork of go-gitdiff with jj support
1package gitdiff
2
3import (
4 "bufio"
5 "errors"
6 "fmt"
7 "io"
8 "mime/quotedprintable"
9 "net/mail"
10 "strconv"
11 "strings"
12 "time"
13 "unicode"
14)
15
16const (
17 mailHeaderPrefix = "From "
18 prettyHeaderPrefix = "commit "
19 mailMinimumHeaderPrefix = "From:"
20)
21
22// PatchHeader is a parsed version of the preamble content that appears before
23// the first diff in a patch. It includes metadata about the patch, such as the
24// author and a subject.
25type PatchHeader struct {
26 // The SHA of the commit the patch was generated from. Empty if the SHA is
27 // not included in the header.
28 SHA string
29
30 // The author details of the patch. If these details are not included in
31 // the header, Author is nil and AuthorDate is the zero time.
32 Author *PatchIdentity
33 AuthorDate time.Time
34
35 // The committer details of the patch. If these details are not included in
36 // the header, Committer is nil and CommitterDate is the zero time.
37 Committer *PatchIdentity
38 CommitterDate time.Time
39
40 // The title and body of the commit message describing the changes in the
41 // patch. Empty if no message is included in the header.
42 Title string
43 Body string
44
45 // If the preamble looks like an email, ParsePatchHeader will
46 // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the
47 // Title and place them here.
48 SubjectPrefix string
49
50 // If the preamble looks like an email, and it contains a `---`
51 // line, that line will be removed and everything after it will be
52 // placed in BodyAppendix.
53 BodyAppendix string
54}
55
56// Message returns the commit message for the header. The message consists of
57// the title and the body separated by an empty line.
58func (h *PatchHeader) Message() string {
59 var msg strings.Builder
60 if h != nil {
61 msg.WriteString(h.Title)
62 if h.Body != "" {
63 msg.WriteString("\n\n")
64 msg.WriteString(h.Body)
65 }
66 }
67 return msg.String()
68}
69
70// PatchIdentity identifies a person who authored or committed a patch.
71type PatchIdentity struct {
72 Name string
73 Email string
74}
75
76func (i PatchIdentity) String() string {
77 name := i.Name
78 if name == "" {
79 name = `""`
80 }
81 return fmt.Sprintf("%s <%s>", name, i.Email)
82}
83
84// ParsePatchIdentity parses a patch identity string. A valid string contains a
85// non-empty name followed by an email address in angle brackets. Like Git,
86// ParsePatchIdentity does not require that the email address is valid or
87// properly formatted, only that it is non-empty. The name must not contain a
88// left angle bracket, '<', and the email address must not contain a right
89// angle bracket, '>'.
90func ParsePatchIdentity(s string) (PatchIdentity, error) {
91 var emailStart, emailEnd int
92 for i, c := range s {
93 if c == '<' && emailStart == 0 {
94 emailStart = i + 1
95 }
96 if c == '>' && emailStart > 0 {
97 emailEnd = i
98 break
99 }
100 }
101 if emailStart > 0 && emailEnd == 0 {
102 return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s)
103 }
104
105 var name, email string
106 if emailStart > 0 {
107 name = strings.TrimSpace(s[:emailStart-1])
108 }
109 if emailStart > 0 && emailEnd > 0 {
110 email = strings.TrimSpace(s[emailStart:emailEnd])
111 }
112 if name == "" || email == "" {
113 return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s)
114 }
115
116 return PatchIdentity{Name: name, Email: email}, nil
117}
118
119// ParsePatchDate parses a patch date string. It returns the parsed time or an
120// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
121// short, raw, unix, and default formats (with local variants) used by the
122// --date flag in Git.
123func ParsePatchDate(s string) (time.Time, error) {
124 const (
125 isoFormat = "2006-01-02 15:04:05 -0700"
126 isoStrictFormat = "2006-01-02T15:04:05-07:00"
127 rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700"
128 shortFormat = "2006-01-02"
129 defaultFormat = "Mon Jan 2 15:04:05 2006 -0700"
130 defaultLocalFormat = "Mon Jan 2 15:04:05 2006"
131 )
132
133 if s == "" {
134 return time.Time{}, nil
135 }
136
137 for _, fmt := range []string{
138 isoFormat,
139 isoStrictFormat,
140 rfc2822Format,
141 shortFormat,
142 defaultFormat,
143 defaultLocalFormat,
144 } {
145 if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil {
146 return t, nil
147 }
148 }
149
150 // unix format
151 if unix, err := strconv.ParseInt(s, 10, 64); err == nil {
152 return time.Unix(unix, 0), nil
153 }
154
155 // raw format
156 if space := strings.IndexByte(s, ' '); space > 0 {
157 unix, uerr := strconv.ParseInt(s[:space], 10, 64)
158 zone, zerr := time.Parse("-0700", s[space+1:])
159 if uerr == nil && zerr == nil {
160 return time.Unix(unix, 0).In(zone.Location()), nil
161 }
162 }
163
164 return time.Time{}, fmt.Errorf("unknown date format: %s", s)
165}
166
167// ParsePatchHeader parses a preamble string as returned by Parse into a
168// PatchHeader. Due to the variety of header formats, some fields of the parsed
169// PatchHeader may be unset after parsing.
170//
171// Supported formats are the short, medium, full, fuller, and email pretty
172// formats used by git diff, git log, and git show and the UNIX mailbox format
173// used by git format-patch.
174//
175// If ParsePatchHeader detects that it is handling an email, it will
176// remove extra content at the beginning of the title line, such as
177// `[PATCH]` or `Re:` in the same way that `git mailinfo` does.
178// SubjectPrefix will be set to the value of this removed string.
179// (`git mailinfo` is the core part of `git am` that pulls information
180// out of an individual mail.)
181//
182// Additionally, if ParsePatchHeader detects that it's handling an
183// email, it will remove a `---` line and put anything after it into
184// BodyAppendix.
185//
186// Those wishing the effect of a plain `git am` should use
187// `PatchHeader.Title + "\n" + PatchHeader.Body` (or
188// `PatchHeader.Message()`). Those wishing to retain the subject
189// prefix and appendix material should use `PatchHeader.SubjectPrefix
190// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" +
191// PatchHeader.BodyAppendix`.
192func ParsePatchHeader(s string) (*PatchHeader, error) {
193 r := bufio.NewReader(strings.NewReader(s))
194
195 var line string
196 for {
197 var err error
198 line, err = r.ReadString('\n')
199 if err == io.EOF {
200 break
201 }
202 if err != nil {
203 return nil, err
204 }
205
206 line = strings.TrimSpace(line)
207 if len(line) > 0 {
208 break
209 }
210 }
211
212 switch {
213 case strings.HasPrefix(line, mailHeaderPrefix):
214 return parseHeaderMail(line, r)
215 case strings.HasPrefix(line, mailMinimumHeaderPrefix):
216 r = bufio.NewReader(strings.NewReader(s))
217 return parseHeaderMail("", r)
218 case strings.HasPrefix(line, prettyHeaderPrefix):
219 return parseHeaderPretty(line, r)
220 }
221 return nil, errors.New("unrecognized patch header format")
222}
223
224func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) {
225 const (
226 authorPrefix = "Author:"
227 commitPrefix = "Commit:"
228 datePrefix = "Date:"
229 authorDatePrefix = "AuthorDate:"
230 commitDatePrefix = "CommitDate:"
231 )
232
233 h := &PatchHeader{}
234
235 prettyLine = prettyLine[len(prettyHeaderPrefix):]
236 if i := strings.IndexByte(prettyLine, ' '); i > 0 {
237 h.SHA = prettyLine[:i]
238 } else {
239 h.SHA = prettyLine
240 }
241
242 s := bufio.NewScanner(r)
243 for s.Scan() {
244 line := s.Text()
245
246 // empty line marks end of fields, remaining lines are title/message
247 if strings.TrimSpace(line) == "" {
248 break
249 }
250
251 switch {
252 case strings.HasPrefix(line, authorPrefix):
253 u, err := ParsePatchIdentity(line[len(authorPrefix):])
254 if err != nil {
255 return nil, err
256 }
257 h.Author = &u
258
259 case strings.HasPrefix(line, commitPrefix):
260 u, err := ParsePatchIdentity(line[len(commitPrefix):])
261 if err != nil {
262 return nil, err
263 }
264 h.Committer = &u
265
266 case strings.HasPrefix(line, datePrefix):
267 d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):]))
268 if err != nil {
269 return nil, err
270 }
271 h.AuthorDate = d
272
273 case strings.HasPrefix(line, authorDatePrefix):
274 d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):]))
275 if err != nil {
276 return nil, err
277 }
278 h.AuthorDate = d
279
280 case strings.HasPrefix(line, commitDatePrefix):
281 d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):]))
282 if err != nil {
283 return nil, err
284 }
285 h.CommitterDate = d
286 }
287 }
288 if s.Err() != nil {
289 return nil, s.Err()
290 }
291
292 title, indent := scanMessageTitle(s)
293 if s.Err() != nil {
294 return nil, s.Err()
295 }
296 h.Title = title
297
298 if title != "" {
299 // Don't check for an appendix
300 body, _ := scanMessageBody(s, indent, false)
301 if s.Err() != nil {
302 return nil, s.Err()
303 }
304 h.Body = body
305 }
306
307 return h, nil
308}
309
310func scanMessageTitle(s *bufio.Scanner) (title string, indent string) {
311 var b strings.Builder
312 for i := 0; s.Scan(); i++ {
313 line := s.Text()
314 trimLine := strings.TrimSpace(line)
315 if trimLine == "" {
316 break
317 }
318
319 if i == 0 {
320 if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 {
321 indent = line[:start]
322 }
323 }
324 if b.Len() > 0 {
325 b.WriteByte(' ')
326 }
327 b.WriteString(trimLine)
328 }
329 return b.String(), indent
330}
331
332func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) {
333 // Body and appendix
334 var body, appendix strings.Builder
335 c := &body
336 var empty int
337 for i := 0; s.Scan(); i++ {
338 line := s.Text()
339
340 line = strings.TrimRightFunc(line, unicode.IsSpace)
341 line = strings.TrimPrefix(line, indent)
342
343 if line == "" {
344 empty++
345 continue
346 }
347
348 // If requested, parse out "appendix" information (often added
349 // by `git format-patch` and removed by `git am`).
350 if separateAppendix && c == &body && line == "---" {
351 c = &appendix
352 continue
353 }
354
355 if c.Len() > 0 {
356 c.WriteByte('\n')
357 if empty > 0 {
358 c.WriteByte('\n')
359 }
360 }
361 empty = 0
362
363 c.WriteString(line)
364 }
365 return body.String(), appendix.String()
366}
367
368func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) {
369 msg, err := mail.ReadMessage(r)
370 if err != nil {
371 return nil, err
372 }
373
374 h := &PatchHeader{}
375
376 if len(mailLine) > len(mailHeaderPrefix) {
377 mailLine = mailLine[len(mailHeaderPrefix):]
378 if i := strings.IndexByte(mailLine, ' '); i > 0 {
379 h.SHA = mailLine[:i]
380 }
381 }
382
383 addrs, err := msg.Header.AddressList("From")
384 if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) {
385 return nil, err
386 }
387 if len(addrs) > 0 {
388 addr := addrs[0]
389 if addr.Name == "" {
390 addr.Name = addr.Address
391 }
392 h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address}
393 }
394
395 date := msg.Header.Get("Date")
396 if date != "" {
397 d, err := ParsePatchDate(date)
398 if err != nil {
399 return nil, err
400 }
401 h.AuthorDate = d
402 }
403
404 subject := msg.Header.Get("Subject")
405 h.SubjectPrefix, h.Title = parseSubject(subject)
406
407 s := bufio.NewScanner(msg.Body)
408 h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
409 if s.Err() != nil {
410 return nil, s.Err()
411 }
412
413 return h, nil
414}
415
416// Takes an email subject and returns the patch prefix and commit
417// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH
418// v3 3/5] ` and `Implement foo`
419func parseSubject(s string) (string, string) {
420 // This is meant to be compatible with
421 // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject().
422 // If compatibility with `git am` drifts, go there to see if there
423 // are any updates.
424
425 at := 0
426 for at < len(s) {
427 switch s[at] {
428 case 'r', 'R':
429 // Detect re:, Re:, rE: and RE:
430 if at+2 < len(s) &&
431 (s[at+1] == 'e' || s[at+1] == 'E') &&
432 s[at+2] == ':' {
433 at += 3
434 continue
435 }
436
437 case ' ', '\t', ':':
438 // Delete whitespace and duplicate ':' characters
439 at++
440 continue
441
442 case '[':
443 // Look for closing parenthesis
444 j := at + 1
445 for ; j < len(s); j++ {
446 if s[j] == ']' {
447 break
448 }
449 }
450
451 if j < len(s) {
452 at = j + 1
453 continue
454 }
455 }
456
457 // Only loop if we actually removed something
458 break
459 }
460
461 return s[:at], decodeSubject(s[at:])
462}
463
464// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
465// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji).
466// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject
467func decodeSubject(encoded string) string {
468 if !strings.HasPrefix(encoded, "=?UTF-8?q?") {
469 // not UTF-8 encoded
470 return encoded
471 }
472
473 // If the subject is too long, `git format-patch` may produce a subject line across
474 // multiple lines. When parsed, this can look like the following:
475 // <UTF8-prefix><first-line> <UTF8-prefix><second-line>
476 payload := " " + encoded
477 payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "")
478 payload = strings.ReplaceAll(payload, "?=", "")
479
480 decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
481 if err != nil {
482 // if err, abort decoding and return original subject
483 return encoded
484 }
485
486 return string(decoded)
487}