fork of go-gitdiff with jj support
1package gitdiff
2
3import (
4 "bufio"
5 "errors"
6 "fmt"
7 "io"
8 "net/mail"
9 "strconv"
10 "strings"
11 "time"
12 "unicode"
13)
14
15const (
16 mailHeaderPrefix = "From "
17 prettyHeaderPrefix = "commit "
18)
19
20// PatchHeader is a parsed version of the preamble content that appears before
21// the first diff in a patch. It includes metadata about the patch, such as the
22// author and a subject.
23type PatchHeader struct {
24 // The SHA of the commit the patch was generated from. Empty if the SHA is
25 // not included in the header.
26 SHA string
27
28 // The author details of the patch. If these details are not included in
29 // the header, Author is nil and AuthorDate is the zero time.
30 Author *PatchIdentity
31 AuthorDate time.Time
32
33 // The committer details of the patch. If these details are not included in
34 // the header, Committer is nil and CommitterDate is the zero time.
35 Committer *PatchIdentity
36 CommitterDate time.Time
37
38 // The title and body of the commit message describing the changes in the
39 // patch. Empty if no message is included in the header.
40 Title string
41 Body string
42
43 // If the preamble looks like an email, ParsePatchHeader will
44 // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the
45 // Title and place them here.
46 SubjectPrefix string
47
48 // If the preamble looks like an email, and it contains a `---`
49 // line, that line will be removed and everything after it will be
50 // placed in BodyAppendix.
51 BodyAppendix string
52}
53
54// Message returns the commit message for the header. The message consists of
55// the title and the body separated by an empty line.
56func (h *PatchHeader) Message() string {
57 var msg strings.Builder
58 if h != nil {
59 msg.WriteString(h.Title)
60 if h.Body != "" {
61 msg.WriteString("\n\n")
62 msg.WriteString(h.Body)
63 }
64 }
65 return msg.String()
66}
67
68// PatchIdentity identifies a person who authored or committed a patch.
69type PatchIdentity struct {
70 Name string
71 Email string
72}
73
74func (i PatchIdentity) String() string {
75 name := i.Name
76 if name == "" {
77 name = `""`
78 }
79 return fmt.Sprintf("%s <%s>", name, i.Email)
80}
81
82// ParsePatchIdentity parses a patch identity string. A valid string contains a
83// non-empty name followed by an email address in angle brackets. Like Git,
84// ParsePatchIdentity does not require that the email address is valid or
85// properly formatted, only that it is non-empty. The name must not contain a
86// left angle bracket, '<', and the email address must not contain a right
87// angle bracket, '>'.
88func ParsePatchIdentity(s string) (PatchIdentity, error) {
89 var emailStart, emailEnd int
90 for i, c := range s {
91 if c == '<' && emailStart == 0 {
92 emailStart = i + 1
93 }
94 if c == '>' && emailStart > 0 {
95 emailEnd = i
96 break
97 }
98 }
99 if emailStart > 0 && emailEnd == 0 {
100 return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s)
101 }
102
103 var name, email string
104 if emailStart > 0 {
105 name = strings.TrimSpace(s[:emailStart-1])
106 }
107 if emailStart > 0 && emailEnd > 0 {
108 email = strings.TrimSpace(s[emailStart:emailEnd])
109 }
110 if name == "" || email == "" {
111 return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s)
112 }
113
114 return PatchIdentity{Name: name, Email: email}, nil
115}
116
117// ParsePatchDate parses a patch date string. It returns the parsed time or an
118// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
119// short, raw, unix, and default formats (with local variants) used by the
120// --date flag in Git.
121func ParsePatchDate(s string) (time.Time, error) {
122 const (
123 isoFormat = "2006-01-02 15:04:05 -0700"
124 isoStrictFormat = "2006-01-02T15:04:05-07:00"
125 rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700"
126 shortFormat = "2006-01-02"
127 defaultFormat = "Mon Jan 2 15:04:05 2006 -0700"
128 defaultLocalFormat = "Mon Jan 2 15:04:05 2006"
129 )
130
131 if s == "" {
132 return time.Time{}, nil
133 }
134
135 for _, fmt := range []string{
136 isoFormat,
137 isoStrictFormat,
138 rfc2822Format,
139 shortFormat,
140 defaultFormat,
141 defaultLocalFormat,
142 } {
143 if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil {
144 return t, nil
145 }
146 }
147
148 // unix format
149 if unix, err := strconv.ParseInt(s, 10, 64); err == nil {
150 return time.Unix(unix, 0), nil
151 }
152
153 // raw format
154 if space := strings.IndexByte(s, ' '); space > 0 {
155 unix, uerr := strconv.ParseInt(s[:space], 10, 64)
156 zone, zerr := time.Parse("-0700", s[space+1:])
157 if uerr == nil && zerr == nil {
158 return time.Unix(unix, 0).In(zone.Location()), nil
159 }
160 }
161
162 return time.Time{}, fmt.Errorf("unknown date format: %s", s)
163}
164
165// ParsePatchHeader parses a preamble string as returned by Parse into a
166// PatchHeader. Due to the variety of header formats, some fields of the parsed
167// PatchHeader may be unset after parsing.
168//
169// Supported formats are the short, medium, full, fuller, and email pretty
170// formats used by git diff, git log, and git show and the UNIX mailbox format
171// used by git format-patch.
172//
173// If ParsePatchHeader detects that it is handling an email, it will
174// remove extra content at the beginning of the title line, such as
175// `[PATCH]` or `Re:` in the same way that `git mailinfo` does.
176// SubjectPrefix will be set to the value of this removed string.
177// (`git mailinfo` is the core part of `git am` that pulls information
178// out of an individual mail.)
179//
180// Additionally, if ParsePatchHeader detects that it's handling an
181// email, it will remove a `---` line and put anything after it into
182// BodyAppendix.
183//
184// Those wishing the effect of a plain `git am` should use
185// `PatchHeader.Title + "\n" + PatchHeader.Body` (or
186// `PatchHeader.Message()`). Those wishing to retain the subject
187// prefix and appendix material should use `PatchHeader.SubjectPrefix
188// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" +
189// PatchHeader.BodyAppendix`.
190func ParsePatchHeader(s string) (*PatchHeader, error) {
191 r := bufio.NewReader(strings.NewReader(s))
192
193 var line string
194 for {
195 var err error
196 line, err = r.ReadString('\n')
197 if err == io.EOF {
198 break
199 }
200 if err != nil {
201 return nil, err
202 }
203
204 line = strings.TrimSpace(line)
205 if len(line) > 0 {
206 break
207 }
208 }
209
210 switch {
211 case strings.HasPrefix(line, mailHeaderPrefix):
212 return parseHeaderMail(line, r)
213 case strings.HasPrefix(line, prettyHeaderPrefix):
214 return parseHeaderPretty(line, r)
215 }
216 return nil, errors.New("unrecognized patch header format")
217}
218
219func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) {
220 const (
221 authorPrefix = "Author:"
222 commitPrefix = "Commit:"
223 datePrefix = "Date:"
224 authorDatePrefix = "AuthorDate:"
225 commitDatePrefix = "CommitDate:"
226 )
227
228 h := &PatchHeader{}
229
230 prettyLine = prettyLine[len(prettyHeaderPrefix):]
231 if i := strings.IndexByte(prettyLine, ' '); i > 0 {
232 h.SHA = prettyLine[:i]
233 } else {
234 h.SHA = prettyLine
235 }
236
237 s := bufio.NewScanner(r)
238 for s.Scan() {
239 line := s.Text()
240
241 // empty line marks end of fields, remaining lines are title/message
242 if strings.TrimSpace(line) == "" {
243 break
244 }
245
246 switch {
247 case strings.HasPrefix(line, authorPrefix):
248 u, err := ParsePatchIdentity(line[len(authorPrefix):])
249 if err != nil {
250 return nil, err
251 }
252 h.Author = &u
253
254 case strings.HasPrefix(line, commitPrefix):
255 u, err := ParsePatchIdentity(line[len(commitPrefix):])
256 if err != nil {
257 return nil, err
258 }
259 h.Committer = &u
260
261 case strings.HasPrefix(line, datePrefix):
262 d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):]))
263 if err != nil {
264 return nil, err
265 }
266 h.AuthorDate = d
267
268 case strings.HasPrefix(line, authorDatePrefix):
269 d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):]))
270 if err != nil {
271 return nil, err
272 }
273 h.AuthorDate = d
274
275 case strings.HasPrefix(line, commitDatePrefix):
276 d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):]))
277 if err != nil {
278 return nil, err
279 }
280 h.CommitterDate = d
281 }
282 }
283 if s.Err() != nil {
284 return nil, s.Err()
285 }
286
287 title, indent := scanMessageTitle(s)
288 if s.Err() != nil {
289 return nil, s.Err()
290 }
291 h.Title = title
292
293 if title != "" {
294 // Don't check for an appendix
295 body, _ := scanMessageBody(s, indent, false)
296 if s.Err() != nil {
297 return nil, s.Err()
298 }
299 h.Body = body
300 }
301
302 return h, nil
303}
304
305func scanMessageTitle(s *bufio.Scanner) (title string, indent string) {
306 var b strings.Builder
307 for i := 0; s.Scan(); i++ {
308 line := s.Text()
309 trimLine := strings.TrimSpace(line)
310 if trimLine == "" {
311 break
312 }
313
314 if i == 0 {
315 if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 {
316 indent = line[:start]
317 }
318 }
319 if b.Len() > 0 {
320 b.WriteByte(' ')
321 }
322 b.WriteString(trimLine)
323 }
324 return b.String(), indent
325}
326
327func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) {
328 // Body and appendix
329 var body, appendix strings.Builder
330 c := &body
331 var empty int
332 for i := 0; s.Scan(); i++ {
333 line := s.Text()
334
335 line = strings.TrimRightFunc(line, unicode.IsSpace)
336 line = strings.TrimPrefix(line, indent)
337
338 if line == "" {
339 empty++
340 continue
341 }
342
343 // If requested, parse out "appendix" information (often added
344 // by `git format-patch` and removed by `git am`).
345 if separateAppendix && c == &body && line == "---" {
346 c = &appendix
347 continue
348 }
349
350 if c.Len() > 0 {
351 c.WriteByte('\n')
352 if empty > 0 {
353 c.WriteByte('\n')
354 }
355 }
356 empty = 0
357
358 c.WriteString(line)
359 }
360 return body.String(), appendix.String()
361}
362
363func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) {
364 msg, err := mail.ReadMessage(r)
365 if err != nil {
366 return nil, err
367 }
368
369 h := &PatchHeader{}
370
371 mailLine = mailLine[len(mailHeaderPrefix):]
372 if i := strings.IndexByte(mailLine, ' '); i > 0 {
373 h.SHA = mailLine[:i]
374 }
375
376 addrs, err := msg.Header.AddressList("From")
377 if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) {
378 return nil, err
379 }
380 if len(addrs) > 0 {
381 addr := addrs[0]
382 if addr.Name == "" {
383 return nil, fmt.Errorf("invalid user string: %s", addr)
384 }
385 h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address}
386 }
387
388 date := msg.Header.Get("Date")
389 if date != "" {
390 d, err := ParsePatchDate(date)
391 if err != nil {
392 return nil, err
393 }
394 h.AuthorDate = d
395 }
396
397 subject := msg.Header.Get("Subject")
398 h.SubjectPrefix, h.Title = parseSubject(subject)
399
400 s := bufio.NewScanner(msg.Body)
401 h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
402 if s.Err() != nil {
403 return nil, s.Err()
404 }
405
406 return h, nil
407}
408
409// Takes an email subject and returns the patch prefix and commit
410// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH
411// v3 3/5] ` and `Implement foo`
412func parseSubject(s string) (string, string) {
413 // This is meant to be compatible with
414 // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject().
415 // If compatibility with `git am` drifts, go there to see if there
416 // are any updates.
417
418 at := 0
419 for at < len(s) {
420 switch s[at] {
421 case 'r', 'R':
422 // Detect re:, Re:, rE: and RE:
423 if at+2 < len(s) &&
424 (s[at+1] == 'e' || s[at+1] == 'E') &&
425 s[at+2] == ':' {
426 at += 3
427 continue
428 }
429
430 case ' ', '\t', ':':
431 // Delete whitespace and duplicate ':' characters
432 at++
433 continue
434
435 case '[':
436 // Look for closing parenthesis
437 j := at + 1
438 for ; j < len(s); j++ {
439 if s[j] == ']' {
440 break
441 }
442 }
443
444 if j < len(s) {
445 at = j + 1
446 continue
447 }
448 }
449
450 // Only loop if we actually removed something
451 break
452 }
453
454 return s[:at], s[at:]
455}