fork of go-gitdiff with jj support
1package gitdiff
2
3import (
4 "fmt"
5 "strings"
6)
7
8// PatchIdentity identifies a person who authored or committed a patch.
9type PatchIdentity struct {
10 Name string
11 Email string
12}
13
14func (i PatchIdentity) String() string {
15 name := i.Name
16 if name == "" {
17 name = `""`
18 }
19 return fmt.Sprintf("%s <%s>", name, i.Email)
20}
21
22// ParsePatchIdentity parses a patch identity string. A patch identity contains
23// an email address and an optional name in [RFC 5322] format. This is either a
24// plain email adddress or a name followed by an address in angle brackets:
25//
26// author@example.com
27// Author Name <author@example.com>
28//
29// If the input is not one of these formats, ParsePatchIdentity applies a
30// heuristic to separate the name and email portions. If both the name and
31// email are missing or empty, ParsePatchIdentity returns an error. It
32// otherwise does not validate the result.
33//
34// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322
35func ParsePatchIdentity(s string) (PatchIdentity, error) {
36 s = normalizeSpace(s)
37 s = unquotePairs(s)
38
39 var name, email string
40 if at := strings.IndexByte(s, '@'); at >= 0 {
41 start, end := at, at
42 for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' {
43 start--
44 }
45 for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' {
46 end++
47 }
48 email = s[start+1 : end]
49
50 // Adjust the boundaries so that we drop angle brackets, but keep
51 // spaces when removing the email to form the name.
52 if start < 0 || s[start] != '<' {
53 start++
54 }
55 if end >= len(s) || s[end] != '>' {
56 end--
57 }
58 name = s[:start] + s[end+1:]
59 } else {
60 start, end := 0, 0
61 for i := 0; i < len(s); i++ {
62 if s[i] == '<' && start == 0 {
63 start = i + 1
64 }
65 if s[i] == '>' && start > 0 {
66 end = i
67 break
68 }
69 }
70 if start > 0 && end >= start {
71 email = strings.TrimSpace(s[start:end])
72 name = s[:start-1]
73 }
74 }
75
76 // After extracting the email, the name might contain extra whitespace
77 // again and may be surrounded by comment characters. The git source gives
78 // these examples of when this can happen:
79 //
80 // "Name <email@domain>"
81 // "email@domain (Name)"
82 // "Name <email@domain> (Comment)"
83 //
84 name = normalizeSpace(name)
85 if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
86 name = name[1 : len(name)-1]
87 }
88 name = strings.TrimSpace(name)
89
90 // If the name is empty or contains email-like characters, use the email
91 // instead (assuming one exists)
92 if name == "" || strings.ContainsAny(name, "@<>") {
93 name = email
94 }
95
96 if name == "" && email == "" {
97 return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s)
98 }
99 return PatchIdentity{Name: name, Email: email}, nil
100}
101
102// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to
103// remove any "quoted-pairs" (backslash-espaced characters). It also removes
104// the quotes from any quoted strings, but leaves the comment delimiters.
105func unquotePairs(s string) string {
106 quote := false
107 comments := 0
108 escaped := false
109
110 var out strings.Builder
111 for i := 0; i < len(s); i++ {
112 if escaped {
113 escaped = false
114 } else {
115 switch s[i] {
116 case '\\':
117 // quoted-pair is only allowed in quoted-string/comment
118 if quote || comments > 0 {
119 escaped = true
120 continue // drop '\' character
121 }
122
123 case '"':
124 if comments == 0 {
125 quote = !quote
126 continue // drop '"' character
127 }
128
129 case '(':
130 if !quote {
131 comments++
132 }
133 case ')':
134 if comments > 0 {
135 comments--
136 }
137 }
138 }
139 out.WriteByte(s[i])
140 }
141 return out.String()
142}
143
144// normalizeSpace trims leading and trailing whitespace from s and converts
145// inner sequences of one or more whitespace characters to single spaces.
146func normalizeSpace(s string) string {
147 var sb strings.Builder
148 for i := 0; i < len(s); i++ {
149 c := s[i]
150 if !isRFC5332Space(c) {
151 if sb.Len() > 0 && isRFC5332Space(s[i-1]) {
152 sb.WriteByte(' ')
153 }
154 sb.WriteByte(c)
155 }
156 }
157 return sb.String()
158}
159
160func isRFC5332Space(c byte) bool {
161 switch c {
162 case '\t', '\n', '\r', ' ':
163 return true
164 }
165 return false
166}