fork of go-gitdiff with jj support
at v0.3.0 8.9 kB view raw
1package gitdiff 2 3import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "net/mail" 9 "strconv" 10 "strings" 11 "time" 12 "unicode" 13) 14 15const ( 16 mailHeaderPrefix = "From " 17 prettyHeaderPrefix = "commit " 18) 19 20// PatchHeader is a parsed version of the preamble content that appears before 21// the first diff in a patch. It includes metadata about the patch, such as the 22// author and a subject. 23type PatchHeader struct { 24 // The SHA of the commit the patch was generated from. Empty if the SHA is 25 // not included in the header. 26 SHA string 27 28 // The author details of the patch. Nil if author information is not 29 // included in the header. 30 Author *PatchIdentity 31 AuthorDate *PatchDate 32 33 // The committer details of the patch. Nil if committer information is not 34 // included in the header. 35 Committer *PatchIdentity 36 CommitterDate *PatchDate 37 38 // The title and message summarizing the changes in the patch. Empty if a 39 // title or message is not included in the header. 40 Title string 41 Message string 42} 43 44// PatchIdentity identifies a person who authored or committed a patch. 45type PatchIdentity struct { 46 Name string 47 Email string 48} 49 50func (i PatchIdentity) String() string { 51 name := i.Name 52 if name == "" { 53 name = `""` 54 } 55 return fmt.Sprintf("%s <%s>", name, i.Email) 56} 57 58// ParsePatchIdentity parses a patch identity string. A valid string contains a 59// non-empty name followed by an email address in angle brackets. Like Git, 60// ParsePatchIdentity does not require that the email addresses is valid or 61// properly formatted, only that it is non-empty. The name must not contain a 62// left angle bracket, '<', and the email address must not contain a right 63// angle bracket, '>'. 64func ParsePatchIdentity(s string) (PatchIdentity, error) { 65 var emailStart, emailEnd int 66 for i, c := range s { 67 if c == '<' && emailStart == 0 { 68 emailStart = i + 1 69 } 70 if c == '>' && emailStart > 0 { 71 emailEnd = i 72 break 73 } 74 } 75 if emailStart > 0 && emailEnd == 0 { 76 return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) 77 } 78 79 var name, email string 80 if emailStart > 0 { 81 name = strings.TrimSpace(s[:emailStart-1]) 82 } 83 if emailStart > 0 && emailEnd > 0 { 84 email = strings.TrimSpace(s[emailStart:emailEnd]) 85 } 86 if name == "" || email == "" { 87 return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) 88 } 89 90 return PatchIdentity{Name: name, Email: email}, nil 91} 92 93// PatchDate is the timestamp when a patch was authored or committed. It 94// contains a raw string version of the date and a parsed version if the date 95// is in a known format. 96type PatchDate struct { 97 Parsed time.Time 98 Raw string 99} 100 101// IsParsed returns true if the PatchDate has a parsed time. 102func (d PatchDate) IsParsed() bool { 103 return !d.Parsed.IsZero() 104} 105 106// ParsePatchDate parses a patch date string. If s is in a supported format, 107// the PatchDate has both the Raw and Parsed initialized. 108// 109// ParsePatchDate supports the iso, rfc, short, raw, unix, and default formats 110// (with local variants) used by the --date flag in Git. 111func ParsePatchDate(s string) PatchDate { 112 const ( 113 isoFormat = "2006-01-02 15:04:05 -0700" 114 isoStrictFormat = "2006-01-02T15:04:05-07:00" 115 rfc2822Format = "Mon, 02 Jan 2006 15:04:05 -0700" 116 shortFormat = "2006-01-02" 117 defaultFormat = "Mon Jan 02 15:04:05 2006 -0700" 118 defaultLocalFormat = "Mon Jan 02 15:04:05 2006" 119 ) 120 121 d := PatchDate{Raw: s} 122 123 for _, fmt := range []string{ 124 isoFormat, 125 isoStrictFormat, 126 rfc2822Format, 127 shortFormat, 128 defaultFormat, 129 defaultLocalFormat, 130 } { 131 if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil { 132 d.Parsed = t 133 return d 134 } 135 } 136 137 // unix format 138 if unix, err := strconv.ParseInt(s, 10, 64); err == nil { 139 d.Parsed = time.Unix(unix, 0) 140 return d 141 } 142 143 // raw format 144 if space := strings.IndexByte(s, ' '); space > 0 { 145 unix, uerr := strconv.ParseInt(s[:space], 10, 64) 146 zone, zerr := time.Parse("-0700", s[space+1:]) 147 if uerr == nil && zerr == nil { 148 d.Parsed = time.Unix(unix, 0).In(zone.Location()) 149 return d 150 } 151 } 152 153 return d 154} 155 156// ParsePatchHeader parses a preamble string as returned by Parse into a 157// PatchHeader. Due to the variety of header formats, some fields of the parsed 158// PatchHeader may be unset after parsing. 159// 160// Supported formats are the short, medium, full, fuller, and email pretty 161// formats used by git diff, git log, and git show and the UNIX mailbox format 162// used by git format-patch. 163// 164// ParsePatchHeader makes no assumptions about the format of the patch title or 165// message other than removing leading and trailing whitespace on each line and 166// condensing blank lines. In particular, it does not remove the extra content 167// that git format-patch adds to make emailed patches friendlier, like subject 168// prefixes or commit stats. 169func ParsePatchHeader(s string) (*PatchHeader, error) { 170 r := bufio.NewReader(strings.NewReader(s)) 171 172 var line string 173 for { 174 var err error 175 line, err = r.ReadString('\n') 176 if err == io.EOF { 177 break 178 } 179 if err != nil { 180 return nil, err 181 } 182 183 line = strings.TrimSpace(line) 184 if len(line) > 0 { 185 break 186 } 187 } 188 189 switch { 190 case strings.HasPrefix(line, mailHeaderPrefix): 191 return parseHeaderMail(line, r) 192 case strings.HasPrefix(line, prettyHeaderPrefix): 193 return parseHeaderPretty(line, r) 194 } 195 return nil, errors.New("unrecognized patch header format") 196} 197 198func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { 199 const ( 200 authorPrefix = "Author:" 201 commitPrefix = "Commit:" 202 datePrefix = "Date:" 203 authorDatePrefix = "AuthorDate:" 204 commitDatePrefix = "CommitDate:" 205 ) 206 207 h := &PatchHeader{} 208 209 prettyLine = prettyLine[len(prettyHeaderPrefix):] 210 if i := strings.IndexByte(prettyLine, ' '); i > 0 { 211 h.SHA = prettyLine[:i] 212 } else { 213 h.SHA = prettyLine 214 } 215 216 s := bufio.NewScanner(r) 217 for s.Scan() { 218 line := s.Text() 219 220 // empty line marks end of fields, remaining lines are title/message 221 if strings.TrimSpace(line) == "" { 222 break 223 } 224 225 switch { 226 case strings.HasPrefix(line, authorPrefix): 227 u, err := ParsePatchIdentity(line[len(authorPrefix):]) 228 if err != nil { 229 return nil, err 230 } 231 h.Author = &u 232 233 case strings.HasPrefix(line, commitPrefix): 234 u, err := ParsePatchIdentity(line[len(commitPrefix):]) 235 if err != nil { 236 return nil, err 237 } 238 h.Committer = &u 239 240 case strings.HasPrefix(line, datePrefix): 241 d := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):])) 242 h.AuthorDate = &d 243 244 case strings.HasPrefix(line, authorDatePrefix): 245 d := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):])) 246 h.AuthorDate = &d 247 248 case strings.HasPrefix(line, commitDatePrefix): 249 d := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):])) 250 h.CommitterDate = &d 251 } 252 } 253 if s.Err() != nil { 254 return nil, s.Err() 255 } 256 257 title, indent := scanPatchTitle(s) 258 if s.Err() != nil { 259 return nil, s.Err() 260 } 261 h.Title = title 262 263 if title != "" { 264 msg := scanPatchMessage(s, indent) 265 if s.Err() != nil { 266 return nil, s.Err() 267 } 268 h.Message = msg 269 } 270 271 return h, nil 272} 273 274func scanPatchTitle(s *bufio.Scanner) (title string, indent string) { 275 var b strings.Builder 276 for i := 0; s.Scan(); i++ { 277 line := s.Text() 278 trimLine := strings.TrimSpace(line) 279 if trimLine == "" { 280 break 281 } 282 283 if i == 0 { 284 if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 { 285 indent = line[:start] 286 } 287 } 288 if b.Len() > 0 { 289 b.WriteByte(' ') 290 } 291 b.WriteString(trimLine) 292 } 293 return b.String(), indent 294} 295 296func scanPatchMessage(s *bufio.Scanner, indent string) string { 297 var b strings.Builder 298 var empty int 299 for i := 0; s.Scan(); i++ { 300 line := s.Text() 301 if strings.TrimSpace(line) == "" { 302 empty++ 303 continue 304 } 305 306 if b.Len() > 0 { 307 b.WriteByte('\n') 308 if empty > 0 { 309 b.WriteByte('\n') 310 } 311 } 312 empty = 0 313 314 line = strings.TrimRightFunc(line, unicode.IsSpace) 315 line = strings.TrimPrefix(line, indent) 316 b.WriteString(line) 317 } 318 return b.String() 319} 320 321func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { 322 msg, err := mail.ReadMessage(r) 323 if err != nil { 324 return nil, err 325 } 326 327 h := &PatchHeader{} 328 329 mailLine = mailLine[len(mailHeaderPrefix):] 330 if i := strings.IndexByte(mailLine, ' '); i > 0 { 331 h.SHA = mailLine[:i] 332 } 333 334 addrs, err := msg.Header.AddressList("From") 335 if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { 336 return nil, err 337 } 338 if len(addrs) > 0 { 339 addr := addrs[0] 340 if addr.Name == "" { 341 return nil, fmt.Errorf("invalid user string: %s", addr) 342 } 343 h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} 344 } 345 346 date := msg.Header.Get("Date") 347 if date != "" { 348 d := ParsePatchDate(date) 349 h.AuthorDate = &d 350 } 351 352 h.Title = msg.Header.Get("Subject") 353 354 s := bufio.NewScanner(msg.Body) 355 h.Message = scanPatchMessage(s, "") 356 if s.Err() != nil { 357 return nil, s.Err() 358 } 359 360 return h, nil 361}