Fast implementation of Git in pure Go
at master 600 lines 14 kB view raw
1package furgit 2 3import ( 4 "bufio" 5 "bytes" 6 "encoding/binary" 7 "errors" 8 "fmt" 9 "os" 10 "path/filepath" 11 "strings" 12 "sync" 13 "syscall" 14 15 "codeberg.org/lindenii/furgit/internal/bloom" 16) 17 18const ( 19 commitGraphSignature = 0x43475048 // "CGPH" 20 commitGraphVersion = 1 21) 22 23const ( 24 commitGraphChunkOIDF = 0x4f494446 // "OIDF" 25 commitGraphChunkOIDL = 0x4f49444c // "OIDL" 26 commitGraphChunkCDAT = 0x43444154 // "CDAT" 27 commitGraphChunkGDA2 = 0x47444132 // "GDA2" 28 commitGraphChunkGDO2 = 0x47444f32 // "GDO2" 29 commitGraphChunkEDGE = 0x45444745 // "EDGE" 30 commitGraphChunkBIDX = 0x42494458 // "BIDX" 31 commitGraphChunkBDAT = 0x42444154 // "BDAT" 32 commitGraphChunkBASE = 0x42415345 // "BASE" 33) 34 35const ( 36 commitGraphHeaderSize = 8 37 commitGraphChunkEntrySize = 12 38 commitGraphFanoutSize = 256 * 4 39) 40 41type commitGraph struct { 42 repo *Repository 43 44 filename string 45 data []byte 46 dataLen int 47 48 numChunks uint8 49 numBase uint8 50 hashAlgo hashAlgorithm 51 52 numCommits uint32 53 numCommitsInBase uint32 54 55 base *commitGraph 56 57 chunkOIDFanout []byte 58 chunkOIDLookup []byte 59 chunkCommit []byte 60 61 chunkGeneration []byte 62 chunkGenerationOv []byte 63 readGeneration bool 64 65 chunkExtraEdges []byte 66 67 chunkBloomIndex []byte 68 chunkBloomData []byte 69 70 chunkBaseGraphs []byte 71 72 bloomSettings *bloom.Settings 73 74 closeOnce sync.Once 75} 76 77func (cg *commitGraph) Close() error { 78 if cg == nil { 79 return nil 80 } 81 var closeErr error 82 cg.closeOnce.Do(func() { 83 if len(cg.data) > 0 { 84 if err := syscall.Munmap(cg.data); closeErr == nil { 85 closeErr = err 86 } 87 cg.data = nil 88 } 89 }) 90 if cg.base != nil { 91 _ = cg.base.Close() 92 } 93 return closeErr 94} 95 96func (repo *Repository) CommitGraph() (*commitGraph, error) { 97 if repo == nil { 98 return nil, ErrInvalidObject 99 } 100 repo.commitGraphOnce.Do(func() { 101 repo.commitGraph, repo.commitGraphErr = repo.loadCommitGraph() 102 }) 103 return repo.commitGraph, repo.commitGraphErr 104} 105 106func (repo *Repository) loadCommitGraph() (*commitGraph, error) { 107 cg, err := repo.loadCommitGraphSingle() 108 if err == nil { 109 return cg, nil 110 } 111 if !errors.Is(err, ErrNotFound) { 112 return nil, err 113 } 114 return repo.loadCommitGraphChain() 115} 116 117func (repo *Repository) loadCommitGraphSingle() (*commitGraph, error) { 118 path := repo.repoPath(filepath.Join("objects", "info", "commit-graph")) 119 return repo.loadCommitGraphFile(path) 120} 121 122func (repo *Repository) loadCommitGraphChain() (*commitGraph, error) { 123 chainPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", "commit-graph-chain")) 124 f, err := os.Open(chainPath) 125 if err != nil { 126 if os.IsNotExist(err) { 127 return nil, ErrNotFound 128 } 129 return nil, err 130 } 131 defer func() { _ = f.Close() }() 132 133 var hashes []string 134 scanner := bufio.NewScanner(f) 135 for scanner.Scan() { 136 line := strings.TrimSpace(scanner.Text()) 137 if line == "" { 138 continue 139 } 140 hashes = append(hashes, line) 141 } 142 if err := scanner.Err(); err != nil { 143 return nil, err 144 } 145 if len(hashes) == 0 { 146 return nil, ErrNotFound 147 } 148 149 var chain *commitGraph 150 for i, h := range hashes { 151 graphPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", h))) 152 g, err := repo.loadCommitGraphFile(graphPath) 153 if err != nil { 154 return nil, err 155 } 156 if i > 0 { 157 if g.chunkBaseGraphs == nil { 158 return nil, ErrInvalidObject 159 } 160 if len(g.chunkBaseGraphs) < i*repo.hashAlgo.Size() { 161 return nil, ErrInvalidObject 162 } 163 for j := 0; j < i; j++ { 164 want := hashes[j] 165 start := j * repo.hashAlgo.Size() 166 end := start + repo.hashAlgo.Size() 167 got := hexEncode(g.chunkBaseGraphs[start:end]) 168 if got != want { 169 return nil, ErrInvalidObject 170 } 171 } 172 } 173 if chain != nil { 174 if chain.numCommits+chain.numCommitsInBase > ^uint32(0) { 175 return nil, ErrInvalidObject 176 } 177 g.numCommitsInBase = chain.numCommits + chain.numCommitsInBase 178 } 179 g.base = chain 180 chain = g 181 } 182 validateCommitGraphChain(chain) 183 return chain, nil 184} 185 186func validateCommitGraphChain(head *commitGraph) { 187 if head == nil { 188 return 189 } 190 readGeneration := true 191 for g := head; g != nil; g = g.base { 192 readGeneration = readGeneration && g.readGeneration 193 } 194 if !readGeneration { 195 for g := head; g != nil; g = g.base { 196 g.readGeneration = false 197 } 198 } 199 200 var settings *bloom.Settings 201 for g := head; g != nil; g = g.base { 202 if g.bloomSettings == nil { 203 continue 204 } 205 if settings == nil { 206 settings = g.bloomSettings 207 continue 208 } 209 if settings.HashVersion != g.bloomSettings.HashVersion || 210 settings.NumHashes != g.bloomSettings.NumHashes || 211 settings.BitsPerEntry != g.bloomSettings.BitsPerEntry { 212 g.chunkBloomIndex = nil 213 g.chunkBloomData = nil 214 g.bloomSettings = nil 215 } 216 } 217} 218 219func (repo *Repository) loadCommitGraphFile(path string) (*commitGraph, error) { 220 f, err := os.Open(path) 221 if err != nil { 222 if os.IsNotExist(err) { 223 return nil, ErrNotFound 224 } 225 return nil, err 226 } 227 stat, err := f.Stat() 228 if err != nil { 229 _ = f.Close() 230 return nil, err 231 } 232 if stat.Size() < int64(commitGraphHeaderSize+commitGraphFanoutSize) { 233 _ = f.Close() 234 return nil, ErrInvalidObject 235 } 236 region, err := syscall.Mmap( 237 int(f.Fd()), 238 0, 239 int(stat.Size()), 240 syscall.PROT_READ, 241 syscall.MAP_PRIVATE, 242 ) 243 if err != nil { 244 _ = f.Close() 245 return nil, err 246 } 247 err = f.Close() 248 if err != nil { 249 _ = syscall.Munmap(region) 250 return nil, err 251 } 252 cg, err := parseCommitGraph(repo, path, region) 253 if err != nil { 254 _ = syscall.Munmap(region) 255 return nil, err 256 } 257 cg.data = region 258 return cg, nil 259} 260 261func parseCommitGraph(repo *Repository, filename string, data []byte) (*commitGraph, error) { 262 if len(data) < commitGraphHeaderSize { 263 return nil, ErrInvalidObject 264 } 265 sig := binary.BigEndian.Uint32(data[0:4]) 266 if sig != commitGraphSignature { 267 return nil, ErrInvalidObject 268 } 269 version := data[4] 270 if version != commitGraphVersion { 271 return nil, ErrInvalidObject 272 } 273 hashVersion := data[5] 274 if hashVersion != hashAlgoVersion(repo.hashAlgo) { 275 return nil, ErrInvalidObject 276 } 277 numChunks := data[6] 278 numBase := data[7] 279 280 tocStart := commitGraphHeaderSize 281 tocLen := (int(numChunks) + 1) * commitGraphChunkEntrySize 282 if len(data) < tocStart+tocLen+repo.hashAlgo.Size() { 283 return nil, ErrInvalidObject 284 } 285 286 trailerStart := len(data) - repo.hashAlgo.Size() 287 288 type chunkEntry struct { 289 id uint32 290 offset uint64 291 } 292 entries := make([]chunkEntry, 0, numChunks+1) 293 for i := 0; i < int(numChunks)+1; i++ { 294 pos := tocStart + i*commitGraphChunkEntrySize 295 id := binary.BigEndian.Uint32(data[pos : pos+4]) 296 offset := binary.BigEndian.Uint64(data[pos+4 : pos+12]) 297 entries = append(entries, chunkEntry{id: id, offset: offset}) 298 } 299 300 chunks := map[uint32][]byte{} 301 for i := 0; i < len(entries)-1; i++ { 302 id := entries[i].id 303 if id == 0 { 304 break 305 } 306 start := int(entries[i].offset) 307 end := int(entries[i+1].offset) 308 if start < tocStart+tocLen || end < start || end > trailerStart { 309 return nil, ErrInvalidObject 310 } 311 chunks[id] = data[start:end] 312 } 313 314 cg := &commitGraph{ 315 repo: repo, 316 filename: filename, 317 dataLen: len(data), 318 hashAlgo: repo.hashAlgo, 319 numChunks: numChunks, 320 numBase: numBase, 321 } 322 323 oidf := chunks[commitGraphChunkOIDF] 324 if len(oidf) != commitGraphFanoutSize { 325 return nil, ErrInvalidObject 326 } 327 cg.chunkOIDFanout = oidf 328 cg.numCommits = binary.BigEndian.Uint32(oidf[len(oidf)-4:]) 329 for i := 0; i < 255; i++ { 330 if binary.BigEndian.Uint32(oidf[i*4:(i+1)*4]) > 331 binary.BigEndian.Uint32(oidf[(i+1)*4:(i+2)*4]) { 332 return nil, ErrInvalidObject 333 } 334 } 335 336 oidl := chunks[commitGraphChunkOIDL] 337 if len(oidl) != int(cg.numCommits)*repo.hashAlgo.Size() { 338 return nil, ErrInvalidObject 339 } 340 cg.chunkOIDLookup = oidl 341 342 cdat := chunks[commitGraphChunkCDAT] 343 if len(cdat) != int(cg.numCommits)*commitGraphCommitDataSize(repo.hashAlgo) { 344 return nil, ErrInvalidObject 345 } 346 cg.chunkCommit = cdat 347 348 if gda2 := chunks[commitGraphChunkGDA2]; len(gda2) != 0 { 349 if len(gda2) != int(cg.numCommits)*4 { 350 return nil, ErrInvalidObject 351 } 352 cg.chunkGeneration = gda2 353 cg.readGeneration = true 354 } 355 if gdo2 := chunks[commitGraphChunkGDO2]; len(gdo2) != 0 { 356 cg.chunkGenerationOv = gdo2 357 } 358 359 if edge := chunks[commitGraphChunkEDGE]; len(edge) != 0 { 360 if len(edge)%4 != 0 { 361 return nil, ErrInvalidObject 362 } 363 cg.chunkExtraEdges = edge 364 } 365 366 if base := chunks[commitGraphChunkBASE]; len(base) != 0 { 367 if numBase == 0 { 368 return nil, ErrInvalidObject 369 } 370 if len(base) != int(numBase)*repo.hashAlgo.Size() { 371 return nil, ErrInvalidObject 372 } 373 cg.chunkBaseGraphs = base 374 } else if numBase != 0 { 375 return nil, ErrInvalidObject 376 } 377 378 if bidx := chunks[commitGraphChunkBIDX]; len(bidx) != 0 { 379 if len(bidx) != int(cg.numCommits)*4 { 380 return nil, ErrInvalidObject 381 } 382 cg.chunkBloomIndex = bidx 383 } 384 if bdat := chunks[commitGraphChunkBDAT]; len(bdat) != 0 { 385 if len(bdat) < bloom.DataHeaderSize { 386 return nil, ErrInvalidObject 387 } 388 cg.chunkBloomData = bdat 389 settings, err := bloom.ParseSettings(bdat) 390 if err != nil { 391 return nil, err 392 } 393 cg.bloomSettings = settings 394 } 395 396 if (cg.chunkBloomIndex != nil) != (cg.chunkBloomData != nil) { 397 cg.chunkBloomIndex = nil 398 cg.chunkBloomData = nil 399 cg.bloomSettings = nil 400 } 401 if cg.chunkBloomIndex != nil && cg.chunkBloomData != nil { 402 for i := 0; i < int(cg.numCommits); i++ { 403 cur := binary.BigEndian.Uint32(cg.chunkBloomIndex[i*4 : i*4+4]) 404 if i > 0 { 405 prev := binary.BigEndian.Uint32(cg.chunkBloomIndex[(i-1)*4 : i*4]) 406 if cur < prev { 407 return nil, ErrInvalidObject 408 } 409 } 410 if int(cur) > len(cg.chunkBloomData)-bloom.DataHeaderSize { 411 return nil, ErrInvalidObject 412 } 413 } 414 } 415 416 if err := verifyCommitGraphTrailer(repo, data); err != nil { 417 return nil, err 418 } 419 420 return cg, nil 421} 422 423func commitGraphCommitDataSize(algo hashAlgorithm) int { 424 return algo.Size() + 16 425} 426 427func hashAlgoVersion(algo hashAlgorithm) byte { 428 switch algo { 429 case hashAlgoSHA1: 430 return 1 431 case hashAlgoSHA256: 432 return 2 433 default: 434 return 0 435 } 436} 437 438func verifyCommitGraphTrailer(repo *Repository, data []byte) error { 439 if repo == nil { 440 return ErrInvalidObject 441 } 442 hashSize := repo.hashAlgo.Size() 443 if len(data) < hashSize { 444 return ErrInvalidObject 445 } 446 want := data[len(data)-hashSize:] 447 h, err := repo.hashAlgo.New() 448 if err != nil { 449 return err 450 } 451 _, _ = h.Write(data[:len(data)-hashSize]) 452 got := h.Sum(nil) 453 if !bytes.Equal(got, want) { 454 return ErrInvalidObject 455 } 456 return nil 457} 458 459func hexEncode(b []byte) string { 460 const hex = "0123456789abcdef" 461 out := make([]byte, len(b)*2) 462 for i, v := range b { 463 out[i*2] = hex[v>>4] 464 out[i*2+1] = hex[v&0x0f] 465 } 466 return string(out) 467} 468 469func (cg *commitGraph) lookupOID(id Hash) (uint32, bool) { 470 if cg == nil { 471 return 0, false 472 } 473 return commitGraphBsearchOID(cg, id) 474} 475 476func commitGraphBsearchOID(cg *commitGraph, id Hash) (uint32, bool) { 477 if cg == nil || len(cg.chunkOIDLookup) == 0 { 478 return 0, false 479 } 480 hashSize := cg.hashAlgo.Size() 481 first := int(id.data[0]) 482 fanout := cg.chunkOIDFanout 483 var start uint32 484 if first > 0 { 485 start = binary.BigEndian.Uint32(fanout[(first-1)*4 : first*4]) 486 } 487 end := binary.BigEndian.Uint32(fanout[first*4 : (first+1)*4]) 488 lo := int(start) 489 hi := int(end) - 1 490 for lo <= hi { 491 mid := lo + (hi-lo)/2 492 pos := mid * hashSize 493 cmp := bytes.Compare(cg.chunkOIDLookup[pos:pos+hashSize], id.data[:hashSize]) 494 if cmp == 0 { 495 return uint32(mid) + cg.numCommitsInBase, true 496 } 497 if cmp < 0 { 498 lo = mid + 1 499 } else { 500 hi = mid - 1 501 } 502 } 503 if cg.base != nil { 504 return commitGraphBsearchOID(cg.base, id) 505 } 506 return 0, false 507} 508 509func (cg *commitGraph) loadOID(pos uint32) (Hash, error) { 510 g := cg 511 for g != nil && pos < g.numCommitsInBase { 512 g = g.base 513 } 514 if g == nil { 515 return Hash{}, ErrInvalidObject 516 } 517 if pos >= g.numCommits+g.numCommitsInBase { 518 return Hash{}, ErrInvalidObject 519 } 520 lex := pos - g.numCommitsInBase 521 hashSize := g.hashAlgo.Size() 522 start := int(lex) * hashSize 523 end := start + hashSize 524 var out Hash 525 copy(out.data[:], g.chunkOIDLookup[start:end]) 526 out.algo = g.hashAlgo 527 return out, nil 528} 529 530func (cg *commitGraph) commitDataAt(pos uint32) (*commitGraphCommit, error) { 531 g := cg 532 for g != nil && pos < g.numCommitsInBase { 533 g = g.base 534 } 535 if g == nil { 536 return nil, ErrInvalidObject 537 } 538 if pos >= g.numCommits+g.numCommitsInBase { 539 return nil, ErrInvalidObject 540 } 541 lex := pos - g.numCommitsInBase 542 stride := commitGraphCommitDataSize(g.hashAlgo) 543 start := int(lex) * stride 544 end := start + stride 545 if end > len(g.chunkCommit) { 546 return nil, ErrInvalidObject 547 } 548 buf := g.chunkCommit[start:end] 549 550 var tree Hash 551 hashSize := g.hashAlgo.Size() 552 copy(tree.data[:], buf[:hashSize]) 553 tree.algo = g.hashAlgo 554 555 p1 := binary.BigEndian.Uint32(buf[hashSize : hashSize+4]) 556 p2 := binary.BigEndian.Uint32(buf[hashSize+4 : hashSize+8]) 557 558 genTime := binary.BigEndian.Uint32(buf[hashSize+8 : hashSize+12]) 559 timeLow := binary.BigEndian.Uint32(buf[hashSize+12 : hashSize+16]) 560 561 timeHigh := genTime & 0x3 562 commitTime := (uint64(timeHigh) << 32) | uint64(timeLow) 563 generation := uint64(genTime >> 2) 564 565 if g.readGeneration { 566 genOffset := binary.BigEndian.Uint32(g.chunkGeneration[int(lex)*4:]) 567 if genOffset&correctedGenerationOverflow != 0 { 568 off := genOffset ^ correctedGenerationOverflow 569 idx := int(off) * 8 570 if g.chunkGenerationOv == nil || idx+8 > len(g.chunkGenerationOv) { 571 return nil, ErrInvalidObject 572 } 573 offset := binary.BigEndian.Uint64(g.chunkGenerationOv[idx : idx+8]) 574 generation = commitTime + offset 575 } else { 576 generation = commitTime + uint64(genOffset) 577 } 578 } 579 580 var parents []uint32 581 var err error 582 parents, err = commitGraphAppendParent(parents, g, p1) 583 if err != nil { 584 return nil, err 585 } 586 parents, err = commitGraphAppendParent(parents, g, p2) 587 if err != nil { 588 return nil, err 589 } 590 591 return &commitGraphCommit{ 592 Position: pos, 593 Tree: tree, 594 Parents: parents, 595 CommitTime: commitTime, 596 Generation: generation, 597 Graph: g, 598 graphLexPos: lex, 599 }, nil 600}