Fast implementation of Git in pure Go
1package furgit
2
3import (
4 "bufio"
5 "bytes"
6 "encoding/binary"
7 "errors"
8 "fmt"
9 "os"
10 "path/filepath"
11 "strings"
12 "sync"
13 "syscall"
14
15 "codeberg.org/lindenii/furgit/internal/bloom"
16)
17
18const (
19 commitGraphSignature = 0x43475048 // "CGPH"
20 commitGraphVersion = 1
21)
22
23const (
24 commitGraphChunkOIDF = 0x4f494446 // "OIDF"
25 commitGraphChunkOIDL = 0x4f49444c // "OIDL"
26 commitGraphChunkCDAT = 0x43444154 // "CDAT"
27 commitGraphChunkGDA2 = 0x47444132 // "GDA2"
28 commitGraphChunkGDO2 = 0x47444f32 // "GDO2"
29 commitGraphChunkEDGE = 0x45444745 // "EDGE"
30 commitGraphChunkBIDX = 0x42494458 // "BIDX"
31 commitGraphChunkBDAT = 0x42444154 // "BDAT"
32 commitGraphChunkBASE = 0x42415345 // "BASE"
33)
34
35const (
36 commitGraphHeaderSize = 8
37 commitGraphChunkEntrySize = 12
38 commitGraphFanoutSize = 256 * 4
39)
40
41type commitGraph struct {
42 repo *Repository
43
44 filename string
45 data []byte
46 dataLen int
47
48 numChunks uint8
49 numBase uint8
50 hashAlgo hashAlgorithm
51
52 numCommits uint32
53 numCommitsInBase uint32
54
55 base *commitGraph
56
57 chunkOIDFanout []byte
58 chunkOIDLookup []byte
59 chunkCommit []byte
60
61 chunkGeneration []byte
62 chunkGenerationOv []byte
63 readGeneration bool
64
65 chunkExtraEdges []byte
66
67 chunkBloomIndex []byte
68 chunkBloomData []byte
69
70 chunkBaseGraphs []byte
71
72 bloomSettings *bloom.Settings
73
74 closeOnce sync.Once
75}
76
77func (cg *commitGraph) Close() error {
78 if cg == nil {
79 return nil
80 }
81 var closeErr error
82 cg.closeOnce.Do(func() {
83 if len(cg.data) > 0 {
84 if err := syscall.Munmap(cg.data); closeErr == nil {
85 closeErr = err
86 }
87 cg.data = nil
88 }
89 })
90 if cg.base != nil {
91 _ = cg.base.Close()
92 }
93 return closeErr
94}
95
96func (repo *Repository) CommitGraph() (*commitGraph, error) {
97 if repo == nil {
98 return nil, ErrInvalidObject
99 }
100 repo.commitGraphOnce.Do(func() {
101 repo.commitGraph, repo.commitGraphErr = repo.loadCommitGraph()
102 })
103 return repo.commitGraph, repo.commitGraphErr
104}
105
106func (repo *Repository) loadCommitGraph() (*commitGraph, error) {
107 cg, err := repo.loadCommitGraphSingle()
108 if err == nil {
109 return cg, nil
110 }
111 if !errors.Is(err, ErrNotFound) {
112 return nil, err
113 }
114 return repo.loadCommitGraphChain()
115}
116
117func (repo *Repository) loadCommitGraphSingle() (*commitGraph, error) {
118 path := repo.repoPath(filepath.Join("objects", "info", "commit-graph"))
119 return repo.loadCommitGraphFile(path)
120}
121
122func (repo *Repository) loadCommitGraphChain() (*commitGraph, error) {
123 chainPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", "commit-graph-chain"))
124 f, err := os.Open(chainPath)
125 if err != nil {
126 if os.IsNotExist(err) {
127 return nil, ErrNotFound
128 }
129 return nil, err
130 }
131 defer func() { _ = f.Close() }()
132
133 var hashes []string
134 scanner := bufio.NewScanner(f)
135 for scanner.Scan() {
136 line := strings.TrimSpace(scanner.Text())
137 if line == "" {
138 continue
139 }
140 hashes = append(hashes, line)
141 }
142 if err := scanner.Err(); err != nil {
143 return nil, err
144 }
145 if len(hashes) == 0 {
146 return nil, ErrNotFound
147 }
148
149 var chain *commitGraph
150 for i, h := range hashes {
151 graphPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", h)))
152 g, err := repo.loadCommitGraphFile(graphPath)
153 if err != nil {
154 return nil, err
155 }
156 if i > 0 {
157 if g.chunkBaseGraphs == nil {
158 return nil, ErrInvalidObject
159 }
160 if len(g.chunkBaseGraphs) < i*repo.hashAlgo.Size() {
161 return nil, ErrInvalidObject
162 }
163 for j := 0; j < i; j++ {
164 want := hashes[j]
165 start := j * repo.hashAlgo.Size()
166 end := start + repo.hashAlgo.Size()
167 got := hexEncode(g.chunkBaseGraphs[start:end])
168 if got != want {
169 return nil, ErrInvalidObject
170 }
171 }
172 }
173 if chain != nil {
174 if chain.numCommits+chain.numCommitsInBase > ^uint32(0) {
175 return nil, ErrInvalidObject
176 }
177 g.numCommitsInBase = chain.numCommits + chain.numCommitsInBase
178 }
179 g.base = chain
180 chain = g
181 }
182 validateCommitGraphChain(chain)
183 return chain, nil
184}
185
186func validateCommitGraphChain(head *commitGraph) {
187 if head == nil {
188 return
189 }
190 readGeneration := true
191 for g := head; g != nil; g = g.base {
192 readGeneration = readGeneration && g.readGeneration
193 }
194 if !readGeneration {
195 for g := head; g != nil; g = g.base {
196 g.readGeneration = false
197 }
198 }
199
200 var settings *bloom.Settings
201 for g := head; g != nil; g = g.base {
202 if g.bloomSettings == nil {
203 continue
204 }
205 if settings == nil {
206 settings = g.bloomSettings
207 continue
208 }
209 if settings.HashVersion != g.bloomSettings.HashVersion ||
210 settings.NumHashes != g.bloomSettings.NumHashes ||
211 settings.BitsPerEntry != g.bloomSettings.BitsPerEntry {
212 g.chunkBloomIndex = nil
213 g.chunkBloomData = nil
214 g.bloomSettings = nil
215 }
216 }
217}
218
219func (repo *Repository) loadCommitGraphFile(path string) (*commitGraph, error) {
220 f, err := os.Open(path)
221 if err != nil {
222 if os.IsNotExist(err) {
223 return nil, ErrNotFound
224 }
225 return nil, err
226 }
227 stat, err := f.Stat()
228 if err != nil {
229 _ = f.Close()
230 return nil, err
231 }
232 if stat.Size() < int64(commitGraphHeaderSize+commitGraphFanoutSize) {
233 _ = f.Close()
234 return nil, ErrInvalidObject
235 }
236 region, err := syscall.Mmap(
237 int(f.Fd()),
238 0,
239 int(stat.Size()),
240 syscall.PROT_READ,
241 syscall.MAP_PRIVATE,
242 )
243 if err != nil {
244 _ = f.Close()
245 return nil, err
246 }
247 err = f.Close()
248 if err != nil {
249 _ = syscall.Munmap(region)
250 return nil, err
251 }
252 cg, err := parseCommitGraph(repo, path, region)
253 if err != nil {
254 _ = syscall.Munmap(region)
255 return nil, err
256 }
257 cg.data = region
258 return cg, nil
259}
260
261func parseCommitGraph(repo *Repository, filename string, data []byte) (*commitGraph, error) {
262 if len(data) < commitGraphHeaderSize {
263 return nil, ErrInvalidObject
264 }
265 sig := binary.BigEndian.Uint32(data[0:4])
266 if sig != commitGraphSignature {
267 return nil, ErrInvalidObject
268 }
269 version := data[4]
270 if version != commitGraphVersion {
271 return nil, ErrInvalidObject
272 }
273 hashVersion := data[5]
274 if hashVersion != hashAlgoVersion(repo.hashAlgo) {
275 return nil, ErrInvalidObject
276 }
277 numChunks := data[6]
278 numBase := data[7]
279
280 tocStart := commitGraphHeaderSize
281 tocLen := (int(numChunks) + 1) * commitGraphChunkEntrySize
282 if len(data) < tocStart+tocLen+repo.hashAlgo.Size() {
283 return nil, ErrInvalidObject
284 }
285
286 trailerStart := len(data) - repo.hashAlgo.Size()
287
288 type chunkEntry struct {
289 id uint32
290 offset uint64
291 }
292 entries := make([]chunkEntry, 0, numChunks+1)
293 for i := 0; i < int(numChunks)+1; i++ {
294 pos := tocStart + i*commitGraphChunkEntrySize
295 id := binary.BigEndian.Uint32(data[pos : pos+4])
296 offset := binary.BigEndian.Uint64(data[pos+4 : pos+12])
297 entries = append(entries, chunkEntry{id: id, offset: offset})
298 }
299
300 chunks := map[uint32][]byte{}
301 for i := 0; i < len(entries)-1; i++ {
302 id := entries[i].id
303 if id == 0 {
304 break
305 }
306 start := int(entries[i].offset)
307 end := int(entries[i+1].offset)
308 if start < tocStart+tocLen || end < start || end > trailerStart {
309 return nil, ErrInvalidObject
310 }
311 chunks[id] = data[start:end]
312 }
313
314 cg := &commitGraph{
315 repo: repo,
316 filename: filename,
317 dataLen: len(data),
318 hashAlgo: repo.hashAlgo,
319 numChunks: numChunks,
320 numBase: numBase,
321 }
322
323 oidf := chunks[commitGraphChunkOIDF]
324 if len(oidf) != commitGraphFanoutSize {
325 return nil, ErrInvalidObject
326 }
327 cg.chunkOIDFanout = oidf
328 cg.numCommits = binary.BigEndian.Uint32(oidf[len(oidf)-4:])
329 for i := 0; i < 255; i++ {
330 if binary.BigEndian.Uint32(oidf[i*4:(i+1)*4]) >
331 binary.BigEndian.Uint32(oidf[(i+1)*4:(i+2)*4]) {
332 return nil, ErrInvalidObject
333 }
334 }
335
336 oidl := chunks[commitGraphChunkOIDL]
337 if len(oidl) != int(cg.numCommits)*repo.hashAlgo.Size() {
338 return nil, ErrInvalidObject
339 }
340 cg.chunkOIDLookup = oidl
341
342 cdat := chunks[commitGraphChunkCDAT]
343 if len(cdat) != int(cg.numCommits)*commitGraphCommitDataSize(repo.hashAlgo) {
344 return nil, ErrInvalidObject
345 }
346 cg.chunkCommit = cdat
347
348 if gda2 := chunks[commitGraphChunkGDA2]; len(gda2) != 0 {
349 if len(gda2) != int(cg.numCommits)*4 {
350 return nil, ErrInvalidObject
351 }
352 cg.chunkGeneration = gda2
353 cg.readGeneration = true
354 }
355 if gdo2 := chunks[commitGraphChunkGDO2]; len(gdo2) != 0 {
356 cg.chunkGenerationOv = gdo2
357 }
358
359 if edge := chunks[commitGraphChunkEDGE]; len(edge) != 0 {
360 if len(edge)%4 != 0 {
361 return nil, ErrInvalidObject
362 }
363 cg.chunkExtraEdges = edge
364 }
365
366 if base := chunks[commitGraphChunkBASE]; len(base) != 0 {
367 if numBase == 0 {
368 return nil, ErrInvalidObject
369 }
370 if len(base) != int(numBase)*repo.hashAlgo.Size() {
371 return nil, ErrInvalidObject
372 }
373 cg.chunkBaseGraphs = base
374 } else if numBase != 0 {
375 return nil, ErrInvalidObject
376 }
377
378 if bidx := chunks[commitGraphChunkBIDX]; len(bidx) != 0 {
379 if len(bidx) != int(cg.numCommits)*4 {
380 return nil, ErrInvalidObject
381 }
382 cg.chunkBloomIndex = bidx
383 }
384 if bdat := chunks[commitGraphChunkBDAT]; len(bdat) != 0 {
385 if len(bdat) < bloom.DataHeaderSize {
386 return nil, ErrInvalidObject
387 }
388 cg.chunkBloomData = bdat
389 settings, err := bloom.ParseSettings(bdat)
390 if err != nil {
391 return nil, err
392 }
393 cg.bloomSettings = settings
394 }
395
396 if (cg.chunkBloomIndex != nil) != (cg.chunkBloomData != nil) {
397 cg.chunkBloomIndex = nil
398 cg.chunkBloomData = nil
399 cg.bloomSettings = nil
400 }
401 if cg.chunkBloomIndex != nil && cg.chunkBloomData != nil {
402 for i := 0; i < int(cg.numCommits); i++ {
403 cur := binary.BigEndian.Uint32(cg.chunkBloomIndex[i*4 : i*4+4])
404 if i > 0 {
405 prev := binary.BigEndian.Uint32(cg.chunkBloomIndex[(i-1)*4 : i*4])
406 if cur < prev {
407 return nil, ErrInvalidObject
408 }
409 }
410 if int(cur) > len(cg.chunkBloomData)-bloom.DataHeaderSize {
411 return nil, ErrInvalidObject
412 }
413 }
414 }
415
416 if err := verifyCommitGraphTrailer(repo, data); err != nil {
417 return nil, err
418 }
419
420 return cg, nil
421}
422
423func commitGraphCommitDataSize(algo hashAlgorithm) int {
424 return algo.Size() + 16
425}
426
427func hashAlgoVersion(algo hashAlgorithm) byte {
428 switch algo {
429 case hashAlgoSHA1:
430 return 1
431 case hashAlgoSHA256:
432 return 2
433 default:
434 return 0
435 }
436}
437
438func verifyCommitGraphTrailer(repo *Repository, data []byte) error {
439 if repo == nil {
440 return ErrInvalidObject
441 }
442 hashSize := repo.hashAlgo.Size()
443 if len(data) < hashSize {
444 return ErrInvalidObject
445 }
446 want := data[len(data)-hashSize:]
447 h, err := repo.hashAlgo.New()
448 if err != nil {
449 return err
450 }
451 _, _ = h.Write(data[:len(data)-hashSize])
452 got := h.Sum(nil)
453 if !bytes.Equal(got, want) {
454 return ErrInvalidObject
455 }
456 return nil
457}
458
459func hexEncode(b []byte) string {
460 const hex = "0123456789abcdef"
461 out := make([]byte, len(b)*2)
462 for i, v := range b {
463 out[i*2] = hex[v>>4]
464 out[i*2+1] = hex[v&0x0f]
465 }
466 return string(out)
467}
468
469func (cg *commitGraph) lookupOID(id Hash) (uint32, bool) {
470 if cg == nil {
471 return 0, false
472 }
473 return commitGraphBsearchOID(cg, id)
474}
475
476func commitGraphBsearchOID(cg *commitGraph, id Hash) (uint32, bool) {
477 if cg == nil || len(cg.chunkOIDLookup) == 0 {
478 return 0, false
479 }
480 hashSize := cg.hashAlgo.Size()
481 first := int(id.data[0])
482 fanout := cg.chunkOIDFanout
483 var start uint32
484 if first > 0 {
485 start = binary.BigEndian.Uint32(fanout[(first-1)*4 : first*4])
486 }
487 end := binary.BigEndian.Uint32(fanout[first*4 : (first+1)*4])
488 lo := int(start)
489 hi := int(end) - 1
490 for lo <= hi {
491 mid := lo + (hi-lo)/2
492 pos := mid * hashSize
493 cmp := bytes.Compare(cg.chunkOIDLookup[pos:pos+hashSize], id.data[:hashSize])
494 if cmp == 0 {
495 return uint32(mid) + cg.numCommitsInBase, true
496 }
497 if cmp < 0 {
498 lo = mid + 1
499 } else {
500 hi = mid - 1
501 }
502 }
503 if cg.base != nil {
504 return commitGraphBsearchOID(cg.base, id)
505 }
506 return 0, false
507}
508
509func (cg *commitGraph) loadOID(pos uint32) (Hash, error) {
510 g := cg
511 for g != nil && pos < g.numCommitsInBase {
512 g = g.base
513 }
514 if g == nil {
515 return Hash{}, ErrInvalidObject
516 }
517 if pos >= g.numCommits+g.numCommitsInBase {
518 return Hash{}, ErrInvalidObject
519 }
520 lex := pos - g.numCommitsInBase
521 hashSize := g.hashAlgo.Size()
522 start := int(lex) * hashSize
523 end := start + hashSize
524 var out Hash
525 copy(out.data[:], g.chunkOIDLookup[start:end])
526 out.algo = g.hashAlgo
527 return out, nil
528}
529
530func (cg *commitGraph) commitDataAt(pos uint32) (*commitGraphCommit, error) {
531 g := cg
532 for g != nil && pos < g.numCommitsInBase {
533 g = g.base
534 }
535 if g == nil {
536 return nil, ErrInvalidObject
537 }
538 if pos >= g.numCommits+g.numCommitsInBase {
539 return nil, ErrInvalidObject
540 }
541 lex := pos - g.numCommitsInBase
542 stride := commitGraphCommitDataSize(g.hashAlgo)
543 start := int(lex) * stride
544 end := start + stride
545 if end > len(g.chunkCommit) {
546 return nil, ErrInvalidObject
547 }
548 buf := g.chunkCommit[start:end]
549
550 var tree Hash
551 hashSize := g.hashAlgo.Size()
552 copy(tree.data[:], buf[:hashSize])
553 tree.algo = g.hashAlgo
554
555 p1 := binary.BigEndian.Uint32(buf[hashSize : hashSize+4])
556 p2 := binary.BigEndian.Uint32(buf[hashSize+4 : hashSize+8])
557
558 genTime := binary.BigEndian.Uint32(buf[hashSize+8 : hashSize+12])
559 timeLow := binary.BigEndian.Uint32(buf[hashSize+12 : hashSize+16])
560
561 timeHigh := genTime & 0x3
562 commitTime := (uint64(timeHigh) << 32) | uint64(timeLow)
563 generation := uint64(genTime >> 2)
564
565 if g.readGeneration {
566 genOffset := binary.BigEndian.Uint32(g.chunkGeneration[int(lex)*4:])
567 if genOffset&correctedGenerationOverflow != 0 {
568 off := genOffset ^ correctedGenerationOverflow
569 idx := int(off) * 8
570 if g.chunkGenerationOv == nil || idx+8 > len(g.chunkGenerationOv) {
571 return nil, ErrInvalidObject
572 }
573 offset := binary.BigEndian.Uint64(g.chunkGenerationOv[idx : idx+8])
574 generation = commitTime + offset
575 } else {
576 generation = commitTime + uint64(genOffset)
577 }
578 }
579
580 var parents []uint32
581 var err error
582 parents, err = commitGraphAppendParent(parents, g, p1)
583 if err != nil {
584 return nil, err
585 }
586 parents, err = commitGraphAppendParent(parents, g, p2)
587 if err != nil {
588 return nil, err
589 }
590
591 return &commitGraphCommit{
592 Position: pos,
593 Tree: tree,
594 Parents: parents,
595 CommitTime: commitTime,
596 Generation: generation,
597 Graph: g,
598 graphLexPos: lex,
599 }, nil
600}