package furgit import ( "bufio" "bytes" "encoding/binary" "errors" "fmt" "os" "path/filepath" "strings" "sync" "syscall" "codeberg.org/lindenii/furgit/internal/bloom" ) const ( commitGraphSignature = 0x43475048 // "CGPH" commitGraphVersion = 1 ) const ( commitGraphChunkOIDF = 0x4f494446 // "OIDF" commitGraphChunkOIDL = 0x4f49444c // "OIDL" commitGraphChunkCDAT = 0x43444154 // "CDAT" commitGraphChunkGDA2 = 0x47444132 // "GDA2" commitGraphChunkGDO2 = 0x47444f32 // "GDO2" commitGraphChunkEDGE = 0x45444745 // "EDGE" commitGraphChunkBIDX = 0x42494458 // "BIDX" commitGraphChunkBDAT = 0x42444154 // "BDAT" commitGraphChunkBASE = 0x42415345 // "BASE" ) const ( commitGraphHeaderSize = 8 commitGraphChunkEntrySize = 12 commitGraphFanoutSize = 256 * 4 ) type commitGraph struct { repo *Repository filename string data []byte dataLen int numChunks uint8 numBase uint8 hashAlgo hashAlgorithm numCommits uint32 numCommitsInBase uint32 base *commitGraph chunkOIDFanout []byte chunkOIDLookup []byte chunkCommit []byte chunkGeneration []byte chunkGenerationOv []byte readGeneration bool chunkExtraEdges []byte chunkBloomIndex []byte chunkBloomData []byte chunkBaseGraphs []byte bloomSettings *bloom.Settings closeOnce sync.Once } func (cg *commitGraph) Close() error { if cg == nil { return nil } var closeErr error cg.closeOnce.Do(func() { if len(cg.data) > 0 { if err := syscall.Munmap(cg.data); closeErr == nil { closeErr = err } cg.data = nil } }) if cg.base != nil { _ = cg.base.Close() } return closeErr } func (repo *Repository) CommitGraph() (*commitGraph, error) { if repo == nil { return nil, ErrInvalidObject } repo.commitGraphOnce.Do(func() { repo.commitGraph, repo.commitGraphErr = repo.loadCommitGraph() }) return repo.commitGraph, repo.commitGraphErr } func (repo *Repository) loadCommitGraph() (*commitGraph, error) { cg, err := repo.loadCommitGraphSingle() if err == nil { return cg, nil } if !errors.Is(err, ErrNotFound) { return nil, err } return repo.loadCommitGraphChain() } func (repo *Repository) loadCommitGraphSingle() (*commitGraph, error) { path := repo.repoPath(filepath.Join("objects", "info", "commit-graph")) return repo.loadCommitGraphFile(path) } func (repo *Repository) loadCommitGraphChain() (*commitGraph, error) { chainPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", "commit-graph-chain")) f, err := os.Open(chainPath) if err != nil { if os.IsNotExist(err) { return nil, ErrNotFound } return nil, err } defer func() { _ = f.Close() }() var hashes []string scanner := bufio.NewScanner(f) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" { continue } hashes = append(hashes, line) } if err := scanner.Err(); err != nil { return nil, err } if len(hashes) == 0 { return nil, ErrNotFound } var chain *commitGraph for i, h := range hashes { graphPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", h))) g, err := repo.loadCommitGraphFile(graphPath) if err != nil { return nil, err } if i > 0 { if g.chunkBaseGraphs == nil { return nil, ErrInvalidObject } if len(g.chunkBaseGraphs) < i*repo.hashAlgo.Size() { return nil, ErrInvalidObject } for j := 0; j < i; j++ { want := hashes[j] start := j * repo.hashAlgo.Size() end := start + repo.hashAlgo.Size() got := hexEncode(g.chunkBaseGraphs[start:end]) if got != want { return nil, ErrInvalidObject } } } if chain != nil { if chain.numCommits+chain.numCommitsInBase > ^uint32(0) { return nil, ErrInvalidObject } g.numCommitsInBase = chain.numCommits + chain.numCommitsInBase } g.base = chain chain = g } validateCommitGraphChain(chain) return chain, nil } func validateCommitGraphChain(head *commitGraph) { if head == nil { return } readGeneration := true for g := head; g != nil; g = g.base { readGeneration = readGeneration && g.readGeneration } if !readGeneration { for g := head; g != nil; g = g.base { g.readGeneration = false } } var settings *bloom.Settings for g := head; g != nil; g = g.base { if g.bloomSettings == nil { continue } if settings == nil { settings = g.bloomSettings continue } if settings.HashVersion != g.bloomSettings.HashVersion || settings.NumHashes != g.bloomSettings.NumHashes || settings.BitsPerEntry != g.bloomSettings.BitsPerEntry { g.chunkBloomIndex = nil g.chunkBloomData = nil g.bloomSettings = nil } } } func (repo *Repository) loadCommitGraphFile(path string) (*commitGraph, error) { f, err := os.Open(path) if err != nil { if os.IsNotExist(err) { return nil, ErrNotFound } return nil, err } stat, err := f.Stat() if err != nil { _ = f.Close() return nil, err } if stat.Size() < int64(commitGraphHeaderSize+commitGraphFanoutSize) { _ = f.Close() return nil, ErrInvalidObject } region, err := syscall.Mmap( int(f.Fd()), 0, int(stat.Size()), syscall.PROT_READ, syscall.MAP_PRIVATE, ) if err != nil { _ = f.Close() return nil, err } err = f.Close() if err != nil { _ = syscall.Munmap(region) return nil, err } cg, err := parseCommitGraph(repo, path, region) if err != nil { _ = syscall.Munmap(region) return nil, err } cg.data = region return cg, nil } func parseCommitGraph(repo *Repository, filename string, data []byte) (*commitGraph, error) { if len(data) < commitGraphHeaderSize { return nil, ErrInvalidObject } sig := binary.BigEndian.Uint32(data[0:4]) if sig != commitGraphSignature { return nil, ErrInvalidObject } version := data[4] if version != commitGraphVersion { return nil, ErrInvalidObject } hashVersion := data[5] if hashVersion != hashAlgoVersion(repo.hashAlgo) { return nil, ErrInvalidObject } numChunks := data[6] numBase := data[7] tocStart := commitGraphHeaderSize tocLen := (int(numChunks) + 1) * commitGraphChunkEntrySize if len(data) < tocStart+tocLen+repo.hashAlgo.Size() { return nil, ErrInvalidObject } trailerStart := len(data) - repo.hashAlgo.Size() type chunkEntry struct { id uint32 offset uint64 } entries := make([]chunkEntry, 0, numChunks+1) for i := 0; i < int(numChunks)+1; i++ { pos := tocStart + i*commitGraphChunkEntrySize id := binary.BigEndian.Uint32(data[pos : pos+4]) offset := binary.BigEndian.Uint64(data[pos+4 : pos+12]) entries = append(entries, chunkEntry{id: id, offset: offset}) } chunks := map[uint32][]byte{} for i := 0; i < len(entries)-1; i++ { id := entries[i].id if id == 0 { break } start := int(entries[i].offset) end := int(entries[i+1].offset) if start < tocStart+tocLen || end < start || end > trailerStart { return nil, ErrInvalidObject } chunks[id] = data[start:end] } cg := &commitGraph{ repo: repo, filename: filename, dataLen: len(data), hashAlgo: repo.hashAlgo, numChunks: numChunks, numBase: numBase, } oidf := chunks[commitGraphChunkOIDF] if len(oidf) != commitGraphFanoutSize { return nil, ErrInvalidObject } cg.chunkOIDFanout = oidf cg.numCommits = binary.BigEndian.Uint32(oidf[len(oidf)-4:]) for i := 0; i < 255; i++ { if binary.BigEndian.Uint32(oidf[i*4:(i+1)*4]) > binary.BigEndian.Uint32(oidf[(i+1)*4:(i+2)*4]) { return nil, ErrInvalidObject } } oidl := chunks[commitGraphChunkOIDL] if len(oidl) != int(cg.numCommits)*repo.hashAlgo.Size() { return nil, ErrInvalidObject } cg.chunkOIDLookup = oidl cdat := chunks[commitGraphChunkCDAT] if len(cdat) != int(cg.numCommits)*commitGraphCommitDataSize(repo.hashAlgo) { return nil, ErrInvalidObject } cg.chunkCommit = cdat if gda2 := chunks[commitGraphChunkGDA2]; len(gda2) != 0 { if len(gda2) != int(cg.numCommits)*4 { return nil, ErrInvalidObject } cg.chunkGeneration = gda2 cg.readGeneration = true } if gdo2 := chunks[commitGraphChunkGDO2]; len(gdo2) != 0 { cg.chunkGenerationOv = gdo2 } if edge := chunks[commitGraphChunkEDGE]; len(edge) != 0 { if len(edge)%4 != 0 { return nil, ErrInvalidObject } cg.chunkExtraEdges = edge } if base := chunks[commitGraphChunkBASE]; len(base) != 0 { if numBase == 0 { return nil, ErrInvalidObject } if len(base) != int(numBase)*repo.hashAlgo.Size() { return nil, ErrInvalidObject } cg.chunkBaseGraphs = base } else if numBase != 0 { return nil, ErrInvalidObject } if bidx := chunks[commitGraphChunkBIDX]; len(bidx) != 0 { if len(bidx) != int(cg.numCommits)*4 { return nil, ErrInvalidObject } cg.chunkBloomIndex = bidx } if bdat := chunks[commitGraphChunkBDAT]; len(bdat) != 0 { if len(bdat) < bloom.DataHeaderSize { return nil, ErrInvalidObject } cg.chunkBloomData = bdat settings, err := bloom.ParseSettings(bdat) if err != nil { return nil, err } cg.bloomSettings = settings } if (cg.chunkBloomIndex != nil) != (cg.chunkBloomData != nil) { cg.chunkBloomIndex = nil cg.chunkBloomData = nil cg.bloomSettings = nil } if cg.chunkBloomIndex != nil && cg.chunkBloomData != nil { for i := 0; i < int(cg.numCommits); i++ { cur := binary.BigEndian.Uint32(cg.chunkBloomIndex[i*4 : i*4+4]) if i > 0 { prev := binary.BigEndian.Uint32(cg.chunkBloomIndex[(i-1)*4 : i*4]) if cur < prev { return nil, ErrInvalidObject } } if int(cur) > len(cg.chunkBloomData)-bloom.DataHeaderSize { return nil, ErrInvalidObject } } } if err := verifyCommitGraphTrailer(repo, data); err != nil { return nil, err } return cg, nil } func commitGraphCommitDataSize(algo hashAlgorithm) int { return algo.Size() + 16 } func hashAlgoVersion(algo hashAlgorithm) byte { switch algo { case hashAlgoSHA1: return 1 case hashAlgoSHA256: return 2 default: return 0 } } func verifyCommitGraphTrailer(repo *Repository, data []byte) error { if repo == nil { return ErrInvalidObject } hashSize := repo.hashAlgo.Size() if len(data) < hashSize { return ErrInvalidObject } want := data[len(data)-hashSize:] h, err := repo.hashAlgo.New() if err != nil { return err } _, _ = h.Write(data[:len(data)-hashSize]) got := h.Sum(nil) if !bytes.Equal(got, want) { return ErrInvalidObject } return nil } func hexEncode(b []byte) string { const hex = "0123456789abcdef" out := make([]byte, len(b)*2) for i, v := range b { out[i*2] = hex[v>>4] out[i*2+1] = hex[v&0x0f] } return string(out) } func (cg *commitGraph) lookupOID(id Hash) (uint32, bool) { if cg == nil { return 0, false } return commitGraphBsearchOID(cg, id) } func commitGraphBsearchOID(cg *commitGraph, id Hash) (uint32, bool) { if cg == nil || len(cg.chunkOIDLookup) == 0 { return 0, false } hashSize := cg.hashAlgo.Size() first := int(id.data[0]) fanout := cg.chunkOIDFanout var start uint32 if first > 0 { start = binary.BigEndian.Uint32(fanout[(first-1)*4 : first*4]) } end := binary.BigEndian.Uint32(fanout[first*4 : (first+1)*4]) lo := int(start) hi := int(end) - 1 for lo <= hi { mid := lo + (hi-lo)/2 pos := mid * hashSize cmp := bytes.Compare(cg.chunkOIDLookup[pos:pos+hashSize], id.data[:hashSize]) if cmp == 0 { return uint32(mid) + cg.numCommitsInBase, true } if cmp < 0 { lo = mid + 1 } else { hi = mid - 1 } } if cg.base != nil { return commitGraphBsearchOID(cg.base, id) } return 0, false } func (cg *commitGraph) loadOID(pos uint32) (Hash, error) { g := cg for g != nil && pos < g.numCommitsInBase { g = g.base } if g == nil { return Hash{}, ErrInvalidObject } if pos >= g.numCommits+g.numCommitsInBase { return Hash{}, ErrInvalidObject } lex := pos - g.numCommitsInBase hashSize := g.hashAlgo.Size() start := int(lex) * hashSize end := start + hashSize var out Hash copy(out.data[:], g.chunkOIDLookup[start:end]) out.algo = g.hashAlgo return out, nil } func (cg *commitGraph) commitDataAt(pos uint32) (*commitGraphCommit, error) { g := cg for g != nil && pos < g.numCommitsInBase { g = g.base } if g == nil { return nil, ErrInvalidObject } if pos >= g.numCommits+g.numCommitsInBase { return nil, ErrInvalidObject } lex := pos - g.numCommitsInBase stride := commitGraphCommitDataSize(g.hashAlgo) start := int(lex) * stride end := start + stride if end > len(g.chunkCommit) { return nil, ErrInvalidObject } buf := g.chunkCommit[start:end] var tree Hash hashSize := g.hashAlgo.Size() copy(tree.data[:], buf[:hashSize]) tree.algo = g.hashAlgo p1 := binary.BigEndian.Uint32(buf[hashSize : hashSize+4]) p2 := binary.BigEndian.Uint32(buf[hashSize+4 : hashSize+8]) genTime := binary.BigEndian.Uint32(buf[hashSize+8 : hashSize+12]) timeLow := binary.BigEndian.Uint32(buf[hashSize+12 : hashSize+16]) timeHigh := genTime & 0x3 commitTime := (uint64(timeHigh) << 32) | uint64(timeLow) generation := uint64(genTime >> 2) if g.readGeneration { genOffset := binary.BigEndian.Uint32(g.chunkGeneration[int(lex)*4:]) if genOffset&correctedGenerationOverflow != 0 { off := genOffset ^ correctedGenerationOverflow idx := int(off) * 8 if g.chunkGenerationOv == nil || idx+8 > len(g.chunkGenerationOv) { return nil, ErrInvalidObject } offset := binary.BigEndian.Uint64(g.chunkGenerationOv[idx : idx+8]) generation = commitTime + offset } else { generation = commitTime + uint64(genOffset) } } var parents []uint32 var err error parents, err = commitGraphAppendParent(parents, g, p1) if err != nil { return nil, err } parents, err = commitGraphAppendParent(parents, g, p2) if err != nil { return nil, err } return &commitGraphCommit{ Position: pos, Tree: tree, Parents: parents, CommitTime: commitTime, Generation: generation, Graph: g, graphLexPos: lex, }, nil }