at master 250 lines 5.4 kB view raw
1package git 2 3import ( 4 "bufio" 5 "context" 6 "crypto/sha256" 7 "fmt" 8 "io" 9 "iter" 10 "os/exec" 11 "path" 12 "strconv" 13 "strings" 14 "time" 15 16 "github.com/dgraph-io/ristretto" 17 "github.com/go-git/go-git/v5/plumbing" 18 "tangled.org/core/sets" 19 "tangled.org/core/types" 20) 21 22var ( 23 commitCache *ristretto.Cache 24) 25 26func init() { 27 cache, _ := ristretto.NewCache(&ristretto.Config{ 28 NumCounters: 1e7, 29 MaxCost: 1 << 30, 30 BufferItems: 64, 31 TtlTickerDurationInSec: 120, 32 }) 33 commitCache = cache 34} 35 36// processReader wraps a reader and ensures the associated process is cleaned up 37type processReader struct { 38 io.Reader 39 cmd *exec.Cmd 40 stdout io.ReadCloser 41} 42 43func (pr *processReader) Close() error { 44 if err := pr.stdout.Close(); err != nil { 45 return err 46 } 47 return pr.cmd.Wait() 48} 49 50func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.ReadCloser, error) { 51 args := []string{} 52 args = append(args, "log") 53 args = append(args, g.h.String()) 54 args = append(args, extraArgs...) 55 56 cmd := exec.CommandContext(ctx, "git", args...) 57 cmd.Dir = g.path 58 59 stdout, err := cmd.StdoutPipe() 60 if err != nil { 61 return nil, err 62 } 63 64 if err := cmd.Start(); err != nil { 65 return nil, err 66 } 67 68 return &processReader{ 69 Reader: stdout, 70 cmd: cmd, 71 stdout: stdout, 72 }, nil 73} 74 75type commit struct { 76 hash plumbing.Hash 77 when time.Time 78 files sets.Set[string] 79 message string 80} 81 82func newCommit() commit { 83 return commit{ 84 files: sets.New[string](), 85 } 86} 87 88type lastCommitDir struct { 89 dir string 90 entries []string 91} 92 93func (l lastCommitDir) children() iter.Seq[string] { 94 return func(yield func(string) bool) { 95 for _, child := range l.entries { 96 if !yield(path.Join(l.dir, child)) { 97 return 98 } 99 } 100 } 101} 102 103func cacheKey(g *GitRepo, path string) string { 104 sep := byte(':') 105 hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path)) 106 return fmt.Sprintf("%x", hash) 107} 108 109func (g *GitRepo) lastCommitDirIn(ctx context.Context, parent lastCommitDir, timeout time.Duration) (map[string]commit, error) { 110 ctx, cancel := context.WithTimeout(ctx, timeout) 111 defer cancel() 112 return g.lastCommitDir(ctx, parent) 113} 114 115func (g *GitRepo) lastCommitDir(ctx context.Context, parent lastCommitDir) (map[string]commit, error) { 116 filesToDo := sets.Collect(parent.children()) 117 filesDone := make(map[string]commit) 118 119 for p := range filesToDo.All() { 120 cacheKey := cacheKey(g, p) 121 if cached, ok := commitCache.Get(cacheKey); ok { 122 filesDone[p] = cached.(commit) 123 filesToDo.Remove(p) 124 } else { 125 filesToDo.Insert(p) 126 } 127 } 128 129 if filesToDo.IsEmpty() { 130 return filesDone, nil 131 } 132 133 ctx, cancel := context.WithCancel(ctx) 134 defer cancel() 135 136 pathSpec := "." 137 if parent.dir != "" { 138 pathSpec = parent.dir 139 } 140 if filesToDo.Len() == 1 { 141 // this is an optimization for the scenario where we want to calculate 142 // the last commit for just one path, we can directly set the pathspec to that path 143 for s := range filesToDo.All() { 144 pathSpec = s 145 } 146 } 147 148 output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=unix", "--name-only", "--", pathSpec) 149 if err != nil { 150 return nil, err 151 } 152 defer output.Close() // Ensure the git process is properly cleaned up 153 154 reader := bufio.NewReader(output) 155 current := newCommit() 156 for { 157 line, err := reader.ReadString('\n') 158 if err != nil && err != io.EOF { 159 return nil, err 160 } 161 line = strings.TrimSpace(line) 162 163 if line == "" { 164 if !current.hash.IsZero() { 165 // we have a fully parsed commit 166 for f := range current.files.All() { 167 if filesToDo.Contains(f) { 168 filesDone[f] = current 169 filesToDo.Remove(f) 170 commitCache.Set(cacheKey(g, f), current, 0) 171 } 172 } 173 174 if filesToDo.IsEmpty() { 175 break 176 } 177 current = newCommit() 178 } 179 } else if current.hash.IsZero() { 180 parts := strings.SplitN(line, ",", 3) 181 if len(parts) == 3 { 182 current.hash = plumbing.NewHash(parts[0]) 183 epochTime, _ := strconv.ParseInt(parts[1], 10, 64) 184 current.when = time.Unix(epochTime, 0) 185 current.message = parts[2] 186 } 187 } else { 188 // all ancestors along this path should also be included 189 file := path.Clean(line) 190 current.files.Insert(file) 191 for _, a := range ancestors(file) { 192 current.files.Insert(a) 193 } 194 } 195 196 if err == io.EOF { 197 break 198 } 199 } 200 201 return filesDone, nil 202} 203 204// LastCommitFile returns the last commit information for a specific file path 205func (g *GitRepo) LastCommitFile(ctx context.Context, filePath string) (*types.LastCommitInfo, error) { 206 parent, child := path.Split(filePath) 207 parent = path.Clean(parent) 208 if parent == "." { 209 parent = "" 210 } 211 212 lastCommitDir := lastCommitDir{ 213 dir: parent, 214 entries: []string{child}, 215 } 216 217 times, err := g.lastCommitDirIn(ctx, lastCommitDir, 2*time.Second) 218 if err != nil { 219 return nil, fmt.Errorf("calculate commit time: %w", err) 220 } 221 222 // extract the only element of the map, the commit info of the current path 223 var commitInfo *commit 224 for _, c := range times { 225 commitInfo = &c 226 } 227 228 if commitInfo == nil { 229 return nil, fmt.Errorf("no commit found for path: %s", filePath) 230 } 231 232 return &types.LastCommitInfo{ 233 Hash: commitInfo.hash, 234 Message: commitInfo.message, 235 When: commitInfo.when, 236 }, nil 237} 238 239func ancestors(p string) []string { 240 var ancestors []string 241 242 for { 243 p = path.Dir(p) 244 if p == "." || p == "/" { 245 break 246 } 247 ancestors = append(ancestors, p) 248 } 249 return ancestors 250}