1package git
2
3import (
4 "bufio"
5 "context"
6 "crypto/sha256"
7 "fmt"
8 "io"
9 "iter"
10 "os/exec"
11 "path"
12 "strconv"
13 "strings"
14 "time"
15
16 "github.com/dgraph-io/ristretto"
17 "github.com/go-git/go-git/v5/plumbing"
18 "tangled.org/core/sets"
19 "tangled.org/core/types"
20)
21
22var (
23 commitCache *ristretto.Cache
24)
25
26func init() {
27 cache, _ := ristretto.NewCache(&ristretto.Config{
28 NumCounters: 1e7,
29 MaxCost: 1 << 30,
30 BufferItems: 64,
31 TtlTickerDurationInSec: 120,
32 })
33 commitCache = cache
34}
35
36// processReader wraps a reader and ensures the associated process is cleaned up
37type processReader struct {
38 io.Reader
39 cmd *exec.Cmd
40 stdout io.ReadCloser
41}
42
43func (pr *processReader) Close() error {
44 if err := pr.stdout.Close(); err != nil {
45 return err
46 }
47 return pr.cmd.Wait()
48}
49
50func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.ReadCloser, error) {
51 args := []string{}
52 args = append(args, "log")
53 args = append(args, g.h.String())
54 args = append(args, extraArgs...)
55
56 cmd := exec.CommandContext(ctx, "git", args...)
57 cmd.Dir = g.path
58
59 stdout, err := cmd.StdoutPipe()
60 if err != nil {
61 return nil, err
62 }
63
64 if err := cmd.Start(); err != nil {
65 return nil, err
66 }
67
68 return &processReader{
69 Reader: stdout,
70 cmd: cmd,
71 stdout: stdout,
72 }, nil
73}
74
75type commit struct {
76 hash plumbing.Hash
77 when time.Time
78 files sets.Set[string]
79 message string
80}
81
82func newCommit() commit {
83 return commit{
84 files: sets.New[string](),
85 }
86}
87
88type lastCommitDir struct {
89 dir string
90 entries []string
91}
92
93func (l lastCommitDir) children() iter.Seq[string] {
94 return func(yield func(string) bool) {
95 for _, child := range l.entries {
96 if !yield(path.Join(l.dir, child)) {
97 return
98 }
99 }
100 }
101}
102
103func cacheKey(g *GitRepo, path string) string {
104 sep := byte(':')
105 hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path))
106 return fmt.Sprintf("%x", hash)
107}
108
109func (g *GitRepo) lastCommitDirIn(ctx context.Context, parent lastCommitDir, timeout time.Duration) (map[string]commit, error) {
110 ctx, cancel := context.WithTimeout(ctx, timeout)
111 defer cancel()
112 return g.lastCommitDir(ctx, parent)
113}
114
115func (g *GitRepo) lastCommitDir(ctx context.Context, parent lastCommitDir) (map[string]commit, error) {
116 filesToDo := sets.Collect(parent.children())
117 filesDone := make(map[string]commit)
118
119 for p := range filesToDo.All() {
120 cacheKey := cacheKey(g, p)
121 if cached, ok := commitCache.Get(cacheKey); ok {
122 filesDone[p] = cached.(commit)
123 filesToDo.Remove(p)
124 } else {
125 filesToDo.Insert(p)
126 }
127 }
128
129 if filesToDo.IsEmpty() {
130 return filesDone, nil
131 }
132
133 ctx, cancel := context.WithCancel(ctx)
134 defer cancel()
135
136 pathSpec := "."
137 if parent.dir != "" {
138 pathSpec = parent.dir
139 }
140 if filesToDo.Len() == 1 {
141 // this is an optimization for the scenario where we want to calculate
142 // the last commit for just one path, we can directly set the pathspec to that path
143 for s := range filesToDo.All() {
144 pathSpec = s
145 }
146 }
147
148 output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=unix", "--name-only", "--", pathSpec)
149 if err != nil {
150 return nil, err
151 }
152 defer output.Close() // Ensure the git process is properly cleaned up
153
154 reader := bufio.NewReader(output)
155 current := newCommit()
156 for {
157 line, err := reader.ReadString('\n')
158 if err != nil && err != io.EOF {
159 return nil, err
160 }
161 line = strings.TrimSpace(line)
162
163 if line == "" {
164 if !current.hash.IsZero() {
165 // we have a fully parsed commit
166 for f := range current.files.All() {
167 if filesToDo.Contains(f) {
168 filesDone[f] = current
169 filesToDo.Remove(f)
170 commitCache.Set(cacheKey(g, f), current, 0)
171 }
172 }
173
174 if filesToDo.IsEmpty() {
175 break
176 }
177 current = newCommit()
178 }
179 } else if current.hash.IsZero() {
180 parts := strings.SplitN(line, ",", 3)
181 if len(parts) == 3 {
182 current.hash = plumbing.NewHash(parts[0])
183 epochTime, _ := strconv.ParseInt(parts[1], 10, 64)
184 current.when = time.Unix(epochTime, 0)
185 current.message = parts[2]
186 }
187 } else {
188 // all ancestors along this path should also be included
189 file := path.Clean(line)
190 current.files.Insert(file)
191 for _, a := range ancestors(file) {
192 current.files.Insert(a)
193 }
194 }
195
196 if err == io.EOF {
197 break
198 }
199 }
200
201 return filesDone, nil
202}
203
204// LastCommitFile returns the last commit information for a specific file path
205func (g *GitRepo) LastCommitFile(ctx context.Context, filePath string) (*types.LastCommitInfo, error) {
206 parent, child := path.Split(filePath)
207 parent = path.Clean(parent)
208 if parent == "." {
209 parent = ""
210 }
211
212 lastCommitDir := lastCommitDir{
213 dir: parent,
214 entries: []string{child},
215 }
216
217 times, err := g.lastCommitDirIn(ctx, lastCommitDir, 2*time.Second)
218 if err != nil {
219 return nil, fmt.Errorf("calculate commit time: %w", err)
220 }
221
222 // extract the only element of the map, the commit info of the current path
223 var commitInfo *commit
224 for _, c := range times {
225 commitInfo = &c
226 }
227
228 if commitInfo == nil {
229 return nil, fmt.Errorf("no commit found for path: %s", filePath)
230 }
231
232 return &types.LastCommitInfo{
233 Hash: commitInfo.hash,
234 Message: commitInfo.message,
235 When: commitInfo.when,
236 }, nil
237}
238
239func ancestors(p string) []string {
240 var ancestors []string
241
242 for {
243 p = path.Dir(p)
244 if p == "." || p == "/" {
245 break
246 }
247 ancestors = append(ancestors, p)
248 }
249 return ancestors
250}