knotserver: improve performance of commit listing #171

closed
opened by oppi.li targeting master from push-mrwusmywonor

RepoIndex and RepoLog should no longer take longer on bigger repositories. all endpoints are still backwards compatible.

Signed-off-by: oppiliappan me@oppi.li

Changed files
+203 -40
knotserver
+17 -22
knotserver/routes.go
··· 87 87 } 88 88 } 89 89 90 - commits, err := gr.Commits() 91 - total := len(commits) 90 + commits, err := gr.Commits(0, 60) // a good preview of commits in this repo 92 91 if err != nil { 93 92 writeError(w, err.Error(), http.StatusInternalServerError) 94 93 l.Error("fetching commits", "error", err.Error()) 95 94 return 96 95 } 97 - if len(commits) > 10 { 98 - commits = commits[:10] 96 + 97 + total, err := gr.TotalCommits() 98 + if err != nil { 99 + writeError(w, err.Error(), http.StatusInternalServerError) 100 + l.Error("fetching commits", "error", err.Error()) 101 + return 99 102 } 100 103 101 104 branches, err := gr.Branches() ··· 349 352 return 350 353 } 351 354 352 - commits, err := gr.Commits() 353 - if err != nil { 354 - writeError(w, err.Error(), http.StatusInternalServerError) 355 - l.Error("fetching commits", "error", err.Error()) 356 - return 357 - } 358 - 359 355 // Get page parameters 360 356 page := 1 361 357 pageSize := 30 ··· 372 368 } 373 369 } 374 370 375 - // Calculate pagination 376 - start := (page - 1) * pageSize 377 - end := start + pageSize 378 - total := len(commits) 371 + // convert to offset/limit 372 + offset := (page - 1) * pageSize 373 + limit := pageSize 379 374 380 - if start >= total { 381 - commits = []*object.Commit{} 382 - } else { 383 - if end > total { 384 - end = total 385 - } 386 - commits = commits[start:end] 375 + commits, err := gr.Commits(offset, limit) 376 + if err != nil { 377 + writeError(w, err.Error(), http.StatusInternalServerError) 378 + l.Error("fetching commits", "error", err.Error()) 379 + return 387 380 } 388 381 382 + total := len(commits) 383 + 389 384 resp := types.RepoLogResponse{ 390 385 Commits: commits, 391 386 Ref: ref,
+167
knotserver/git/last_commit.go
··· 1 + package git 2 + 3 + import ( 4 + "bufio" 5 + "context" 6 + "fmt" 7 + "io" 8 + "os/exec" 9 + "path" 10 + "strings" 11 + "sync" 12 + "time" 13 + 14 + "github.com/dgraph-io/ristretto" 15 + "github.com/go-git/go-git/v5/plumbing" 16 + "github.com/go-git/go-git/v5/plumbing/object" 17 + ) 18 + 19 + var ( 20 + commitCache *ristretto.Cache 21 + cacheMu sync.RWMutex 22 + ) 23 + 24 + func init() { 25 + cache, _ := ristretto.NewCache(&ristretto.Config{ 26 + NumCounters: 1e7, 27 + MaxCost: 1 << 30, 28 + BufferItems: 64, 29 + TtlTickerDurationInSec: 120, 30 + }) 31 + commitCache = cache 32 + } 33 + 34 + func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.Reader, error) { 35 + args := []string{} 36 + args = append(args, "log") 37 + args = append(args, g.h.String()) 38 + args = append(args, extraArgs...) 39 + 40 + cmd := exec.CommandContext(ctx, "git", args...) 41 + cmd.Dir = g.path 42 + 43 + stdout, err := cmd.StdoutPipe() 44 + if err != nil { 45 + return nil, err 46 + } 47 + 48 + if err := cmd.Start(); err != nil { 49 + return nil, err 50 + } 51 + 52 + return stdout, nil 53 + } 54 + 55 + type commit struct { 56 + hash plumbing.Hash 57 + when time.Time 58 + files []string 59 + message string 60 + } 61 + 62 + func cacheKey(g *GitRepo, path string) string { 63 + return fmt.Sprintf("%s:%s:%s", g.path, g.h.String(), path) 64 + } 65 + 66 + func (g *GitRepo) calculateCommitTime(subtree *object.Tree, parent string) (map[string]commit, error) { 67 + filesToDo := make(map[string]struct{}) 68 + filesDone := make(map[string]commit) 69 + for _, e := range subtree.Entries { 70 + fpath := path.Clean(path.Join(parent, e.Name)) 71 + filesToDo[fpath] = struct{}{} 72 + } 73 + 74 + for _, e := range subtree.Entries { 75 + f := path.Clean(path.Join(parent, e.Name)) 76 + cacheKey := cacheKey(g, f) 77 + if cached, ok := commitCache.Get(cacheKey); ok { 78 + filesDone[f] = cached.(commit) 79 + delete(filesToDo, f) 80 + } else { 81 + filesToDo[f] = struct{}{} 82 + } 83 + } 84 + 85 + if len(filesToDo) == 0 { 86 + return filesDone, nil 87 + } 88 + 89 + ctx, cancel := context.WithCancel(context.Background()) 90 + defer cancel() 91 + 92 + pathSpec := "." 93 + if parent != "" { 94 + pathSpec = parent 95 + } 96 + output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec) 97 + if err != nil { 98 + return nil, err 99 + } 100 + 101 + reader := bufio.NewReader(output) 102 + var current commit 103 + for { 104 + line, err := reader.ReadString('\n') 105 + if err != nil && err != io.EOF { 106 + return nil, err 107 + } 108 + line = strings.TrimSpace(line) 109 + 110 + if line == "" { 111 + if !current.hash.IsZero() { 112 + // we have a fully parsed commit 113 + for _, f := range current.files { 114 + if _, ok := filesToDo[f]; ok { 115 + filesDone[f] = current 116 + delete(filesToDo, f) 117 + commitCache.Set(cacheKey(g, f), current, 3600) 118 + } 119 + } 120 + 121 + if len(filesToDo) == 0 { 122 + fmt.Println("finished at commit", current.hash, current.when) 123 + cancel() 124 + break 125 + } 126 + current = commit{} 127 + } 128 + } else if current.hash.IsZero() { 129 + parts := strings.SplitN(line, ",", 3) 130 + if len(parts) == 3 { 131 + current.hash = plumbing.NewHash(parts[0]) 132 + current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1]) 133 + current.message = parts[2] 134 + } 135 + } else { 136 + // all ancestors along this path should also be included 137 + file := path.Clean(line) 138 + ancestors := ancestors(file) 139 + current.files = append(current.files, file) 140 + current.files = append(current.files, ancestors...) 141 + } 142 + 143 + if err == io.EOF { 144 + break 145 + } 146 + } 147 + 148 + return filesDone, nil 149 + } 150 + 151 + func ancestors(p string) []string { 152 + var ancestors []string 153 + 154 + for { 155 + p = path.Dir(p) 156 + if p == "." || p == "/" { 157 + break 158 + } 159 + ancestors = append(ancestors, p) 160 + } 161 + return ancestors 162 + } 163 + 164 + type Cache interface { 165 + Put(key string, val any, timeout int64) error 166 + Get(key string) any 167 + }
+19 -18
knotserver/git/tree.go
··· 2 2 3 3 import ( 4 4 "fmt" 5 + "path" 5 6 "time" 6 7 7 8 "github.com/go-git/go-git/v5/plumbing/object" ··· 41 42 return files, nil 42 43 } 43 44 44 - func (g *GitRepo) makeNiceTree(t *object.Tree, parent string) []types.NiceTree { 45 + func (g *GitRepo) makeNiceTree(subtree *object.Tree, parent string) []types.NiceTree { 45 46 nts := []types.NiceTree{} 46 47 47 - for _, e := range t.Entries { 48 + times, err := g.calculateCommitTime(subtree, parent) 49 + if err != nil { 50 + return nts 51 + } 52 + 53 + for _, e := range subtree.Entries { 48 54 mode, _ := e.Mode.ToOSFileMode() 49 - sz, _ := t.Size(e.Name) 55 + sz, _ := subtree.Size(e.Name) 50 56 51 - var fpath string 52 - if parent != "" { 53 - fpath = fmt.Sprintf("%s/%s", parent, e.Name) 54 - } else { 55 - fpath = e.Name 57 + fpath := path.Join(parent, e.Name) 58 + 59 + lastCommit := &types.LastCommitInfo{ 60 + Hash: g.h, 61 + Message: "", 62 + When: time.Now(), 56 63 } 57 - lastCommit, err := g.LastCommitForPath(fpath) 58 - if err != nil { 59 - fmt.Println("error getting last commit time:", err) 60 - // We don't want to skip the file, so worst case lets just 61 - // populate it with "defaults". 62 - lastCommit = &types.LastCommitInfo{ 63 - Hash: g.h, 64 - Message: "", 65 - When: time.Now(), 66 - } 64 + if t, ok := times[fpath]; ok { 65 + lastCommit.Hash = t.hash 66 + lastCommit.Message = t.message 67 + lastCommit.When = t.when 67 68 } 68 69 69 70 nts = append(nts, types.NiceTree{