From 03ac78d47e2800c4f9e2db5678f9eeda886b05dd Mon Sep 17 00:00:00 2001 From: oppiliappan Date: Wed, 28 May 2025 15:26:59 +0100 Subject: [PATCH] knotserver: improve performance of commit listing Change-Id: ylurqrxkzkyzoylrpzqklxmknslwmxry RepoIndex and RepoLog should no longer take longer on bigger repositories. all endpoints are still backwards compatible. Signed-off-by: oppiliappan --- knotserver/git/git.go | 54 +++++++++++++++++++++++++++++++++++++------ knotserver/routes.go | 39 ++++++++++++++----------------- 2 files changed, 64 insertions(+), 29 deletions(-) diff --git a/knotserver/git/git.go b/knotserver/git/git.go index 9fd23b2..3630a80 100644 --- a/knotserver/git/git.go +++ b/knotserver/git/git.go @@ -142,21 +142,61 @@ func PlainOpen(path string) (*GitRepo, error) { return &g, nil } -func (g *GitRepo) Commits() ([]*object.Commit, error) { - ci, err := g.r.Log(&git.LogOptions{From: g.h}) +func (g *GitRepo) Commits(offset, limit int) ([]*object.Commit, error) { + commits := []*object.Commit{} + + output, err := g.revList( + fmt.Sprintf("--skip=%d", offset), + fmt.Sprintf("--max-count=%d", limit), + ) if err != nil { return nil, fmt.Errorf("commits from ref: %w", err) } - commits := []*object.Commit{} - ci.ForEach(func(c *object.Commit) error { - commits = append(commits, c) - return nil - }) + lines := strings.Split(strings.TrimSpace(string(output)), "\n") + if len(lines) == 1 && lines[0] == "" { + return commits, nil + } + + for _, item := range lines { + obj, err := g.r.CommitObject(plumbing.NewHash(item)) + if err != nil { + continue + } + commits = append(commits, obj) + } return commits, nil } +func (g *GitRepo) TotalCommits() (int, error) { + output, err := g.revList( + fmt.Sprintf("--count"), + ) + if err != nil { + return 0, fmt.Errorf("failed to run rev-list", err) + } + + count, err := strconv.Atoi(strings.TrimSpace(string(output))) + if err != nil { + return 0, err + } + + return count, nil +} + +func (g *GitRepo) revList(extraArgs ...string) ([]byte, error) { + var args []string + args = append(args, "rev-list") + args = append(args, g.h.String()) + args = append(args, extraArgs...) + + cmd := exec.Command("git", args...) + cmd.Dir = g.path + + return cmd.Output() +} + func (g *GitRepo) Commit(h plumbing.Hash) (*object.Commit, error) { return g.r.CommitObject(h) } diff --git a/knotserver/routes.go b/knotserver/routes.go index 29dca92..068fb28 100644 --- a/knotserver/routes.go +++ b/knotserver/routes.go @@ -87,15 +87,18 @@ func (h *Handle) RepoIndex(w http.ResponseWriter, r *http.Request) { } } - commits, err := gr.Commits() - total := len(commits) + commits, err := gr.Commits(0, 60) // a good preview of commits in this repo if err != nil { writeError(w, err.Error(), http.StatusInternalServerError) l.Error("fetching commits", "error", err.Error()) return } - if len(commits) > 10 { - commits = commits[:10] + + total, err := gr.TotalCommits() + if err != nil { + writeError(w, err.Error(), http.StatusInternalServerError) + l.Error("fetching commits", "error", err.Error()) + return } branches, err := gr.Branches() @@ -349,13 +352,6 @@ func (h *Handle) Log(w http.ResponseWriter, r *http.Request) { return } - commits, err := gr.Commits() - if err != nil { - writeError(w, err.Error(), http.StatusInternalServerError) - l.Error("fetching commits", "error", err.Error()) - return - } - // Get page parameters page := 1 pageSize := 30 @@ -372,20 +368,19 @@ func (h *Handle) Log(w http.ResponseWriter, r *http.Request) { } } - // Calculate pagination - start := (page - 1) * pageSize - end := start + pageSize - total := len(commits) + // convert to offset/limit + offset := (page - 1) * pageSize + limit := pageSize - if start >= total { - commits = []*object.Commit{} - } else { - if end > total { - end = total - } - commits = commits[start:end] + commits, err := gr.Commits(offset, limit) + if err != nil { + writeError(w, err.Error(), http.StatusInternalServerError) + l.Error("fetching commits", "error", err.Error()) + return } + total := len(commits) + resp := types.RepoLogResponse{ Commits: commits, Ref: ref, -- 2.43.0 From de9bc9b154a4c745c1aa797e4527cb4fa84fe4e7 Mon Sep 17 00:00:00 2001 From: oppiliappan Date: Wed, 28 May 2025 18:42:14 +0100 Subject: [PATCH] knotserver: improve performance of last_commit_time Change-Id: mrwusmywonorkzrquzrowrnpmrnsylou uses `git log --name-only` to speed things along. the performance tradeoff here is that active repositories tend to load last-commit times really quickly, but directories with one file that was modified 20k commits ago will result in the entire log being walked through. git-log command is parsed as it streams to avoid having to wait for the entire command to complete, and as soon as the entire directory's listing is hydrated, we cancel the command. most of the time, this results in hydration of subdirectories also (this gets cached), resulting in good experience when browsing a repo (typically moving down directories gets faster with each click, as fewer files need hydration). Signed-off-by: oppiliappan --- knotserver/git/git.go | 17 ---- knotserver/git/last_commit.go | 167 ++++++++++++++++++++++++++++++++++ knotserver/git/tree.go | 37 ++++---- 3 files changed, 186 insertions(+), 35 deletions(-) create mode 100644 knotserver/git/last_commit.go diff --git a/knotserver/git/git.go b/knotserver/git/git.go index 3630a80..7d83303 100644 --- a/knotserver/git/git.go +++ b/knotserver/git/git.go @@ -11,31 +11,14 @@ import ( "sort" "strconv" "strings" - "sync" "time" - "github.com/dgraph-io/ristretto" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" "tangled.sh/tangled.sh/core/types" ) -var ( - commitCache *ristretto.Cache - cacheMu sync.RWMutex -) - -func init() { - cache, _ := ristretto.NewCache(&ristretto.Config{ - NumCounters: 1e7, - MaxCost: 1 << 30, - BufferItems: 64, - TtlTickerDurationInSec: 120, - }) - commitCache = cache -} - var ( ErrBinaryFile = fmt.Errorf("binary file") ErrNotBinaryFile = fmt.Errorf("not binary file") diff --git a/knotserver/git/last_commit.go b/knotserver/git/last_commit.go new file mode 100644 index 0000000..3dc75a4 --- /dev/null +++ b/knotserver/git/last_commit.go @@ -0,0 +1,167 @@ +package git + +import ( + "bufio" + "context" + "fmt" + "io" + "os/exec" + "path" + "strings" + "sync" + "time" + + "github.com/dgraph-io/ristretto" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" +) + +var ( + commitCache *ristretto.Cache + cacheMu sync.RWMutex +) + +func init() { + cache, _ := ristretto.NewCache(&ristretto.Config{ + NumCounters: 1e7, + MaxCost: 1 << 30, + BufferItems: 64, + TtlTickerDurationInSec: 120, + }) + commitCache = cache +} + +func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.Reader, error) { + args := []string{} + args = append(args, "log") + args = append(args, g.h.String()) + args = append(args, extraArgs...) + + cmd := exec.CommandContext(ctx, "git", args...) + cmd.Dir = g.path + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + if err := cmd.Start(); err != nil { + return nil, err + } + + return stdout, nil +} + +type commit struct { + hash plumbing.Hash + when time.Time + files []string + message string +} + +func cacheKey(g *GitRepo, path string) string { + return fmt.Sprintf("%s:%s:%s", g.path, g.h.String(), path) +} + +func (g *GitRepo) calculateCommitTime(subtree *object.Tree, parent string) (map[string]commit, error) { + filesToDo := make(map[string]struct{}) + filesDone := make(map[string]commit) + for _, e := range subtree.Entries { + fpath := path.Clean(path.Join(parent, e.Name)) + filesToDo[fpath] = struct{}{} + } + + for _, e := range subtree.Entries { + f := path.Clean(path.Join(parent, e.Name)) + cacheKey := cacheKey(g, f) + if cached, ok := commitCache.Get(cacheKey); ok { + filesDone[f] = cached.(commit) + delete(filesToDo, f) + } else { + filesToDo[f] = struct{}{} + } + } + + if len(filesToDo) == 0 { + return filesDone, nil + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + pathSpec := "." + if parent != "" { + pathSpec = parent + } + output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec) + if err != nil { + return nil, err + } + + reader := bufio.NewReader(output) + var current commit + for { + line, err := reader.ReadString('\n') + if err != nil && err != io.EOF { + return nil, err + } + line = strings.TrimSpace(line) + + if line == "" { + if !current.hash.IsZero() { + // we have a fully parsed commit + for _, f := range current.files { + if _, ok := filesToDo[f]; ok { + filesDone[f] = current + delete(filesToDo, f) + commitCache.Set(cacheKey(g, f), current, 3600) + } + } + + if len(filesToDo) == 0 { + fmt.Println("finished at commit", current.hash, current.when) + cancel() + break + } + current = commit{} + } + } else if current.hash.IsZero() { + parts := strings.SplitN(line, ",", 3) + if len(parts) == 3 { + current.hash = plumbing.NewHash(parts[0]) + current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1]) + current.message = parts[2] + } + } else { + // all ancestors along this path should also be included + file := path.Clean(line) + ancestors := ancestors(file) + current.files = append(current.files, file) + current.files = append(current.files, ancestors...) + } + + if err == io.EOF { + break + } + } + + return filesDone, nil +} + +func ancestors(p string) []string { + var ancestors []string + + for { + p = path.Dir(p) + if p == "." || p == "/" { + break + } + ancestors = append(ancestors, p) + } + return ancestors +} + +type Cache interface { + Put(key string, val any, timeout int64) error + Get(key string) any +} diff --git a/knotserver/git/tree.go b/knotserver/git/tree.go index 4ad9974..fbbe288 100644 --- a/knotserver/git/tree.go +++ b/knotserver/git/tree.go @@ -2,6 +2,7 @@ package git import ( "fmt" + "path" "time" "github.com/go-git/go-git/v5/plumbing/object" @@ -41,29 +42,29 @@ func (g *GitRepo) FileTree(path string) ([]types.NiceTree, error) { return files, nil } -func (g *GitRepo) makeNiceTree(t *object.Tree, parent string) []types.NiceTree { +func (g *GitRepo) makeNiceTree(subtree *object.Tree, parent string) []types.NiceTree { nts := []types.NiceTree{} - for _, e := range t.Entries { + times, err := g.calculateCommitTime(subtree, parent) + if err != nil { + return nts + } + + for _, e := range subtree.Entries { mode, _ := e.Mode.ToOSFileMode() - sz, _ := t.Size(e.Name) + sz, _ := subtree.Size(e.Name) - var fpath string - if parent != "" { - fpath = fmt.Sprintf("%s/%s", parent, e.Name) - } else { - fpath = e.Name + fpath := path.Join(parent, e.Name) + + lastCommit := &types.LastCommitInfo{ + Hash: g.h, + Message: "", + When: time.Now(), } - lastCommit, err := g.LastCommitForPath(fpath) - if err != nil { - fmt.Println("error getting last commit time:", err) - // We don't want to skip the file, so worst case lets just - // populate it with "defaults". - lastCommit = &types.LastCommitInfo{ - Hash: g.h, - Message: "", - When: time.Now(), - } + if t, ok := times[fpath]; ok { + lastCommit.Hash = t.hash + lastCommit.Message = t.message + lastCommit.When = t.when } nts = append(nts, types.NiceTree{ -- 2.43.0