Monorepo for Tangled tangled.org

knotserver: git: improve performance of last_commit_time

uses `git log --name-only` to speed things along. the performance
tradeoff here is that active repositories tend to load last-commit times
really quickly, but directories with one file that was modified 20k
commits ago will result in the entire log being walked through.

git-log command is parsed as it streams to avoid having to wait for the
entire command to complete, and as soon as the entire directory's
listing is hydrated, we cancel the command. most of the time, this
results in hydration of subdirectories also (this gets cached),
resulting in good experience when browsing a repo (typically moving down
directories gets faster with each click, as fewer files need hydration).

Signed-off-by: oppiliappan <me@oppi.li>

Changed files
+205 -97
appview
pages
templates
knotserver
+4
appview/pages/templates/repo/index.html
··· 133 133 </div> 134 134 </a> 135 135 136 + {{ if .LastCommit }} 136 137 <time class="text-xs text-gray-500 dark:text-gray-400" 137 138 >{{ timeFmt .LastCommit.When }}</time 138 139 > 140 + {{ end }} 139 141 </div> 140 142 </div> 141 143 {{ end }} ··· 154 156 </div> 155 157 </a> 156 158 159 + {{ if .LastCommit }} 157 160 <time class="text-xs text-gray-500 dark:text-gray-400" 158 161 >{{ timeFmt .LastCommit.When }}</time 159 162 > 163 + {{ end }} 160 164 </div> 161 165 </div> 162 166 {{ end }}
+6 -2
appview/pages/templates/repo/tree.html
··· 62 62 {{ i "folder" "size-4 fill-current" }}{{ .Name }} 63 63 </div> 64 64 </a> 65 - <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 65 + {{ if .LastCommit}} 66 + <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 67 + {{ end }} 66 68 </div> 67 69 </div> 68 70 {{ end }} ··· 77 79 {{ i "file" "size-4" }}{{ .Name }} 78 80 </div> 79 81 </a> 80 - <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 82 + {{ if .LastCommit}} 83 + <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 84 + {{ end }} 81 85 </div> 82 86 </div> 83 87 {{ end }}
-69
knotserver/git/git.go
··· 2 2 3 3 import ( 4 4 "archive/tar" 5 - "bytes" 6 5 "fmt" 7 6 "io" 8 7 "io/fs" ··· 11 10 "sort" 12 11 "strconv" 13 12 "strings" 14 - "sync" 15 13 "time" 16 14 17 - "github.com/dgraph-io/ristretto" 18 15 "github.com/go-git/go-git/v5" 19 16 "github.com/go-git/go-git/v5/plumbing" 20 17 "github.com/go-git/go-git/v5/plumbing/object" 21 18 "tangled.sh/tangled.sh/core/types" 22 19 ) 23 - 24 - var ( 25 - commitCache *ristretto.Cache 26 - cacheMu sync.RWMutex 27 - ) 28 - 29 - func init() { 30 - cache, _ := ristretto.NewCache(&ristretto.Config{ 31 - NumCounters: 1e7, 32 - MaxCost: 1 << 30, 33 - BufferItems: 64, 34 - TtlTickerDurationInSec: 120, 35 - }) 36 - commitCache = cache 37 - } 38 20 39 21 var ( 40 22 ErrBinaryFile = fmt.Errorf("binary file") ··· 448 430 } 449 431 450 432 return nil 451 - } 452 - 453 - func (g *GitRepo) LastCommitForPath(path string) (*types.LastCommitInfo, error) { 454 - cacheKey := fmt.Sprintf("%s:%s", g.h.String(), path) 455 - cacheMu.RLock() 456 - if commitInfo, found := commitCache.Get(cacheKey); found { 457 - cacheMu.RUnlock() 458 - return commitInfo.(*types.LastCommitInfo), nil 459 - } 460 - cacheMu.RUnlock() 461 - 462 - cmd := exec.Command("git", "-C", g.path, "log", g.h.String(), "-1", "--format=%H %ct", "--", path) 463 - 464 - var out bytes.Buffer 465 - cmd.Stdout = &out 466 - cmd.Stderr = &out 467 - 468 - if err := cmd.Run(); err != nil { 469 - return nil, fmt.Errorf("failed to get commit hash: %w", err) 470 - } 471 - 472 - output := strings.TrimSpace(out.String()) 473 - if output == "" { 474 - return nil, fmt.Errorf("no commits found for path: %s", path) 475 - } 476 - 477 - parts := strings.SplitN(output, " ", 2) 478 - if len(parts) < 2 { 479 - return nil, fmt.Errorf("unexpected commit log format") 480 - } 481 - 482 - commitHash := parts[0] 483 - commitTimeUnix, err := strconv.ParseInt(parts[1], 10, 64) 484 - if err != nil { 485 - return nil, fmt.Errorf("parsing commit time: %w", err) 486 - } 487 - commitTime := time.Unix(commitTimeUnix, 0) 488 - 489 - hash := plumbing.NewHash(commitHash) 490 - 491 - commitInfo := &types.LastCommitInfo{ 492 - Hash: hash, 493 - Message: "", 494 - When: commitTime, 495 - } 496 - 497 - cacheMu.Lock() 498 - commitCache.Set(cacheKey, commitInfo, 1) 499 - cacheMu.Unlock() 500 - 501 - return commitInfo, nil 502 433 } 503 434 504 435 func newInfoWrapper(
+168
knotserver/git/last_commit.go
··· 1 + package git 2 + 3 + import ( 4 + "bufio" 5 + "context" 6 + "crypto/sha256" 7 + "fmt" 8 + "io" 9 + "os/exec" 10 + "path" 11 + "strings" 12 + "time" 13 + 14 + "github.com/dgraph-io/ristretto" 15 + "github.com/go-git/go-git/v5/plumbing" 16 + "github.com/go-git/go-git/v5/plumbing/object" 17 + ) 18 + 19 + var ( 20 + commitCache *ristretto.Cache 21 + ) 22 + 23 + func init() { 24 + cache, _ := ristretto.NewCache(&ristretto.Config{ 25 + NumCounters: 1e7, 26 + MaxCost: 1 << 30, 27 + BufferItems: 64, 28 + TtlTickerDurationInSec: 120, 29 + }) 30 + commitCache = cache 31 + } 32 + 33 + func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.Reader, error) { 34 + args := []string{} 35 + args = append(args, "log") 36 + args = append(args, g.h.String()) 37 + args = append(args, extraArgs...) 38 + 39 + cmd := exec.CommandContext(ctx, "git", args...) 40 + cmd.Dir = g.path 41 + 42 + stdout, err := cmd.StdoutPipe() 43 + if err != nil { 44 + return nil, err 45 + } 46 + 47 + if err := cmd.Start(); err != nil { 48 + return nil, err 49 + } 50 + 51 + return stdout, nil 52 + } 53 + 54 + type commit struct { 55 + hash plumbing.Hash 56 + when time.Time 57 + files []string 58 + message string 59 + } 60 + 61 + func cacheKey(g *GitRepo, path string) string { 62 + sep := byte(':') 63 + hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path)) 64 + return fmt.Sprintf("%x", hash) 65 + } 66 + 67 + func (g *GitRepo) calculateCommitTimeIn(ctx context.Context, subtree *object.Tree, parent string, timeout time.Duration) (map[string]commit, error) { 68 + ctx, cancel := context.WithTimeout(ctx, timeout) 69 + defer cancel() 70 + return g.calculateCommitTime(ctx, subtree, parent) 71 + } 72 + 73 + func (g *GitRepo) calculateCommitTime(ctx context.Context, subtree *object.Tree, parent string) (map[string]commit, error) { 74 + filesToDo := make(map[string]struct{}) 75 + filesDone := make(map[string]commit) 76 + for _, e := range subtree.Entries { 77 + fpath := path.Clean(path.Join(parent, e.Name)) 78 + filesToDo[fpath] = struct{}{} 79 + } 80 + 81 + for _, e := range subtree.Entries { 82 + f := path.Clean(path.Join(parent, e.Name)) 83 + cacheKey := cacheKey(g, f) 84 + if cached, ok := commitCache.Get(cacheKey); ok { 85 + filesDone[f] = cached.(commit) 86 + delete(filesToDo, f) 87 + } else { 88 + filesToDo[f] = struct{}{} 89 + } 90 + } 91 + 92 + if len(filesToDo) == 0 { 93 + return filesDone, nil 94 + } 95 + 96 + ctx, cancel := context.WithCancel(ctx) 97 + defer cancel() 98 + 99 + pathSpec := "." 100 + if parent != "" { 101 + pathSpec = parent 102 + } 103 + output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec) 104 + if err != nil { 105 + return nil, err 106 + } 107 + 108 + reader := bufio.NewReader(output) 109 + var current commit 110 + for { 111 + line, err := reader.ReadString('\n') 112 + if err != nil && err != io.EOF { 113 + return nil, err 114 + } 115 + line = strings.TrimSpace(line) 116 + 117 + if line == "" { 118 + if !current.hash.IsZero() { 119 + // we have a fully parsed commit 120 + for _, f := range current.files { 121 + if _, ok := filesToDo[f]; ok { 122 + filesDone[f] = current 123 + delete(filesToDo, f) 124 + commitCache.Set(cacheKey(g, f), current, 0) 125 + } 126 + } 127 + 128 + if len(filesToDo) == 0 { 129 + cancel() 130 + break 131 + } 132 + current = commit{} 133 + } 134 + } else if current.hash.IsZero() { 135 + parts := strings.SplitN(line, ",", 3) 136 + if len(parts) == 3 { 137 + current.hash = plumbing.NewHash(parts[0]) 138 + current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1]) 139 + current.message = parts[2] 140 + } 141 + } else { 142 + // all ancestors along this path should also be included 143 + file := path.Clean(line) 144 + ancestors := ancestors(file) 145 + current.files = append(current.files, file) 146 + current.files = append(current.files, ancestors...) 147 + } 148 + 149 + if err == io.EOF { 150 + break 151 + } 152 + } 153 + 154 + return filesDone, nil 155 + } 156 + 157 + func ancestors(p string) []string { 158 + var ancestors []string 159 + 160 + for { 161 + p = path.Dir(p) 162 + if p == "." || p == "/" { 163 + break 164 + } 165 + ancestors = append(ancestors, p) 166 + } 167 + return ancestors 168 + }
+20 -20
knotserver/git/tree.go
··· 1 1 package git 2 2 3 3 import ( 4 + "context" 4 5 "fmt" 6 + "path" 5 7 "time" 6 8 7 9 "github.com/go-git/go-git/v5/plumbing/object" 8 10 "tangled.sh/tangled.sh/core/types" 9 11 ) 10 12 11 - func (g *GitRepo) FileTree(path string) ([]types.NiceTree, error) { 13 + func (g *GitRepo) FileTree(ctx context.Context, path string) ([]types.NiceTree, error) { 12 14 c, err := g.r.CommitObject(g.h) 13 15 if err != nil { 14 16 return nil, fmt.Errorf("commit object: %w", err) ··· 21 23 } 22 24 23 25 if path == "" { 24 - files = g.makeNiceTree(tree, "") 26 + files = g.makeNiceTree(ctx, tree, "") 25 27 } else { 26 28 o, err := tree.FindEntry(path) 27 29 if err != nil { ··· 34 36 return nil, err 35 37 } 36 38 37 - files = g.makeNiceTree(subtree, path) 39 + files = g.makeNiceTree(ctx, subtree, path) 38 40 } 39 41 } 40 42 41 43 return files, nil 42 44 } 43 45 44 - func (g *GitRepo) makeNiceTree(t *object.Tree, parent string) []types.NiceTree { 46 + func (g *GitRepo) makeNiceTree(ctx context.Context, subtree *object.Tree, parent string) []types.NiceTree { 45 47 nts := []types.NiceTree{} 46 48 47 - for _, e := range t.Entries { 49 + times, err := g.calculateCommitTimeIn(ctx, subtree, parent, 2*time.Second) 50 + if err != nil { 51 + return nts 52 + } 53 + 54 + for _, e := range subtree.Entries { 48 55 mode, _ := e.Mode.ToOSFileMode() 49 - sz, _ := t.Size(e.Name) 56 + sz, _ := subtree.Size(e.Name) 50 57 51 - var fpath string 52 - if parent != "" { 53 - fpath = fmt.Sprintf("%s/%s", parent, e.Name) 54 - } else { 55 - fpath = e.Name 56 - } 57 - lastCommit, err := g.LastCommitForPath(fpath) 58 - if err != nil { 59 - fmt.Println("error getting last commit time:", err) 60 - // We don't want to skip the file, so worst case lets just 61 - // populate it with "defaults". 58 + fpath := path.Join(parent, e.Name) 59 + 60 + var lastCommit *types.LastCommitInfo 61 + if t, ok := times[fpath]; ok { 62 62 lastCommit = &types.LastCommitInfo{ 63 - Hash: g.h, 64 - Message: "", 65 - When: time.Now(), 63 + Hash: t.hash, 64 + Message: t.message, 65 + When: t.when, 66 66 } 67 67 } 68 68
+7 -6
knotserver/routes.go
··· 2 2 3 3 import ( 4 4 "compress/gzip" 5 + "context" 5 6 "crypto/hmac" 6 7 "crypto/sha256" 7 8 "encoding/hex" ··· 142 143 } 143 144 } 144 145 145 - files, err := gr.FileTree("") 146 + files, err := gr.FileTree(r.Context(), "") 146 147 if err != nil { 147 148 writeError(w, err.Error(), http.StatusInternalServerError) 148 149 l.Error("file tree", "error", err.Error()) ··· 190 191 return 191 192 } 192 193 193 - files, err := gr.FileTree(treePath) 194 + files, err := gr.FileTree(r.Context(), treePath) 194 195 if err != nil { 195 196 writeError(w, err.Error(), http.StatusInternalServerError) 196 197 l.Error("file tree", "error", err.Error()) ··· 725 726 726 727 languageFileCount := make(map[string]int) 727 728 728 - err = recurseEntireTree(gr, func(absPath string) { 729 + err = recurseEntireTree(r.Context(), gr, func(absPath string) { 729 730 lang, safe := enry.GetLanguageByExtension(absPath) 730 731 if len(lang) == 0 || !safe { 731 732 content, _ := gr.FileContentN(absPath, 1024) ··· 758 759 return 759 760 } 760 761 761 - func recurseEntireTree(git *git.GitRepo, callback func(absPath string), filePath string) error { 762 - files, err := git.FileTree(filePath) 762 + func recurseEntireTree(ctx context.Context, git *git.GitRepo, callback func(absPath string), filePath string) error { 763 + files, err := git.FileTree(ctx, filePath) 763 764 if err != nil { 764 765 log.Println(err) 765 766 return err ··· 768 769 for _, file := range files { 769 770 absPath := path.Join(filePath, file.Name) 770 771 if !file.IsFile { 771 - return recurseEntireTree(git, callback, absPath) 772 + return recurseEntireTree(ctx, git, callback, absPath) 772 773 } 773 774 callback(absPath) 774 775 }