Monorepo for Tangled tangled.org

knotserver: git: improve performance of last_commit_time

uses `git log --name-only` to speed things along. the performance
tradeoff here is that active repositories tend to load last-commit times
really quickly, but directories with one file that was modified 20k
commits ago will result in the entire log being walked through.

git-log command is parsed as it streams to avoid having to wait for the
entire command to complete, and as soon as the entire directory's
listing is hydrated, we cancel the command. most of the time, this
results in hydration of subdirectories also (this gets cached),
resulting in good experience when browsing a repo (typically moving down
directories gets faster with each click, as fewer files need hydration).

Signed-off-by: oppiliappan <me@oppi.li>

Changed files
+205 -97
appview
pages
templates
knotserver
+4
appview/pages/templates/repo/index.html
··· 133 </div> 134 </a> 135 136 <time class="text-xs text-gray-500 dark:text-gray-400" 137 >{{ timeFmt .LastCommit.When }}</time 138 > 139 </div> 140 </div> 141 {{ end }} ··· 154 </div> 155 </a> 156 157 <time class="text-xs text-gray-500 dark:text-gray-400" 158 >{{ timeFmt .LastCommit.When }}</time 159 > 160 </div> 161 </div> 162 {{ end }}
··· 133 </div> 134 </a> 135 136 + {{ if .LastCommit }} 137 <time class="text-xs text-gray-500 dark:text-gray-400" 138 >{{ timeFmt .LastCommit.When }}</time 139 > 140 + {{ end }} 141 </div> 142 </div> 143 {{ end }} ··· 156 </div> 157 </a> 158 159 + {{ if .LastCommit }} 160 <time class="text-xs text-gray-500 dark:text-gray-400" 161 >{{ timeFmt .LastCommit.When }}</time 162 > 163 + {{ end }} 164 </div> 165 </div> 166 {{ end }}
+6 -2
appview/pages/templates/repo/tree.html
··· 62 {{ i "folder" "size-4 fill-current" }}{{ .Name }} 63 </div> 64 </a> 65 - <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 66 </div> 67 </div> 68 {{ end }} ··· 77 {{ i "file" "size-4" }}{{ .Name }} 78 </div> 79 </a> 80 - <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 81 </div> 82 </div> 83 {{ end }}
··· 62 {{ i "folder" "size-4 fill-current" }}{{ .Name }} 63 </div> 64 </a> 65 + {{ if .LastCommit}} 66 + <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 67 + {{ end }} 68 </div> 69 </div> 70 {{ end }} ··· 79 {{ i "file" "size-4" }}{{ .Name }} 80 </div> 81 </a> 82 + {{ if .LastCommit}} 83 + <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 84 + {{ end }} 85 </div> 86 </div> 87 {{ end }}
-69
knotserver/git/git.go
··· 2 3 import ( 4 "archive/tar" 5 - "bytes" 6 "fmt" 7 "io" 8 "io/fs" ··· 11 "sort" 12 "strconv" 13 "strings" 14 - "sync" 15 "time" 16 17 - "github.com/dgraph-io/ristretto" 18 "github.com/go-git/go-git/v5" 19 "github.com/go-git/go-git/v5/plumbing" 20 "github.com/go-git/go-git/v5/plumbing/object" 21 "tangled.sh/tangled.sh/core/types" 22 ) 23 - 24 - var ( 25 - commitCache *ristretto.Cache 26 - cacheMu sync.RWMutex 27 - ) 28 - 29 - func init() { 30 - cache, _ := ristretto.NewCache(&ristretto.Config{ 31 - NumCounters: 1e7, 32 - MaxCost: 1 << 30, 33 - BufferItems: 64, 34 - TtlTickerDurationInSec: 120, 35 - }) 36 - commitCache = cache 37 - } 38 39 var ( 40 ErrBinaryFile = fmt.Errorf("binary file") ··· 448 } 449 450 return nil 451 - } 452 - 453 - func (g *GitRepo) LastCommitForPath(path string) (*types.LastCommitInfo, error) { 454 - cacheKey := fmt.Sprintf("%s:%s", g.h.String(), path) 455 - cacheMu.RLock() 456 - if commitInfo, found := commitCache.Get(cacheKey); found { 457 - cacheMu.RUnlock() 458 - return commitInfo.(*types.LastCommitInfo), nil 459 - } 460 - cacheMu.RUnlock() 461 - 462 - cmd := exec.Command("git", "-C", g.path, "log", g.h.String(), "-1", "--format=%H %ct", "--", path) 463 - 464 - var out bytes.Buffer 465 - cmd.Stdout = &out 466 - cmd.Stderr = &out 467 - 468 - if err := cmd.Run(); err != nil { 469 - return nil, fmt.Errorf("failed to get commit hash: %w", err) 470 - } 471 - 472 - output := strings.TrimSpace(out.String()) 473 - if output == "" { 474 - return nil, fmt.Errorf("no commits found for path: %s", path) 475 - } 476 - 477 - parts := strings.SplitN(output, " ", 2) 478 - if len(parts) < 2 { 479 - return nil, fmt.Errorf("unexpected commit log format") 480 - } 481 - 482 - commitHash := parts[0] 483 - commitTimeUnix, err := strconv.ParseInt(parts[1], 10, 64) 484 - if err != nil { 485 - return nil, fmt.Errorf("parsing commit time: %w", err) 486 - } 487 - commitTime := time.Unix(commitTimeUnix, 0) 488 - 489 - hash := plumbing.NewHash(commitHash) 490 - 491 - commitInfo := &types.LastCommitInfo{ 492 - Hash: hash, 493 - Message: "", 494 - When: commitTime, 495 - } 496 - 497 - cacheMu.Lock() 498 - commitCache.Set(cacheKey, commitInfo, 1) 499 - cacheMu.Unlock() 500 - 501 - return commitInfo, nil 502 } 503 504 func newInfoWrapper(
··· 2 3 import ( 4 "archive/tar" 5 "fmt" 6 "io" 7 "io/fs" ··· 10 "sort" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/go-git/go-git/v5" 16 "github.com/go-git/go-git/v5/plumbing" 17 "github.com/go-git/go-git/v5/plumbing/object" 18 "tangled.sh/tangled.sh/core/types" 19 ) 20 21 var ( 22 ErrBinaryFile = fmt.Errorf("binary file") ··· 430 } 431 432 return nil 433 } 434 435 func newInfoWrapper(
+168
knotserver/git/last_commit.go
···
··· 1 + package git 2 + 3 + import ( 4 + "bufio" 5 + "context" 6 + "crypto/sha256" 7 + "fmt" 8 + "io" 9 + "os/exec" 10 + "path" 11 + "strings" 12 + "time" 13 + 14 + "github.com/dgraph-io/ristretto" 15 + "github.com/go-git/go-git/v5/plumbing" 16 + "github.com/go-git/go-git/v5/plumbing/object" 17 + ) 18 + 19 + var ( 20 + commitCache *ristretto.Cache 21 + ) 22 + 23 + func init() { 24 + cache, _ := ristretto.NewCache(&ristretto.Config{ 25 + NumCounters: 1e7, 26 + MaxCost: 1 << 30, 27 + BufferItems: 64, 28 + TtlTickerDurationInSec: 120, 29 + }) 30 + commitCache = cache 31 + } 32 + 33 + func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.Reader, error) { 34 + args := []string{} 35 + args = append(args, "log") 36 + args = append(args, g.h.String()) 37 + args = append(args, extraArgs...) 38 + 39 + cmd := exec.CommandContext(ctx, "git", args...) 40 + cmd.Dir = g.path 41 + 42 + stdout, err := cmd.StdoutPipe() 43 + if err != nil { 44 + return nil, err 45 + } 46 + 47 + if err := cmd.Start(); err != nil { 48 + return nil, err 49 + } 50 + 51 + return stdout, nil 52 + } 53 + 54 + type commit struct { 55 + hash plumbing.Hash 56 + when time.Time 57 + files []string 58 + message string 59 + } 60 + 61 + func cacheKey(g *GitRepo, path string) string { 62 + sep := byte(':') 63 + hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path)) 64 + return fmt.Sprintf("%x", hash) 65 + } 66 + 67 + func (g *GitRepo) calculateCommitTimeIn(ctx context.Context, subtree *object.Tree, parent string, timeout time.Duration) (map[string]commit, error) { 68 + ctx, cancel := context.WithTimeout(ctx, timeout) 69 + defer cancel() 70 + return g.calculateCommitTime(ctx, subtree, parent) 71 + } 72 + 73 + func (g *GitRepo) calculateCommitTime(ctx context.Context, subtree *object.Tree, parent string) (map[string]commit, error) { 74 + filesToDo := make(map[string]struct{}) 75 + filesDone := make(map[string]commit) 76 + for _, e := range subtree.Entries { 77 + fpath := path.Clean(path.Join(parent, e.Name)) 78 + filesToDo[fpath] = struct{}{} 79 + } 80 + 81 + for _, e := range subtree.Entries { 82 + f := path.Clean(path.Join(parent, e.Name)) 83 + cacheKey := cacheKey(g, f) 84 + if cached, ok := commitCache.Get(cacheKey); ok { 85 + filesDone[f] = cached.(commit) 86 + delete(filesToDo, f) 87 + } else { 88 + filesToDo[f] = struct{}{} 89 + } 90 + } 91 + 92 + if len(filesToDo) == 0 { 93 + return filesDone, nil 94 + } 95 + 96 + ctx, cancel := context.WithCancel(ctx) 97 + defer cancel() 98 + 99 + pathSpec := "." 100 + if parent != "" { 101 + pathSpec = parent 102 + } 103 + output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec) 104 + if err != nil { 105 + return nil, err 106 + } 107 + 108 + reader := bufio.NewReader(output) 109 + var current commit 110 + for { 111 + line, err := reader.ReadString('\n') 112 + if err != nil && err != io.EOF { 113 + return nil, err 114 + } 115 + line = strings.TrimSpace(line) 116 + 117 + if line == "" { 118 + if !current.hash.IsZero() { 119 + // we have a fully parsed commit 120 + for _, f := range current.files { 121 + if _, ok := filesToDo[f]; ok { 122 + filesDone[f] = current 123 + delete(filesToDo, f) 124 + commitCache.Set(cacheKey(g, f), current, 0) 125 + } 126 + } 127 + 128 + if len(filesToDo) == 0 { 129 + cancel() 130 + break 131 + } 132 + current = commit{} 133 + } 134 + } else if current.hash.IsZero() { 135 + parts := strings.SplitN(line, ",", 3) 136 + if len(parts) == 3 { 137 + current.hash = plumbing.NewHash(parts[0]) 138 + current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1]) 139 + current.message = parts[2] 140 + } 141 + } else { 142 + // all ancestors along this path should also be included 143 + file := path.Clean(line) 144 + ancestors := ancestors(file) 145 + current.files = append(current.files, file) 146 + current.files = append(current.files, ancestors...) 147 + } 148 + 149 + if err == io.EOF { 150 + break 151 + } 152 + } 153 + 154 + return filesDone, nil 155 + } 156 + 157 + func ancestors(p string) []string { 158 + var ancestors []string 159 + 160 + for { 161 + p = path.Dir(p) 162 + if p == "." || p == "/" { 163 + break 164 + } 165 + ancestors = append(ancestors, p) 166 + } 167 + return ancestors 168 + }
+20 -20
knotserver/git/tree.go
··· 1 package git 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/go-git/go-git/v5/plumbing/object" 8 "tangled.sh/tangled.sh/core/types" 9 ) 10 11 - func (g *GitRepo) FileTree(path string) ([]types.NiceTree, error) { 12 c, err := g.r.CommitObject(g.h) 13 if err != nil { 14 return nil, fmt.Errorf("commit object: %w", err) ··· 21 } 22 23 if path == "" { 24 - files = g.makeNiceTree(tree, "") 25 } else { 26 o, err := tree.FindEntry(path) 27 if err != nil { ··· 34 return nil, err 35 } 36 37 - files = g.makeNiceTree(subtree, path) 38 } 39 } 40 41 return files, nil 42 } 43 44 - func (g *GitRepo) makeNiceTree(t *object.Tree, parent string) []types.NiceTree { 45 nts := []types.NiceTree{} 46 47 - for _, e := range t.Entries { 48 mode, _ := e.Mode.ToOSFileMode() 49 - sz, _ := t.Size(e.Name) 50 51 - var fpath string 52 - if parent != "" { 53 - fpath = fmt.Sprintf("%s/%s", parent, e.Name) 54 - } else { 55 - fpath = e.Name 56 - } 57 - lastCommit, err := g.LastCommitForPath(fpath) 58 - if err != nil { 59 - fmt.Println("error getting last commit time:", err) 60 - // We don't want to skip the file, so worst case lets just 61 - // populate it with "defaults". 62 lastCommit = &types.LastCommitInfo{ 63 - Hash: g.h, 64 - Message: "", 65 - When: time.Now(), 66 } 67 } 68
··· 1 package git 2 3 import ( 4 + "context" 5 "fmt" 6 + "path" 7 "time" 8 9 "github.com/go-git/go-git/v5/plumbing/object" 10 "tangled.sh/tangled.sh/core/types" 11 ) 12 13 + func (g *GitRepo) FileTree(ctx context.Context, path string) ([]types.NiceTree, error) { 14 c, err := g.r.CommitObject(g.h) 15 if err != nil { 16 return nil, fmt.Errorf("commit object: %w", err) ··· 23 } 24 25 if path == "" { 26 + files = g.makeNiceTree(ctx, tree, "") 27 } else { 28 o, err := tree.FindEntry(path) 29 if err != nil { ··· 36 return nil, err 37 } 38 39 + files = g.makeNiceTree(ctx, subtree, path) 40 } 41 } 42 43 return files, nil 44 } 45 46 + func (g *GitRepo) makeNiceTree(ctx context.Context, subtree *object.Tree, parent string) []types.NiceTree { 47 nts := []types.NiceTree{} 48 49 + times, err := g.calculateCommitTimeIn(ctx, subtree, parent, 2*time.Second) 50 + if err != nil { 51 + return nts 52 + } 53 + 54 + for _, e := range subtree.Entries { 55 mode, _ := e.Mode.ToOSFileMode() 56 + sz, _ := subtree.Size(e.Name) 57 58 + fpath := path.Join(parent, e.Name) 59 + 60 + var lastCommit *types.LastCommitInfo 61 + if t, ok := times[fpath]; ok { 62 lastCommit = &types.LastCommitInfo{ 63 + Hash: t.hash, 64 + Message: t.message, 65 + When: t.when, 66 } 67 } 68
+7 -6
knotserver/routes.go
··· 2 3 import ( 4 "compress/gzip" 5 "crypto/hmac" 6 "crypto/sha256" 7 "encoding/hex" ··· 142 } 143 } 144 145 - files, err := gr.FileTree("") 146 if err != nil { 147 writeError(w, err.Error(), http.StatusInternalServerError) 148 l.Error("file tree", "error", err.Error()) ··· 190 return 191 } 192 193 - files, err := gr.FileTree(treePath) 194 if err != nil { 195 writeError(w, err.Error(), http.StatusInternalServerError) 196 l.Error("file tree", "error", err.Error()) ··· 725 726 languageFileCount := make(map[string]int) 727 728 - err = recurseEntireTree(gr, func(absPath string) { 729 lang, safe := enry.GetLanguageByExtension(absPath) 730 if len(lang) == 0 || !safe { 731 content, _ := gr.FileContentN(absPath, 1024) ··· 758 return 759 } 760 761 - func recurseEntireTree(git *git.GitRepo, callback func(absPath string), filePath string) error { 762 - files, err := git.FileTree(filePath) 763 if err != nil { 764 log.Println(err) 765 return err ··· 768 for _, file := range files { 769 absPath := path.Join(filePath, file.Name) 770 if !file.IsFile { 771 - return recurseEntireTree(git, callback, absPath) 772 } 773 callback(absPath) 774 }
··· 2 3 import ( 4 "compress/gzip" 5 + "context" 6 "crypto/hmac" 7 "crypto/sha256" 8 "encoding/hex" ··· 143 } 144 } 145 146 + files, err := gr.FileTree(r.Context(), "") 147 if err != nil { 148 writeError(w, err.Error(), http.StatusInternalServerError) 149 l.Error("file tree", "error", err.Error()) ··· 191 return 192 } 193 194 + files, err := gr.FileTree(r.Context(), treePath) 195 if err != nil { 196 writeError(w, err.Error(), http.StatusInternalServerError) 197 l.Error("file tree", "error", err.Error()) ··· 726 727 languageFileCount := make(map[string]int) 728 729 + err = recurseEntireTree(r.Context(), gr, func(absPath string) { 730 lang, safe := enry.GetLanguageByExtension(absPath) 731 if len(lang) == 0 || !safe { 732 content, _ := gr.FileContentN(absPath, 1024) ··· 759 return 760 } 761 762 + func recurseEntireTree(ctx context.Context, git *git.GitRepo, callback func(absPath string), filePath string) error { 763 + files, err := git.FileTree(ctx, filePath) 764 if err != nil { 765 log.Println(err) 766 return err ··· 769 for _, file := range files { 770 absPath := path.Join(filePath, file.Name) 771 if !file.IsFile { 772 + return recurseEntireTree(ctx, git, callback, absPath) 773 } 774 callback(absPath) 775 }