Mirror of @tangled.org/core. Running on a Raspberry Pi Zero 2 (Please be gentle).

knotserver: git: improve performance of last_commit_time

uses `git log --name-only` to speed things along. the performance
tradeoff here is that active repositories tend to load last-commit times
really quickly, but directories with one file that was modified 20k
commits ago will result in the entire log being walked through.

git-log command is parsed as it streams to avoid having to wait for the
entire command to complete, and as soon as the entire directory's
listing is hydrated, we cancel the command. most of the time, this
results in hydration of subdirectories also (this gets cached),
resulting in good experience when browsing a repo (typically moving down
directories gets faster with each click, as fewer files need hydration).

Signed-off-by: oppiliappan <me@oppi.li>

authored by oppi.li and committed by

Tangled d4f57e8d 52cd32d1

+206 -98
+4
appview/pages/templates/repo/index.html
··· 133 133 </div> 134 134 </a> 135 135 136 + {{ if .LastCommit }} 136 137 <time class="text-xs text-gray-500 dark:text-gray-400" 137 138 >{{ timeFmt .LastCommit.When }}</time 138 139 > 140 + {{ end }} 139 141 </div> 140 142 </div> 141 143 {{ end }} ··· 156 154 </div> 157 155 </a> 158 156 157 + {{ if .LastCommit }} 159 158 <time class="text-xs text-gray-500 dark:text-gray-400" 160 159 >{{ timeFmt .LastCommit.When }}</time 161 160 > 161 + {{ end }} 162 162 </div> 163 163 </div> 164 164 {{ end }}
+6 -2
appview/pages/templates/repo/tree.html
··· 62 62 {{ i "folder" "size-4 fill-current" }}{{ .Name }} 63 63 </div> 64 64 </a> 65 - <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 65 + {{ if .LastCommit}} 66 + <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 67 + {{ end }} 66 68 </div> 67 69 </div> 68 70 {{ end }} ··· 79 77 {{ i "file" "size-4" }}{{ .Name }} 80 78 </div> 81 79 </a> 82 - <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 80 + {{ if .LastCommit}} 81 + <time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time> 82 + {{ end }} 83 83 </div> 84 84 </div> 85 85 {{ end }}
-69
knotserver/git/git.go
··· 2 2 3 3 import ( 4 4 "archive/tar" 5 - "bytes" 6 5 "fmt" 7 6 "io" 8 7 "io/fs" ··· 10 11 "sort" 11 12 "strconv" 12 13 "strings" 13 - "sync" 14 14 "time" 15 15 16 - "github.com/dgraph-io/ristretto" 17 16 "github.com/go-git/go-git/v5" 18 17 "github.com/go-git/go-git/v5/plumbing" 19 18 "github.com/go-git/go-git/v5/plumbing/object" 20 19 "tangled.sh/tangled.sh/core/types" 21 20 ) 22 - 23 - var ( 24 - commitCache *ristretto.Cache 25 - cacheMu sync.RWMutex 26 - ) 27 - 28 - func init() { 29 - cache, _ := ristretto.NewCache(&ristretto.Config{ 30 - NumCounters: 1e7, 31 - MaxCost: 1 << 30, 32 - BufferItems: 64, 33 - TtlTickerDurationInSec: 120, 34 - }) 35 - commitCache = cache 36 - } 37 21 38 22 var ( 39 23 ErrBinaryFile = fmt.Errorf("binary file") ··· 430 448 } 431 449 432 450 return nil 433 - } 434 - 435 - func (g *GitRepo) LastCommitForPath(path string) (*types.LastCommitInfo, error) { 436 - cacheKey := fmt.Sprintf("%s:%s", g.h.String(), path) 437 - cacheMu.RLock() 438 - if commitInfo, found := commitCache.Get(cacheKey); found { 439 - cacheMu.RUnlock() 440 - return commitInfo.(*types.LastCommitInfo), nil 441 - } 442 - cacheMu.RUnlock() 443 - 444 - cmd := exec.Command("git", "-C", g.path, "log", g.h.String(), "-1", "--format=%H %ct", "--", path) 445 - 446 - var out bytes.Buffer 447 - cmd.Stdout = &out 448 - cmd.Stderr = &out 449 - 450 - if err := cmd.Run(); err != nil { 451 - return nil, fmt.Errorf("failed to get commit hash: %w", err) 452 - } 453 - 454 - output := strings.TrimSpace(out.String()) 455 - if output == "" { 456 - return nil, fmt.Errorf("no commits found for path: %s", path) 457 - } 458 - 459 - parts := strings.SplitN(output, " ", 2) 460 - if len(parts) < 2 { 461 - return nil, fmt.Errorf("unexpected commit log format") 462 - } 463 - 464 - commitHash := parts[0] 465 - commitTimeUnix, err := strconv.ParseInt(parts[1], 10, 64) 466 - if err != nil { 467 - return nil, fmt.Errorf("parsing commit time: %w", err) 468 - } 469 - commitTime := time.Unix(commitTimeUnix, 0) 470 - 471 - hash := plumbing.NewHash(commitHash) 472 - 473 - commitInfo := &types.LastCommitInfo{ 474 - Hash: hash, 475 - Message: "", 476 - When: commitTime, 477 - } 478 - 479 - cacheMu.Lock() 480 - commitCache.Set(cacheKey, commitInfo, 1) 481 - cacheMu.Unlock() 482 - 483 - return commitInfo, nil 484 451 } 485 452 486 453 func newInfoWrapper(
+168
knotserver/git/last_commit.go
··· 1 + package git 2 + 3 + import ( 4 + "bufio" 5 + "context" 6 + "crypto/sha256" 7 + "fmt" 8 + "io" 9 + "os/exec" 10 + "path" 11 + "strings" 12 + "time" 13 + 14 + "github.com/dgraph-io/ristretto" 15 + "github.com/go-git/go-git/v5/plumbing" 16 + "github.com/go-git/go-git/v5/plumbing/object" 17 + ) 18 + 19 + var ( 20 + commitCache *ristretto.Cache 21 + ) 22 + 23 + func init() { 24 + cache, _ := ristretto.NewCache(&ristretto.Config{ 25 + NumCounters: 1e7, 26 + MaxCost: 1 << 30, 27 + BufferItems: 64, 28 + TtlTickerDurationInSec: 120, 29 + }) 30 + commitCache = cache 31 + } 32 + 33 + func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.Reader, error) { 34 + args := []string{} 35 + args = append(args, "log") 36 + args = append(args, g.h.String()) 37 + args = append(args, extraArgs...) 38 + 39 + cmd := exec.CommandContext(ctx, "git", args...) 40 + cmd.Dir = g.path 41 + 42 + stdout, err := cmd.StdoutPipe() 43 + if err != nil { 44 + return nil, err 45 + } 46 + 47 + if err := cmd.Start(); err != nil { 48 + return nil, err 49 + } 50 + 51 + return stdout, nil 52 + } 53 + 54 + type commit struct { 55 + hash plumbing.Hash 56 + when time.Time 57 + files []string 58 + message string 59 + } 60 + 61 + func cacheKey(g *GitRepo, path string) string { 62 + sep := byte(':') 63 + hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path)) 64 + return fmt.Sprintf("%x", hash) 65 + } 66 + 67 + func (g *GitRepo) calculateCommitTimeIn(ctx context.Context, subtree *object.Tree, parent string, timeout time.Duration) (map[string]commit, error) { 68 + ctx, cancel := context.WithTimeout(ctx, timeout) 69 + defer cancel() 70 + return g.calculateCommitTime(ctx, subtree, parent) 71 + } 72 + 73 + func (g *GitRepo) calculateCommitTime(ctx context.Context, subtree *object.Tree, parent string) (map[string]commit, error) { 74 + filesToDo := make(map[string]struct{}) 75 + filesDone := make(map[string]commit) 76 + for _, e := range subtree.Entries { 77 + fpath := path.Clean(path.Join(parent, e.Name)) 78 + filesToDo[fpath] = struct{}{} 79 + } 80 + 81 + for _, e := range subtree.Entries { 82 + f := path.Clean(path.Join(parent, e.Name)) 83 + cacheKey := cacheKey(g, f) 84 + if cached, ok := commitCache.Get(cacheKey); ok { 85 + filesDone[f] = cached.(commit) 86 + delete(filesToDo, f) 87 + } else { 88 + filesToDo[f] = struct{}{} 89 + } 90 + } 91 + 92 + if len(filesToDo) == 0 { 93 + return filesDone, nil 94 + } 95 + 96 + ctx, cancel := context.WithCancel(ctx) 97 + defer cancel() 98 + 99 + pathSpec := "." 100 + if parent != "" { 101 + pathSpec = parent 102 + } 103 + output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec) 104 + if err != nil { 105 + return nil, err 106 + } 107 + 108 + reader := bufio.NewReader(output) 109 + var current commit 110 + for { 111 + line, err := reader.ReadString('\n') 112 + if err != nil && err != io.EOF { 113 + return nil, err 114 + } 115 + line = strings.TrimSpace(line) 116 + 117 + if line == "" { 118 + if !current.hash.IsZero() { 119 + // we have a fully parsed commit 120 + for _, f := range current.files { 121 + if _, ok := filesToDo[f]; ok { 122 + filesDone[f] = current 123 + delete(filesToDo, f) 124 + commitCache.Set(cacheKey(g, f), current, 0) 125 + } 126 + } 127 + 128 + if len(filesToDo) == 0 { 129 + cancel() 130 + break 131 + } 132 + current = commit{} 133 + } 134 + } else if current.hash.IsZero() { 135 + parts := strings.SplitN(line, ",", 3) 136 + if len(parts) == 3 { 137 + current.hash = plumbing.NewHash(parts[0]) 138 + current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1]) 139 + current.message = parts[2] 140 + } 141 + } else { 142 + // all ancestors along this path should also be included 143 + file := path.Clean(line) 144 + ancestors := ancestors(file) 145 + current.files = append(current.files, file) 146 + current.files = append(current.files, ancestors...) 147 + } 148 + 149 + if err == io.EOF { 150 + break 151 + } 152 + } 153 + 154 + return filesDone, nil 155 + } 156 + 157 + func ancestors(p string) []string { 158 + var ancestors []string 159 + 160 + for { 161 + p = path.Dir(p) 162 + if p == "." || p == "/" { 163 + break 164 + } 165 + ancestors = append(ancestors, p) 166 + } 167 + return ancestors 168 + }
+21 -21
knotserver/git/tree.go
··· 1 1 package git 2 2 3 3 import ( 4 + "context" 4 5 "fmt" 6 + "path" 5 7 "time" 6 8 7 9 "github.com/go-git/go-git/v5/plumbing/object" 8 10 "tangled.sh/tangled.sh/core/types" 9 11 ) 10 12 11 - func (g *GitRepo) FileTree(path string) ([]types.NiceTree, error) { 13 + func (g *GitRepo) FileTree(ctx context.Context, path string) ([]types.NiceTree, error) { 12 14 c, err := g.r.CommitObject(g.h) 13 15 if err != nil { 14 16 return nil, fmt.Errorf("commit object: %w", err) ··· 23 21 } 24 22 25 23 if path == "" { 26 - files = g.makeNiceTree(tree, "") 24 + files = g.makeNiceTree(ctx, tree, "") 27 25 } else { 28 26 o, err := tree.FindEntry(path) 29 27 if err != nil { ··· 36 34 return nil, err 37 35 } 38 36 39 - files = g.makeNiceTree(subtree, path) 37 + files = g.makeNiceTree(ctx, subtree, path) 40 38 } 41 39 } 42 40 43 41 return files, nil 44 42 } 45 43 46 - func (g *GitRepo) makeNiceTree(t *object.Tree, parent string) []types.NiceTree { 44 + func (g *GitRepo) makeNiceTree(ctx context.Context, subtree *object.Tree, parent string) []types.NiceTree { 47 45 nts := []types.NiceTree{} 48 46 49 - for _, e := range t.Entries { 50 - mode, _ := e.Mode.ToOSFileMode() 51 - sz, _ := t.Size(e.Name) 47 + times, err := g.calculateCommitTimeIn(ctx, subtree, parent, 2*time.Second) 48 + if err != nil { 49 + return nts 50 + } 52 51 53 - var fpath string 54 - if parent != "" { 55 - fpath = fmt.Sprintf("%s/%s", parent, e.Name) 56 - } else { 57 - fpath = e.Name 58 - } 59 - lastCommit, err := g.LastCommitForPath(fpath) 60 - if err != nil { 61 - fmt.Println("error getting last commit time:", err) 62 - // We don't want to skip the file, so worst case lets just 63 - // populate it with "defaults". 52 + for _, e := range subtree.Entries { 53 + mode, _ := e.Mode.ToOSFileMode() 54 + sz, _ := subtree.Size(e.Name) 55 + 56 + fpath := path.Join(parent, e.Name) 57 + 58 + var lastCommit *types.LastCommitInfo 59 + if t, ok := times[fpath]; ok { 64 60 lastCommit = &types.LastCommitInfo{ 65 - Hash: g.h, 66 - Message: "", 67 - When: time.Now(), 61 + Hash: t.hash, 62 + Message: t.message, 63 + When: t.when, 68 64 } 69 65 } 70 66
+7 -6
knotserver/routes.go
··· 2 2 3 3 import ( 4 4 "compress/gzip" 5 + "context" 5 6 "crypto/hmac" 6 7 "crypto/sha256" 7 8 "encoding/hex" ··· 143 142 } 144 143 } 145 144 146 - files, err := gr.FileTree("") 145 + files, err := gr.FileTree(r.Context(), "") 147 146 if err != nil { 148 147 writeError(w, err.Error(), http.StatusInternalServerError) 149 148 l.Error("file tree", "error", err.Error()) ··· 191 190 return 192 191 } 193 192 194 - files, err := gr.FileTree(treePath) 193 + files, err := gr.FileTree(r.Context(), treePath) 195 194 if err != nil { 196 195 writeError(w, err.Error(), http.StatusInternalServerError) 197 196 l.Error("file tree", "error", err.Error()) ··· 726 725 727 726 languageFileCount := make(map[string]int) 728 727 729 - err = recurseEntireTree(gr, func(absPath string) { 728 + err = recurseEntireTree(r.Context(), gr, func(absPath string) { 730 729 lang, safe := enry.GetLanguageByExtension(absPath) 731 730 if len(lang) == 0 || !safe { 732 731 content, _ := gr.FileContentN(absPath, 1024) ··· 759 758 return 760 759 } 761 760 762 - func recurseEntireTree(git *git.GitRepo, callback func(absPath string), filePath string) error { 763 - files, err := git.FileTree(filePath) 761 + func recurseEntireTree(ctx context.Context, git *git.GitRepo, callback func(absPath string), filePath string) error { 762 + files, err := git.FileTree(ctx, filePath) 764 763 if err != nil { 765 764 log.Println(err) 766 765 return err ··· 769 768 for _, file := range files { 770 769 absPath := path.Join(filePath, file.Name) 771 770 if !file.IsFile { 772 - return recurseEntireTree(git, callback, absPath) 771 + return recurseEntireTree(ctx, git, callback, absPath) 773 772 } 774 773 callback(absPath) 775 774 }