loading up the forgejo repo on tangled to test page performance
at forgejo 199 lines 5.9 kB view raw
1// Copyright 2019 The Gitea Authors. All rights reserved. 2// SPDX-License-Identifier: MIT 3 4package code 5 6import ( 7 "context" 8 "strconv" 9 "strings" 10 11 repo_model "forgejo.org/models/repo" 12 "forgejo.org/modules/git" 13 "forgejo.org/modules/indexer/code/internal" 14 "forgejo.org/modules/log" 15 "forgejo.org/modules/setting" 16) 17 18func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) { 19 stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) 20 if err != nil { 21 return "", err 22 } 23 return strings.TrimSpace(stdout), nil 24} 25 26// getRepoChanges returns changes to repo since last indexer update 27func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { 28 status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode) 29 if err != nil { 30 return nil, err 31 } 32 33 needGenesis := len(status.CommitSha) == 0 34 if !needGenesis { 35 hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision) 36 stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) 37 needGenesis = len(stdout) == 0 38 } 39 40 if needGenesis { 41 return genesisChanges(ctx, repo, revision) 42 } 43 return nonGenesisChanges(ctx, repo, revision) 44} 45 46func isIndexable(entry *git.TreeEntry) bool { 47 if !entry.IsRegular() && !entry.IsExecutable() { 48 return false 49 } 50 name := strings.ToLower(entry.Name()) 51 for _, g := range setting.Indexer.ExcludePatterns { 52 if g.Match(name) { 53 return false 54 } 55 } 56 for _, g := range setting.Indexer.IncludePatterns { 57 if g.Match(name) { 58 return true 59 } 60 } 61 return len(setting.Indexer.IncludePatterns) == 0 62} 63 64// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command 65func parseGitLsTreeOutput(stdout []byte) ([]internal.FileUpdate, error) { 66 entries, err := git.ParseTreeEntries(stdout) 67 if err != nil { 68 return nil, err 69 } 70 idxCount := 0 71 updates := make([]internal.FileUpdate, len(entries)) 72 for _, entry := range entries { 73 if isIndexable(entry) { 74 updates[idxCount] = internal.FileUpdate{ 75 Filename: entry.Name(), 76 BlobSha: entry.ID.String(), 77 Size: entry.Size(), 78 Sized: true, 79 } 80 idxCount++ 81 } 82 } 83 return updates[:idxCount], nil 84} 85 86// genesisChanges get changes to add repo to the indexer for the first time 87func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { 88 var changes internal.RepoChanges 89 stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) 90 if runErr != nil { 91 return nil, runErr 92 } 93 94 var err error 95 changes.Updates, err = parseGitLsTreeOutput(stdout) 96 return &changes, err 97} 98 99// nonGenesisChanges get changes since the previous indexer update 100func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { 101 diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) 102 stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) 103 if runErr != nil { 104 // previous commit sha may have been removed by a force push, so 105 // try rebuilding from scratch 106 log.Warn("git diff: %v", runErr) 107 if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil { 108 return nil, err 109 } 110 return genesisChanges(ctx, repo, revision) 111 } 112 113 var changes internal.RepoChanges 114 var err error 115 updatedFilenames := make([]string, 0, 10) 116 117 updateChanges := func() error { 118 cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). 119 AddDashesAndList(updatedFilenames...) 120 lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) 121 if err != nil { 122 return err 123 } 124 125 updates, err1 := parseGitLsTreeOutput(lsTreeStdout) 126 if err1 != nil { 127 return err1 128 } 129 changes.Updates = append(changes.Updates, updates...) 130 return nil 131 } 132 lines := strings.Split(stdout, "\n") 133 for _, line := range lines { 134 line = strings.TrimSpace(line) 135 if len(line) == 0 { 136 continue 137 } 138 fields := strings.Split(line, "\t") 139 if len(fields) < 2 { 140 log.Warn("Unparsable output for diff --name-status: `%s`)", line) 141 continue 142 } 143 filename := fields[1] 144 if len(filename) == 0 { 145 continue 146 } else if filename[0] == '"' { 147 filename, err = strconv.Unquote(filename) 148 if err != nil { 149 return nil, err 150 } 151 } 152 153 switch status := fields[0][0]; status { 154 case 'M', 'A': 155 updatedFilenames = append(updatedFilenames, filename) 156 case 'D': 157 changes.RemovedFilenames = append(changes.RemovedFilenames, filename) 158 case 'R', 'C': 159 if len(fields) < 3 { 160 log.Warn("Unparsable output for diff --name-status: `%s`)", line) 161 continue 162 } 163 dest := fields[2] 164 if len(dest) == 0 { 165 log.Warn("Unparsable output for diff --name-status: `%s`)", line) 166 continue 167 } 168 if dest[0] == '"' { 169 dest, err = strconv.Unquote(dest) 170 if err != nil { 171 return nil, err 172 } 173 } 174 if status == 'R' { 175 changes.RemovedFilenames = append(changes.RemovedFilenames, filename) 176 } 177 updatedFilenames = append(updatedFilenames, dest) 178 default: 179 log.Warn("Unrecognized status: %c (line=%s)", status, line) 180 } 181 182 // According to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information 183 // the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30 184 if len(updatedFilenames) >= 30 { 185 if err := updateChanges(); err != nil { 186 return nil, err 187 } 188 updatedFilenames = updatedFilenames[0:0] 189 } 190 } 191 192 if len(updatedFilenames) > 0 { 193 if err := updateChanges(); err != nil { 194 return nil, err 195 } 196 } 197 198 return &changes, err 199}