1// Copyright 2019 The Gitea Authors. All rights reserved.
2// SPDX-License-Identifier: MIT
3
4package code
5
6import (
7 "context"
8 "strconv"
9 "strings"
10
11 repo_model "forgejo.org/models/repo"
12 "forgejo.org/modules/git"
13 "forgejo.org/modules/indexer/code/internal"
14 "forgejo.org/modules/log"
15 "forgejo.org/modules/setting"
16)
17
18func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) {
19 stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
20 if err != nil {
21 return "", err
22 }
23 return strings.TrimSpace(stdout), nil
24}
25
26// getRepoChanges returns changes to repo since last indexer update
27func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
28 status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode)
29 if err != nil {
30 return nil, err
31 }
32
33 needGenesis := len(status.CommitSha) == 0
34 if !needGenesis {
35 hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision)
36 stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
37 needGenesis = len(stdout) == 0
38 }
39
40 if needGenesis {
41 return genesisChanges(ctx, repo, revision)
42 }
43 return nonGenesisChanges(ctx, repo, revision)
44}
45
46func isIndexable(entry *git.TreeEntry) bool {
47 if !entry.IsRegular() && !entry.IsExecutable() {
48 return false
49 }
50 name := strings.ToLower(entry.Name())
51 for _, g := range setting.Indexer.ExcludePatterns {
52 if g.Match(name) {
53 return false
54 }
55 }
56 for _, g := range setting.Indexer.IncludePatterns {
57 if g.Match(name) {
58 return true
59 }
60 }
61 return len(setting.Indexer.IncludePatterns) == 0
62}
63
64// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
65func parseGitLsTreeOutput(stdout []byte) ([]internal.FileUpdate, error) {
66 entries, err := git.ParseTreeEntries(stdout)
67 if err != nil {
68 return nil, err
69 }
70 idxCount := 0
71 updates := make([]internal.FileUpdate, len(entries))
72 for _, entry := range entries {
73 if isIndexable(entry) {
74 updates[idxCount] = internal.FileUpdate{
75 Filename: entry.Name(),
76 BlobSha: entry.ID.String(),
77 Size: entry.Size(),
78 Sized: true,
79 }
80 idxCount++
81 }
82 }
83 return updates[:idxCount], nil
84}
85
86// genesisChanges get changes to add repo to the indexer for the first time
87func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
88 var changes internal.RepoChanges
89 stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()})
90 if runErr != nil {
91 return nil, runErr
92 }
93
94 var err error
95 changes.Updates, err = parseGitLsTreeOutput(stdout)
96 return &changes, err
97}
98
99// nonGenesisChanges get changes since the previous indexer update
100func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) {
101 diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
102 stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
103 if runErr != nil {
104 // previous commit sha may have been removed by a force push, so
105 // try rebuilding from scratch
106 log.Warn("git diff: %v", runErr)
107 if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil {
108 return nil, err
109 }
110 return genesisChanges(ctx, repo, revision)
111 }
112
113 var changes internal.RepoChanges
114 var err error
115 updatedFilenames := make([]string, 0, 10)
116
117 updateChanges := func() error {
118 cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision).
119 AddDashesAndList(updatedFilenames...)
120 lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()})
121 if err != nil {
122 return err
123 }
124
125 updates, err1 := parseGitLsTreeOutput(lsTreeStdout)
126 if err1 != nil {
127 return err1
128 }
129 changes.Updates = append(changes.Updates, updates...)
130 return nil
131 }
132 lines := strings.Split(stdout, "\n")
133 for _, line := range lines {
134 line = strings.TrimSpace(line)
135 if len(line) == 0 {
136 continue
137 }
138 fields := strings.Split(line, "\t")
139 if len(fields) < 2 {
140 log.Warn("Unparsable output for diff --name-status: `%s`)", line)
141 continue
142 }
143 filename := fields[1]
144 if len(filename) == 0 {
145 continue
146 } else if filename[0] == '"' {
147 filename, err = strconv.Unquote(filename)
148 if err != nil {
149 return nil, err
150 }
151 }
152
153 switch status := fields[0][0]; status {
154 case 'M', 'A':
155 updatedFilenames = append(updatedFilenames, filename)
156 case 'D':
157 changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
158 case 'R', 'C':
159 if len(fields) < 3 {
160 log.Warn("Unparsable output for diff --name-status: `%s`)", line)
161 continue
162 }
163 dest := fields[2]
164 if len(dest) == 0 {
165 log.Warn("Unparsable output for diff --name-status: `%s`)", line)
166 continue
167 }
168 if dest[0] == '"' {
169 dest, err = strconv.Unquote(dest)
170 if err != nil {
171 return nil, err
172 }
173 }
174 if status == 'R' {
175 changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
176 }
177 updatedFilenames = append(updatedFilenames, dest)
178 default:
179 log.Warn("Unrecognized status: %c (line=%s)", status, line)
180 }
181
182 // According to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information
183 // the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30
184 if len(updatedFilenames) >= 30 {
185 if err := updateChanges(); err != nil {
186 return nil, err
187 }
188 updatedFilenames = updatedFilenames[0:0]
189 }
190 }
191
192 if len(updatedFilenames) > 0 {
193 if err := updateChanges(); err != nil {
194 return nil, err
195 }
196 }
197
198 return &changes, err
199}