loading up the forgejo repo on tangled to test page performance
at forgejo 16 kB view raw
1// Copyright 2015 The Gogs Authors. All rights reserved. 2// Copyright 2016 The Gitea Authors. All rights reserved. 3// SPDX-License-Identifier: MIT 4 5package git 6 7import ( 8 "bytes" 9 "context" 10 "errors" 11 "fmt" 12 "io" 13 "os" 14 "os/exec" 15 "runtime/trace" 16 "strings" 17 "time" 18 19 "forgejo.org/modules/git/internal" //nolint:depguard // only this file can use the internal type CmdArg, other files and packages should use AddXxx functions 20 "forgejo.org/modules/log" 21 "forgejo.org/modules/process" 22 "forgejo.org/modules/util" 23) 24 25// TrustedCmdArgs returns the trusted arguments for git command. 26// It's mainly for passing user-provided and trusted arguments to git command 27// In most cases, it shouldn't be used. Use AddXxx function instead 28type TrustedCmdArgs []internal.CmdArg 29 30var ( 31 // globalCommandArgs global command args for external package setting 32 globalCommandArgs TrustedCmdArgs 33 34 // defaultCommandExecutionTimeout default command execution timeout duration 35 defaultCommandExecutionTimeout = 360 * time.Second 36) 37 38// DefaultLocale is the default LC_ALL to run git commands in. 39const DefaultLocale = "C" 40 41// Command represents a command with its subcommands or arguments. 42type Command struct { 43 prog string 44 args []string 45 parentContext context.Context 46 desc string 47 globalArgsLength int 48 brokenArgs []string 49} 50 51func (c *Command) String() string { 52 return c.toString(false) 53} 54 55func (c *Command) toString(sanitizing bool) string { 56 // WARNING: this function is for debugging purposes only. It's much better than old code (which only joins args with space), 57 // It's impossible to make a simple and 100% correct implementation of argument quoting for different platforms. 58 debugQuote := func(s string) string { 59 if strings.ContainsAny(s, " `'\"\t\r\n") { 60 return fmt.Sprintf("%q", s) 61 } 62 return s 63 } 64 a := make([]string, 0, len(c.args)+1) 65 a = append(a, debugQuote(c.prog)) 66 for _, arg := range c.args { 67 if sanitizing && (strings.Contains(arg, "://") && strings.Contains(arg, "@")) { 68 a = append(a, debugQuote(util.SanitizeCredentialURLs(arg))) 69 } else { 70 a = append(a, debugQuote(arg)) 71 } 72 } 73 return strings.Join(a, " ") 74} 75 76// NewCommand creates and returns a new Git Command based on given command and arguments. 77// Each argument should be safe to be trusted. User-provided arguments should be passed to AddDynamicArguments instead. 78func NewCommand(ctx context.Context, args ...internal.CmdArg) *Command { 79 // Make an explicit copy of globalCommandArgs, otherwise append might overwrite it 80 cargs := make([]string, 0, len(globalCommandArgs)+len(args)) 81 for _, arg := range globalCommandArgs { 82 cargs = append(cargs, string(arg)) 83 } 84 for _, arg := range args { 85 cargs = append(cargs, string(arg)) 86 } 87 return &Command{ 88 prog: GitExecutable, 89 args: cargs, 90 parentContext: ctx, 91 globalArgsLength: len(globalCommandArgs), 92 } 93} 94 95// NewCommandContextNoGlobals creates and returns a new Git Command based on given command and arguments only with the specify args and don't care global command args 96// Each argument should be safe to be trusted. User-provided arguments should be passed to AddDynamicArguments instead. 97func NewCommandContextNoGlobals(ctx context.Context, args ...internal.CmdArg) *Command { 98 cargs := make([]string, 0, len(args)) 99 for _, arg := range args { 100 cargs = append(cargs, string(arg)) 101 } 102 return &Command{ 103 prog: GitExecutable, 104 args: cargs, 105 parentContext: ctx, 106 } 107} 108 109// SetParentContext sets the parent context for this command 110func (c *Command) SetParentContext(ctx context.Context) *Command { 111 c.parentContext = ctx 112 return c 113} 114 115// SetDescription sets the description for this command which be returned on c.String() 116func (c *Command) SetDescription(desc string) *Command { 117 c.desc = desc 118 return c 119} 120 121// isSafeArgumentValue checks if the argument is safe to be used as a value (not an option) 122func isSafeArgumentValue(s string) bool { 123 return s == "" || s[0] != '-' 124} 125 126// isValidArgumentOption checks if the argument is a valid option (starting with '-'). 127// It doesn't check whether the option is supported or not 128func isValidArgumentOption(s string) bool { 129 return s != "" && s[0] == '-' 130} 131 132// AddArguments adds new git arguments (option/value) to the command. It only accepts string literals, or trusted CmdArg. 133// Type CmdArg is in the internal package, so it can not be used outside of this package directly, 134// it makes sure that user-provided arguments won't cause RCE risks. 135// User-provided arguments should be passed by other AddXxx functions 136func (c *Command) AddArguments(args ...internal.CmdArg) *Command { 137 for _, arg := range args { 138 c.args = append(c.args, string(arg)) 139 } 140 return c 141} 142 143// AddOptionValues adds a new option with a list of non-option values 144// For example: AddOptionValues("--opt", val) means 2 arguments: {"--opt", val}. 145// The values are treated as dynamic argument values. It equals to: AddArguments("--opt") then AddDynamicArguments(val). 146func (c *Command) AddOptionValues(opt internal.CmdArg, args ...string) *Command { 147 if !isValidArgumentOption(string(opt)) { 148 c.brokenArgs = append(c.brokenArgs, string(opt)) 149 return c 150 } 151 c.args = append(c.args, string(opt)) 152 c.AddDynamicArguments(args...) 153 return c 154} 155 156// AddGitGrepExpression adds an expression option (-e) to git-grep command 157// It is different from AddOptionValues in that it allows the actual expression 158// to not be filtered out for leading dashes (which is otherwise a security feature 159// of AddOptionValues). 160func (c *Command) AddGitGrepExpression(exp string) *Command { 161 if c.args[len(globalCommandArgs)] != "grep" { 162 panic("function called on a non-grep git program: " + c.args[0]) 163 } 164 c.args = append(c.args, "-e", exp) 165 return c 166} 167 168// AddOptionFormat adds a new option with a format string and arguments 169// For example: AddOptionFormat("--opt=%s %s", val1, val2) means 1 argument: {"--opt=val1 val2"}. 170func (c *Command) AddOptionFormat(opt string, args ...any) *Command { 171 if !isValidArgumentOption(opt) { 172 c.brokenArgs = append(c.brokenArgs, opt) 173 return c 174 } 175 // a quick check to make sure the format string matches the number of arguments, to find low-level mistakes ASAP 176 if strings.Count(strings.ReplaceAll(opt, "%%", ""), "%") != len(args) { 177 c.brokenArgs = append(c.brokenArgs, opt) 178 return c 179 } 180 s := fmt.Sprintf(opt, args...) 181 c.args = append(c.args, s) 182 return c 183} 184 185// AddDynamicArguments adds new dynamic argument values to the command. 186// The arguments may come from user input and can not be trusted, so no leading '-' is allowed to avoid passing options. 187// TODO: in the future, this function can be renamed to AddArgumentValues 188func (c *Command) AddDynamicArguments(args ...string) *Command { 189 for _, arg := range args { 190 if !isSafeArgumentValue(arg) { 191 c.brokenArgs = append(c.brokenArgs, arg) 192 } 193 } 194 if len(c.brokenArgs) != 0 { 195 return c 196 } 197 c.args = append(c.args, args...) 198 return c 199} 200 201// AddDashesAndList adds the "--" and then add the list as arguments, it's usually for adding file list 202// At the moment, this function can be only called once, maybe in future it can be refactored to support multiple calls (if necessary) 203func (c *Command) AddDashesAndList(list ...string) *Command { 204 c.args = append(c.args, "--") 205 // Some old code also checks `arg != ""`, IMO it's not necessary. 206 // If the check is needed, the list should be prepared before the call to this function 207 c.args = append(c.args, list...) 208 return c 209} 210 211// ToTrustedCmdArgs converts a list of strings (trusted as argument) to TrustedCmdArgs 212// In most cases, it shouldn't be used. Use NewCommand().AddXxx() function instead 213func ToTrustedCmdArgs(args []string) TrustedCmdArgs { 214 ret := make(TrustedCmdArgs, len(args)) 215 for i, arg := range args { 216 ret[i] = internal.CmdArg(arg) 217 } 218 return ret 219} 220 221// RunOpts represents parameters to run the command. If UseContextTimeout is specified, then Timeout is ignored. 222type RunOpts struct { 223 Env []string 224 Timeout time.Duration 225 UseContextTimeout bool 226 227 // Dir is the working dir for the git command, however: 228 // FIXME: this could be incorrect in many cases, for example: 229 // * /some/path/.git 230 // * /some/path/.git/gitea-data/data/repositories/user/repo.git 231 // If "user/repo.git" is invalid/broken, then running git command in it will use "/some/path/.git", and produce unexpected results 232 // The correct approach is to use `--git-dir" global argument 233 Dir string 234 235 Stdout, Stderr io.Writer 236 237 // Stdin is used for passing input to the command 238 // The caller must make sure the Stdin writer is closed properly to finish the Run function. 239 // Otherwise, the Run function may hang for long time or forever, especially when the Git's context deadline is not the same as the caller's. 240 // Some common mistakes: 241 // * `defer stdinWriter.Close()` then call `cmd.Run()`: the Run() would never return if the command is killed by timeout 242 // * `go { case <- parentContext.Done(): stdinWriter.Close() }` with `cmd.Run(DefaultTimeout)`: the command would have been killed by timeout but the Run doesn't return until stdinWriter.Close() 243 // * `go { if stdoutReader.Read() err != nil: stdinWriter.Close() }` with `cmd.Run()`: the stdoutReader may never return error if the command is killed by timeout 244 // In the future, ideally the git module itself should have full control of the stdin, to avoid such problems and make it easier to refactor to a better architecture. 245 Stdin io.Reader 246 247 PipelineFunc func(context.Context, context.CancelFunc) error 248} 249 250func commonBaseEnvs() []string { 251 // at the moment, do not set "GIT_CONFIG_NOSYSTEM", users may have put some configs like "receive.certNonceSeed" in it 252 envs := []string{ 253 "HOME=" + HomeDir(), // make Gitea use internal git config only, to prevent conflicts with user's git config 254 "GIT_NO_REPLACE_OBJECTS=1", // ignore replace references (https://git-scm.com/docs/git-replace) 255 } 256 257 // some environment variables should be passed to git command 258 passThroughEnvKeys := []string{ 259 "GNUPGHOME", // git may call gnupg to do commit signing 260 } 261 for _, key := range passThroughEnvKeys { 262 if val, ok := os.LookupEnv(key); ok { 263 envs = append(envs, key+"="+val) 264 } 265 } 266 return envs 267} 268 269// CommonGitCmdEnvs returns the common environment variables for a "git" command. 270func CommonGitCmdEnvs() []string { 271 return append(commonBaseEnvs(), []string{ 272 "LC_ALL=" + DefaultLocale, 273 "GIT_TERMINAL_PROMPT=0", // avoid prompting for credentials interactively, supported since git v2.3 274 }...) 275} 276 277// CommonCmdServEnvs is like CommonGitCmdEnvs, but it only returns minimal required environment variables for the "gitea serv" command 278func CommonCmdServEnvs() []string { 279 return commonBaseEnvs() 280} 281 282var ErrBrokenCommand = errors.New("git command is broken") 283 284// Run runs the command with the RunOpts 285func (c *Command) Run(opts *RunOpts) error { 286 if len(c.brokenArgs) != 0 { 287 log.Error("git command is broken: %s, broken args: %s", c.String(), strings.Join(c.brokenArgs, " ")) 288 return ErrBrokenCommand 289 } 290 if opts == nil { 291 opts = &RunOpts{} 292 } 293 294 // We must not change the provided options 295 timeout := opts.Timeout 296 if timeout <= 0 { 297 timeout = defaultCommandExecutionTimeout 298 } 299 300 if len(opts.Dir) == 0 { 301 log.Debug("git.Command.Run: %s", c) 302 } else { 303 log.Debug("git.Command.RunDir(%s): %s", opts.Dir, c) 304 } 305 306 desc := c.desc 307 if desc == "" { 308 if opts.Dir == "" { 309 desc = fmt.Sprintf("git: %s", c.toString(true)) 310 } else { 311 desc = fmt.Sprintf("git(dir:%s): %s", opts.Dir, c.toString(true)) 312 } 313 } 314 315 var ctx context.Context 316 var cancel context.CancelFunc 317 var finished context.CancelFunc 318 319 if opts.UseContextTimeout { 320 ctx, cancel, finished = process.GetManager().AddTypedContext(c.parentContext, desc, process.GitProcessType, true) 321 } else { 322 ctx, cancel, finished = process.GetManager().AddTypedContextTimeout(c.parentContext, timeout, desc, process.GitProcessType, true) 323 } 324 defer finished() 325 326 trace.Log(ctx, "command", desc) 327 startTime := time.Now() 328 329 cmd := exec.CommandContext(ctx, c.prog, c.args...) 330 if opts.Env == nil { 331 cmd.Env = os.Environ() 332 } else { 333 cmd.Env = opts.Env 334 } 335 336 process.SetSysProcAttribute(cmd) 337 cmd.Env = append(cmd.Env, CommonGitCmdEnvs()...) 338 cmd.Dir = opts.Dir 339 cmd.Stdout = opts.Stdout 340 cmd.Stderr = opts.Stderr 341 cmd.Stdin = opts.Stdin 342 if err := cmd.Start(); err != nil { 343 return err 344 } 345 346 if opts.PipelineFunc != nil { 347 err := opts.PipelineFunc(ctx, cancel) 348 if err != nil { 349 cancel() 350 _ = cmd.Wait() 351 return err 352 } 353 } 354 355 err := cmd.Wait() 356 elapsed := time.Since(startTime) 357 if elapsed > time.Second { 358 log.Debug("slow git.Command.Run: %s (%s)", c, elapsed) 359 } 360 361 if err != nil && ctx.Err() != context.DeadlineExceeded { 362 return err 363 } 364 365 return ctx.Err() 366} 367 368type RunStdError interface { 369 error 370 Unwrap() error 371 Stderr() string 372} 373 374type runStdError struct { 375 err error 376 stderr string 377 errMsg string 378} 379 380func (r *runStdError) Error() string { 381 // the stderr must be in the returned error text, some code only checks `strings.Contains(err.Error(), "git error")` 382 if r.errMsg == "" { 383 r.errMsg = ConcatenateError(r.err, r.stderr).Error() 384 } 385 return r.errMsg 386} 387 388func (r *runStdError) Unwrap() error { 389 return r.err 390} 391 392func (r *runStdError) Stderr() string { 393 return r.stderr 394} 395 396func IsErrorExitCode(err error, code int) bool { 397 var exitError *exec.ExitError 398 if errors.As(err, &exitError) { 399 return exitError.ExitCode() == code 400 } 401 return false 402} 403 404// RunStdString runs the command with options and returns stdout/stderr as string. and store stderr to returned error (err combined with stderr). 405func (c *Command) RunStdString(opts *RunOpts) (stdout, stderr string, runErr RunStdError) { 406 stdoutBytes, stderrBytes, err := c.RunStdBytes(opts) 407 stdout = util.UnsafeBytesToString(stdoutBytes) 408 stderr = util.UnsafeBytesToString(stderrBytes) 409 if err != nil { 410 return stdout, stderr, &runStdError{err: err, stderr: stderr} 411 } 412 // even if there is no err, there could still be some stderr output, so we just return stdout/stderr as they are 413 return stdout, stderr, nil 414} 415 416// RunStdBytes runs the command with options and returns stdout/stderr as bytes. and store stderr to returned error (err combined with stderr). 417func (c *Command) RunStdBytes(opts *RunOpts) (stdout, stderr []byte, runErr RunStdError) { 418 if opts == nil { 419 opts = &RunOpts{} 420 } 421 if opts.Stdout != nil || opts.Stderr != nil { 422 // we must panic here, otherwise there would be bugs if developers set Stdin/Stderr by mistake, and it would be very difficult to debug 423 panic("stdout and stderr field must be nil when using RunStdBytes") 424 } 425 stdoutBuf := &bytes.Buffer{} 426 stderrBuf := &bytes.Buffer{} 427 428 // We must not change the provided options as it could break future calls - therefore make a copy. 429 newOpts := &RunOpts{ 430 Env: opts.Env, 431 Timeout: opts.Timeout, 432 UseContextTimeout: opts.UseContextTimeout, 433 Dir: opts.Dir, 434 Stdout: stdoutBuf, 435 Stderr: stderrBuf, 436 Stdin: opts.Stdin, 437 PipelineFunc: opts.PipelineFunc, 438 } 439 440 err := c.Run(newOpts) 441 stderr = stderrBuf.Bytes() 442 if err != nil { 443 return nil, stderr, &runStdError{err: err, stderr: util.UnsafeBytesToString(stderr)} 444 } 445 // even if there is no err, there could still be some stderr output 446 return stdoutBuf.Bytes(), stderr, nil 447} 448 449// AllowLFSFiltersArgs return globalCommandArgs with lfs filter, it should only be used for tests 450func AllowLFSFiltersArgs() TrustedCmdArgs { 451 // Now here we should explicitly allow lfs filters to run 452 filteredLFSGlobalArgs := make(TrustedCmdArgs, len(globalCommandArgs)) 453 j := 0 454 for _, arg := range globalCommandArgs { 455 if strings.Contains(string(arg), "lfs") { 456 j-- 457 } else { 458 filteredLFSGlobalArgs[j] = arg 459 j++ 460 } 461 } 462 return filteredLFSGlobalArgs[:j] 463}