[mirror] Scalable static site server for Git forges (like GitHub Pages)
at main 17 kB view raw
1package git_pages 2 3import ( 4 "context" 5 "crypto/tls" 6 "errors" 7 "flag" 8 "fmt" 9 "io" 10 "log" 11 "log/slog" 12 "net" 13 "net/http" 14 "net/http/httputil" 15 "net/url" 16 "os" 17 "path" 18 "runtime/debug" 19 "strings" 20 "time" 21 22 automemlimit "github.com/KimMachineGun/automemlimit/memlimit" 23 "github.com/c2h5oh/datasize" 24 "github.com/fatih/color" 25 "github.com/kankanreno/go-snowflake" 26 "github.com/prometheus/client_golang/prometheus/promhttp" 27 "google.golang.org/protobuf/proto" 28) 29 30var config *Config 31var wildcards []*WildcardPattern 32var fallback http.Handler 33var backend Backend 34 35func configureFeatures(ctx context.Context) (err error) { 36 if len(config.Features) > 0 { 37 logc.Println(ctx, "features:", strings.Join(config.Features, ", ")) 38 } 39 return 40} 41 42func configureMemLimit(ctx context.Context) (err error) { 43 // Avoid being OOM killed by not garbage collecting early enough. 44 memlimitBefore := datasize.ByteSize(debug.SetMemoryLimit(-1)) 45 automemlimit.SetGoMemLimitWithOpts( 46 automemlimit.WithLogger(slog.New(slog.DiscardHandler)), 47 automemlimit.WithProvider( 48 automemlimit.ApplyFallback( 49 automemlimit.FromCgroup, 50 automemlimit.FromSystem, 51 ), 52 ), 53 automemlimit.WithRatio(float64(config.Limits.MaxHeapSizeRatio)), 54 ) 55 memlimitAfter := datasize.ByteSize(debug.SetMemoryLimit(-1)) 56 if memlimitBefore == memlimitAfter { 57 logc.Println(ctx, "memlimit: now", memlimitBefore.HR()) 58 } else { 59 logc.Println(ctx, "memlimit: was", memlimitBefore.HR(), "now", memlimitAfter.HR()) 60 } 61 return 62} 63 64func configureWildcards(_ context.Context) (err error) { 65 newWildcards, err := TranslateWildcards(config.Wildcard) 66 if err != nil { 67 return err 68 } else { 69 wildcards = newWildcards 70 return nil 71 } 72} 73 74func configureFallback(_ context.Context) (err error) { 75 if config.Fallback.ProxyTo != nil { 76 fallbackURL := &config.Fallback.ProxyTo.URL 77 fallback = &httputil.ReverseProxy{ 78 Rewrite: func(r *httputil.ProxyRequest) { 79 r.SetURL(fallbackURL) 80 r.Out.Host = r.In.Host 81 r.Out.Header["X-Forwarded-For"] = r.In.Header["X-Forwarded-For"] 82 }, 83 Transport: &http.Transport{ 84 TLSClientConfig: &tls.Config{ 85 InsecureSkipVerify: config.Fallback.Insecure, 86 }, 87 }, 88 } 89 } 90 return 91} 92 93// Thread-unsafe, must be called only during initial configuration. 94func configureAudit(_ context.Context) (err error) { 95 snowflake.SetStartTime(time.Date(2025, 12, 1, 0, 0, 0, 0, time.UTC)) 96 snowflake.SetMachineID(config.Audit.NodeID) 97 return 98} 99 100func listen(ctx context.Context, name string, listen string) net.Listener { 101 if listen == "-" { 102 return nil 103 } 104 105 protocol, address, ok := strings.Cut(listen, "/") 106 if !ok { 107 logc.Fatalf(ctx, "%s: %s: malformed endpoint", name, listen) 108 } 109 110 listener, err := net.Listen(protocol, address) 111 if err != nil { 112 logc.Fatalf(ctx, "%s: %s\n", name, err) 113 } 114 115 return listener 116} 117 118func panicHandler(handler http.Handler) http.Handler { 119 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 120 defer func() { 121 if err := recover(); err != nil { 122 logc.Printf(r.Context(), "panic: %s %s %s: %s\n%s", 123 r.Method, r.Host, r.URL.Path, err, string(debug.Stack())) 124 http.Error(w, 125 fmt.Sprintf("internal server error: %s", err), 126 http.StatusInternalServerError, 127 ) 128 } 129 }() 130 handler.ServeHTTP(w, r) 131 }) 132} 133 134func serve(ctx context.Context, listener net.Listener, handler http.Handler) { 135 if listener != nil { 136 server := http.Server{Handler: handler} 137 server.Protocols = new(http.Protocols) 138 server.Protocols.SetHTTP1(true) 139 server.Protocols.SetUnencryptedHTTP2(true) 140 logc.Fatalln(ctx, server.Serve(listener)) 141 } 142} 143 144func webRootArg(arg string) string { 145 switch strings.Count(arg, "/") { 146 case 0: 147 return arg + "/.index" 148 case 1: 149 return arg 150 default: 151 logc.Fatalln(context.Background(), 152 "webroot argument must be either 'domain.tld' or 'domain.tld/dir") 153 return "" 154 } 155} 156 157func fileOutputArg() (writer io.WriteCloser) { 158 var err error 159 if flag.NArg() == 0 { 160 writer = os.Stdout 161 } else { 162 writer, err = os.Create(flag.Arg(0)) 163 if err != nil { 164 logc.Fatalln(context.Background(), err) 165 } 166 } 167 return 168} 169 170func usage() { 171 fmt.Fprintf(os.Stderr, "Usage:\n") 172 fmt.Fprintf(os.Stderr, "(server) "+ 173 "git-pages [-config <file>|-no-config]\n") 174 fmt.Fprintf(os.Stderr, "(info) "+ 175 "git-pages {-print-config-env-vars|-print-config}\n") 176 fmt.Fprintf(os.Stderr, "(debug) "+ 177 "git-pages {-list-blobs|-list-manifests}\n") 178 fmt.Fprintf(os.Stderr, "(debug) "+ 179 "git-pages {-get-blob|-get-manifest|-get-archive|-update-site} <ref> [file]\n") 180 fmt.Fprintf(os.Stderr, "(admin) "+ 181 "git-pages {-freeze-domain <domain>|-unfreeze-domain <domain>}\n") 182 fmt.Fprintf(os.Stderr, "(audit) "+ 183 "git-pages {-audit-log|-audit-read <id>|-audit-server <endpoint> <program> [args...]}\n") 184 fmt.Fprintf(os.Stderr, "(maint) "+ 185 "git-pages {-run-migration <name>|-trace-garbage}\n") 186 flag.PrintDefaults() 187} 188 189func Main() { 190 ctx := context.Background() 191 192 flag.Usage = usage 193 configTomlPath := flag.String("config", "", 194 "load configuration from `filename` (default: 'config.toml')") 195 noConfig := flag.Bool("no-config", false, 196 "run without configuration file (configure via environment variables)") 197 printConfigEnvVars := flag.Bool("print-config-env-vars", false, 198 "print every recognized configuration environment variable and exit") 199 printConfig := flag.Bool("print-config", false, 200 "print configuration as JSON and exit") 201 listBlobs := flag.Bool("list-blobs", false, 202 "enumerate every blob with its metadata") 203 listManifests := flag.Bool("list-manifests", false, 204 "enumerate every manifest with its metadata") 205 getBlob := flag.String("get-blob", "", 206 "write contents of `blob` ('sha256-xxxxxxx...xxx')") 207 getManifest := flag.String("get-manifest", "", 208 "write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON") 209 getArchive := flag.String("get-archive", "", 210 "write archive for `site` (either 'domain.tld' or 'domain.tld/dir') in tar format") 211 updateSite := flag.String("update-site", "", 212 "update `site` (either 'domain.tld' or 'domain.tld/dir') from archive or repository URL") 213 freezeDomain := flag.String("freeze-domain", "", 214 "prevent any site uploads to a given `domain`") 215 unfreezeDomain := flag.String("unfreeze-domain", "", 216 "allow site uploads to a `domain` again after it has been frozen") 217 auditLog := flag.Bool("audit-log", false, 218 "display audit log") 219 auditRead := flag.String("audit-read", "", 220 "extract contents of audit record `id` to files '<id>-*'") 221 auditRollback := flag.String("audit-rollback", "", 222 "restore site from contents of audit record `id`") 223 auditServer := flag.String("audit-server", "", 224 "listen for notifications on `endpoint` and spawn a process for each audit event") 225 runMigration := flag.String("run-migration", "", 226 "run a store `migration` (one of: create-domain-markers)") 227 traceGarbage := flag.Bool("trace-garbage", false, 228 "estimate total size of unreachable blobs") 229 flag.Parse() 230 231 var cliOperations int 232 for _, selected := range []bool{ 233 *listBlobs, 234 *listManifests, 235 *getBlob != "", 236 *getManifest != "", 237 *getArchive != "", 238 *updateSite != "", 239 *freezeDomain != "", 240 *unfreezeDomain != "", 241 *auditLog, 242 *auditRead != "", 243 *auditRollback != "", 244 *auditServer != "", 245 *runMigration != "", 246 *traceGarbage, 247 } { 248 if selected { 249 cliOperations++ 250 } 251 } 252 if cliOperations > 1 { 253 logc.Fatalln(ctx, "-list-blobs, -list-manifests, -get-blob, -get-manifest, -get-archive, "+ 254 "-update-site, -freeze-domain, -unfreeze-domain, -audit-log, -audit-read, "+ 255 "-audit-rollback, -audit-server, -run-migration, and -trace-garbage are "+ 256 "mutually exclusive") 257 } 258 259 if *configTomlPath != "" && *noConfig { 260 logc.Fatalln(ctx, "-no-config and -config are mutually exclusive") 261 } 262 263 if *printConfigEnvVars { 264 PrintConfigEnvVars() 265 return 266 } 267 268 var err error 269 if *configTomlPath == "" && !*noConfig { 270 *configTomlPath = "config.toml" 271 } 272 if config, err = Configure(*configTomlPath); err != nil { 273 logc.Fatalln(ctx, "config:", err) 274 } 275 276 if *printConfig { 277 fmt.Println(config.TOML()) 278 return 279 } 280 281 InitObservability() 282 defer FiniObservability() 283 284 if err = errors.Join( 285 configureFeatures(ctx), 286 configureMemLimit(ctx), 287 configureWildcards(ctx), 288 configureFallback(ctx), 289 configureAudit(ctx), 290 ); err != nil { 291 logc.Fatalln(ctx, err) 292 } 293 294 // The server has its own logic for creating the backend. 295 if cliOperations > 0 { 296 if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 297 logc.Fatalln(ctx, err) 298 } 299 } 300 301 switch { 302 case *listBlobs: 303 for metadata, err := range backend.EnumerateBlobs(ctx) { 304 if err != nil { 305 logc.Fatalln(ctx, err) 306 } 307 fmt.Fprintf(color.Output, "%s %s %s\n", 308 metadata.Name, 309 color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)), 310 color.HiGreenString(fmt.Sprint(metadata.Size)), 311 ) 312 } 313 314 case *listManifests: 315 for metadata, err := range backend.EnumerateManifests(ctx) { 316 if err != nil { 317 logc.Fatalln(ctx, err) 318 } 319 fmt.Fprintf(color.Output, "%s %s %s\n", 320 metadata.Name, 321 color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)), 322 color.HiGreenString(fmt.Sprint(metadata.Size)), 323 ) 324 } 325 326 case *getBlob != "": 327 reader, _, err := backend.GetBlob(ctx, *getBlob) 328 if err != nil { 329 logc.Fatalln(ctx, err) 330 } 331 io.Copy(fileOutputArg(), reader) 332 333 case *getManifest != "": 334 webRoot := webRootArg(*getManifest) 335 manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) 336 if err != nil { 337 logc.Fatalln(ctx, err) 338 } 339 fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest))) 340 341 case *getArchive != "": 342 webRoot := webRootArg(*getArchive) 343 manifest, metadata, err := 344 backend.GetManifest(ctx, webRoot, GetManifestOptions{}) 345 if err != nil { 346 logc.Fatalln(ctx, err) 347 } 348 if err = CollectTar(ctx, fileOutputArg(), manifest, metadata); err != nil { 349 logc.Fatalln(ctx, err) 350 } 351 352 case *updateSite != "": 353 ctx = WithPrincipal(ctx) 354 GetPrincipal(ctx).CliAdmin = proto.Bool(true) 355 356 if flag.NArg() != 1 { 357 logc.Fatalln(ctx, "update source must be provided as the argument") 358 } 359 360 sourceURL, err := url.Parse(flag.Arg(0)) 361 if err != nil { 362 logc.Fatalln(ctx, err) 363 } 364 365 var result UpdateResult 366 if sourceURL.Scheme == "" { 367 file, err := os.Open(sourceURL.Path) 368 if err != nil { 369 logc.Fatalln(ctx, err) 370 } 371 defer file.Close() 372 373 var contentType string 374 switch { 375 case strings.HasSuffix(sourceURL.Path, ".zip"): 376 contentType = "application/zip" 377 case strings.HasSuffix(sourceURL.Path, ".tar"): 378 contentType = "application/x-tar" 379 case strings.HasSuffix(sourceURL.Path, ".tar.gz"): 380 contentType = "application/x-tar+gzip" 381 case strings.HasSuffix(sourceURL.Path, ".tar.zst"): 382 contentType = "application/x-tar+zstd" 383 default: 384 log.Fatalf("cannot determine content type from filename %q\n", sourceURL) 385 } 386 387 webRoot := webRootArg(*updateSite) 388 result = UpdateFromArchive(ctx, webRoot, contentType, file) 389 } else { 390 branch := "pages" 391 if sourceURL.Fragment != "" { 392 branch, sourceURL.Fragment = sourceURL.Fragment, "" 393 } 394 395 webRoot := webRootArg(*updateSite) 396 result = UpdateFromRepository(ctx, webRoot, sourceURL.String(), branch) 397 } 398 399 switch result.outcome { 400 case UpdateError: 401 logc.Printf(ctx, "error: %s\n", result.err) 402 os.Exit(2) 403 case UpdateTimeout: 404 logc.Println(ctx, "timeout") 405 os.Exit(1) 406 case UpdateCreated: 407 logc.Println(ctx, "created") 408 case UpdateReplaced: 409 logc.Println(ctx, "replaced") 410 case UpdateDeleted: 411 logc.Println(ctx, "deleted") 412 case UpdateNoChange: 413 logc.Println(ctx, "no-change") 414 } 415 416 case *freezeDomain != "" || *unfreezeDomain != "": 417 ctx = WithPrincipal(ctx) 418 GetPrincipal(ctx).CliAdmin = proto.Bool(true) 419 420 var domain string 421 var freeze bool 422 if *freezeDomain != "" { 423 domain = *freezeDomain 424 freeze = true 425 } else { 426 domain = *unfreezeDomain 427 freeze = false 428 } 429 430 if freeze { 431 if err = backend.FreezeDomain(ctx, domain); err != nil { 432 logc.Fatalln(ctx, err) 433 } 434 logc.Println(ctx, "frozen") 435 } else { 436 if err = backend.UnfreezeDomain(ctx, domain); err != nil { 437 logc.Fatalln(ctx, err) 438 } 439 logc.Println(ctx, "thawed") 440 } 441 442 case *auditLog: 443 ch := make(chan *AuditRecord) 444 ids := []AuditID{} 445 for id, err := range backend.SearchAuditLog(ctx, SearchAuditLogOptions{}) { 446 if err != nil { 447 logc.Fatalln(ctx, err) 448 } 449 go func() { 450 if record, err := backend.QueryAuditLog(ctx, id); err != nil { 451 logc.Fatalln(ctx, err) 452 } else { 453 ch <- record 454 } 455 }() 456 ids = append(ids, id) 457 } 458 459 records := map[AuditID]*AuditRecord{} 460 for len(records) < len(ids) { 461 record := <-ch 462 records[record.GetAuditID()] = record 463 } 464 465 for _, id := range ids { 466 record := records[id] 467 fmt.Fprintf(color.Output, "%s %s %s %s %s\n", 468 record.GetAuditID().String(), 469 color.HiWhiteString(record.GetTimestamp().AsTime().UTC().Format(time.RFC3339)), 470 color.HiMagentaString(record.DescribePrincipal()), 471 color.HiGreenString(record.DescribeResource()), 472 record.GetEvent(), 473 ) 474 } 475 476 case *auditRead != "": 477 id, err := ParseAuditID(*auditRead) 478 if err != nil { 479 logc.Fatalln(ctx, err) 480 } 481 482 record, err := backend.QueryAuditLog(ctx, id) 483 if err != nil { 484 logc.Fatalln(ctx, err) 485 } 486 487 if err = ExtractAuditRecord(ctx, id, record, "."); err != nil { 488 logc.Fatalln(ctx, err) 489 } 490 491 case *auditRollback != "": 492 ctx = WithPrincipal(ctx) 493 GetPrincipal(ctx).CliAdmin = proto.Bool(true) 494 495 id, err := ParseAuditID(*auditRollback) 496 if err != nil { 497 logc.Fatalln(ctx, err) 498 } 499 500 record, err := backend.QueryAuditLog(ctx, id) 501 if err != nil { 502 logc.Fatalln(ctx, err) 503 } 504 505 if record.GetManifest() == nil || record.GetDomain() == "" || record.GetProject() == "" { 506 logc.Fatalln(ctx, "no manifest in audit record") 507 } 508 509 webRoot := path.Join(record.GetDomain(), record.GetProject()) 510 err = backend.StageManifest(ctx, record.GetManifest()) 511 if err != nil { 512 logc.Fatalln(ctx, err) 513 } 514 err = backend.CommitManifest(ctx, webRoot, record.GetManifest(), ModifyManifestOptions{}) 515 if err != nil { 516 logc.Fatalln(ctx, err) 517 } 518 519 case *auditServer != "": 520 if flag.NArg() < 1 { 521 logc.Fatalln(ctx, "handler path not provided") 522 } 523 524 processor, err := AuditEventProcessor(flag.Arg(0), flag.Args()[1:]) 525 if err != nil { 526 logc.Fatalln(ctx, err) 527 } 528 529 serve(ctx, listen(ctx, "audit", *auditServer), ObserveHTTPHandler(processor)) 530 531 case *runMigration != "": 532 if err = RunMigration(ctx, *runMigration); err != nil { 533 logc.Fatalln(ctx, err) 534 } 535 536 case *traceGarbage: 537 if err = TraceGarbage(ctx); err != nil { 538 logc.Fatalln(ctx, err) 539 } 540 541 default: 542 // Hook a signal (SIGHUP on *nix, nothing on Windows) for reloading the configuration 543 // at runtime. This is useful because it preserves S3 backend cache contents. Failed 544 // configuration reloads will not crash the process; you may want to check the syntax 545 // first with `git-pages -config ... -print-config` since there is no other feedback. 546 // 547 // Note that not all of the configuration is updated on reload. Listeners are kept as-is. 548 // The backend is not recreated (this is intentional as it allows preserving the cache). 549 OnReload(func() { 550 if newConfig, err := Configure(*configTomlPath); err != nil { 551 logc.Println(ctx, "config: reload err:", err) 552 } else { 553 // From https://go.dev/ref/mem: 554 // > A read r of a memory location x holding a value that is not larger than 555 // > a machine word must observe some write w such that r does not happen before 556 // > w and there is no write w' such that w happens before w' and w' happens 557 // > before r. That is, each read must observe a value written by a preceding or 558 // > concurrent write. 559 config = newConfig 560 if err = errors.Join( 561 configureFeatures(ctx), 562 configureMemLimit(ctx), 563 configureWildcards(ctx), 564 configureFallback(ctx), 565 ); err != nil { 566 // At this point the configuration is in an in-between, corrupted state, so 567 // the only reasonable choice is to crash. 568 logc.Fatalln(ctx, "config: reload fail:", err) 569 } else { 570 logc.Println(ctx, "config: reload ok") 571 } 572 } 573 }) 574 575 // Start listening on all ports before initializing the backend, otherwise if the backend 576 // spends some time initializing (which the S3 backend does) a proxy like Caddy can race 577 // with git-pages on startup and return errors for requests that would have been served 578 // just 0.5s later. 579 pagesListener := listen(ctx, "pages", config.Server.Pages) 580 caddyListener := listen(ctx, "caddy", config.Server.Caddy) 581 metricsListener := listen(ctx, "metrics", config.Server.Metrics) 582 583 if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 584 logc.Fatalln(ctx, err) 585 } 586 backend = NewObservedBackend(backend) 587 588 middleware := chainHTTPMiddleware( 589 panicHandler, 590 remoteAddrMiddleware, 591 ObserveHTTPHandler, 592 ) 593 go serve(ctx, pagesListener, middleware(http.HandlerFunc(ServePages))) 594 go serve(ctx, caddyListener, middleware(http.HandlerFunc(ServeCaddy))) 595 go serve(ctx, metricsListener, promhttp.Handler()) 596 597 if config.Insecure { 598 logc.Println(ctx, "serve: ready (INSECURE)") 599 } else { 600 logc.Println(ctx, "serve: ready") 601 } 602 603 WaitForInterrupt() 604 logc.Println(ctx, "serve: exiting") 605 } 606}