package git_pages import ( "context" "crypto/tls" "errors" "flag" "fmt" "io" "log" "log/slog" "net" "net/http" "net/http/httputil" "net/url" "os" "path" "runtime/debug" "strings" "time" automemlimit "github.com/KimMachineGun/automemlimit/memlimit" "github.com/c2h5oh/datasize" "github.com/fatih/color" "github.com/kankanreno/go-snowflake" "github.com/prometheus/client_golang/prometheus/promhttp" "google.golang.org/protobuf/proto" ) var config *Config var wildcards []*WildcardPattern var fallback http.Handler var backend Backend func configureFeatures(ctx context.Context) (err error) { if len(config.Features) > 0 { logc.Println(ctx, "features:", strings.Join(config.Features, ", ")) } return } func configureMemLimit(ctx context.Context) (err error) { // Avoid being OOM killed by not garbage collecting early enough. memlimitBefore := datasize.ByteSize(debug.SetMemoryLimit(-1)) automemlimit.SetGoMemLimitWithOpts( automemlimit.WithLogger(slog.New(slog.DiscardHandler)), automemlimit.WithProvider( automemlimit.ApplyFallback( automemlimit.FromCgroup, automemlimit.FromSystem, ), ), automemlimit.WithRatio(float64(config.Limits.MaxHeapSizeRatio)), ) memlimitAfter := datasize.ByteSize(debug.SetMemoryLimit(-1)) if memlimitBefore == memlimitAfter { logc.Println(ctx, "memlimit: now", memlimitBefore.HR()) } else { logc.Println(ctx, "memlimit: was", memlimitBefore.HR(), "now", memlimitAfter.HR()) } return } func configureWildcards(_ context.Context) (err error) { newWildcards, err := TranslateWildcards(config.Wildcard) if err != nil { return err } else { wildcards = newWildcards return nil } } func configureFallback(_ context.Context) (err error) { if config.Fallback.ProxyTo != nil { fallbackURL := &config.Fallback.ProxyTo.URL fallback = &httputil.ReverseProxy{ Rewrite: func(r *httputil.ProxyRequest) { r.SetURL(fallbackURL) r.Out.Host = r.In.Host r.Out.Header["X-Forwarded-For"] = r.In.Header["X-Forwarded-For"] }, Transport: &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: config.Fallback.Insecure, }, }, } } return } // Thread-unsafe, must be called only during initial configuration. func configureAudit(_ context.Context) (err error) { snowflake.SetStartTime(time.Date(2025, 12, 1, 0, 0, 0, 0, time.UTC)) snowflake.SetMachineID(config.Audit.NodeID) return } func listen(ctx context.Context, name string, listen string) net.Listener { if listen == "-" { return nil } protocol, address, ok := strings.Cut(listen, "/") if !ok { logc.Fatalf(ctx, "%s: %s: malformed endpoint", name, listen) } listener, err := net.Listen(protocol, address) if err != nil { logc.Fatalf(ctx, "%s: %s\n", name, err) } return listener } func panicHandler(handler http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { defer func() { if err := recover(); err != nil { logc.Printf(r.Context(), "panic: %s %s %s: %s\n%s", r.Method, r.Host, r.URL.Path, err, string(debug.Stack())) http.Error(w, fmt.Sprintf("internal server error: %s", err), http.StatusInternalServerError, ) } }() handler.ServeHTTP(w, r) }) } func serve(ctx context.Context, listener net.Listener, handler http.Handler) { if listener != nil { server := http.Server{Handler: handler} server.Protocols = new(http.Protocols) server.Protocols.SetHTTP1(true) server.Protocols.SetUnencryptedHTTP2(true) logc.Fatalln(ctx, server.Serve(listener)) } } func webRootArg(arg string) string { switch strings.Count(arg, "/") { case 0: return arg + "/.index" case 1: return arg default: logc.Fatalln(context.Background(), "webroot argument must be either 'domain.tld' or 'domain.tld/dir") return "" } } func fileOutputArg() (writer io.WriteCloser) { var err error if flag.NArg() == 0 { writer = os.Stdout } else { writer, err = os.Create(flag.Arg(0)) if err != nil { logc.Fatalln(context.Background(), err) } } return } func usage() { fmt.Fprintf(os.Stderr, "Usage:\n") fmt.Fprintf(os.Stderr, "(server) "+ "git-pages [-config |-no-config]\n") fmt.Fprintf(os.Stderr, "(info) "+ "git-pages {-print-config-env-vars|-print-config}\n") fmt.Fprintf(os.Stderr, "(debug) "+ "git-pages {-list-blobs|-list-manifests}\n") fmt.Fprintf(os.Stderr, "(debug) "+ "git-pages {-get-blob|-get-manifest|-get-archive|-update-site} [file]\n") fmt.Fprintf(os.Stderr, "(admin) "+ "git-pages {-freeze-domain |-unfreeze-domain }\n") fmt.Fprintf(os.Stderr, "(audit) "+ "git-pages {-audit-log|-audit-read |-audit-server [args...]}\n") fmt.Fprintf(os.Stderr, "(maint) "+ "git-pages {-run-migration |-trace-garbage}\n") flag.PrintDefaults() } func Main() { ctx := context.Background() flag.Usage = usage configTomlPath := flag.String("config", "", "load configuration from `filename` (default: 'config.toml')") noConfig := flag.Bool("no-config", false, "run without configuration file (configure via environment variables)") printConfigEnvVars := flag.Bool("print-config-env-vars", false, "print every recognized configuration environment variable and exit") printConfig := flag.Bool("print-config", false, "print configuration as JSON and exit") listBlobs := flag.Bool("list-blobs", false, "enumerate every blob with its metadata") listManifests := flag.Bool("list-manifests", false, "enumerate every manifest with its metadata") getBlob := flag.String("get-blob", "", "write contents of `blob` ('sha256-xxxxxxx...xxx')") getManifest := flag.String("get-manifest", "", "write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON") getArchive := flag.String("get-archive", "", "write archive for `site` (either 'domain.tld' or 'domain.tld/dir') in tar format") updateSite := flag.String("update-site", "", "update `site` (either 'domain.tld' or 'domain.tld/dir') from archive or repository URL") freezeDomain := flag.String("freeze-domain", "", "prevent any site uploads to a given `domain`") unfreezeDomain := flag.String("unfreeze-domain", "", "allow site uploads to a `domain` again after it has been frozen") auditLog := flag.Bool("audit-log", false, "display audit log") auditRead := flag.String("audit-read", "", "extract contents of audit record `id` to files '-*'") auditRollback := flag.String("audit-rollback", "", "restore site from contents of audit record `id`") auditServer := flag.String("audit-server", "", "listen for notifications on `endpoint` and spawn a process for each audit event") runMigration := flag.String("run-migration", "", "run a store `migration` (one of: create-domain-markers)") traceGarbage := flag.Bool("trace-garbage", false, "estimate total size of unreachable blobs") flag.Parse() var cliOperations int for _, selected := range []bool{ *listBlobs, *listManifests, *getBlob != "", *getManifest != "", *getArchive != "", *updateSite != "", *freezeDomain != "", *unfreezeDomain != "", *auditLog, *auditRead != "", *auditRollback != "", *auditServer != "", *runMigration != "", *traceGarbage, } { if selected { cliOperations++ } } if cliOperations > 1 { logc.Fatalln(ctx, "-list-blobs, -list-manifests, -get-blob, -get-manifest, -get-archive, "+ "-update-site, -freeze-domain, -unfreeze-domain, -audit-log, -audit-read, "+ "-audit-rollback, -audit-server, -run-migration, and -trace-garbage are "+ "mutually exclusive") } if *configTomlPath != "" && *noConfig { logc.Fatalln(ctx, "-no-config and -config are mutually exclusive") } if *printConfigEnvVars { PrintConfigEnvVars() return } var err error if *configTomlPath == "" && !*noConfig { *configTomlPath = "config.toml" } if config, err = Configure(*configTomlPath); err != nil { logc.Fatalln(ctx, "config:", err) } if *printConfig { fmt.Println(config.TOML()) return } InitObservability() defer FiniObservability() if err = errors.Join( configureFeatures(ctx), configureMemLimit(ctx), configureWildcards(ctx), configureFallback(ctx), configureAudit(ctx), ); err != nil { logc.Fatalln(ctx, err) } // The server has its own logic for creating the backend. if cliOperations > 0 { if backend, err = CreateBackend(ctx, &config.Storage); err != nil { logc.Fatalln(ctx, err) } } switch { case *listBlobs: for metadata, err := range backend.EnumerateBlobs(ctx) { if err != nil { logc.Fatalln(ctx, err) } fmt.Fprintf(color.Output, "%s %s %s\n", metadata.Name, color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)), color.HiGreenString(fmt.Sprint(metadata.Size)), ) } case *listManifests: for metadata, err := range backend.EnumerateManifests(ctx) { if err != nil { logc.Fatalln(ctx, err) } fmt.Fprintf(color.Output, "%s %s %s\n", metadata.Name, color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)), color.HiGreenString(fmt.Sprint(metadata.Size)), ) } case *getBlob != "": reader, _, err := backend.GetBlob(ctx, *getBlob) if err != nil { logc.Fatalln(ctx, err) } io.Copy(fileOutputArg(), reader) case *getManifest != "": webRoot := webRootArg(*getManifest) manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) if err != nil { logc.Fatalln(ctx, err) } fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest))) case *getArchive != "": webRoot := webRootArg(*getArchive) manifest, metadata, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) if err != nil { logc.Fatalln(ctx, err) } if err = CollectTar(ctx, fileOutputArg(), manifest, metadata); err != nil { logc.Fatalln(ctx, err) } case *updateSite != "": ctx = WithPrincipal(ctx) GetPrincipal(ctx).CliAdmin = proto.Bool(true) if flag.NArg() != 1 { logc.Fatalln(ctx, "update source must be provided as the argument") } sourceURL, err := url.Parse(flag.Arg(0)) if err != nil { logc.Fatalln(ctx, err) } var result UpdateResult if sourceURL.Scheme == "" { file, err := os.Open(sourceURL.Path) if err != nil { logc.Fatalln(ctx, err) } defer file.Close() var contentType string switch { case strings.HasSuffix(sourceURL.Path, ".zip"): contentType = "application/zip" case strings.HasSuffix(sourceURL.Path, ".tar"): contentType = "application/x-tar" case strings.HasSuffix(sourceURL.Path, ".tar.gz"): contentType = "application/x-tar+gzip" case strings.HasSuffix(sourceURL.Path, ".tar.zst"): contentType = "application/x-tar+zstd" default: log.Fatalf("cannot determine content type from filename %q\n", sourceURL) } webRoot := webRootArg(*updateSite) result = UpdateFromArchive(ctx, webRoot, contentType, file) } else { branch := "pages" if sourceURL.Fragment != "" { branch, sourceURL.Fragment = sourceURL.Fragment, "" } webRoot := webRootArg(*updateSite) result = UpdateFromRepository(ctx, webRoot, sourceURL.String(), branch) } switch result.outcome { case UpdateError: logc.Printf(ctx, "error: %s\n", result.err) os.Exit(2) case UpdateTimeout: logc.Println(ctx, "timeout") os.Exit(1) case UpdateCreated: logc.Println(ctx, "created") case UpdateReplaced: logc.Println(ctx, "replaced") case UpdateDeleted: logc.Println(ctx, "deleted") case UpdateNoChange: logc.Println(ctx, "no-change") } case *freezeDomain != "" || *unfreezeDomain != "": ctx = WithPrincipal(ctx) GetPrincipal(ctx).CliAdmin = proto.Bool(true) var domain string var freeze bool if *freezeDomain != "" { domain = *freezeDomain freeze = true } else { domain = *unfreezeDomain freeze = false } if freeze { if err = backend.FreezeDomain(ctx, domain); err != nil { logc.Fatalln(ctx, err) } logc.Println(ctx, "frozen") } else { if err = backend.UnfreezeDomain(ctx, domain); err != nil { logc.Fatalln(ctx, err) } logc.Println(ctx, "thawed") } case *auditLog: ch := make(chan *AuditRecord) ids := []AuditID{} for id, err := range backend.SearchAuditLog(ctx, SearchAuditLogOptions{}) { if err != nil { logc.Fatalln(ctx, err) } go func() { if record, err := backend.QueryAuditLog(ctx, id); err != nil { logc.Fatalln(ctx, err) } else { ch <- record } }() ids = append(ids, id) } records := map[AuditID]*AuditRecord{} for len(records) < len(ids) { record := <-ch records[record.GetAuditID()] = record } for _, id := range ids { record := records[id] fmt.Fprintf(color.Output, "%s %s %s %s %s\n", record.GetAuditID().String(), color.HiWhiteString(record.GetTimestamp().AsTime().UTC().Format(time.RFC3339)), color.HiMagentaString(record.DescribePrincipal()), color.HiGreenString(record.DescribeResource()), record.GetEvent(), ) } case *auditRead != "": id, err := ParseAuditID(*auditRead) if err != nil { logc.Fatalln(ctx, err) } record, err := backend.QueryAuditLog(ctx, id) if err != nil { logc.Fatalln(ctx, err) } if err = ExtractAuditRecord(ctx, id, record, "."); err != nil { logc.Fatalln(ctx, err) } case *auditRollback != "": ctx = WithPrincipal(ctx) GetPrincipal(ctx).CliAdmin = proto.Bool(true) id, err := ParseAuditID(*auditRollback) if err != nil { logc.Fatalln(ctx, err) } record, err := backend.QueryAuditLog(ctx, id) if err != nil { logc.Fatalln(ctx, err) } if record.GetManifest() == nil || record.GetDomain() == "" || record.GetProject() == "" { logc.Fatalln(ctx, "no manifest in audit record") } webRoot := path.Join(record.GetDomain(), record.GetProject()) err = backend.StageManifest(ctx, record.GetManifest()) if err != nil { logc.Fatalln(ctx, err) } err = backend.CommitManifest(ctx, webRoot, record.GetManifest(), ModifyManifestOptions{}) if err != nil { logc.Fatalln(ctx, err) } case *auditServer != "": if flag.NArg() < 1 { logc.Fatalln(ctx, "handler path not provided") } processor, err := AuditEventProcessor(flag.Arg(0), flag.Args()[1:]) if err != nil { logc.Fatalln(ctx, err) } serve(ctx, listen(ctx, "audit", *auditServer), ObserveHTTPHandler(processor)) case *runMigration != "": if err = RunMigration(ctx, *runMigration); err != nil { logc.Fatalln(ctx, err) } case *traceGarbage: if err = TraceGarbage(ctx); err != nil { logc.Fatalln(ctx, err) } default: // Hook a signal (SIGHUP on *nix, nothing on Windows) for reloading the configuration // at runtime. This is useful because it preserves S3 backend cache contents. Failed // configuration reloads will not crash the process; you may want to check the syntax // first with `git-pages -config ... -print-config` since there is no other feedback. // // Note that not all of the configuration is updated on reload. Listeners are kept as-is. // The backend is not recreated (this is intentional as it allows preserving the cache). OnReload(func() { if newConfig, err := Configure(*configTomlPath); err != nil { logc.Println(ctx, "config: reload err:", err) } else { // From https://go.dev/ref/mem: // > A read r of a memory location x holding a value that is not larger than // > a machine word must observe some write w such that r does not happen before // > w and there is no write w' such that w happens before w' and w' happens // > before r. That is, each read must observe a value written by a preceding or // > concurrent write. config = newConfig if err = errors.Join( configureFeatures(ctx), configureMemLimit(ctx), configureWildcards(ctx), configureFallback(ctx), ); err != nil { // At this point the configuration is in an in-between, corrupted state, so // the only reasonable choice is to crash. logc.Fatalln(ctx, "config: reload fail:", err) } else { logc.Println(ctx, "config: reload ok") } } }) // Start listening on all ports before initializing the backend, otherwise if the backend // spends some time initializing (which the S3 backend does) a proxy like Caddy can race // with git-pages on startup and return errors for requests that would have been served // just 0.5s later. pagesListener := listen(ctx, "pages", config.Server.Pages) caddyListener := listen(ctx, "caddy", config.Server.Caddy) metricsListener := listen(ctx, "metrics", config.Server.Metrics) if backend, err = CreateBackend(ctx, &config.Storage); err != nil { logc.Fatalln(ctx, err) } backend = NewObservedBackend(backend) middleware := chainHTTPMiddleware( panicHandler, remoteAddrMiddleware, ObserveHTTPHandler, ) go serve(ctx, pagesListener, middleware(http.HandlerFunc(ServePages))) go serve(ctx, caddyListener, middleware(http.HandlerFunc(ServeCaddy))) go serve(ctx, metricsListener, promhttp.Handler()) if config.Insecure { logc.Println(ctx, "serve: ready (INSECURE)") } else { logc.Println(ctx, "serve: ready") } WaitForInterrupt() logc.Println(ctx, "serve: exiting") } }