[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "context"
5 "crypto/tls"
6 "errors"
7 "flag"
8 "fmt"
9 "io"
10 "log"
11 "log/slog"
12 "net"
13 "net/http"
14 "net/http/httputil"
15 "net/url"
16 "os"
17 "path"
18 "runtime/debug"
19 "strings"
20 "time"
21
22 automemlimit "github.com/KimMachineGun/automemlimit/memlimit"
23 "github.com/c2h5oh/datasize"
24 "github.com/fatih/color"
25 "github.com/kankanreno/go-snowflake"
26 "github.com/prometheus/client_golang/prometheus/promhttp"
27 "google.golang.org/protobuf/proto"
28)
29
30var config *Config
31var wildcards []*WildcardPattern
32var fallback http.Handler
33var backend Backend
34
35func configureFeatures(ctx context.Context) (err error) {
36 if len(config.Features) > 0 {
37 logc.Println(ctx, "features:", strings.Join(config.Features, ", "))
38 }
39 return
40}
41
42func configureMemLimit(ctx context.Context) (err error) {
43 // Avoid being OOM killed by not garbage collecting early enough.
44 memlimitBefore := datasize.ByteSize(debug.SetMemoryLimit(-1))
45 automemlimit.SetGoMemLimitWithOpts(
46 automemlimit.WithLogger(slog.New(slog.DiscardHandler)),
47 automemlimit.WithProvider(
48 automemlimit.ApplyFallback(
49 automemlimit.FromCgroup,
50 automemlimit.FromSystem,
51 ),
52 ),
53 automemlimit.WithRatio(float64(config.Limits.MaxHeapSizeRatio)),
54 )
55 memlimitAfter := datasize.ByteSize(debug.SetMemoryLimit(-1))
56 if memlimitBefore == memlimitAfter {
57 logc.Println(ctx, "memlimit: now", memlimitBefore.HR())
58 } else {
59 logc.Println(ctx, "memlimit: was", memlimitBefore.HR(), "now", memlimitAfter.HR())
60 }
61 return
62}
63
64func configureWildcards(_ context.Context) (err error) {
65 newWildcards, err := TranslateWildcards(config.Wildcard)
66 if err != nil {
67 return err
68 } else {
69 wildcards = newWildcards
70 return nil
71 }
72}
73
74func configureFallback(_ context.Context) (err error) {
75 if config.Fallback.ProxyTo != nil {
76 fallbackURL := &config.Fallback.ProxyTo.URL
77 fallback = &httputil.ReverseProxy{
78 Rewrite: func(r *httputil.ProxyRequest) {
79 r.SetURL(fallbackURL)
80 r.Out.Host = r.In.Host
81 r.Out.Header["X-Forwarded-For"] = r.In.Header["X-Forwarded-For"]
82 },
83 Transport: &http.Transport{
84 TLSClientConfig: &tls.Config{
85 InsecureSkipVerify: config.Fallback.Insecure,
86 },
87 },
88 }
89 }
90 return
91}
92
93// Thread-unsafe, must be called only during initial configuration.
94func configureAudit(_ context.Context) (err error) {
95 snowflake.SetStartTime(time.Date(2025, 12, 1, 0, 0, 0, 0, time.UTC))
96 snowflake.SetMachineID(config.Audit.NodeID)
97 return
98}
99
100func listen(ctx context.Context, name string, listen string) net.Listener {
101 if listen == "-" {
102 return nil
103 }
104
105 protocol, address, ok := strings.Cut(listen, "/")
106 if !ok {
107 logc.Fatalf(ctx, "%s: %s: malformed endpoint", name, listen)
108 }
109
110 listener, err := net.Listen(protocol, address)
111 if err != nil {
112 logc.Fatalf(ctx, "%s: %s\n", name, err)
113 }
114
115 return listener
116}
117
118func panicHandler(handler http.Handler) http.Handler {
119 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
120 defer func() {
121 if err := recover(); err != nil {
122 logc.Printf(r.Context(), "panic: %s %s %s: %s\n%s",
123 r.Method, r.Host, r.URL.Path, err, string(debug.Stack()))
124 http.Error(w,
125 fmt.Sprintf("internal server error: %s", err),
126 http.StatusInternalServerError,
127 )
128 }
129 }()
130 handler.ServeHTTP(w, r)
131 })
132}
133
134func serve(ctx context.Context, listener net.Listener, handler http.Handler) {
135 if listener != nil {
136 server := http.Server{Handler: handler}
137 server.Protocols = new(http.Protocols)
138 server.Protocols.SetHTTP1(true)
139 server.Protocols.SetUnencryptedHTTP2(true)
140 logc.Fatalln(ctx, server.Serve(listener))
141 }
142}
143
144func webRootArg(arg string) string {
145 switch strings.Count(arg, "/") {
146 case 0:
147 return arg + "/.index"
148 case 1:
149 return arg
150 default:
151 logc.Fatalln(context.Background(),
152 "webroot argument must be either 'domain.tld' or 'domain.tld/dir")
153 return ""
154 }
155}
156
157func fileOutputArg() (writer io.WriteCloser) {
158 var err error
159 if flag.NArg() == 0 {
160 writer = os.Stdout
161 } else {
162 writer, err = os.Create(flag.Arg(0))
163 if err != nil {
164 logc.Fatalln(context.Background(), err)
165 }
166 }
167 return
168}
169
170func usage() {
171 fmt.Fprintf(os.Stderr, "Usage:\n")
172 fmt.Fprintf(os.Stderr, "(server) "+
173 "git-pages [-config <file>|-no-config]\n")
174 fmt.Fprintf(os.Stderr, "(info) "+
175 "git-pages {-print-config-env-vars|-print-config}\n")
176 fmt.Fprintf(os.Stderr, "(debug) "+
177 "git-pages {-list-blobs|-list-manifests}\n")
178 fmt.Fprintf(os.Stderr, "(debug) "+
179 "git-pages {-get-blob|-get-manifest|-get-archive|-update-site} <ref> [file]\n")
180 fmt.Fprintf(os.Stderr, "(admin) "+
181 "git-pages {-freeze-domain <domain>|-unfreeze-domain <domain>}\n")
182 fmt.Fprintf(os.Stderr, "(audit) "+
183 "git-pages {-audit-log|-audit-read <id>|-audit-server <endpoint> <program> [args...]}\n")
184 fmt.Fprintf(os.Stderr, "(maint) "+
185 "git-pages {-run-migration <name>|-trace-garbage}\n")
186 flag.PrintDefaults()
187}
188
189func Main() {
190 ctx := context.Background()
191
192 flag.Usage = usage
193 configTomlPath := flag.String("config", "",
194 "load configuration from `filename` (default: 'config.toml')")
195 noConfig := flag.Bool("no-config", false,
196 "run without configuration file (configure via environment variables)")
197 printConfigEnvVars := flag.Bool("print-config-env-vars", false,
198 "print every recognized configuration environment variable and exit")
199 printConfig := flag.Bool("print-config", false,
200 "print configuration as JSON and exit")
201 listBlobs := flag.Bool("list-blobs", false,
202 "enumerate every blob with its metadata")
203 listManifests := flag.Bool("list-manifests", false,
204 "enumerate every manifest with its metadata")
205 getBlob := flag.String("get-blob", "",
206 "write contents of `blob` ('sha256-xxxxxxx...xxx')")
207 getManifest := flag.String("get-manifest", "",
208 "write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON")
209 getArchive := flag.String("get-archive", "",
210 "write archive for `site` (either 'domain.tld' or 'domain.tld/dir') in tar format")
211 updateSite := flag.String("update-site", "",
212 "update `site` (either 'domain.tld' or 'domain.tld/dir') from archive or repository URL")
213 freezeDomain := flag.String("freeze-domain", "",
214 "prevent any site uploads to a given `domain`")
215 unfreezeDomain := flag.String("unfreeze-domain", "",
216 "allow site uploads to a `domain` again after it has been frozen")
217 auditLog := flag.Bool("audit-log", false,
218 "display audit log")
219 auditRead := flag.String("audit-read", "",
220 "extract contents of audit record `id` to files '<id>-*'")
221 auditRollback := flag.String("audit-rollback", "",
222 "restore site from contents of audit record `id`")
223 auditServer := flag.String("audit-server", "",
224 "listen for notifications on `endpoint` and spawn a process for each audit event")
225 runMigration := flag.String("run-migration", "",
226 "run a store `migration` (one of: create-domain-markers)")
227 traceGarbage := flag.Bool("trace-garbage", false,
228 "estimate total size of unreachable blobs")
229 flag.Parse()
230
231 var cliOperations int
232 for _, selected := range []bool{
233 *listBlobs,
234 *listManifests,
235 *getBlob != "",
236 *getManifest != "",
237 *getArchive != "",
238 *updateSite != "",
239 *freezeDomain != "",
240 *unfreezeDomain != "",
241 *auditLog,
242 *auditRead != "",
243 *auditRollback != "",
244 *auditServer != "",
245 *runMigration != "",
246 *traceGarbage,
247 } {
248 if selected {
249 cliOperations++
250 }
251 }
252 if cliOperations > 1 {
253 logc.Fatalln(ctx, "-list-blobs, -list-manifests, -get-blob, -get-manifest, -get-archive, "+
254 "-update-site, -freeze-domain, -unfreeze-domain, -audit-log, -audit-read, "+
255 "-audit-rollback, -audit-server, -run-migration, and -trace-garbage are "+
256 "mutually exclusive")
257 }
258
259 if *configTomlPath != "" && *noConfig {
260 logc.Fatalln(ctx, "-no-config and -config are mutually exclusive")
261 }
262
263 if *printConfigEnvVars {
264 PrintConfigEnvVars()
265 return
266 }
267
268 var err error
269 if *configTomlPath == "" && !*noConfig {
270 *configTomlPath = "config.toml"
271 }
272 if config, err = Configure(*configTomlPath); err != nil {
273 logc.Fatalln(ctx, "config:", err)
274 }
275
276 if *printConfig {
277 fmt.Println(config.TOML())
278 return
279 }
280
281 InitObservability()
282 defer FiniObservability()
283
284 if err = errors.Join(
285 configureFeatures(ctx),
286 configureMemLimit(ctx),
287 configureWildcards(ctx),
288 configureFallback(ctx),
289 configureAudit(ctx),
290 ); err != nil {
291 logc.Fatalln(ctx, err)
292 }
293
294 // The server has its own logic for creating the backend.
295 if cliOperations > 0 {
296 if backend, err = CreateBackend(ctx, &config.Storage); err != nil {
297 logc.Fatalln(ctx, err)
298 }
299 }
300
301 switch {
302 case *listBlobs:
303 for metadata, err := range backend.EnumerateBlobs(ctx) {
304 if err != nil {
305 logc.Fatalln(ctx, err)
306 }
307 fmt.Fprintf(color.Output, "%s %s %s\n",
308 metadata.Name,
309 color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)),
310 color.HiGreenString(fmt.Sprint(metadata.Size)),
311 )
312 }
313
314 case *listManifests:
315 for metadata, err := range backend.EnumerateManifests(ctx) {
316 if err != nil {
317 logc.Fatalln(ctx, err)
318 }
319 fmt.Fprintf(color.Output, "%s %s %s\n",
320 metadata.Name,
321 color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)),
322 color.HiGreenString(fmt.Sprint(metadata.Size)),
323 )
324 }
325
326 case *getBlob != "":
327 reader, _, err := backend.GetBlob(ctx, *getBlob)
328 if err != nil {
329 logc.Fatalln(ctx, err)
330 }
331 io.Copy(fileOutputArg(), reader)
332
333 case *getManifest != "":
334 webRoot := webRootArg(*getManifest)
335 manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
336 if err != nil {
337 logc.Fatalln(ctx, err)
338 }
339 fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest)))
340
341 case *getArchive != "":
342 webRoot := webRootArg(*getArchive)
343 manifest, metadata, err :=
344 backend.GetManifest(ctx, webRoot, GetManifestOptions{})
345 if err != nil {
346 logc.Fatalln(ctx, err)
347 }
348 if err = CollectTar(ctx, fileOutputArg(), manifest, metadata); err != nil {
349 logc.Fatalln(ctx, err)
350 }
351
352 case *updateSite != "":
353 ctx = WithPrincipal(ctx)
354 GetPrincipal(ctx).CliAdmin = proto.Bool(true)
355
356 if flag.NArg() != 1 {
357 logc.Fatalln(ctx, "update source must be provided as the argument")
358 }
359
360 sourceURL, err := url.Parse(flag.Arg(0))
361 if err != nil {
362 logc.Fatalln(ctx, err)
363 }
364
365 var result UpdateResult
366 if sourceURL.Scheme == "" {
367 file, err := os.Open(sourceURL.Path)
368 if err != nil {
369 logc.Fatalln(ctx, err)
370 }
371 defer file.Close()
372
373 var contentType string
374 switch {
375 case strings.HasSuffix(sourceURL.Path, ".zip"):
376 contentType = "application/zip"
377 case strings.HasSuffix(sourceURL.Path, ".tar"):
378 contentType = "application/x-tar"
379 case strings.HasSuffix(sourceURL.Path, ".tar.gz"):
380 contentType = "application/x-tar+gzip"
381 case strings.HasSuffix(sourceURL.Path, ".tar.zst"):
382 contentType = "application/x-tar+zstd"
383 default:
384 log.Fatalf("cannot determine content type from filename %q\n", sourceURL)
385 }
386
387 webRoot := webRootArg(*updateSite)
388 result = UpdateFromArchive(ctx, webRoot, contentType, file)
389 } else {
390 branch := "pages"
391 if sourceURL.Fragment != "" {
392 branch, sourceURL.Fragment = sourceURL.Fragment, ""
393 }
394
395 webRoot := webRootArg(*updateSite)
396 result = UpdateFromRepository(ctx, webRoot, sourceURL.String(), branch)
397 }
398
399 switch result.outcome {
400 case UpdateError:
401 logc.Printf(ctx, "error: %s\n", result.err)
402 os.Exit(2)
403 case UpdateTimeout:
404 logc.Println(ctx, "timeout")
405 os.Exit(1)
406 case UpdateCreated:
407 logc.Println(ctx, "created")
408 case UpdateReplaced:
409 logc.Println(ctx, "replaced")
410 case UpdateDeleted:
411 logc.Println(ctx, "deleted")
412 case UpdateNoChange:
413 logc.Println(ctx, "no-change")
414 }
415
416 case *freezeDomain != "" || *unfreezeDomain != "":
417 ctx = WithPrincipal(ctx)
418 GetPrincipal(ctx).CliAdmin = proto.Bool(true)
419
420 var domain string
421 var freeze bool
422 if *freezeDomain != "" {
423 domain = *freezeDomain
424 freeze = true
425 } else {
426 domain = *unfreezeDomain
427 freeze = false
428 }
429
430 if freeze {
431 if err = backend.FreezeDomain(ctx, domain); err != nil {
432 logc.Fatalln(ctx, err)
433 }
434 logc.Println(ctx, "frozen")
435 } else {
436 if err = backend.UnfreezeDomain(ctx, domain); err != nil {
437 logc.Fatalln(ctx, err)
438 }
439 logc.Println(ctx, "thawed")
440 }
441
442 case *auditLog:
443 ch := make(chan *AuditRecord)
444 ids := []AuditID{}
445 for id, err := range backend.SearchAuditLog(ctx, SearchAuditLogOptions{}) {
446 if err != nil {
447 logc.Fatalln(ctx, err)
448 }
449 go func() {
450 if record, err := backend.QueryAuditLog(ctx, id); err != nil {
451 logc.Fatalln(ctx, err)
452 } else {
453 ch <- record
454 }
455 }()
456 ids = append(ids, id)
457 }
458
459 records := map[AuditID]*AuditRecord{}
460 for len(records) < len(ids) {
461 record := <-ch
462 records[record.GetAuditID()] = record
463 }
464
465 for _, id := range ids {
466 record := records[id]
467 fmt.Fprintf(color.Output, "%s %s %s %s %s\n",
468 record.GetAuditID().String(),
469 color.HiWhiteString(record.GetTimestamp().AsTime().UTC().Format(time.RFC3339)),
470 color.HiMagentaString(record.DescribePrincipal()),
471 color.HiGreenString(record.DescribeResource()),
472 record.GetEvent(),
473 )
474 }
475
476 case *auditRead != "":
477 id, err := ParseAuditID(*auditRead)
478 if err != nil {
479 logc.Fatalln(ctx, err)
480 }
481
482 record, err := backend.QueryAuditLog(ctx, id)
483 if err != nil {
484 logc.Fatalln(ctx, err)
485 }
486
487 if err = ExtractAuditRecord(ctx, id, record, "."); err != nil {
488 logc.Fatalln(ctx, err)
489 }
490
491 case *auditRollback != "":
492 ctx = WithPrincipal(ctx)
493 GetPrincipal(ctx).CliAdmin = proto.Bool(true)
494
495 id, err := ParseAuditID(*auditRollback)
496 if err != nil {
497 logc.Fatalln(ctx, err)
498 }
499
500 record, err := backend.QueryAuditLog(ctx, id)
501 if err != nil {
502 logc.Fatalln(ctx, err)
503 }
504
505 if record.GetManifest() == nil || record.GetDomain() == "" || record.GetProject() == "" {
506 logc.Fatalln(ctx, "no manifest in audit record")
507 }
508
509 webRoot := path.Join(record.GetDomain(), record.GetProject())
510 err = backend.StageManifest(ctx, record.GetManifest())
511 if err != nil {
512 logc.Fatalln(ctx, err)
513 }
514 err = backend.CommitManifest(ctx, webRoot, record.GetManifest(), ModifyManifestOptions{})
515 if err != nil {
516 logc.Fatalln(ctx, err)
517 }
518
519 case *auditServer != "":
520 if flag.NArg() < 1 {
521 logc.Fatalln(ctx, "handler path not provided")
522 }
523
524 processor, err := AuditEventProcessor(flag.Arg(0), flag.Args()[1:])
525 if err != nil {
526 logc.Fatalln(ctx, err)
527 }
528
529 serve(ctx, listen(ctx, "audit", *auditServer), ObserveHTTPHandler(processor))
530
531 case *runMigration != "":
532 if err = RunMigration(ctx, *runMigration); err != nil {
533 logc.Fatalln(ctx, err)
534 }
535
536 case *traceGarbage:
537 if err = TraceGarbage(ctx); err != nil {
538 logc.Fatalln(ctx, err)
539 }
540
541 default:
542 // Hook a signal (SIGHUP on *nix, nothing on Windows) for reloading the configuration
543 // at runtime. This is useful because it preserves S3 backend cache contents. Failed
544 // configuration reloads will not crash the process; you may want to check the syntax
545 // first with `git-pages -config ... -print-config` since there is no other feedback.
546 //
547 // Note that not all of the configuration is updated on reload. Listeners are kept as-is.
548 // The backend is not recreated (this is intentional as it allows preserving the cache).
549 OnReload(func() {
550 if newConfig, err := Configure(*configTomlPath); err != nil {
551 logc.Println(ctx, "config: reload err:", err)
552 } else {
553 // From https://go.dev/ref/mem:
554 // > A read r of a memory location x holding a value that is not larger than
555 // > a machine word must observe some write w such that r does not happen before
556 // > w and there is no write w' such that w happens before w' and w' happens
557 // > before r. That is, each read must observe a value written by a preceding or
558 // > concurrent write.
559 config = newConfig
560 if err = errors.Join(
561 configureFeatures(ctx),
562 configureMemLimit(ctx),
563 configureWildcards(ctx),
564 configureFallback(ctx),
565 ); err != nil {
566 // At this point the configuration is in an in-between, corrupted state, so
567 // the only reasonable choice is to crash.
568 logc.Fatalln(ctx, "config: reload fail:", err)
569 } else {
570 logc.Println(ctx, "config: reload ok")
571 }
572 }
573 })
574
575 // Start listening on all ports before initializing the backend, otherwise if the backend
576 // spends some time initializing (which the S3 backend does) a proxy like Caddy can race
577 // with git-pages on startup and return errors for requests that would have been served
578 // just 0.5s later.
579 pagesListener := listen(ctx, "pages", config.Server.Pages)
580 caddyListener := listen(ctx, "caddy", config.Server.Caddy)
581 metricsListener := listen(ctx, "metrics", config.Server.Metrics)
582
583 if backend, err = CreateBackend(ctx, &config.Storage); err != nil {
584 logc.Fatalln(ctx, err)
585 }
586 backend = NewObservedBackend(backend)
587
588 middleware := chainHTTPMiddleware(
589 panicHandler,
590 remoteAddrMiddleware,
591 ObserveHTTPHandler,
592 )
593 go serve(ctx, pagesListener, middleware(http.HandlerFunc(ServePages)))
594 go serve(ctx, caddyListener, middleware(http.HandlerFunc(ServeCaddy)))
595 go serve(ctx, metricsListener, promhttp.Handler())
596
597 if config.Insecure {
598 logc.Println(ctx, "serve: ready (INSECURE)")
599 } else {
600 logc.Println(ctx, "serve: ready")
601 }
602
603 WaitForInterrupt()
604 logc.Println(ctx, "serve: exiting")
605 }
606}