[mirror] Scalable static site server for Git forges (like GitHub Pages)

Add basic garbage tracer.

This isn't a concurrent GC and it cannot provide a reliable result;
the output is just an estimate.

+1 -1
flake.nix
··· 43 43 "-s -w" 44 44 ]; 45 45 46 - vendorHash = "sha256-D5v6LpJZ+a2Dzdir/YzyFBwY/K4laTr58beywzXOsTM="; 46 + vendorHash = "sha256-wwsxHEwCySO2Ykttf6C+GZupMWczVYkAhSVwaVZHNko="; 47 47 }; 48 48 in 49 49 {
+1
go.mod
··· 8 8 github.com/KimMachineGun/automemlimit v0.7.5 9 9 github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 10 10 github.com/creasty/defaults v1.8.0 11 + github.com/dghubble/trie v0.1.0 11 12 github.com/fatih/color v1.18.0 12 13 github.com/getsentry/sentry-go v0.40.0 13 14 github.com/getsentry/sentry-go/slog v0.40.0
+2
go.sum
··· 27 27 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 28 28 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 29 29 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 30 + github.com/dghubble/trie v0.1.0 h1:kJnjBLFFElBwS60N4tkPvnLhnpcDxbBjIulgI8CpNGM= 31 + github.com/dghubble/trie v0.1.0/go.mod h1:sOmnzfBNH7H92ow2292dDFWNsVQuh/izuD7otCYb1ak= 30 32 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 31 33 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 32 34 github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o=
+1 -1
src/backend_s3.go
··· 780 780 ctx context.Context, opts SearchAuditLogOptions, 781 781 ) iter.Seq2[AuditID, error] { 782 782 return func(yield func(AuditID, error) bool) { 783 - logc.Printf(ctx, "s3: query audit\n") 783 + logc.Printf(ctx, "s3: search audit\n") 784 784 785 785 ctx, cancel := context.WithCancel(ctx) 786 786 defer cancel()
+87
src/garbage.go
··· 1 + package git_pages 2 + 3 + import ( 4 + "context" 5 + "fmt" 6 + 7 + "github.com/c2h5oh/datasize" 8 + "github.com/dghubble/trie" 9 + ) 10 + 11 + func trieReduce(data trie.Trier) (items, total int64) { 12 + data.Walk(func(key string, value any) error { 13 + items += 1 14 + total += *value.(*int64) 15 + return nil 16 + }) 17 + return 18 + } 19 + 20 + func TraceGarbage(ctx context.Context) error { 21 + allBlobs := trie.NewRuneTrie() 22 + liveBlobs := trie.NewRuneTrie() 23 + 24 + traceManifest := func(manifestName string, manifest *Manifest) error { 25 + for _, entry := range manifest.GetContents() { 26 + if entry.GetType() == Type_ExternalFile { 27 + blobName := string(entry.Data) 28 + if size := allBlobs.Get(blobName); size == nil { 29 + return fmt.Errorf("%s: dangling reference %s", manifestName, blobName) 30 + } else { 31 + liveBlobs.Put(blobName, size) 32 + } 33 + } 34 + } 35 + return nil 36 + } 37 + 38 + // Enumerate all blobs. 39 + for metadata, err := range backend.EnumerateBlobs(ctx) { 40 + if err != nil { 41 + return fmt.Errorf("trace blobs err: %w", err) 42 + } 43 + allBlobs.Put(metadata.Name, &metadata.Size) 44 + } 45 + 46 + // Enumerate blobs live via site manifests. 47 + for metadata, err := range backend.EnumerateManifests(ctx) { 48 + if err != nil { 49 + return fmt.Errorf("trace sites err: %w", err) 50 + } 51 + manifest, _, err := backend.GetManifest(ctx, metadata.Name, GetManifestOptions{}) 52 + if err != nil { 53 + return fmt.Errorf("trace sites err: %w", err) 54 + } 55 + err = traceManifest(metadata.Name, manifest) 56 + if err != nil { 57 + return fmt.Errorf("trace sites err: %w", err) 58 + } 59 + } 60 + 61 + // Enumerate blobs live via audit records. 62 + for auditID, err := range backend.SearchAuditLog(ctx, SearchAuditLogOptions{}) { 63 + if err != nil { 64 + return fmt.Errorf("trace audit err: %w", err) 65 + } 66 + auditRecord, err := backend.QueryAuditLog(ctx, auditID) 67 + if err != nil { 68 + return fmt.Errorf("trace audit err: %w", err) 69 + } 70 + if auditRecord.Manifest != nil { 71 + err = traceManifest(auditID.String(), auditRecord.Manifest) 72 + if err != nil { 73 + return fmt.Errorf("trace audit err: %w", err) 74 + } 75 + } 76 + } 77 + 78 + allBlobsCount, allBlobsSize := trieReduce(allBlobs) 79 + logc.Printf(ctx, "trace all: %d blobs, %s", 80 + allBlobsCount, datasize.ByteSize(allBlobsSize).HR()) 81 + 82 + liveBlobsCount, liveBlobsSize := trieReduce(liveBlobs) 83 + logc.Printf(ctx, "trace live: %d blobs, %s", 84 + liveBlobsCount, datasize.ByteSize(liveBlobsSize).HR()) 85 + 86 + return nil 87 + }
+49 -38
src/main.go
··· 170 170 fmt.Fprintf(os.Stderr, "Usage:\n") 171 171 fmt.Fprintf(os.Stderr, "(server) "+ 172 172 "git-pages [-config <file>|-no-config]\n") 173 + fmt.Fprintf(os.Stderr, "(info) "+ 174 + "git-pages {-print-config-env-vars|-print-config}\n") 173 175 fmt.Fprintf(os.Stderr, "(debug) "+ 174 176 "git-pages {-list-blobs|-list-manifests}\n") 175 177 fmt.Fprintf(os.Stderr, "(debug) "+ 176 178 "git-pages {-get-blob|-get-manifest|-get-archive|-update-site} <ref> [file]\n") 177 179 fmt.Fprintf(os.Stderr, "(admin) "+ 178 - "git-pages {-run-migration <name>|-freeze-domain <domain>|-unfreeze-domain <domain>}\n") 180 + "git-pages {-freeze-domain <domain>|-unfreeze-domain <domain>}\n") 179 181 fmt.Fprintf(os.Stderr, "(audit) "+ 180 182 "git-pages {-audit-log|-audit-read <id>|-audit-server <endpoint> <program> [args...]}\n") 181 - fmt.Fprintf(os.Stderr, "(info) "+ 182 - "git-pages {-print-config-env-vars|-print-config}\n") 183 + fmt.Fprintf(os.Stderr, "(maint) "+ 184 + "git-pages {-run-migration <name>|-trace-garbage}\n") 183 185 flag.PrintDefaults() 184 186 } 185 187 ··· 187 189 ctx := context.Background() 188 190 189 191 flag.Usage = usage 190 - printConfigEnvVars := flag.Bool("print-config-env-vars", false, 191 - "print every recognized configuration environment variable and exit") 192 - printConfig := flag.Bool("print-config", false, 193 - "print configuration as JSON and exit") 194 192 configTomlPath := flag.String("config", "", 195 193 "load configuration from `filename` (default: 'config.toml')") 196 194 noConfig := flag.Bool("no-config", false, 197 195 "run without configuration file (configure via environment variables)") 198 - runMigration := flag.String("run-migration", "", 199 - "run a store `migration` (one of: create-domain-markers)") 200 - getBlob := flag.String("get-blob", "", 201 - "write contents of `blob` ('sha256-xxxxxxx...xxx')") 196 + printConfigEnvVars := flag.Bool("print-config-env-vars", false, 197 + "print every recognized configuration environment variable and exit") 198 + printConfig := flag.Bool("print-config", false, 199 + "print configuration as JSON and exit") 202 200 listBlobs := flag.Bool("list-blobs", false, 203 201 "enumerate every blob with its metadata") 204 - getManifest := flag.String("get-manifest", "", 205 - "write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON") 206 202 listManifests := flag.Bool("list-manifests", false, 207 203 "enumerate every manifest with its metadata") 204 + getBlob := flag.String("get-blob", "", 205 + "write contents of `blob` ('sha256-xxxxxxx...xxx')") 206 + getManifest := flag.String("get-manifest", "", 207 + "write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON") 208 208 getArchive := flag.String("get-archive", "", 209 209 "write archive for `site` (either 'domain.tld' or 'domain.tld/dir') in tar format") 210 210 updateSite := flag.String("update-site", "", ··· 219 219 "extract contents of audit record `id` to files '<id>-*'") 220 220 auditServer := flag.String("audit-server", "", 221 221 "listen for notifications on `endpoint` and spawn a process for each audit event") 222 + runMigration := flag.String("run-migration", "", 223 + "run a store `migration` (one of: create-domain-markers)") 224 + traceGarbage := flag.Bool("trace-garbage", false, 225 + "estimate total size of unreachable blobs") 222 226 flag.Parse() 223 227 224 228 var cliOperations int 225 229 for _, selected := range []bool{ 226 - *runMigration != "", 227 - *getBlob != "", 228 230 *listBlobs, 229 - *getManifest != "", 230 231 *listManifests, 232 + *getBlob != "", 233 + *getManifest != "", 231 234 *getArchive != "", 232 235 *updateSite != "", 233 236 *freezeDomain != "", ··· 235 238 *auditLog, 236 239 *auditRead != "", 237 240 *auditServer != "", 241 + *runMigration != "", 242 + *traceGarbage, 238 243 } { 239 244 if selected { 240 245 cliOperations++ 241 246 } 242 247 } 243 248 if cliOperations > 1 { 244 - logc.Fatalln(ctx, "-get-blob, -get-manifest, -get-archive, -update-site, "+ 245 - "-freeze, -unfreeze, -audit-log, and -audit-read are mutually exclusive") 249 + logc.Fatalln(ctx, "-list-blobs, -list-manifests, -get-blob, -get-manifest, -get-archive, "+ 250 + "-update-site, -freeze-domain, -unfreeze-domain, -audit-log, -audit-read, "+ 251 + "-audit-server, -run-migration, and -trace-garbage are mutually exclusive") 246 252 } 247 253 248 254 if *configTomlPath != "" && *noConfig { ··· 288 294 } 289 295 290 296 switch { 291 - case *runMigration != "": 292 - if err := RunMigration(ctx, *runMigration); err != nil { 293 - logc.Fatalln(ctx, err) 294 - } 295 - 296 - case *getBlob != "": 297 - reader, _, err := backend.GetBlob(ctx, *getBlob) 298 - if err != nil { 299 - logc.Fatalln(ctx, err) 300 - } 301 - io.Copy(fileOutputArg(), reader) 302 - 303 297 case *listBlobs: 304 298 for metadata, err := range backend.EnumerateBlobs(ctx) { 305 299 if err != nil { ··· 312 306 ) 313 307 } 314 308 315 - case *getManifest != "": 316 - webRoot := webRootArg(*getManifest) 317 - manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) 318 - if err != nil { 319 - logc.Fatalln(ctx, err) 320 - } 321 - fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest))) 322 - 323 309 case *listManifests: 324 310 for metadata, err := range backend.EnumerateManifests(ctx) { 325 311 if err != nil { ··· 331 317 color.HiGreenString(fmt.Sprint(metadata.Size)), 332 318 ) 333 319 } 320 + 321 + case *getBlob != "": 322 + reader, _, err := backend.GetBlob(ctx, *getBlob) 323 + if err != nil { 324 + logc.Fatalln(ctx, err) 325 + } 326 + io.Copy(fileOutputArg(), reader) 327 + 328 + case *getManifest != "": 329 + webRoot := webRootArg(*getManifest) 330 + manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) 331 + if err != nil { 332 + logc.Fatalln(ctx, err) 333 + } 334 + fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest))) 334 335 335 336 case *getArchive != "": 336 337 webRoot := webRootArg(*getArchive) ··· 490 491 } 491 492 492 493 serve(ctx, listen(ctx, "audit", *auditServer), ObserveHTTPHandler(processor)) 494 + 495 + case *runMigration != "": 496 + if err = RunMigration(ctx, *runMigration); err != nil { 497 + logc.Fatalln(ctx, err) 498 + } 499 + 500 + case *traceGarbage: 501 + if err = TraceGarbage(ctx); err != nil { 502 + logc.Fatalln(ctx, err) 503 + } 493 504 494 505 default: 495 506 // Hook a signal (SIGHUP on *nix, nothing on Windows) for reloading the configuration