[mirror] Scalable static site server for Git forges (like GitHub Pages)

Add `EnumerateBlobs` API and `-list-blobs` option.

This also adds `Name` to blob metadata.

+12 -5
src/backend.go
··· 6 6 "fmt" 7 7 "io" 8 8 "iter" 9 - "slices" 10 9 "strings" 11 10 "time" 12 11 ) ··· 17 16 var ErrDomainFrozen = errors.New("domain administratively frozen") 18 17 19 18 func splitBlobName(name string) []string { 20 - algo, hash, found := strings.Cut(name, "-") 21 - if found { 22 - return slices.Concat([]string{algo}, splitBlobName(hash)) 19 + if algo, hash, found := strings.Cut(name, "-"); found { 20 + return []string{algo, hash[0:2], hash[2:4], hash[4:]} 23 21 } else { 24 - return []string{name[0:2], name[2:4], name[4:]} 22 + panic("malformed blob name") 25 23 } 24 + } 25 + 26 + func joinBlobName(parts []string) string { 27 + return fmt.Sprintf("%s-%s", parts[0], strings.Join(parts[1:], "")) 26 28 } 27 29 28 30 type BackendFeature string ··· 32 34 ) 33 35 34 36 type BlobMetadata struct { 37 + Name string 35 38 Size int64 36 39 LastModified time.Time 37 40 } ··· 92 95 93 96 // Delete a blob. This is an unconditional operation that can break integrity of manifests. 94 97 DeleteBlob(ctx context.Context, name string) error 98 + 99 + // Iterate through all blobs. Whether blobs that are newly added during iteration will appear 100 + // in the results is unspecified. 101 + EnumerateBlobs(ctx context.Context) iter.Seq2[BlobMetadata, error] 95 102 96 103 // Retrieve a manifest. 97 104 GetManifest(ctx context.Context, name string, opts GetManifestOptions) (
+27 -1
src/backend_fs.go
··· 133 133 err = fmt.Errorf("open: %w", err) 134 134 return 135 135 } 136 - return file, BlobMetadata{int64(stat.Size()), stat.ModTime()}, nil 136 + return file, BlobMetadata{name, int64(stat.Size()), stat.ModTime()}, nil 137 137 } 138 138 139 139 func (fs *FSBackend) PutBlob(ctx context.Context, name string, data []byte) error { ··· 179 179 func (fs *FSBackend) DeleteBlob(ctx context.Context, name string) error { 180 180 blobPath := filepath.Join(splitBlobName(name)...) 181 181 return fs.blobRoot.Remove(blobPath) 182 + } 183 + 184 + func (fs *FSBackend) EnumerateBlobs(ctx context.Context) iter.Seq2[BlobMetadata, error] { 185 + return func(yield func(BlobMetadata, error) bool) { 186 + iofs.WalkDir(fs.blobRoot.FS(), ".", 187 + func(path string, entry iofs.DirEntry, err error) error { 188 + var metadata BlobMetadata 189 + if err != nil { 190 + // report error 191 + } else if entry.IsDir() { 192 + // skip directory 193 + return nil 194 + } else if info, err := entry.Info(); err != nil { 195 + // report error 196 + } else { 197 + // report blob 198 + metadata.Name = joinBlobName(strings.Split(path, "/")) 199 + metadata.Size = info.Size() 200 + metadata.LastModified = info.ModTime() 201 + } 202 + if !yield(metadata, err) { 203 + return iofs.SkipAll 204 + } 205 + return nil 206 + }) 207 + } 182 208 } 183 209 184 210 func (fs *FSBackend) ListManifests(ctx context.Context) (manifests []string, err error) {
+32
src/backend_s3.go
··· 316 316 } 317 317 } else { 318 318 reader = bytes.NewReader(cached.blob) 319 + metadata.Name = name 319 320 metadata.Size = int64(len(cached.blob)) 320 321 metadata.LastModified = cached.mtime 321 322 } ··· 355 356 356 357 return s3.client.RemoveObject(ctx, s3.bucket, blobObjectName(name), 357 358 minio.RemoveObjectOptions{}) 359 + } 360 + 361 + func (s3 *S3Backend) EnumerateBlobs(ctx context.Context) iter.Seq2[BlobMetadata, error] { 362 + return func(yield func(BlobMetadata, error) bool) { 363 + logc.Print(ctx, "s3: enumerate blobs") 364 + 365 + ctx, cancel := context.WithCancel(ctx) 366 + defer cancel() 367 + 368 + prefix := "blob/" 369 + for object := range s3.client.ListObjectsIter(ctx, s3.bucket, minio.ListObjectsOptions{ 370 + Prefix: prefix, 371 + Recursive: true, 372 + }) { 373 + var metadata BlobMetadata 374 + var err error 375 + if err = object.Err; err == nil { 376 + key := strings.TrimPrefix(object.Key, prefix) 377 + if strings.HasSuffix(key, "/") { 378 + continue // directory; skip 379 + } else { 380 + metadata.Name = joinBlobName(strings.Split(key, "/")) 381 + metadata.Size = object.Size 382 + metadata.LastModified = object.LastModified 383 + } 384 + } 385 + if !yield(metadata, err) { 386 + break 387 + } 388 + } 389 + } 358 390 } 359 391 360 392 func manifestObjectName(name string) string {
+23 -35
src/main.go
··· 170 170 fmt.Fprintf(os.Stderr, "Usage:\n") 171 171 fmt.Fprintf(os.Stderr, "(server) "+ 172 172 "git-pages [-config <file>|-no-config]\n") 173 + fmt.Fprintf(os.Stderr, "(debug) "+ 174 + "git-pages {-list-blobs}\n") 175 + fmt.Fprintf(os.Stderr, "(debug) "+ 176 + "git-pages {-get-blob|-get-manifest|-get-archive|-update-site} <ref> [file]\n") 173 177 fmt.Fprintf(os.Stderr, "(admin) "+ 174 178 "git-pages {-run-migration <name>|-freeze-domain <domain>|-unfreeze-domain <domain>}\n") 175 179 fmt.Fprintf(os.Stderr, "(audit) "+ 176 180 "git-pages {-audit-log|-audit-read <id>|-audit-server <endpoint> <program> [args...]}\n") 177 181 fmt.Fprintf(os.Stderr, "(info) "+ 178 182 "git-pages {-print-config-env-vars|-print-config}\n") 179 - fmt.Fprintf(os.Stderr, "(cli) "+ 180 - "git-pages {-get-blob|-get-manifest|-get-archive|-update-site} <ref> [file]\n") 181 183 flag.PrintDefaults() 182 184 } 183 185 ··· 197 199 "run a store `migration` (one of: create-domain-markers)") 198 200 getBlob := flag.String("get-blob", "", 199 201 "write contents of `blob` ('sha256-xxxxxxx...xxx')") 202 + listBlobs := flag.Bool("list-blobs", false, 203 + "enumerate every blob with its metadata") 200 204 getManifest := flag.String("get-manifest", "", 201 205 "write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON") 202 206 getArchive := flag.String("get-archive", "", ··· 219 223 for _, selected := range []bool{ 220 224 *runMigration != "", 221 225 *getBlob != "", 226 + *listBlobs, 222 227 *getManifest != "", 223 228 *getArchive != "", 224 229 *updateSite != "", ··· 272 277 logc.Fatalln(ctx, err) 273 278 } 274 279 275 - switch { 276 - case *runMigration != "": 280 + // The server has its own logic for creating the backend. 281 + if cliOperations > 0 { 277 282 if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 278 283 logc.Fatalln(ctx, err) 279 284 } 285 + } 280 286 287 + switch { 288 + case *runMigration != "": 281 289 if err := RunMigration(ctx, *runMigration); err != nil { 282 290 logc.Fatalln(ctx, err) 283 291 } 284 292 285 293 case *getBlob != "": 286 - if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 287 - logc.Fatalln(ctx, err) 288 - } 289 - 290 294 reader, _, err := backend.GetBlob(ctx, *getBlob) 291 295 if err != nil { 292 296 logc.Fatalln(ctx, err) 293 297 } 294 298 io.Copy(fileOutputArg(), reader) 295 299 296 - case *getManifest != "": 297 - if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 298 - logc.Fatalln(ctx, err) 300 + case *listBlobs: 301 + for metadata, err := range backend.EnumerateBlobs(ctx) { 302 + if err != nil { 303 + logc.Fatalln(ctx, err) 304 + } 305 + fmt.Fprintf(color.Output, "%s %s %s\n", 306 + metadata.Name, 307 + color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)), 308 + color.HiGreenString(fmt.Sprint(metadata.Size)), 309 + ) 299 310 } 300 311 312 + case *getManifest != "": 301 313 webRoot := webRootArg(*getManifest) 302 314 manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) 303 315 if err != nil { ··· 306 318 fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest))) 307 319 308 320 case *getArchive != "": 309 - if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 310 - logc.Fatalln(ctx, err) 311 - } 312 - 313 321 webRoot := webRootArg(*getArchive) 314 322 manifest, metadata, err := 315 323 backend.GetManifest(ctx, webRoot, GetManifestOptions{}) ··· 323 331 case *updateSite != "": 324 332 ctx = WithPrincipal(ctx) 325 333 GetPrincipal(ctx).CliAdmin = proto.Bool(true) 326 - 327 - if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 328 - logc.Fatalln(ctx, err) 329 - } 330 334 331 335 if flag.NArg() != 1 { 332 336 logc.Fatalln(ctx, "update source must be provided as the argument") ··· 402 406 freeze = false 403 407 } 404 408 405 - if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 406 - logc.Fatalln(ctx, err) 407 - } 408 - 409 409 if err = backend.FreezeDomain(ctx, domain, freeze); err != nil { 410 410 logc.Fatalln(ctx, err) 411 411 } ··· 416 416 } 417 417 418 418 case *auditLog: 419 - if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 420 - logc.Fatalln(ctx, err) 421 - } 422 - 423 419 ch := make(chan *AuditRecord) 424 420 ids := []AuditID{} 425 421 for id, err := range backend.SearchAuditLog(ctx, SearchAuditLogOptions{}) { ··· 454 450 } 455 451 456 452 case *auditRead != "": 457 - if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 458 - logc.Fatalln(ctx, err) 459 - } 460 - 461 453 id, err := ParseAuditID(*auditRead) 462 454 if err != nil { 463 455 logc.Fatalln(ctx, err) ··· 473 465 } 474 466 475 467 case *auditServer != "": 476 - if backend, err = CreateBackend(ctx, &config.Storage); err != nil { 477 - logc.Fatalln(ctx, err) 478 - } 479 - 480 468 if flag.NArg() < 1 { 481 469 logc.Fatalln(ctx, "handler path not provided") 482 470 }
+12
src/observe.go
··· 373 373 return 374 374 } 375 375 376 + func (backend *observedBackend) EnumerateBlobs(ctx context.Context) iter.Seq2[BlobMetadata, error] { 377 + return func(yield func(BlobMetadata, error) bool) { 378 + span, ctx := ObserveFunction(ctx, "EnumerateBlobs") 379 + for metadata, err := range backend.inner.EnumerateBlobs(ctx) { 380 + if !yield(metadata, err) { 381 + break 382 + } 383 + } 384 + span.Finish() 385 + } 386 + } 387 + 376 388 func (backend *observedBackend) ListManifests(ctx context.Context) (manifests []string, err error) { 377 389 span, ctx := ObserveFunction(ctx, "ListManifests") 378 390 manifests, err = backend.inner.ListManifests(ctx)