[mirror] Scalable static site server for Git forges (like GitHub Pages)

[breaking-change] Implement audit record retrieval.

This is only a breaking change if you've enabled the `audit` feature.
All past audit reports should be removed once this commit is deployed,
as both the Protobuf schema and the Snowflake epoch have changed.

+1 -1
flake.nix
··· 43 43 "-s -w" 44 44 ]; 45 45 46 - vendorHash = "sha256-opS3f4GDczDRp7mrBzvQtK13Qi4snanX4I64FHTh7Pw="; 46 + vendorHash = "sha256-LkHC/gFiSfYz9Z4bYMq1QNdapPYp8h1DSMRfFU9f7mw="; 47 47 }; 48 48 in 49 49 {
+1 -1
go.mod
··· 12 12 github.com/getsentry/sentry-go/slog v0.40.0 13 13 github.com/go-git/go-billy/v6 v6.0.0-20251126203821-7f9c95185ee0 14 14 github.com/go-git/go-git/v6 v6.0.0-20251128074608-48f817f57805 15 - github.com/influxdata/influxdb v1.12.2 16 15 github.com/jpillora/backoff v1.0.0 16 + github.com/kankanreno/go-snowflake v1.2.0 17 17 github.com/klauspost/compress v1.18.1 18 18 github.com/maypok86/otter/v2 v2.2.1 19 19 github.com/minio/minio-go/v7 v7.0.97
+2 -2
go.sum
··· 57 57 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 58 58 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 59 59 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 60 - github.com/influxdata/influxdb v1.12.2 h1:Y0ZBu47gYVbDCRPMFOrlRRZ3grdqPGIJxerFysVSq+g= 61 - github.com/influxdata/influxdb v1.12.2/go.mod h1:EwqFMB6GKV0Huug82Msa5f8QfXhqETUmC4L9A0QZJQM= 62 60 github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= 63 61 github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= 62 + github.com/kankanreno/go-snowflake v1.2.0 h1:Zx2SctsH5pivIj9vyhwyDyQS23jcDJx4iT49Bjv81kk= 63 + github.com/kankanreno/go-snowflake v1.2.0/go.mod h1:6CZ+10PeVsFXKZUTYyJzPiRIjn1IXbInaWLCX/LDJ0g= 64 64 github.com/kevinburke/ssh_config v1.4.0 h1:6xxtP5bZ2E4NF5tuQulISpTO2z8XbtH8cg1PWkxoFkQ= 65 65 github.com/kevinburke/ssh_config v1.4.0/go.mod h1:q2RIzfka+BXARoNexmF9gkxEX7DmvbW9P4hIVx2Kg4M= 66 66 github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
+56 -11
src/audit.go
··· 1 1 package git_pages 2 2 3 3 import ( 4 + "cmp" 4 5 "context" 5 6 "fmt" 6 7 "net/http" 8 + "strconv" 7 9 "strings" 8 10 "time" 9 11 10 - "github.com/influxdata/influxdb/pkg/snowflake" 11 12 exponential "github.com/jpillora/backoff" 13 + "github.com/kankanreno/go-snowflake" 14 + "github.com/prometheus/client_golang/prometheus" 15 + "github.com/prometheus/client_golang/prometheus/promauto" 12 16 "google.golang.org/protobuf/proto" 13 17 timestamppb "google.golang.org/protobuf/types/known/timestamppb" 14 18 ) 15 19 20 + var ( 21 + auditNotifyOkCount = promauto.NewCounter(prometheus.CounterOpts{ 22 + Name: "git_pages_audit_notify_ok", 23 + Help: "Count of successful audit notifications", 24 + }) 25 + auditNotifyErrorCount = promauto.NewCounter(prometheus.CounterOpts{ 26 + Name: "git_pages_audit_notify_error", 27 + Help: "Count of failed audit notifications", 28 + }) 29 + ) 30 + 31 + type AuditID int64 32 + 33 + func GenerateAuditID() AuditID { 34 + inner, err := snowflake.NextID() 35 + if err != nil { 36 + panic(err) 37 + } 38 + return AuditID(inner) 39 + } 40 + 41 + func ParseAuditID(repr string) (AuditID, error) { 42 + inner, err := strconv.ParseInt(repr, 16, 64) 43 + if err != nil { 44 + return AuditID(0), err 45 + } 46 + return AuditID(inner), nil 47 + } 48 + 49 + func (id AuditID) String() string { 50 + return fmt.Sprintf("%016x", int64(id)) 51 + } 52 + 53 + func (id AuditID) CompareTime(when time.Time) int { 54 + idMillis := int64(id) >> (snowflake.MachineIDLength + snowflake.SequenceLength) 55 + whenMillis := when.UTC().UnixNano() / 1e6 56 + return cmp.Compare(idMillis, whenMillis) 57 + } 58 + 16 59 func EncodeAuditRecord(auditRecord *AuditRecord) (data []byte) { 17 60 data, err := proto.MarshalOptions{Deterministic: true}.Marshal(auditRecord) 18 61 if err != nil { ··· 29 72 30 73 type auditedBackend struct { 31 74 Backend 32 - ids *snowflake.Generator 33 75 } 34 76 35 77 var _ Backend = (*auditedBackend)(nil) 36 78 37 79 func NewAuditedBackend(backend Backend) Backend { 38 80 if config.Feature("audit") { 39 - ids := snowflake.New(config.Audit.NodeID) 40 - return &auditedBackend{backend, ids} 81 + return &auditedBackend{backend} 41 82 } else { 42 83 return backend 43 84 } ··· 50 91 // to be a 100% accurate reflection of performed actions. When in doubt, the audit records 51 92 // should be examined together with the application logs. 52 93 func (audited *auditedBackend) appendNewAuditRecord(ctx context.Context, record *AuditRecord) (err error) { 53 - record.Timestamp = timestamppb.Now() 94 + if config.Audit.Collect { 95 + id := GenerateAuditID() 96 + record.Id = proto.Int64(int64(id)) 97 + record.Timestamp = timestamppb.Now() 54 98 55 - if config.Audit.Collect { 56 - id := fmt.Sprintf("%016x", audited.ids.Next()) 57 - err = audited.Backend.AppendAuditRecord(ctx, id, record) 99 + err = audited.Backend.AppendAuditLog(ctx, id, record) 58 100 if err != nil { 59 101 err = fmt.Errorf("audit: %w", err) 60 102 } else { ··· 64 106 } else { 65 107 subject = fmt.Sprintf("%s/%s", *record.Domain, *record.Project) 66 108 } 67 - logc.Printf(ctx, "audit %s ok: %s %s\n", subject, record.Event.String(), id) 109 + logc.Printf(ctx, "audit %s ok: %s %s\n", subject, id, record.Event.String()) 68 110 69 111 // Send a notification to the audit server, if configured, and try to make sure 70 112 // it is delivered by retrying with exponential backoff on errors. ··· 74 116 return 75 117 } 76 118 77 - func notifyAudit(ctx context.Context, id string) { 119 + func notifyAudit(ctx context.Context, id AuditID) { 78 120 if config.Audit.NotifyURL != nil { 79 121 notifyURL := config.Audit.NotifyURL.URL 80 - notifyURL.RawQuery = id 122 + notifyURL.RawQuery = id.String() 123 + 81 124 go func() { 82 125 backoff := exponential.Backoff{ 83 126 Jitter: true, ··· 89 132 if err != nil { 90 133 sleepFor := backoff.Duration() 91 134 logc.Printf(ctx, "audit notify %s err: %s (retry in %s)", id, err, sleepFor) 135 + auditNotifyErrorCount.Inc() 92 136 time.Sleep(sleepFor) 93 137 } else { 94 138 logc.Printf(ctx, "audit notify %s ok", id) 139 + auditNotifyOkCount.Inc() 95 140 break 96 141 } 97 142 }
+27 -2
src/backend.go
··· 5 5 "errors" 6 6 "fmt" 7 7 "io" 8 + "iter" 8 9 "slices" 9 10 "strings" 10 11 "time" ··· 29 30 ) 30 31 31 32 type GetManifestOptions struct { 33 + // If true and the manifest is past the cache `MaxAge`, `GetManifest` blocks and returns 34 + // a fresh object instead of revalidating in background and returning a stale object. 32 35 BypassCache bool 36 + } 37 + 38 + type QueryAuditLogOptions struct { 39 + // Inclusive lower bound on returned audit records, per their Snowflake ID (which may differ 40 + // slightly from the embedded timestamp). If zero, audit records are returned since beginning 41 + // of time. 42 + Since time.Time 43 + // Inclusive upper bound on returned audit records, per their Snowflake ID (which may differ 44 + // slightly from the embedded timestamp). If zero, audit records are returned until the end 45 + // of time. 46 + Until time.Time 47 + } 48 + 49 + type QueryAuditLogResult struct { 50 + ID AuditID 51 + Err error 33 52 } 34 53 35 54 type Backend interface { ··· 82 101 // is discovered serving abusive content. 83 102 FreezeDomain(ctx context.Context, domain string, freeze bool) error 84 103 85 - // Append an audit record to the log. 86 - AppendAuditRecord(ctx context.Context, id string, record *AuditRecord) error 104 + // Append a record to the audit log. 105 + AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) error 106 + 107 + // Retrieve a single record from the audit log. 108 + QueryAuditLog(ctx context.Context, id AuditID) (record *AuditRecord, err error) 109 + 110 + // Retrieve records from the audit log by time range. 111 + SearchAuditLog(ctx context.Context, opts QueryAuditLogOptions) iter.Seq[QueryAuditLogResult] 87 112 } 88 113 89 114 func CreateBackend(config *StorageConfig) (backend Backend, err error) {
+57 -15
src/backend_fs.go
··· 6 6 "errors" 7 7 "fmt" 8 8 "io" 9 - "io/fs" 9 + iofs "io/fs" 10 + "iter" 10 11 "os" 11 12 "path/filepath" 12 13 "strings" ··· 154 155 return fs.blobRoot.Remove(blobPath) 155 156 } 156 157 157 - func (b *FSBackend) ListManifests(ctx context.Context) (manifests []string, err error) { 158 - err = fs.WalkDir(b.siteRoot.FS(), ".", func(path string, d fs.DirEntry, err error) error { 159 - if strings.Count(path, "/") > 1 { 160 - return fs.SkipDir 161 - } 162 - _, project, _ := strings.Cut(path, "/") 163 - if project == "" || strings.HasPrefix(project, ".") && project != ".index" { 158 + func (fs *FSBackend) ListManifests(ctx context.Context) (manifests []string, err error) { 159 + err = iofs.WalkDir(fs.siteRoot.FS(), ".", 160 + func(path string, entry iofs.DirEntry, err error) error { 161 + if strings.Count(path, "/") > 1 { 162 + return iofs.SkipDir 163 + } 164 + _, project, _ := strings.Cut(path, "/") 165 + if project == "" || strings.HasPrefix(project, ".") && project != ".index" { 166 + return nil 167 + } 168 + manifests = append(manifests, path) 164 169 return nil 165 - } 166 - manifests = append(manifests, path) 167 - return nil 168 - }) 170 + }) 169 171 return 170 172 } 171 173 ··· 293 295 } 294 296 } 295 297 296 - func (fs *FSBackend) AppendAuditRecord(ctx context.Context, id string, record *AuditRecord) error { 297 - if _, err := fs.auditRoot.Stat(id); err == nil { 298 + func (fs *FSBackend) AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) error { 299 + if _, err := fs.auditRoot.Stat(id.String()); err == nil { 298 300 panic(fmt.Errorf("audit ID collision: %s", id)) 299 301 } 300 302 301 - return fs.auditRoot.WriteFile(id, EncodeAuditRecord(record), 0o644) 303 + return fs.auditRoot.WriteFile(id.String(), EncodeAuditRecord(record), 0o644) 304 + } 305 + 306 + func (fs *FSBackend) QueryAuditLog(ctx context.Context, id AuditID) (*AuditRecord, error) { 307 + if data, err := fs.auditRoot.ReadFile(id.String()); err != nil { 308 + return nil, fmt.Errorf("read: %w", err) 309 + } else if record, err := DecodeAuditRecord(data); err != nil { 310 + return nil, fmt.Errorf("decode: %w", err) 311 + } else { 312 + return record, nil 313 + } 314 + } 315 + 316 + func (fs *FSBackend) SearchAuditLog( 317 + ctx context.Context, opts QueryAuditLogOptions, 318 + ) iter.Seq[QueryAuditLogResult] { 319 + return func(yield func(QueryAuditLogResult) bool) { 320 + iofs.WalkDir(fs.auditRoot.FS(), ".", 321 + func(path string, entry iofs.DirEntry, err error) error { 322 + if path == "." { 323 + return nil 324 + } 325 + var result QueryAuditLogResult 326 + if err != nil { 327 + result.Err = err 328 + } else if id, err := ParseAuditID(path); err != nil { 329 + result.Err = err 330 + } else if !opts.Since.IsZero() && id.CompareTime(opts.Since) < 0 { 331 + return nil 332 + } else if !opts.Until.IsZero() && id.CompareTime(opts.Until) > 0 { 333 + return nil 334 + } else { 335 + result.ID = id 336 + } 337 + if !yield(result) { 338 + return iofs.SkipAll 339 + } else { 340 + return nil 341 + } 342 + }) 343 + } 302 344 }
+51 -2
src/backend_s3.go
··· 6 6 "crypto/sha256" 7 7 "fmt" 8 8 "io" 9 + "iter" 9 10 "net/http" 10 11 "path" 11 12 "strings" ··· 631 632 } 632 633 } 633 634 634 - func auditObjectName(id string) string { 635 + func auditObjectName(id AuditID) string { 635 636 return fmt.Sprintf("audit/%s", id) 636 637 } 637 638 638 - func (s3 *S3Backend) AppendAuditRecord(ctx context.Context, id string, record *AuditRecord) error { 639 + func (s3 *S3Backend) AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) error { 640 + logc.Printf(ctx, "s3: append audit %s\n", id) 641 + 639 642 name := auditObjectName(id) 640 643 data := EncodeAuditRecord(record) 641 644 ··· 648 651 } 649 652 return err 650 653 } 654 + 655 + func (s3 *S3Backend) QueryAuditLog(ctx context.Context, id AuditID) (*AuditRecord, error) { 656 + logc.Printf(ctx, "s3: read audit %s\n", id) 657 + 658 + object, err := s3.client.GetObject(ctx, s3.bucket, auditObjectName(id), 659 + minio.GetObjectOptions{}) 660 + if err != nil { 661 + return nil, err 662 + } 663 + defer object.Close() 664 + 665 + data, err := io.ReadAll(object) 666 + if err != nil { 667 + return nil, err 668 + } 669 + 670 + return DecodeAuditRecord(data) 671 + } 672 + 673 + func (s3 *S3Backend) SearchAuditLog( 674 + ctx context.Context, opts QueryAuditLogOptions, 675 + ) iter.Seq[QueryAuditLogResult] { 676 + return func(yield func(QueryAuditLogResult) bool) { 677 + logc.Printf(ctx, "s3: query audit\n") 678 + 679 + ctx, cancel := context.WithCancel(ctx) 680 + defer cancel() 681 + 682 + prefix := "audit/" 683 + for object := range s3.client.ListObjectsIter(ctx, s3.bucket, minio.ListObjectsOptions{ 684 + Prefix: prefix, 685 + }) { 686 + var result QueryAuditLogResult 687 + if object.Err != nil { 688 + result.Err = object.Err 689 + } else if id, err := ParseAuditID(strings.TrimPrefix(object.Key, prefix)); err != nil { 690 + result.Err = err 691 + } else { 692 + result.ID = id 693 + } 694 + if !yield(result) { 695 + break 696 + } 697 + } 698 + } 699 + }
+1 -1
src/config.go
··· 148 148 } 149 149 150 150 type AuditConfig struct { 151 - // Globally unique node identifier (0 to 1023 inclusive). 151 + // Globally unique machine identifier (0 to 63 inclusive). 152 152 NodeID int `toml:"node-id"` 153 153 // Whether audit reports should be stored whenever an audit event occurs. 154 154 Collect bool `toml:"collect"`
+10
src/main.go
··· 16 16 "os" 17 17 "runtime/debug" 18 18 "strings" 19 + "time" 19 20 20 21 automemlimit "github.com/KimMachineGun/automemlimit/memlimit" 21 22 "github.com/c2h5oh/datasize" 23 + "github.com/kankanreno/go-snowflake" 22 24 "github.com/prometheus/client_golang/prometheus/promhttp" 23 25 ) 24 26 ··· 82 84 }, 83 85 } 84 86 } 87 + return 88 + } 89 + 90 + // Thread-unsafe, must be called only during initial configuration. 91 + func configureAudit(_ context.Context) (err error) { 92 + snowflake.SetStartTime(time.Date(2025, 12, 1, 0, 0, 0, 0, time.UTC)) 93 + snowflake.SetMachineID(config.Audit.NodeID) 85 94 return 86 95 } 87 96 ··· 256 265 configureMemLimit(ctx), 257 266 configureWildcards(ctx), 258 267 configureFallback(ctx), 268 + configureAudit(ctx), 259 269 ); err != nil { 260 270 logc.Fatalln(ctx, err) 261 271 }
+28 -3
src/observe.go
··· 5 5 "errors" 6 6 "fmt" 7 7 "io" 8 + "iter" 8 9 "log" 9 10 "log/slog" 10 11 "math/rand/v2" ··· 437 438 return 438 439 } 439 440 440 - func (backend *observedBackend) AppendAuditRecord(ctx context.Context, id string, record *AuditRecord) (err error) { 441 - span, ctx := ObserveFunction(ctx, "AppendAudit", "audit.id", id) 442 - err = backend.inner.AppendAuditRecord(ctx, id, record) 441 + func (backend *observedBackend) AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) (err error) { 442 + span, ctx := ObserveFunction(ctx, "AppendAuditLog", "audit.id", id) 443 + err = backend.inner.AppendAuditLog(ctx, id, record) 444 + span.Finish() 445 + return 446 + } 447 + 448 + func (backend *observedBackend) QueryAuditLog(ctx context.Context, id AuditID) (record *AuditRecord, err error) { 449 + span, ctx := ObserveFunction(ctx, "QueryAuditLog", "audit.id", id) 450 + record, err = backend.inner.QueryAuditLog(ctx, id) 443 451 span.Finish() 444 452 return 445 453 } 454 + 455 + func (backend *observedBackend) SearchAuditLog( 456 + ctx context.Context, opts QueryAuditLogOptions, 457 + ) iter.Seq[QueryAuditLogResult] { 458 + return func(yield func(QueryAuditLogResult) bool) { 459 + span, ctx := ObserveFunction(ctx, "SearchAuditLog", 460 + "audit.search.since", opts.Since, 461 + "audit.search.until", opts.Until, 462 + ) 463 + for result := range backend.inner.SearchAuditLog(ctx, opts) { 464 + if !yield(result) { 465 + break 466 + } 467 + } 468 + span.Finish() 469 + } 470 + }
+20 -11
src/schema.pb.go
··· 654 654 type AuditRecord struct { 655 655 state protoimpl.MessageState `protogen:"open.v1"` 656 656 // Audit event metadata. 657 - Event *AuditEvent `protobuf:"varint,1,opt,name=event,enum=AuditEvent" json:"event,omitempty"` 657 + Id *int64 `protobuf:"varint,1,opt,name=id" json:"id,omitempty"` 658 658 Timestamp *timestamppb.Timestamp `protobuf:"bytes,2,opt,name=timestamp" json:"timestamp,omitempty"` 659 + Event *AuditEvent `protobuf:"varint,3,opt,name=event,enum=AuditEvent" json:"event,omitempty"` 659 660 // Affected resource. 660 661 Domain *string `protobuf:"bytes,10,opt,name=domain" json:"domain,omitempty"` 661 662 Project *string `protobuf:"bytes,11,opt,name=project" json:"project,omitempty"` // only for `*Manifest` events ··· 695 696 return file_schema_proto_rawDescGZIP(), []int{6} 696 697 } 697 698 698 - func (x *AuditRecord) GetEvent() AuditEvent { 699 - if x != nil && x.Event != nil { 700 - return *x.Event 699 + func (x *AuditRecord) GetId() int64 { 700 + if x != nil && x.Id != nil { 701 + return *x.Id 701 702 } 702 - return AuditEvent_InvalidEvent 703 + return 0 703 704 } 704 705 705 706 func (x *AuditRecord) GetTimestamp() *timestamppb.Timestamp { ··· 709 710 return nil 710 711 } 711 712 713 + func (x *AuditRecord) GetEvent() AuditEvent { 714 + if x != nil && x.Event != nil { 715 + return *x.Event 716 + } 717 + return AuditEvent_InvalidEvent 718 + } 719 + 712 720 func (x *AuditRecord) GetDomain() string { 713 721 if x != nil && x.Domain != nil { 714 722 return *x.Domain ··· 775 783 "\bproblems\x18\a \x03(\v2\b.ProblemR\bproblems\x1aC\n" + 776 784 "\rContentsEntry\x12\x10\n" + 777 785 "\x03key\x18\x01 \x01(\tR\x03key\x12\x1c\n" + 778 - "\x05value\x18\x02 \x01(\v2\x06.EntryR\x05value:\x028\x01\"\xc3\x01\n" + 779 - "\vAuditRecord\x12!\n" + 780 - "\x05event\x18\x01 \x01(\x0e2\v.AuditEventR\x05event\x128\n" + 781 - "\ttimestamp\x18\x02 \x01(\v2\x1a.google.protobuf.TimestampR\ttimestamp\x12\x16\n" + 786 + "\x05value\x18\x02 \x01(\v2\x06.EntryR\x05value:\x028\x01\"\xd3\x01\n" + 787 + "\vAuditRecord\x12\x0e\n" + 788 + "\x02id\x18\x01 \x01(\x03R\x02id\x128\n" + 789 + "\ttimestamp\x18\x02 \x01(\v2\x1a.google.protobuf.TimestampR\ttimestamp\x12!\n" + 790 + "\x05event\x18\x03 \x01(\x0e2\v.AuditEventR\x05event\x12\x16\n" + 782 791 "\x06domain\x18\n" + 783 792 " \x01(\tR\x06domain\x12\x18\n" + 784 793 "\aproject\x18\v \x01(\tR\aproject\x12%\n" + ··· 837 846 4, // 4: Manifest.redirects:type_name -> RedirectRule 838 847 6, // 5: Manifest.headers:type_name -> HeaderRule 839 848 7, // 6: Manifest.problems:type_name -> Problem 840 - 2, // 7: AuditRecord.event:type_name -> AuditEvent 841 - 11, // 8: AuditRecord.timestamp:type_name -> google.protobuf.Timestamp 849 + 11, // 7: AuditRecord.timestamp:type_name -> google.protobuf.Timestamp 850 + 2, // 8: AuditRecord.event:type_name -> AuditEvent 842 851 8, // 9: AuditRecord.manifest:type_name -> Manifest 843 852 3, // 10: Manifest.ContentsEntry.value:type_name -> Entry 844 853 11, // [11:11] is the sub-list for method output_type
+2 -1
src/schema.proto
··· 116 116 117 117 message AuditRecord { 118 118 // Audit event metadata. 119 - AuditEvent event = 1; 119 + int64 id = 1; 120 120 google.protobuf.Timestamp timestamp = 2; 121 + AuditEvent event = 3; 121 122 122 123 // Affected resource. 123 124 string domain = 10;