[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "context"
5 "crypto/sha256"
6 "errors"
7 "fmt"
8 "io"
9 iofs "io/fs"
10 "iter"
11 "os"
12 "path/filepath"
13 "strings"
14)
15
16type FSBackend struct {
17 blobRoot *os.Root
18 siteRoot *os.Root
19 auditRoot *os.Root
20 hasAtomicCAS bool
21}
22
23var _ Backend = (*FSBackend)(nil)
24
25func maybeCreateOpenRoot(dir string, name string) (*os.Root, error) {
26 dirName := filepath.Join(dir, name)
27
28 if err := os.Mkdir(dirName, 0o755); err != nil && !errors.Is(err, os.ErrExist) {
29 return nil, fmt.Errorf("mkdir: %w", err)
30 }
31
32 root, err := os.OpenRoot(dirName)
33 if err != nil {
34 return nil, fmt.Errorf("open: %w", err)
35 }
36
37 return root, nil
38}
39
40func createTempInRoot(root *os.Root, name string, data []byte) (string, error) {
41 tempFile, err := os.CreateTemp(root.Name(), name)
42 if err != nil {
43 return "", fmt.Errorf("mktemp: %w", err)
44 }
45 _, err = tempFile.Write(data)
46 tempFile.Close()
47 if err != nil {
48 return "", fmt.Errorf("write: %w", err)
49 }
50
51 tempPath, err := filepath.Rel(root.Name(), tempFile.Name())
52 if err != nil {
53 return "", fmt.Errorf("relpath: %w", err)
54 }
55
56 return tempPath, nil
57}
58
59func checkAtomicCAS(root *os.Root) bool {
60 fileName := ".hasAtomicCAS"
61 file, err := root.Create(fileName)
62 if err != nil {
63 panic(err)
64 }
65 root.Remove(fileName)
66 defer file.Close()
67
68 flockErr := FileLock(file)
69 funlockErr := FileUnlock(file)
70 return (flockErr == nil && funlockErr == nil)
71}
72
73func NewFSBackend(ctx context.Context, config *FSConfig) (*FSBackend, error) {
74 blobRoot, err := maybeCreateOpenRoot(config.Root, "blob")
75 if err != nil {
76 return nil, fmt.Errorf("blob: %w", err)
77 }
78 siteRoot, err := maybeCreateOpenRoot(config.Root, "site")
79 if err != nil {
80 return nil, fmt.Errorf("site: %w", err)
81 }
82 auditRoot, err := maybeCreateOpenRoot(config.Root, "audit")
83 if err != nil {
84 return nil, fmt.Errorf("audit: %w", err)
85 }
86 hasAtomicCAS := checkAtomicCAS(siteRoot)
87 if hasAtomicCAS {
88 logc.Println(ctx, "fs: has atomic CAS")
89 } else {
90 logc.Println(ctx, "fs: has best-effort CAS")
91 }
92 return &FSBackend{blobRoot, siteRoot, auditRoot, hasAtomicCAS}, nil
93}
94
95func (fs *FSBackend) Backend() Backend {
96 return fs
97}
98
99func (fs *FSBackend) HasFeature(ctx context.Context, feature BackendFeature) bool {
100 switch feature {
101 case FeatureCheckDomainMarker:
102 return true
103 default:
104 return false
105 }
106}
107
108func (fs *FSBackend) EnableFeature(ctx context.Context, feature BackendFeature) error {
109 switch feature {
110 case FeatureCheckDomainMarker:
111 return nil
112 default:
113 return fmt.Errorf("not implemented")
114 }
115}
116
117func (fs *FSBackend) GetBlob(
118 ctx context.Context, name string,
119) (
120 reader io.ReadSeeker, metadata BlobMetadata, err error,
121) {
122 blobPath := filepath.Join(splitBlobName(name)...)
123 stat, err := fs.blobRoot.Stat(blobPath)
124 if errors.Is(err, os.ErrNotExist) {
125 err = fmt.Errorf("%w: %s", ErrObjectNotFound, err.(*os.PathError).Path)
126 return
127 } else if err != nil {
128 err = fmt.Errorf("stat: %w", err)
129 return
130 }
131 file, err := fs.blobRoot.Open(blobPath)
132 if err != nil {
133 err = fmt.Errorf("open: %w", err)
134 return
135 }
136 return file, BlobMetadata{name, int64(stat.Size()), stat.ModTime()}, nil
137}
138
139func (fs *FSBackend) PutBlob(ctx context.Context, name string, data []byte) error {
140 blobPath := filepath.Join(splitBlobName(name)...)
141 blobDir := filepath.Dir(blobPath)
142
143 if _, err := fs.blobRoot.Stat(blobPath); err == nil {
144 // Blob already exists. While on Linux it would be benign to write and replace a blob
145 // that already exists, on Windows this is liable to cause access errors.
146 return nil
147 }
148
149 tempPath, err := createTempInRoot(fs.blobRoot, name, data)
150 if err != nil {
151 return err
152 }
153
154 if err := fs.blobRoot.Chmod(tempPath, 0o444); err != nil {
155 return fmt.Errorf("chmod: %w", err)
156 }
157
158again:
159 for {
160 if err := fs.blobRoot.MkdirAll(blobDir, 0o755); err != nil {
161 if errors.Is(err, os.ErrExist) {
162 // Handle the case where two `PutBlob()` calls race creating a common prefix
163 // of a blob directory. The `MkdirAll()` call that loses the TOCTTOU condition
164 // bails out, so we have to repeat it.
165 continue again
166 }
167 return fmt.Errorf("mkdir: %w", err)
168 }
169 break
170 }
171
172 if err := fs.blobRoot.Rename(tempPath, blobPath); err != nil {
173 return fmt.Errorf("rename: %w", err)
174 }
175
176 return nil
177}
178
179func (fs *FSBackend) DeleteBlob(ctx context.Context, name string) error {
180 blobPath := filepath.Join(splitBlobName(name)...)
181 return fs.blobRoot.Remove(blobPath)
182}
183
184func (fs *FSBackend) EnumerateBlobs(ctx context.Context) iter.Seq2[BlobMetadata, error] {
185 return func(yield func(BlobMetadata, error) bool) {
186 iofs.WalkDir(fs.blobRoot.FS(), ".",
187 func(path string, entry iofs.DirEntry, err error) error {
188 var metadata BlobMetadata
189 if err != nil {
190 // report error
191 } else if entry.IsDir() {
192 // skip directory
193 return nil
194 } else if info, err := entry.Info(); err != nil {
195 // report error
196 } else {
197 // report blob
198 metadata.Name = joinBlobName(strings.Split(path, "/"))
199 metadata.Size = info.Size()
200 metadata.LastModified = info.ModTime()
201 }
202 if !yield(metadata, err) {
203 return iofs.SkipAll
204 }
205 return nil
206 })
207 }
208}
209
210func (fs *FSBackend) GetManifest(
211 ctx context.Context, name string, opts GetManifestOptions,
212) (
213 manifest *Manifest, metadata ManifestMetadata, err error,
214) {
215 stat, err := fs.siteRoot.Stat(name)
216 if errors.Is(err, os.ErrNotExist) {
217 err = fmt.Errorf("%w: %s", ErrObjectNotFound, err.(*os.PathError).Path)
218 return
219 } else if err != nil {
220 err = fmt.Errorf("stat: %w", err)
221 return
222 }
223 data, err := fs.siteRoot.ReadFile(name)
224 if err != nil {
225 err = fmt.Errorf("read: %w", err)
226 return
227 }
228 manifest, err = DecodeManifest(data)
229 if err != nil {
230 return
231 }
232 return manifest, ManifestMetadata{
233 LastModified: stat.ModTime(),
234 ETag: fmt.Sprintf("%x", sha256.Sum256(data)),
235 }, nil
236}
237
238func stagedManifestName(manifestData []byte) string {
239 return fmt.Sprintf(".%x", sha256.Sum256(manifestData))
240}
241
242func (fs *FSBackend) StageManifest(ctx context.Context, manifest *Manifest) error {
243 manifestData := EncodeManifest(manifest)
244
245 tempPath, err := createTempInRoot(fs.siteRoot, ".manifest", manifestData)
246 if err != nil {
247 return err
248 }
249
250 if err := fs.siteRoot.Rename(tempPath, stagedManifestName(manifestData)); err != nil {
251 return fmt.Errorf("rename: %w", err)
252 }
253
254 return nil
255}
256
257func domainFrozenMarkerName(domain string) string {
258 return filepath.Join(domain, ".frozen")
259}
260
261func (fs *FSBackend) checkDomainFrozen(ctx context.Context, domain string) error {
262 if _, err := fs.siteRoot.Stat(domainFrozenMarkerName(domain)); err == nil {
263 return ErrDomainFrozen
264 } else if !errors.Is(err, os.ErrNotExist) {
265 return fmt.Errorf("stat: %w", err)
266 } else {
267 return nil
268 }
269}
270
271func (fs *FSBackend) HasAtomicCAS(ctx context.Context) bool {
272 // On a suitable filesystem, POSIX advisory locks can be used to implement atomic CAS.
273 // An implementation consists of two parts:
274 // - Intra-process mutex set (one per manifest), to prevent races between goroutines;
275 // - Inter-process POSIX advisory locks (one per manifest), to prevent races between
276 // different git-pages instances.
277 return fs.hasAtomicCAS
278}
279
280type manifestLockGuard struct {
281 file *os.File
282}
283
284func lockManifest(fs *os.Root, name string) (*manifestLockGuard, error) {
285 file, err := fs.Open(name)
286 if errors.Is(err, os.ErrNotExist) {
287 return &manifestLockGuard{nil}, nil
288 } else if err != nil {
289 return nil, fmt.Errorf("open: %w", err)
290 }
291 if err := FileLock(file); err != nil {
292 file.Close()
293 return nil, fmt.Errorf("flock(LOCK_EX): %w", err)
294 }
295 return &manifestLockGuard{file}, nil
296}
297
298func (guard *manifestLockGuard) Unlock() {
299 if guard.file != nil {
300 FileUnlock(guard.file)
301 guard.file.Close()
302 }
303}
304
305func (fs *FSBackend) checkManifestPrecondition(
306 ctx context.Context, name string, opts ModifyManifestOptions,
307) error {
308 if !opts.IfUnmodifiedSince.IsZero() {
309 stat, err := fs.siteRoot.Stat(name)
310 if err != nil {
311 return fmt.Errorf("stat: %w", err)
312 }
313
314 if stat.ModTime().Compare(opts.IfUnmodifiedSince) > 0 {
315 return fmt.Errorf("%w: If-Unmodified-Since", ErrPreconditionFailed)
316 }
317 }
318
319 if opts.IfMatch != "" {
320 data, err := fs.siteRoot.ReadFile(name)
321 if err != nil {
322 return fmt.Errorf("read: %w", err)
323 }
324
325 if fmt.Sprintf("%x", sha256.Sum256(data)) != opts.IfMatch {
326 return fmt.Errorf("%w: If-Match", ErrPreconditionFailed)
327 }
328 }
329
330 return nil
331}
332
333func (fs *FSBackend) CommitManifest(
334 ctx context.Context, name string, manifest *Manifest, opts ModifyManifestOptions,
335) error {
336 if fs.hasAtomicCAS {
337 if guard, err := lockManifest(fs.siteRoot, name); err != nil {
338 return err
339 } else {
340 defer guard.Unlock()
341 }
342 }
343
344 domain := filepath.Dir(name)
345 if err := fs.checkDomainFrozen(ctx, domain); err != nil {
346 return err
347 }
348
349 if err := fs.checkManifestPrecondition(ctx, name, opts); err != nil {
350 return err
351 }
352
353 manifestData := EncodeManifest(manifest)
354 manifestHashName := stagedManifestName(manifestData)
355
356 if _, err := fs.siteRoot.Stat(manifestHashName); err != nil {
357 return fmt.Errorf("manifest not staged")
358 }
359
360 if err := fs.siteRoot.MkdirAll(domain, 0o755); err != nil {
361 return fmt.Errorf("mkdir: %w", err)
362 }
363
364 if err := fs.siteRoot.Rename(manifestHashName, name); err != nil {
365 return fmt.Errorf("rename: %w", err)
366 }
367
368 return nil
369}
370
371func (fs *FSBackend) DeleteManifest(
372 ctx context.Context, name string, opts ModifyManifestOptions,
373) error {
374 if fs.hasAtomicCAS {
375 if guard, err := lockManifest(fs.siteRoot, name); err != nil {
376 return err
377 } else {
378 defer guard.Unlock()
379 }
380 }
381
382 domain := filepath.Dir(name)
383 if err := fs.checkDomainFrozen(ctx, domain); err != nil {
384 return err
385 }
386
387 if err := fs.checkManifestPrecondition(ctx, name, opts); err != nil {
388 return err
389 }
390
391 err := fs.siteRoot.Remove(name)
392 if errors.Is(err, os.ErrNotExist) {
393 return nil
394 } else {
395 return err
396 }
397}
398
399func (fs *FSBackend) EnumerateManifests(ctx context.Context) iter.Seq2[ManifestMetadata, error] {
400 return func(yield func(ManifestMetadata, error) bool) {
401 iofs.WalkDir(fs.siteRoot.FS(), ".",
402 func(path string, entry iofs.DirEntry, err error) error {
403 _, project, _ := strings.Cut(path, "/")
404 var metadata ManifestMetadata
405 if err != nil {
406 // report error
407 } else if entry.IsDir() {
408 // skip directory
409 return nil
410 } else if project == "" || strings.HasPrefix(project, ".") && project != ".index" {
411 // skip internal
412 return nil
413 } else if info, err := entry.Info(); err != nil {
414 // report error
415 } else {
416 // report blob
417 metadata.Name = path
418 metadata.Size = info.Size()
419 metadata.LastModified = info.ModTime()
420 // not setting metadata.ETag since it is too costly
421 }
422 if !yield(metadata, err) {
423 return iofs.SkipAll
424 }
425 return nil
426 })
427 }
428}
429
430func (fs *FSBackend) CheckDomain(ctx context.Context, domain string) (bool, error) {
431 _, err := fs.siteRoot.Stat(domain)
432 if errors.Is(err, os.ErrNotExist) {
433 return false, nil
434 } else if err == nil {
435 return true, nil
436 } else {
437 return false, err
438 }
439}
440
441func (fs *FSBackend) CreateDomain(ctx context.Context, domain string) error {
442 return nil // no-op
443}
444
445func (fs *FSBackend) FreezeDomain(ctx context.Context, domain string) error {
446 return fs.siteRoot.WriteFile(domainFrozenMarkerName(domain), []byte{}, 0o644)
447}
448
449func (fs *FSBackend) UnfreezeDomain(ctx context.Context, domain string) error {
450 err := fs.siteRoot.Remove(domainFrozenMarkerName(domain))
451 if errors.Is(err, os.ErrNotExist) {
452 return nil
453 } else {
454 return err
455 }
456}
457
458func (fs *FSBackend) AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) error {
459 if _, err := fs.auditRoot.Stat(id.String()); err == nil {
460 panic(fmt.Errorf("audit ID collision: %s", id))
461 }
462
463 return fs.auditRoot.WriteFile(id.String(), EncodeAuditRecord(record), 0o644)
464}
465
466func (fs *FSBackend) QueryAuditLog(ctx context.Context, id AuditID) (*AuditRecord, error) {
467 if data, err := fs.auditRoot.ReadFile(id.String()); err != nil {
468 return nil, fmt.Errorf("read: %w", err)
469 } else if record, err := DecodeAuditRecord(data); err != nil {
470 return nil, fmt.Errorf("decode: %w", err)
471 } else {
472 return record, nil
473 }
474}
475
476func (fs *FSBackend) SearchAuditLog(
477 ctx context.Context, opts SearchAuditLogOptions,
478) iter.Seq2[AuditID, error] {
479 return func(yield func(AuditID, error) bool) {
480 iofs.WalkDir(fs.auditRoot.FS(), ".",
481 func(path string, entry iofs.DirEntry, err error) error {
482 if path == "." {
483 return nil // skip
484 }
485 var id AuditID
486 if err != nil {
487 // report error
488 } else if id, err = ParseAuditID(path); err != nil {
489 // report error
490 } else if !opts.Since.IsZero() && id.CompareTime(opts.Since) < 0 {
491 return nil // skip
492 } else if !opts.Until.IsZero() && id.CompareTime(opts.Until) > 0 {
493 return nil // skip
494 }
495 if !yield(id, err) {
496 return iofs.SkipAll // break
497 } else {
498 return nil // continue
499 }
500 })
501 }
502}