[mirror] Scalable static site server for Git forges (like GitHub Pages)
at v0.1.0 9.5 kB view raw
1//go:generate protoc --go_out=. --go_opt=paths=source_relative schema.proto 2 3package git_pages 4 5import ( 6 "bytes" 7 "context" 8 "crypto/sha256" 9 "errors" 10 "fmt" 11 "log" 12 "mime" 13 "net/http" 14 "path" 15 "path/filepath" 16 "strings" 17 "sync" 18 "time" 19 20 "github.com/c2h5oh/datasize" 21 "github.com/klauspost/compress/zstd" 22 "github.com/prometheus/client_golang/prometheus" 23 "github.com/prometheus/client_golang/prometheus/promauto" 24 "google.golang.org/protobuf/encoding/protojson" 25 "google.golang.org/protobuf/proto" 26) 27 28var ( 29 siteCompressionSpaceSaving = promauto.NewHistogram(prometheus.HistogramOpts{ 30 Name: "git_pages_site_compression_space_saving", 31 Help: "Reduction in site size after compression relative to the uncompressed size", 32 Buckets: []float64{.01, .025, .05, .1, .25, .5, .75, 1, 1.25, 1.5, 1.75, 2, 2.5, 5, 10}, 33 34 NativeHistogramBucketFactor: 1.1, 35 NativeHistogramMaxBucketNumber: 100, 36 NativeHistogramMinResetDuration: 10 * time.Minute, 37 }) 38) 39 40func IsManifestEmpty(manifest *Manifest) bool { 41 if len(manifest.Contents) > 1 { 42 return false 43 } 44 for name, entry := range manifest.Contents { 45 if name == "" && entry.GetType() == Type_Directory { 46 return true 47 } 48 } 49 panic(fmt.Errorf("malformed manifest %v", manifest)) 50} 51 52// Returns `true` if `left` and `right` contain the same files with the same types and data. 53func CompareManifest(left *Manifest, right *Manifest) bool { 54 if len(left.Contents) != len(right.Contents) { 55 return false 56 } 57 for name, leftEntry := range left.Contents { 58 rightEntry := right.Contents[name] 59 if rightEntry == nil { 60 return false 61 } 62 if leftEntry.GetType() != rightEntry.GetType() { 63 return false 64 } 65 if !bytes.Equal(leftEntry.Data, rightEntry.Data) { 66 return false 67 } 68 } 69 return true 70} 71 72func EncodeManifest(manifest *Manifest) []byte { 73 result, err := proto.MarshalOptions{Deterministic: true}.Marshal(manifest) 74 if err != nil { 75 panic(err) 76 } 77 return result 78} 79 80func DecodeManifest(data []byte) (*Manifest, error) { 81 manifest := Manifest{} 82 err := proto.Unmarshal(data, &manifest) 83 return &manifest, err 84} 85 86func AddProblem(manifest *Manifest, path, format string, args ...any) error { 87 cause := fmt.Sprintf(format, args...) 88 manifest.Problems = append(manifest.Problems, &Problem{ 89 Path: proto.String(path), 90 Cause: proto.String(cause), 91 }) 92 return fmt.Errorf("%s: %s", path, cause) 93} 94 95func GetProblemReport(manifest *Manifest) []string { 96 var report []string 97 for _, problem := range manifest.Problems { 98 report = append(report, 99 fmt.Sprintf("%s: %s", problem.GetPath(), problem.GetCause())) 100 } 101 return report 102} 103 104func ManifestDebugJSON(manifest *Manifest) string { 105 result, err := protojson.MarshalOptions{ 106 Multiline: true, 107 EmitDefaultValues: true, 108 }.Marshal(manifest) 109 if err != nil { 110 panic(err) 111 } 112 return string(result) 113} 114 115var ErrSymlinkLoop = errors.New("symbolic link loop") 116 117func ExpandSymlinks(manifest *Manifest, inPath string) (string, error) { 118 var levels uint 119again: 120 for levels = 0; levels < config.Limits.MaxSymlinkDepth; levels += 1 { 121 parts := strings.Split(inPath, "/") 122 for i := 1; i <= len(parts); i++ { 123 linkPath := path.Join(parts[:i]...) 124 entry := manifest.Contents[linkPath] 125 if entry != nil && entry.GetType() == Type_Symlink { 126 inPath = path.Join( 127 path.Dir(linkPath), 128 string(entry.Data), 129 path.Join(parts[i:]...), 130 ) 131 continue again 132 } 133 } 134 break 135 } 136 if levels < config.Limits.MaxSymlinkDepth { 137 return inPath, nil 138 } else { 139 return "", ErrSymlinkLoop 140 } 141} 142 143// Sniff content type using the same algorithm as `http.ServeContent`. 144func DetectContentType(manifest *Manifest) { 145 for path, entry := range manifest.Contents { 146 if entry.GetType() == Type_Directory || entry.GetType() == Type_Symlink { 147 // no Content-Type 148 } else if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_None { 149 contentType := mime.TypeByExtension(filepath.Ext(path)) 150 if contentType == "" { 151 contentType = http.DetectContentType(entry.Data[:512]) 152 } 153 entry.ContentType = proto.String(contentType) 154 } else { 155 panic(fmt.Errorf("DetectContentType encountered invalid entry: %v, %v", 156 entry.GetType(), entry.GetTransform())) 157 } 158 } 159} 160 161// The `clauspost/compress/zstd` package recommends reusing a compressor to avoid repeated 162// allocations of internal buffers. 163var zstdEncoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 164 165// Compress contents of inline files. 166func CompressFiles(ctx context.Context, manifest *Manifest) { 167 span, _ := ObserveFunction(ctx, "CompressFiles") 168 defer span.Finish() 169 170 var originalSize, compressedSize int64 171 for _, entry := range manifest.Contents { 172 if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_None { 173 mtype := getMediaType(entry.GetContentType()) 174 if strings.HasPrefix(mtype, "video/") || strings.HasPrefix(mtype, "audio/") { 175 continue 176 } 177 originalSize += entry.GetSize() 178 compressedData := zstdEncoder.EncodeAll(entry.GetData(), make([]byte, 0, entry.GetSize())) 179 if len(compressedData) < int(*entry.Size) { 180 entry.Data = compressedData 181 entry.Size = proto.Int64(int64(len(entry.Data))) 182 entry.Transform = Transform_Zstandard.Enum() 183 } 184 compressedSize += entry.GetSize() 185 } 186 } 187 manifest.OriginalSize = proto.Int64(originalSize) 188 manifest.CompressedSize = proto.Int64(compressedSize) 189 190 if originalSize != 0 { 191 spaceSaving := (float64(originalSize) - float64(compressedSize)) / float64(originalSize) 192 log.Printf("compress: saved %.2f percent (%s to %s)", 193 spaceSaving*100.0, 194 datasize.ByteSize(originalSize).HR(), 195 datasize.ByteSize(compressedSize).HR(), 196 ) 197 siteCompressionSpaceSaving. 198 Observe(spaceSaving) 199 } 200} 201 202// Apply post-processing steps to the manifest. 203// At the moment, there isn't a good way to report errors except to log them on the terminal. 204// (Perhaps in the future they could be exposed at `.git-pages/status.txt`?) 205func PrepareManifest(ctx context.Context, manifest *Manifest) error { 206 // Parse Netlify-style `_redirects` 207 if err := ProcessRedirectsFile(manifest); err != nil { 208 log.Printf("redirects err: %s\n", err) 209 } else if len(manifest.Redirects) > 0 { 210 log.Printf("redirects ok: %d rules\n", len(manifest.Redirects)) 211 } 212 213 // Parse Netlify-style `_headers` 214 if err := ProcessHeadersFile(manifest); err != nil { 215 log.Printf("headers err: %s\n", err) 216 } else if len(manifest.Headers) > 0 { 217 log.Printf("headers ok: %d rules\n", len(manifest.Headers)) 218 } 219 220 // Sniff content type like `http.ServeContent` 221 DetectContentType(manifest) 222 223 // Opportunistically compress blobs (must be done last) 224 CompressFiles(ctx, manifest) 225 226 return nil 227} 228 229var ErrManifestTooLarge = errors.New("manifest too large") 230 231// Uploads inline file data over certain size to the storage backend. Returns a copy of 232// the manifest updated to refer to an external content-addressable store. 233func StoreManifest(ctx context.Context, name string, manifest *Manifest) (*Manifest, error) { 234 span, ctx := ObserveFunction(ctx, "StoreManifest", "manifest.name", name) 235 defer span.Finish() 236 237 // Replace inline files over certain size with references to external data. 238 extManifest := Manifest{ 239 RepoUrl: manifest.RepoUrl, 240 Branch: manifest.Branch, 241 Commit: manifest.Commit, 242 Contents: make(map[string]*Entry), 243 Redirects: manifest.Redirects, 244 Headers: manifest.Headers, 245 Problems: manifest.Problems, 246 OriginalSize: manifest.OriginalSize, 247 CompressedSize: manifest.CompressedSize, 248 StoredSize: proto.Int64(0), 249 } 250 extObjectSizes := make(map[string]int64) 251 for name, entry := range manifest.Contents { 252 cannotBeInlined := entry.GetType() == Type_InlineFile && 253 entry.GetSize() > int64(config.Limits.MaxInlineFileSize.Bytes()) 254 if cannotBeInlined { 255 dataHash := sha256.Sum256(entry.Data) 256 extManifest.Contents[name] = &Entry{ 257 Type: Type_ExternalFile.Enum(), 258 Size: entry.Size, 259 Data: fmt.Appendf(nil, "sha256-%x", dataHash), 260 Transform: entry.Transform, 261 ContentType: entry.ContentType, 262 } 263 extObjectSizes[string(dataHash[:])] = entry.GetSize() 264 } else { 265 extManifest.Contents[name] = entry 266 } 267 } 268 // `extObjectMap` stores size once per object, deduplicating it 269 for _, storedSize := range extObjectSizes { 270 *extManifest.StoredSize += storedSize 271 } 272 273 // Upload the resulting manifest and the blob it references. 274 extManifestData := EncodeManifest(&extManifest) 275 if uint64(len(extManifestData)) > config.Limits.MaxManifestSize.Bytes() { 276 return nil, fmt.Errorf("%w: manifest size %s exceeds %s limit", 277 ErrManifestTooLarge, 278 datasize.ByteSize(len(extManifestData)).HR(), 279 config.Limits.MaxManifestSize, 280 ) 281 } 282 283 if err := backend.StageManifest(ctx, &extManifest); err != nil { 284 return nil, fmt.Errorf("stage manifest: %w", err) 285 } 286 287 wg := sync.WaitGroup{} 288 ch := make(chan error, len(extManifest.Contents)) 289 for name, entry := range extManifest.Contents { 290 if entry.GetType() == Type_ExternalFile { 291 wg.Go(func() { 292 err := backend.PutBlob(ctx, string(entry.Data), manifest.Contents[name].Data) 293 if err != nil { 294 ch <- fmt.Errorf("put blob %s: %w", name, err) 295 } 296 }) 297 } 298 } 299 wg.Wait() 300 close(ch) 301 for err := range ch { 302 return nil, err // currently ignores all but 1st error 303 } 304 305 if err := backend.CommitManifest(ctx, name, &extManifest); err != nil { 306 return nil, fmt.Errorf("commit manifest: %w", err) 307 } 308 309 return &extManifest, nil 310}