fork of whitequark.org/git-pages with mods for tangled
at main 9.9 kB view raw
1//go:generate protoc --go_out=. --go_opt=paths=source_relative schema.proto 2 3package git_pages 4 5import ( 6 "bytes" 7 "context" 8 "crypto/sha256" 9 "errors" 10 "fmt" 11 "mime" 12 "net/http" 13 "path" 14 "path/filepath" 15 "strings" 16 "sync" 17 "time" 18 19 "github.com/c2h5oh/datasize" 20 "github.com/klauspost/compress/zstd" 21 "github.com/prometheus/client_golang/prometheus" 22 "github.com/prometheus/client_golang/prometheus/promauto" 23 "google.golang.org/protobuf/encoding/protojson" 24 "google.golang.org/protobuf/proto" 25) 26 27var ( 28 siteCompressionSpaceSaving = promauto.NewHistogram(prometheus.HistogramOpts{ 29 Name: "git_pages_site_compression_space_saving", 30 Help: "Reduction in site size after compression relative to the uncompressed size", 31 Buckets: []float64{.01, .025, .05, .1, .25, .5, .75, 1, 1.25, 1.5, 1.75, 2, 2.5, 5, 10}, 32 33 NativeHistogramBucketFactor: 1.1, 34 NativeHistogramMaxBucketNumber: 100, 35 NativeHistogramMinResetDuration: 10 * time.Minute, 36 }) 37) 38 39func IsManifestEmpty(manifest *Manifest) bool { 40 if len(manifest.Contents) > 1 { 41 return false 42 } 43 for name, entry := range manifest.Contents { 44 if name == "" && entry.GetType() == Type_Directory { 45 return true 46 } 47 } 48 panic(fmt.Errorf("malformed manifest %v", manifest)) 49} 50 51// Returns `true` if `left` and `right` contain the same files with the same types and data. 52func CompareManifest(left *Manifest, right *Manifest) bool { 53 if len(left.Contents) != len(right.Contents) { 54 return false 55 } 56 for name, leftEntry := range left.Contents { 57 rightEntry := right.Contents[name] 58 if rightEntry == nil { 59 return false 60 } 61 if leftEntry.GetType() != rightEntry.GetType() { 62 return false 63 } 64 if !bytes.Equal(leftEntry.Data, rightEntry.Data) { 65 return false 66 } 67 } 68 return true 69} 70 71func EncodeManifest(manifest *Manifest) []byte { 72 result, err := proto.MarshalOptions{Deterministic: true}.Marshal(manifest) 73 if err != nil { 74 panic(err) 75 } 76 return result 77} 78 79func DecodeManifest(data []byte) (*Manifest, error) { 80 manifest := Manifest{} 81 err := proto.Unmarshal(data, &manifest) 82 return &manifest, err 83} 84 85func AddProblem(manifest *Manifest, path, format string, args ...any) error { 86 cause := fmt.Sprintf(format, args...) 87 manifest.Problems = append(manifest.Problems, &Problem{ 88 Path: proto.String(path), 89 Cause: proto.String(cause), 90 }) 91 return fmt.Errorf("%s: %s", path, cause) 92} 93 94func GetProblemReport(manifest *Manifest) []string { 95 var report []string 96 for _, problem := range manifest.Problems { 97 report = append(report, 98 fmt.Sprintf("%s: %s", problem.GetPath(), problem.GetCause())) 99 } 100 return report 101} 102 103func ManifestDebugJSON(manifest *Manifest) string { 104 result, err := protojson.MarshalOptions{ 105 Multiline: true, 106 EmitDefaultValues: true, 107 }.Marshal(manifest) 108 if err != nil { 109 panic(err) 110 } 111 return string(result) 112} 113 114var ErrSymlinkLoop = errors.New("symbolic link loop") 115 116func ExpandSymlinks(manifest *Manifest, inPath string) (string, error) { 117 var levels uint 118again: 119 for levels = 0; levels < config.Limits.MaxSymlinkDepth; levels += 1 { 120 parts := strings.Split(inPath, "/") 121 for i := 1; i <= len(parts); i++ { 122 linkPath := path.Join(parts[:i]...) 123 entry := manifest.Contents[linkPath] 124 if entry != nil && entry.GetType() == Type_Symlink { 125 inPath = path.Join( 126 path.Dir(linkPath), 127 string(entry.Data), 128 path.Join(parts[i:]...), 129 ) 130 continue again 131 } 132 } 133 break 134 } 135 if levels < config.Limits.MaxSymlinkDepth { 136 return inPath, nil 137 } else { 138 return "", ErrSymlinkLoop 139 } 140} 141 142// Sniff content type using the same algorithm as `http.ServeContent`. 143func DetectContentType(manifest *Manifest) { 144 for path, entry := range manifest.Contents { 145 if entry.GetType() == Type_Directory || entry.GetType() == Type_Symlink { 146 // no Content-Type 147 } else if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_Identity { 148 contentType := mime.TypeByExtension(filepath.Ext(path)) 149 if contentType == "" { 150 contentType = http.DetectContentType(entry.Data[:min(512, len(entry.Data))]) 151 } 152 entry.ContentType = proto.String(contentType) 153 } else if entry.GetContentType() == "" { 154 panic(fmt.Errorf("DetectContentType encountered invalid entry: %v, %v", 155 entry.GetType(), entry.GetTransform())) 156 } 157 } 158} 159 160// The `klauspost/compress/zstd` package recommends reusing a compressor to avoid repeated 161// allocations of internal buffers. 162var zstdEncoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 163 164// Compress contents of inline files. 165func CompressFiles(ctx context.Context, manifest *Manifest) { 166 span, _ := ObserveFunction(ctx, "CompressFiles") 167 defer span.Finish() 168 169 var originalSize int64 170 var compressedSize int64 171 for _, entry := range manifest.Contents { 172 if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_Identity { 173 mediaType := getMediaType(entry.GetContentType()) 174 if strings.HasPrefix(mediaType, "video/") || strings.HasPrefix(mediaType, "audio/") { 175 continue 176 } 177 compressedData := zstdEncoder.EncodeAll(entry.GetData(), 178 make([]byte, 0, entry.GetOriginalSize())) 179 if int64(len(compressedData)) < entry.GetOriginalSize() { 180 entry.Data = compressedData 181 entry.Transform = Transform_Zstd.Enum() 182 entry.CompressedSize = proto.Int64(int64(len(entry.Data))) 183 } 184 } 185 originalSize += entry.GetOriginalSize() 186 compressedSize += entry.GetCompressedSize() 187 } 188 manifest.OriginalSize = proto.Int64(originalSize) 189 manifest.CompressedSize = proto.Int64(compressedSize) 190 191 if originalSize != 0 { 192 spaceSaving := (float64(originalSize) - float64(compressedSize)) / float64(originalSize) 193 logc.Printf(ctx, "compress: saved %.2f percent (%s to %s)", 194 spaceSaving*100.0, 195 datasize.ByteSize(originalSize).HR(), 196 datasize.ByteSize(compressedSize).HR(), 197 ) 198 siteCompressionSpaceSaving. 199 Observe(spaceSaving) 200 } 201} 202 203// Apply post-processing steps to the manifest. 204// At the moment, there isn't a good way to report errors except to log them on the terminal. 205// (Perhaps in the future they could be exposed at `.git-pages/status.txt`?) 206func PrepareManifest(ctx context.Context, manifest *Manifest) error { 207 // Parse Netlify-style `_redirects` 208 if err := ProcessRedirectsFile(manifest); err != nil { 209 logc.Printf(ctx, "redirects err: %s\n", err) 210 } else if len(manifest.Redirects) > 0 { 211 logc.Printf(ctx, "redirects ok: %d rules\n", len(manifest.Redirects)) 212 } 213 214 // Parse Netlify-style `_headers` 215 if err := ProcessHeadersFile(manifest); err != nil { 216 logc.Printf(ctx, "headers err: %s\n", err) 217 } else if len(manifest.Headers) > 0 { 218 logc.Printf(ctx, "headers ok: %d rules\n", len(manifest.Headers)) 219 } 220 221 // Sniff content type like `http.ServeContent` 222 DetectContentType(manifest) 223 224 // Opportunistically compress blobs (must be done last) 225 CompressFiles(ctx, manifest) 226 227 return nil 228} 229 230var ErrManifestTooLarge = errors.New("manifest too large") 231 232// Uploads inline file data over certain size to the storage backend. Returns a copy of 233// the manifest updated to refer to an external content-addressable store. 234func StoreManifest(ctx context.Context, name string, manifest *Manifest) (*Manifest, error) { 235 span, ctx := ObserveFunction(ctx, "StoreManifest", "manifest.name", name) 236 defer span.Finish() 237 238 // Replace inline files over certain size with references to external data. 239 extManifest := Manifest{ 240 RepoUrl: manifest.RepoUrl, 241 Branch: manifest.Branch, 242 Commit: manifest.Commit, 243 Contents: make(map[string]*Entry), 244 Redirects: manifest.Redirects, 245 Headers: manifest.Headers, 246 Problems: manifest.Problems, 247 OriginalSize: manifest.OriginalSize, 248 CompressedSize: manifest.CompressedSize, 249 StoredSize: proto.Int64(0), 250 } 251 for name, entry := range manifest.Contents { 252 cannotBeInlined := entry.GetType() == Type_InlineFile && 253 entry.GetCompressedSize() > int64(config.Limits.MaxInlineFileSize.Bytes()) 254 if cannotBeInlined { 255 dataHash := sha256.Sum256(entry.Data) 256 extManifest.Contents[name] = &Entry{ 257 Type: Type_ExternalFile.Enum(), 258 OriginalSize: entry.OriginalSize, 259 CompressedSize: entry.CompressedSize, 260 Data: fmt.Appendf(nil, "sha256-%x", dataHash), 261 Transform: entry.Transform, 262 ContentType: entry.ContentType, 263 GitHash: entry.GitHash, 264 } 265 } else { 266 extManifest.Contents[name] = entry 267 } 268 } 269 270 // Compute the deduplicated storage size. 271 var blobSizes = make(map[string]int64) 272 for _, entry := range manifest.Contents { 273 if entry.GetType() == Type_ExternalFile { 274 blobSizes[string(entry.Data)] = entry.GetCompressedSize() 275 } 276 } 277 for _, blobSize := range blobSizes { 278 *extManifest.StoredSize += blobSize 279 } 280 281 // Upload the resulting manifest and the blob it references. 282 extManifestData := EncodeManifest(&extManifest) 283 if uint64(len(extManifestData)) > config.Limits.MaxManifestSize.Bytes() { 284 return nil, fmt.Errorf("%w: manifest size %s exceeds %s limit", 285 ErrManifestTooLarge, 286 datasize.ByteSize(len(extManifestData)).HR(), 287 config.Limits.MaxManifestSize, 288 ) 289 } 290 291 if err := backend.StageManifest(ctx, &extManifest); err != nil { 292 return nil, fmt.Errorf("stage manifest: %w", err) 293 } 294 295 wg := sync.WaitGroup{} 296 ch := make(chan error, len(extManifest.Contents)) 297 for name, entry := range extManifest.Contents { 298 if entry.GetType() == Type_ExternalFile { 299 wg.Go(func() { 300 err := backend.PutBlob(ctx, string(entry.Data), manifest.Contents[name].Data) 301 if err != nil { 302 ch <- fmt.Errorf("put blob %s: %w", name, err) 303 } 304 }) 305 } 306 } 307 wg.Wait() 308 close(ch) 309 for err := range ch { 310 return nil, err // currently ignores all but 1st error 311 } 312 313 if err := backend.CommitManifest(ctx, name, &extManifest); err != nil { 314 if errors.Is(err, ErrDomainFrozen) { 315 return nil, err 316 } else { 317 return nil, fmt.Errorf("commit manifest: %w", err) 318 } 319 } 320 321 return &extManifest, nil 322}