forked from
whitequark.org/git-pages
fork of whitequark.org/git-pages with mods for tangled
1//go:generate protoc --go_out=. --go_opt=paths=source_relative schema.proto
2
3package git_pages
4
5import (
6 "bytes"
7 "context"
8 "crypto/sha256"
9 "errors"
10 "fmt"
11 "mime"
12 "net/http"
13 "path"
14 "path/filepath"
15 "strings"
16 "sync"
17 "time"
18
19 "github.com/c2h5oh/datasize"
20 "github.com/klauspost/compress/zstd"
21 "github.com/prometheus/client_golang/prometheus"
22 "github.com/prometheus/client_golang/prometheus/promauto"
23 "google.golang.org/protobuf/encoding/protojson"
24 "google.golang.org/protobuf/proto"
25)
26
27var (
28 siteCompressionSpaceSaving = promauto.NewHistogram(prometheus.HistogramOpts{
29 Name: "git_pages_site_compression_space_saving",
30 Help: "Reduction in site size after compression relative to the uncompressed size",
31 Buckets: []float64{.01, .025, .05, .1, .25, .5, .75, 1, 1.25, 1.5, 1.75, 2, 2.5, 5, 10},
32
33 NativeHistogramBucketFactor: 1.1,
34 NativeHistogramMaxBucketNumber: 100,
35 NativeHistogramMinResetDuration: 10 * time.Minute,
36 })
37)
38
39func IsManifestEmpty(manifest *Manifest) bool {
40 if len(manifest.Contents) > 1 {
41 return false
42 }
43 for name, entry := range manifest.Contents {
44 if name == "" && entry.GetType() == Type_Directory {
45 return true
46 }
47 }
48 panic(fmt.Errorf("malformed manifest %v", manifest))
49}
50
51// Returns `true` if `left` and `right` contain the same files with the same types and data.
52func CompareManifest(left *Manifest, right *Manifest) bool {
53 if len(left.Contents) != len(right.Contents) {
54 return false
55 }
56 for name, leftEntry := range left.Contents {
57 rightEntry := right.Contents[name]
58 if rightEntry == nil {
59 return false
60 }
61 if leftEntry.GetType() != rightEntry.GetType() {
62 return false
63 }
64 if !bytes.Equal(leftEntry.Data, rightEntry.Data) {
65 return false
66 }
67 }
68 return true
69}
70
71func EncodeManifest(manifest *Manifest) []byte {
72 result, err := proto.MarshalOptions{Deterministic: true}.Marshal(manifest)
73 if err != nil {
74 panic(err)
75 }
76 return result
77}
78
79func DecodeManifest(data []byte) (*Manifest, error) {
80 manifest := Manifest{}
81 err := proto.Unmarshal(data, &manifest)
82 return &manifest, err
83}
84
85func AddProblem(manifest *Manifest, path, format string, args ...any) error {
86 cause := fmt.Sprintf(format, args...)
87 manifest.Problems = append(manifest.Problems, &Problem{
88 Path: proto.String(path),
89 Cause: proto.String(cause),
90 })
91 return fmt.Errorf("%s: %s", path, cause)
92}
93
94func GetProblemReport(manifest *Manifest) []string {
95 var report []string
96 for _, problem := range manifest.Problems {
97 report = append(report,
98 fmt.Sprintf("%s: %s", problem.GetPath(), problem.GetCause()))
99 }
100 return report
101}
102
103func ManifestDebugJSON(manifest *Manifest) string {
104 result, err := protojson.MarshalOptions{
105 Multiline: true,
106 EmitDefaultValues: true,
107 }.Marshal(manifest)
108 if err != nil {
109 panic(err)
110 }
111 return string(result)
112}
113
114var ErrSymlinkLoop = errors.New("symbolic link loop")
115
116func ExpandSymlinks(manifest *Manifest, inPath string) (string, error) {
117 var levels uint
118again:
119 for levels = 0; levels < config.Limits.MaxSymlinkDepth; levels += 1 {
120 parts := strings.Split(inPath, "/")
121 for i := 1; i <= len(parts); i++ {
122 linkPath := path.Join(parts[:i]...)
123 entry := manifest.Contents[linkPath]
124 if entry != nil && entry.GetType() == Type_Symlink {
125 inPath = path.Join(
126 path.Dir(linkPath),
127 string(entry.Data),
128 path.Join(parts[i:]...),
129 )
130 continue again
131 }
132 }
133 break
134 }
135 if levels < config.Limits.MaxSymlinkDepth {
136 return inPath, nil
137 } else {
138 return "", ErrSymlinkLoop
139 }
140}
141
142// Sniff content type using the same algorithm as `http.ServeContent`.
143func DetectContentType(manifest *Manifest) {
144 for path, entry := range manifest.Contents {
145 if entry.GetType() == Type_Directory || entry.GetType() == Type_Symlink {
146 // no Content-Type
147 } else if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_Identity {
148 contentType := mime.TypeByExtension(filepath.Ext(path))
149 if contentType == "" {
150 contentType = http.DetectContentType(entry.Data[:min(512, len(entry.Data))])
151 }
152 entry.ContentType = proto.String(contentType)
153 } else if entry.GetContentType() == "" {
154 panic(fmt.Errorf("DetectContentType encountered invalid entry: %v, %v",
155 entry.GetType(), entry.GetTransform()))
156 }
157 }
158}
159
160// The `klauspost/compress/zstd` package recommends reusing a compressor to avoid repeated
161// allocations of internal buffers.
162var zstdEncoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
163
164// Compress contents of inline files.
165func CompressFiles(ctx context.Context, manifest *Manifest) {
166 span, _ := ObserveFunction(ctx, "CompressFiles")
167 defer span.Finish()
168
169 var originalSize int64
170 var compressedSize int64
171 for _, entry := range manifest.Contents {
172 if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_Identity {
173 mediaType := getMediaType(entry.GetContentType())
174 if strings.HasPrefix(mediaType, "video/") || strings.HasPrefix(mediaType, "audio/") {
175 continue
176 }
177 compressedData := zstdEncoder.EncodeAll(entry.GetData(),
178 make([]byte, 0, entry.GetOriginalSize()))
179 if int64(len(compressedData)) < entry.GetOriginalSize() {
180 entry.Data = compressedData
181 entry.Transform = Transform_Zstd.Enum()
182 entry.CompressedSize = proto.Int64(int64(len(entry.Data)))
183 }
184 }
185 originalSize += entry.GetOriginalSize()
186 compressedSize += entry.GetCompressedSize()
187 }
188 manifest.OriginalSize = proto.Int64(originalSize)
189 manifest.CompressedSize = proto.Int64(compressedSize)
190
191 if originalSize != 0 {
192 spaceSaving := (float64(originalSize) - float64(compressedSize)) / float64(originalSize)
193 logc.Printf(ctx, "compress: saved %.2f percent (%s to %s)",
194 spaceSaving*100.0,
195 datasize.ByteSize(originalSize).HR(),
196 datasize.ByteSize(compressedSize).HR(),
197 )
198 siteCompressionSpaceSaving.
199 Observe(spaceSaving)
200 }
201}
202
203// Apply post-processing steps to the manifest.
204// At the moment, there isn't a good way to report errors except to log them on the terminal.
205// (Perhaps in the future they could be exposed at `.git-pages/status.txt`?)
206func PrepareManifest(ctx context.Context, manifest *Manifest) error {
207 // Parse Netlify-style `_redirects`
208 if err := ProcessRedirectsFile(manifest); err != nil {
209 logc.Printf(ctx, "redirects err: %s\n", err)
210 } else if len(manifest.Redirects) > 0 {
211 logc.Printf(ctx, "redirects ok: %d rules\n", len(manifest.Redirects))
212 }
213
214 // Parse Netlify-style `_headers`
215 if err := ProcessHeadersFile(manifest); err != nil {
216 logc.Printf(ctx, "headers err: %s\n", err)
217 } else if len(manifest.Headers) > 0 {
218 logc.Printf(ctx, "headers ok: %d rules\n", len(manifest.Headers))
219 }
220
221 // Sniff content type like `http.ServeContent`
222 DetectContentType(manifest)
223
224 // Opportunistically compress blobs (must be done last)
225 CompressFiles(ctx, manifest)
226
227 return nil
228}
229
230var ErrManifestTooLarge = errors.New("manifest too large")
231
232// Uploads inline file data over certain size to the storage backend. Returns a copy of
233// the manifest updated to refer to an external content-addressable store.
234func StoreManifest(ctx context.Context, name string, manifest *Manifest) (*Manifest, error) {
235 span, ctx := ObserveFunction(ctx, "StoreManifest", "manifest.name", name)
236 defer span.Finish()
237
238 // Replace inline files over certain size with references to external data.
239 extManifest := Manifest{
240 RepoUrl: manifest.RepoUrl,
241 Branch: manifest.Branch,
242 Commit: manifest.Commit,
243 Contents: make(map[string]*Entry),
244 Redirects: manifest.Redirects,
245 Headers: manifest.Headers,
246 Problems: manifest.Problems,
247 OriginalSize: manifest.OriginalSize,
248 CompressedSize: manifest.CompressedSize,
249 StoredSize: proto.Int64(0),
250 }
251 for name, entry := range manifest.Contents {
252 cannotBeInlined := entry.GetType() == Type_InlineFile &&
253 entry.GetCompressedSize() > int64(config.Limits.MaxInlineFileSize.Bytes())
254 if cannotBeInlined {
255 dataHash := sha256.Sum256(entry.Data)
256 extManifest.Contents[name] = &Entry{
257 Type: Type_ExternalFile.Enum(),
258 OriginalSize: entry.OriginalSize,
259 CompressedSize: entry.CompressedSize,
260 Data: fmt.Appendf(nil, "sha256-%x", dataHash),
261 Transform: entry.Transform,
262 ContentType: entry.ContentType,
263 GitHash: entry.GitHash,
264 }
265 } else {
266 extManifest.Contents[name] = entry
267 }
268 }
269
270 // Compute the deduplicated storage size.
271 var blobSizes = make(map[string]int64)
272 for _, entry := range manifest.Contents {
273 if entry.GetType() == Type_ExternalFile {
274 blobSizes[string(entry.Data)] = entry.GetCompressedSize()
275 }
276 }
277 for _, blobSize := range blobSizes {
278 *extManifest.StoredSize += blobSize
279 }
280
281 // Upload the resulting manifest and the blob it references.
282 extManifestData := EncodeManifest(&extManifest)
283 if uint64(len(extManifestData)) > config.Limits.MaxManifestSize.Bytes() {
284 return nil, fmt.Errorf("%w: manifest size %s exceeds %s limit",
285 ErrManifestTooLarge,
286 datasize.ByteSize(len(extManifestData)).HR(),
287 config.Limits.MaxManifestSize,
288 )
289 }
290
291 if err := backend.StageManifest(ctx, &extManifest); err != nil {
292 return nil, fmt.Errorf("stage manifest: %w", err)
293 }
294
295 wg := sync.WaitGroup{}
296 ch := make(chan error, len(extManifest.Contents))
297 for name, entry := range extManifest.Contents {
298 if entry.GetType() == Type_ExternalFile {
299 wg.Go(func() {
300 err := backend.PutBlob(ctx, string(entry.Data), manifest.Contents[name].Data)
301 if err != nil {
302 ch <- fmt.Errorf("put blob %s: %w", name, err)
303 }
304 })
305 }
306 }
307 wg.Wait()
308 close(ch)
309 for err := range ch {
310 return nil, err // currently ignores all but 1st error
311 }
312
313 if err := backend.CommitManifest(ctx, name, &extManifest); err != nil {
314 if errors.Is(err, ErrDomainFrozen) {
315 return nil, err
316 } else {
317 return nil, fmt.Errorf("commit manifest: %w", err)
318 }
319 }
320
321 return &extManifest, nil
322}