fork of whitequark.org/git-pages with mods for tangled

Use git filters for incremental updates from a git repository.

This commit changes the git fetch algorithm to only retrieve blobs
that aren't included in the previously deployed site manifest, if
git filters are supported by the remote.

It also changes how manifest entry sizes are represented, such that
both decompressed and compressed sizes are stored. This enables
computing accurate (and repeatable) sizes even after incremental
updates.

Co-authored-by: David Leadbeater <dgl@dgl.cx>

+9 -3
src/extract.go
··· 59 } 60 61 manifestEntry.Type = Type_InlineFile.Enum() 62 - manifestEntry.Size = proto.Int64(header.Size) 63 manifestEntry.Data = fileData 64 65 case tar.TypeSymlink: 66 manifestEntry.Type = Type_Symlink.Enum() 67 - manifestEntry.Size = proto.Int64(header.Size) 68 manifestEntry.Data = []byte(header.Linkname) 69 70 case tar.TypeDir: 71 manifestEntry.Type = Type_Directory.Enum() ··· 150 } else { 151 manifestEntry.Type = Type_InlineFile.Enum() 152 } 153 - manifestEntry.Size = proto.Int64(int64(file.UncompressedSize64)) 154 manifestEntry.Data = fileData 155 } else { 156 manifestEntry.Type = Type_Directory.Enum() 157 }
··· 59 } 60 61 manifestEntry.Type = Type_InlineFile.Enum() 62 manifestEntry.Data = fileData 63 + manifestEntry.Transform = Transform_Identity.Enum() 64 + manifestEntry.OriginalSize = proto.Int64(header.Size) 65 + manifestEntry.CompressedSize = proto.Int64(header.Size) 66 67 case tar.TypeSymlink: 68 manifestEntry.Type = Type_Symlink.Enum() 69 manifestEntry.Data = []byte(header.Linkname) 70 + manifestEntry.Transform = Transform_Identity.Enum() 71 + manifestEntry.OriginalSize = proto.Int64(header.Size) 72 + manifestEntry.CompressedSize = proto.Int64(header.Size) 73 74 case tar.TypeDir: 75 manifestEntry.Type = Type_Directory.Enum() ··· 154 } else { 155 manifestEntry.Type = Type_InlineFile.Enum() 156 } 157 manifestEntry.Data = fileData 158 + manifestEntry.Transform = Transform_Identity.Enum() 159 + manifestEntry.OriginalSize = proto.Int64(int64(file.UncompressedSize64)) 160 + manifestEntry.CompressedSize = proto.Int64(int64(file.UncompressedSize64)) 161 } else { 162 manifestEntry.Type = Type_Directory.Enum() 163 }
+175 -42
src/fetch.go
··· 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "os" 8 9 "github.com/go-git/go-billy/v6/osfs" 10 "github.com/go-git/go-git/v6" 11 "github.com/go-git/go-git/v6/plumbing" 12 "github.com/go-git/go-git/v6/plumbing/cache" 13 "github.com/go-git/go-git/v6/plumbing/filemode" 14 "github.com/go-git/go-git/v6/plumbing/object" 15 "github.com/go-git/go-git/v6/storage/filesystem" 16 "google.golang.org/protobuf/proto" 17 ) 18 19 - func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manifest, error) { 20 span, ctx := ObserveFunction(ctx, "FetchRepository", 21 "git.repository", repoURL, "git.branch", branch) 22 defer span.Finish() 23 24 - baseDir, err := os.MkdirTemp("", "fetchRepo") 25 if err != nil { 26 - return nil, fmt.Errorf("mkdtemp: %w", err) 27 } 28 - defer os.RemoveAll(baseDir) 29 30 - fs := osfs.New(baseDir, osfs.WithBoundOS()) 31 - cache := cache.NewObjectLRUDefault() 32 - storer := filesystem.NewStorageWithOptions(fs, cache, filesystem.Options{ 33 - ExclusiveAccess: true, 34 - LargeObjectThreshold: int64(config.Limits.GitLargeObjectThreshold.Bytes()), 35 - }) 36 - repo, err := git.CloneContext(ctx, storer, nil, &git.CloneOptions{ 37 - Bare: true, 38 - URL: repoURL, 39 - ReferenceName: plumbing.ReferenceName(branch), 40 - SingleBranch: true, 41 - Depth: 1, 42 - Tags: git.NoTags, 43 - }) 44 if err != nil { 45 return nil, fmt.Errorf("git clone: %w", err) 46 } ··· 63 walker := object.NewTreeWalker(tree, true, make(map[plumbing.Hash]bool)) 64 defer walker.Close() 65 66 - manifest := Manifest{ 67 RepoUrl: proto.String(repoURL), 68 Branch: proto.String(branch), 69 Commit: proto.String(ref.Hash().String()), ··· 71 "": {Type: Type_Directory.Enum()}, 72 }, 73 } 74 for { 75 name, entry, err := walker.Next() 76 if err == io.EOF { ··· 78 } else if err != nil { 79 return nil, fmt.Errorf("git walker: %w", err) 80 } else { 81 - manifestEntry := Entry{} 82 - if entry.Mode.IsFile() { 83 - blob, err := repo.BlobObject(entry.Hash) 84 - if err != nil { 85 - return nil, fmt.Errorf("git blob %s: %w", name, err) 86 - } 87 - 88 - reader, err := blob.Reader() 89 - if err != nil { 90 - return nil, fmt.Errorf("git blob open: %w", err) 91 - } 92 - defer reader.Close() 93 - 94 - data, err := io.ReadAll(reader) 95 - if err != nil { 96 - return nil, fmt.Errorf("git blob read: %w", err) 97 - } 98 - 99 if entry.Mode == filemode.Symlink { 100 manifestEntry.Type = Type_Symlink.Enum() 101 } else { 102 manifestEntry.Type = Type_InlineFile.Enum() 103 } 104 - manifestEntry.Size = proto.Int64(blob.Size) 105 - manifestEntry.Data = data 106 } else if entry.Mode == filemode.Dir { 107 manifestEntry.Type = Type_Directory.Enum() 108 } else { 109 - AddProblem(&manifest, name, "unsupported mode %#o", entry.Mode) 110 continue 111 } 112 - manifest.Contents[name] = &manifestEntry 113 } 114 } 115 - return &manifest, nil 116 }
··· 2 3 import ( 4 "context" 5 + "errors" 6 "fmt" 7 "io" 8 + "maps" 9 + "net/url" 10 "os" 11 + "slices" 12 13 + "github.com/c2h5oh/datasize" 14 "github.com/go-git/go-billy/v6/osfs" 15 "github.com/go-git/go-git/v6" 16 "github.com/go-git/go-git/v6/plumbing" 17 "github.com/go-git/go-git/v6/plumbing/cache" 18 "github.com/go-git/go-git/v6/plumbing/filemode" 19 "github.com/go-git/go-git/v6/plumbing/object" 20 + "github.com/go-git/go-git/v6/plumbing/protocol/packp" 21 + "github.com/go-git/go-git/v6/plumbing/transport" 22 "github.com/go-git/go-git/v6/storage/filesystem" 23 "google.golang.org/protobuf/proto" 24 ) 25 26 + func FetchRepository( 27 + ctx context.Context, repoURL string, branch string, oldManifest *Manifest, 28 + ) ( 29 + *Manifest, error, 30 + ) { 31 span, ctx := ObserveFunction(ctx, "FetchRepository", 32 "git.repository", repoURL, "git.branch", branch) 33 defer span.Finish() 34 35 + parsedRepoURL, err := url.Parse(repoURL) 36 if err != nil { 37 + return nil, fmt.Errorf("URL parse: %w", err) 38 } 39 + 40 + var repo *git.Repository 41 + var storer *filesystem.Storage 42 + for _, filter := range []packp.Filter{packp.FilterBlobNone(), packp.Filter("")} { 43 + var tempDir string 44 + tempDir, err = os.MkdirTemp("", "fetchRepo") 45 + if err != nil { 46 + return nil, fmt.Errorf("mkdtemp: %w", err) 47 + } 48 + defer os.RemoveAll(tempDir) 49 50 + storer = filesystem.NewStorageWithOptions( 51 + osfs.New(tempDir, osfs.WithBoundOS()), 52 + cache.NewObjectLRUDefault(), 53 + filesystem.Options{ 54 + ExclusiveAccess: true, 55 + LargeObjectThreshold: int64(config.Limits.GitLargeObjectThreshold.Bytes()), 56 + }, 57 + ) 58 + repo, err = git.CloneContext(ctx, storer, nil, &git.CloneOptions{ 59 + Bare: true, 60 + URL: repoURL, 61 + ReferenceName: plumbing.ReferenceName(branch), 62 + SingleBranch: true, 63 + Depth: 1, 64 + Tags: git.NoTags, 65 + Filter: filter, 66 + }) 67 + if err != nil { 68 + logc.Printf(ctx, "clone err: %s %s filter=%q\n", repoURL, branch, filter) 69 + continue 70 + } else { 71 + logc.Printf(ctx, "clone ok: %s %s filter=%q\n", repoURL, branch, filter) 72 + break 73 + } 74 + } 75 if err != nil { 76 return nil, fmt.Errorf("git clone: %w", err) 77 } ··· 94 walker := object.NewTreeWalker(tree, true, make(map[plumbing.Hash]bool)) 95 defer walker.Close() 96 97 + // Create a manifest for the tree object corresponding to `branch`, but do not populate it 98 + // with data yet; instead, record all the blobs we'll need. 99 + manifest := &Manifest{ 100 RepoUrl: proto.String(repoURL), 101 Branch: proto.String(branch), 102 Commit: proto.String(ref.Hash().String()), ··· 104 "": {Type: Type_Directory.Enum()}, 105 }, 106 } 107 + blobsNeeded := map[plumbing.Hash]*Entry{} 108 for { 109 name, entry, err := walker.Next() 110 if err == io.EOF { ··· 112 } else if err != nil { 113 return nil, fmt.Errorf("git walker: %w", err) 114 } else { 115 + manifestEntry := &Entry{} 116 + if existingManifestEntry, found := blobsNeeded[entry.Hash]; found { 117 + // If the same blob is present twice, we only need to fetch it once (and both 118 + // instances will alias the same `Entry` structure in the manifest). 119 + manifestEntry = existingManifestEntry 120 + } else if entry.Mode.IsFile() { 121 + blobsNeeded[entry.Hash] = manifestEntry 122 if entry.Mode == filemode.Symlink { 123 manifestEntry.Type = Type_Symlink.Enum() 124 } else { 125 manifestEntry.Type = Type_InlineFile.Enum() 126 } 127 + manifestEntry.GitHash = proto.String(entry.Hash.String()) 128 } else if entry.Mode == filemode.Dir { 129 manifestEntry.Type = Type_Directory.Enum() 130 } else { 131 + AddProblem(manifest, name, "unsupported mode %#o", entry.Mode) 132 continue 133 } 134 + manifest.Contents[name] = manifestEntry 135 + } 136 + } 137 + 138 + // Collect checkout statistics. 139 + var dataBytesFromOldManifest int64 140 + var dataBytesFromGitCheckout int64 141 + var dataBytesFromGitTransport int64 142 + 143 + // First, see if we can extract the blobs from the old manifest. This is the preferred option 144 + // because it avoids both network transfers and recompression. Note that we do not request 145 + // blobs from the backend under any circumstances to avoid creating a blob existence oracle. 146 + for _, oldManifestEntry := range oldManifest.GetContents() { 147 + if hash, ok := plumbing.FromHex(oldManifestEntry.GetGitHash()); ok { 148 + if manifestEntry, found := blobsNeeded[hash]; found { 149 + CopyProtoMessage(manifestEntry, oldManifestEntry) 150 + dataBytesFromOldManifest += oldManifestEntry.GetOriginalSize() 151 + delete(blobsNeeded, hash) 152 + } 153 + } 154 + } 155 + 156 + // Second, fill the manifest entries with data from the git checkout we just made. 157 + // This will only succeed if a `blob:none` filter isn't supported and we got a full 158 + // clone despite asking for a partial clone. 159 + for hash, manifestEntry := range blobsNeeded { 160 + if err := readGitBlob(repo, hash, manifestEntry); err == nil { 161 + dataBytesFromGitCheckout += manifestEntry.GetOriginalSize() 162 + delete(blobsNeeded, hash) 163 } 164 } 165 + 166 + // Third, if we still don't have data for some manifest entries, re-establish a git transport 167 + // and request the missing blobs (only) from the server. 168 + if len(blobsNeeded) > 0 { 169 + client, err := transport.Get(parsedRepoURL.Scheme) 170 + if err != nil { 171 + return nil, fmt.Errorf("git transport: %w", err) 172 + } 173 + 174 + endpoint, err := transport.NewEndpoint(repoURL) 175 + if err != nil { 176 + return nil, fmt.Errorf("git endpoint: %w", err) 177 + } 178 + 179 + session, err := client.NewSession(storer, endpoint, nil) 180 + if err != nil { 181 + return nil, fmt.Errorf("git session: %w", err) 182 + } 183 + 184 + connection, err := session.Handshake(ctx, transport.UploadPackService) 185 + if err != nil { 186 + return nil, fmt.Errorf("git connection: %w", err) 187 + } 188 + defer connection.Close() 189 + 190 + if err := connection.Fetch(ctx, &transport.FetchRequest{ 191 + Wants: slices.Collect(maps.Keys(blobsNeeded)), 192 + Depth: 1, 193 + // Git CLI behaves like this, even if the wants above are references to blobs. 194 + Filter: "blob:none", 195 + }); err != nil && !errors.Is(err, transport.ErrNoChange) { 196 + return nil, fmt.Errorf("git blob fetch request: %w", err) 197 + } 198 + 199 + // All remaining blobs should now be available. 200 + for hash, manifestEntry := range blobsNeeded { 201 + if err := readGitBlob(repo, hash, manifestEntry); err != nil { 202 + return nil, err 203 + } 204 + dataBytesFromGitTransport += manifestEntry.GetOriginalSize() 205 + delete(blobsNeeded, hash) 206 + } 207 + } 208 + 209 + logc.Printf(ctx, 210 + "fetch: %s from old manifest, %s from git checkout, %s from git transport\n", 211 + datasize.ByteSize(dataBytesFromOldManifest).HR(), 212 + datasize.ByteSize(dataBytesFromGitCheckout).HR(), 213 + datasize.ByteSize(dataBytesFromGitTransport).HR(), 214 + ) 215 + 216 + return manifest, nil 217 + } 218 + 219 + func readGitBlob(repo *git.Repository, hash plumbing.Hash, entry *Entry) error { 220 + blob, err := repo.BlobObject(hash) 221 + if err != nil { 222 + return fmt.Errorf("git blob %s: %w", hash, err) 223 + } 224 + 225 + reader, err := blob.Reader() 226 + if err != nil { 227 + return fmt.Errorf("git blob open: %w", err) 228 + } 229 + defer reader.Close() 230 + 231 + data, err := io.ReadAll(reader) 232 + if err != nil { 233 + return fmt.Errorf("git blob read: %w", err) 234 + } 235 + 236 + switch entry.GetType() { 237 + case Type_InlineFile, Type_Symlink: 238 + // okay 239 + default: 240 + panic(fmt.Errorf("readGitBlob encountered invalid entry: %v, %v", 241 + entry.GetType(), entry.GetTransform())) 242 + } 243 + 244 + entry.Data = data 245 + entry.Transform = Transform_Identity.Enum() 246 + entry.OriginalSize = proto.Int64(blob.Size) 247 + entry.CompressedSize = proto.Int64(blob.Size) 248 + return nil 249 }
+30 -21
src/manifest.go
··· 150 contentType = http.DetectContentType(entry.Data[:min(512, len(entry.Data))]) 151 } 152 entry.ContentType = proto.String(contentType) 153 - } else { 154 panic(fmt.Errorf("DetectContentType encountered invalid entry: %v, %v", 155 entry.GetType(), entry.GetTransform())) 156 } 157 } 158 } 159 160 - // The `clauspost/compress/zstd` package recommends reusing a compressor to avoid repeated 161 // allocations of internal buffers. 162 var zstdEncoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 163 ··· 166 span, _ := ObserveFunction(ctx, "CompressFiles") 167 defer span.Finish() 168 169 - var originalSize, compressedSize int64 170 for _, entry := range manifest.Contents { 171 if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_Identity { 172 - mtype := getMediaType(entry.GetContentType()) 173 - if strings.HasPrefix(mtype, "video/") || strings.HasPrefix(mtype, "audio/") { 174 continue 175 } 176 - originalSize += entry.GetSize() 177 - compressedData := zstdEncoder.EncodeAll(entry.GetData(), make([]byte, 0, entry.GetSize())) 178 - if len(compressedData) < int(*entry.Size) { 179 entry.Data = compressedData 180 - entry.Size = proto.Int64(int64(len(entry.Data))) 181 entry.Transform = Transform_Zstd.Enum() 182 } 183 - compressedSize += entry.GetSize() 184 } 185 } 186 manifest.OriginalSize = proto.Int64(originalSize) 187 manifest.CompressedSize = proto.Int64(compressedSize) ··· 246 CompressedSize: manifest.CompressedSize, 247 StoredSize: proto.Int64(0), 248 } 249 - extObjectSizes := make(map[string]int64) 250 for name, entry := range manifest.Contents { 251 cannotBeInlined := entry.GetType() == Type_InlineFile && 252 - entry.GetSize() > int64(config.Limits.MaxInlineFileSize.Bytes()) 253 if cannotBeInlined { 254 dataHash := sha256.Sum256(entry.Data) 255 extManifest.Contents[name] = &Entry{ 256 - Type: Type_ExternalFile.Enum(), 257 - Size: entry.Size, 258 - Data: fmt.Appendf(nil, "sha256-%x", dataHash), 259 - Transform: entry.Transform, 260 - ContentType: entry.ContentType, 261 } 262 - extObjectSizes[string(dataHash[:])] = entry.GetSize() 263 } else { 264 extManifest.Contents[name] = entry 265 } 266 } 267 - // `extObjectMap` stores size once per object, deduplicating it 268 - for _, storedSize := range extObjectSizes { 269 - *extManifest.StoredSize += storedSize 270 } 271 272 // Upload the resulting manifest and the blob it references.
··· 150 contentType = http.DetectContentType(entry.Data[:min(512, len(entry.Data))]) 151 } 152 entry.ContentType = proto.String(contentType) 153 + } else if entry.GetContentType() == "" { 154 panic(fmt.Errorf("DetectContentType encountered invalid entry: %v, %v", 155 entry.GetType(), entry.GetTransform())) 156 } 157 } 158 } 159 160 + // The `klauspost/compress/zstd` package recommends reusing a compressor to avoid repeated 161 // allocations of internal buffers. 162 var zstdEncoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 163 ··· 166 span, _ := ObserveFunction(ctx, "CompressFiles") 167 defer span.Finish() 168 169 + var originalSize int64 170 + var compressedSize int64 171 for _, entry := range manifest.Contents { 172 if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_Identity { 173 + mediaType := getMediaType(entry.GetContentType()) 174 + if strings.HasPrefix(mediaType, "video/") || strings.HasPrefix(mediaType, "audio/") { 175 continue 176 } 177 + compressedData := zstdEncoder.EncodeAll(entry.GetData(), 178 + make([]byte, 0, entry.GetOriginalSize())) 179 + if int64(len(compressedData)) < entry.GetOriginalSize() { 180 entry.Data = compressedData 181 entry.Transform = Transform_Zstd.Enum() 182 + entry.CompressedSize = proto.Int64(int64(len(entry.Data))) 183 } 184 } 185 + originalSize += entry.GetOriginalSize() 186 + compressedSize += entry.GetCompressedSize() 187 } 188 manifest.OriginalSize = proto.Int64(originalSize) 189 manifest.CompressedSize = proto.Int64(compressedSize) ··· 248 CompressedSize: manifest.CompressedSize, 249 StoredSize: proto.Int64(0), 250 } 251 for name, entry := range manifest.Contents { 252 cannotBeInlined := entry.GetType() == Type_InlineFile && 253 + entry.GetCompressedSize() > int64(config.Limits.MaxInlineFileSize.Bytes()) 254 if cannotBeInlined { 255 dataHash := sha256.Sum256(entry.Data) 256 extManifest.Contents[name] = &Entry{ 257 + Type: Type_ExternalFile.Enum(), 258 + OriginalSize: entry.OriginalSize, 259 + CompressedSize: entry.CompressedSize, 260 + Data: fmt.Appendf(nil, "sha256-%x", dataHash), 261 + Transform: entry.Transform, 262 + ContentType: entry.ContentType, 263 + GitHash: entry.GitHash, 264 } 265 } else { 266 extManifest.Contents[name] = entry 267 } 268 } 269 + 270 + // Compute the deduplicated storage size. 271 + var blobSizes = make(map[string]int64) 272 + for _, entry := range manifest.Contents { 273 + if entry.GetType() == Type_ExternalFile { 274 + blobSizes[string(entry.Data)] = entry.GetCompressedSize() 275 + } 276 + } 277 + for _, blobSize := range blobSizes { 278 + *extManifest.StoredSize += blobSize 279 } 280 281 // Upload the resulting manifest and the blob it references.
+1 -1
src/pages.go
··· 328 case "zstd": 329 // Set Content-Length ourselves since `http.ServeContent` only sets 330 // it if Content-Encoding is unset or if it's a range request. 331 - w.Header().Set("Content-Length", strconv.FormatInt(*entry.Size, 10)) 332 w.Header().Set("Content-Encoding", "zstd") 333 serveEncodingCount. 334 With(prometheus.Labels{"transform": "zstd", "negotiated": "zstd"}).
··· 328 case "zstd": 329 // Set Content-Length ourselves since `http.ServeContent` only sets 330 // it if Content-Encoding is unset or if it's a range request. 331 + w.Header().Set("Content-Length", strconv.FormatInt(entry.GetCompressedSize(), 10)) 332 w.Header().Set("Content-Encoding", "zstd") 333 serveEncodingCount. 334 With(prometheus.Labels{"transform": "zstd", "negotiated": "zstd"}).
+37 -10
src/schema.pb.go
··· 134 state protoimpl.MessageState `protogen:"open.v1"` 135 Type *Type `protobuf:"varint,1,opt,name=type,enum=Type" json:"type,omitempty"` 136 // Only present for `type == InlineFile` and `type == ExternalFile`. 137 - // For transformed entries, refers to the post-transformation (compressed) size. 138 - Size *int64 `protobuf:"varint,2,opt,name=size" json:"size,omitempty"` 139 // Meaning depends on `type`: 140 // - If `type == InlineFile`, contains file data. 141 // - If `type == ExternalFile`, contains blob name (an otherwise unspecified ··· 148 Transform *Transform `protobuf:"varint,4,opt,name=transform,enum=Transform" json:"transform,omitempty"` 149 // Only present for `type == InlineFile` and `type == ExternalFile`. 150 // Currently, optional (not present on certain legacy manifests). 151 - ContentType *string `protobuf:"bytes,5,opt,name=content_type,json=contentType" json:"content_type,omitempty"` 152 unknownFields protoimpl.UnknownFields 153 sizeCache protoimpl.SizeCache 154 } ··· 190 return Type_Invalid 191 } 192 193 - func (x *Entry) GetSize() int64 { 194 - if x != nil && x.Size != nil { 195 - return *x.Size 196 } 197 return 0 198 } ··· 214 func (x *Entry) GetContentType() string { 215 if x != nil && x.ContentType != nil { 216 return *x.ContentType 217 } 218 return "" 219 } ··· 569 570 const file_schema_proto_rawDesc = "" + 571 "\n" + 572 - "\fschema.proto\"\x97\x01\n" + 573 "\x05Entry\x12\x19\n" + 574 - "\x04type\x18\x01 \x01(\x0e2\x05.TypeR\x04type\x12\x12\n" + 575 - "\x04size\x18\x02 \x01(\x03R\x04size\x12\x12\n" + 576 "\x04data\x18\x03 \x01(\fR\x04data\x12(\n" + 577 "\ttransform\x18\x04 \x01(\x0e2\n" + 578 ".TransformR\ttransform\x12!\n" + 579 - "\fcontent_type\x18\x05 \x01(\tR\vcontentType\"`\n" + 580 "\fRedirectRule\x12\x12\n" + 581 "\x04from\x18\x01 \x01(\tR\x04from\x12\x0e\n" + 582 "\x02to\x18\x02 \x01(\tR\x02to\x12\x16\n" +
··· 134 state protoimpl.MessageState `protogen:"open.v1"` 135 Type *Type `protobuf:"varint,1,opt,name=type,enum=Type" json:"type,omitempty"` 136 // Only present for `type == InlineFile` and `type == ExternalFile`. 137 + // For transformed entries, refers to the pre-transformation (decompressed) size; otherwise 138 + // equal to `compressed_size`. 139 + OriginalSize *int64 `protobuf:"varint,7,opt,name=original_size,json=originalSize" json:"original_size,omitempty"` 140 + // Only present for `type == InlineFile` and `type == ExternalFile`. 141 + // For transformed entries, refers to the post-transformation (compressed) size; otherwise 142 + // equal to `original_size`. 143 + CompressedSize *int64 `protobuf:"varint,2,opt,name=compressed_size,json=compressedSize" json:"compressed_size,omitempty"` 144 // Meaning depends on `type`: 145 // - If `type == InlineFile`, contains file data. 146 // - If `type == ExternalFile`, contains blob name (an otherwise unspecified ··· 153 Transform *Transform `protobuf:"varint,4,opt,name=transform,enum=Transform" json:"transform,omitempty"` 154 // Only present for `type == InlineFile` and `type == ExternalFile`. 155 // Currently, optional (not present on certain legacy manifests). 156 + ContentType *string `protobuf:"bytes,5,opt,name=content_type,json=contentType" json:"content_type,omitempty"` 157 + // May be present for `type == InlineFile` and `type == ExternalFile`. 158 + // Used to reduce the amount of work being done during git checkouts. 159 + // The type of hash used is determined by the length: 160 + // - 40 bytes: SHA1DC (as hex) 161 + // - 64 bytes: SHA256 (as hex) 162 + GitHash *string `protobuf:"bytes,6,opt,name=git_hash,json=gitHash" json:"git_hash,omitempty"` 163 unknownFields protoimpl.UnknownFields 164 sizeCache protoimpl.SizeCache 165 } ··· 201 return Type_Invalid 202 } 203 204 + func (x *Entry) GetOriginalSize() int64 { 205 + if x != nil && x.OriginalSize != nil { 206 + return *x.OriginalSize 207 + } 208 + return 0 209 + } 210 + 211 + func (x *Entry) GetCompressedSize() int64 { 212 + if x != nil && x.CompressedSize != nil { 213 + return *x.CompressedSize 214 } 215 return 0 216 } ··· 232 func (x *Entry) GetContentType() string { 233 if x != nil && x.ContentType != nil { 234 return *x.ContentType 235 + } 236 + return "" 237 + } 238 + 239 + func (x *Entry) GetGitHash() string { 240 + if x != nil && x.GitHash != nil { 241 + return *x.GitHash 242 } 243 return "" 244 } ··· 594 595 const file_schema_proto_rawDesc = "" + 596 "\n" + 597 + "\fschema.proto\"\xec\x01\n" + 598 "\x05Entry\x12\x19\n" + 599 + "\x04type\x18\x01 \x01(\x0e2\x05.TypeR\x04type\x12#\n" + 600 + "\roriginal_size\x18\a \x01(\x03R\foriginalSize\x12'\n" + 601 + "\x0fcompressed_size\x18\x02 \x01(\x03R\x0ecompressedSize\x12\x12\n" + 602 "\x04data\x18\x03 \x01(\fR\x04data\x12(\n" + 603 "\ttransform\x18\x04 \x01(\x0e2\n" + 604 ".TransformR\ttransform\x12!\n" + 605 + "\fcontent_type\x18\x05 \x01(\tR\vcontentType\x12\x19\n" + 606 + "\bgit_hash\x18\x06 \x01(\tR\agitHash\"`\n" + 607 "\fRedirectRule\x12\x12\n" + 608 "\x04from\x18\x01 \x01(\tR\x04from\x12\x0e\n" + 609 "\x02to\x18\x02 \x01(\tR\x02to\x12\x16\n" +
+16 -5
src/schema.proto
··· 26 message Entry { 27 Type type = 1; 28 // Only present for `type == InlineFile` and `type == ExternalFile`. 29 - // For transformed entries, refers to the post-transformation (compressed) size. 30 - int64 size = 2; 31 // Meaning depends on `type`: 32 // * If `type == InlineFile`, contains file data. 33 // * If `type == ExternalFile`, contains blob name (an otherwise unspecified ··· 41 // Only present for `type == InlineFile` and `type == ExternalFile`. 42 // Currently, optional (not present on certain legacy manifests). 43 string content_type = 5; 44 } 45 46 // See https://docs.netlify.com/manage/routing/redirects/overview/ for details. ··· 76 77 // Contents 78 map<string, Entry> contents = 4; 79 - int64 original_size = 10; // total size of entries before compression 80 - int64 compressed_size = 5; // simple sum of each `entry.size` 81 - int64 stored_size = 8; // total size of (deduplicated) external objects 82 83 // Netlify-style `_redirects` and `_headers` 84 repeated RedirectRule redirects = 6;
··· 26 message Entry { 27 Type type = 1; 28 // Only present for `type == InlineFile` and `type == ExternalFile`. 29 + // For transformed entries, refers to the pre-transformation (decompressed) size; otherwise 30 + // equal to `compressed_size`. 31 + int64 original_size = 7; 32 + // Only present for `type == InlineFile` and `type == ExternalFile`. 33 + // For transformed entries, refers to the post-transformation (compressed) size; otherwise 34 + // equal to `original_size`. 35 + int64 compressed_size = 2; 36 // Meaning depends on `type`: 37 // * If `type == InlineFile`, contains file data. 38 // * If `type == ExternalFile`, contains blob name (an otherwise unspecified ··· 46 // Only present for `type == InlineFile` and `type == ExternalFile`. 47 // Currently, optional (not present on certain legacy manifests). 48 string content_type = 5; 49 + // May be present for `type == InlineFile` and `type == ExternalFile`. 50 + // Used to reduce the amount of work being done during git checkouts. 51 + // The type of hash used is determined by the length: 52 + // * 40 bytes: SHA1DC (as hex) 53 + // * 64 bytes: SHA256 (as hex) 54 + string git_hash = 6; 55 } 56 57 // See https://docs.netlify.com/manage/routing/redirects/overview/ for details. ··· 87 88 // Contents 89 map<string, Entry> contents = 4; 90 + int64 original_size = 10; // sum of each `entry.original_size` 91 + int64 compressed_size = 5; // sum of each `entry.compressed_size` 92 + int64 stored_size = 8; // sum of deduplicated `entry.compressed_size` for external files only 93 94 // Netlify-style `_redirects` and `_headers` 95 repeated RedirectRule redirects = 6;
+4 -1
src/update.go
··· 92 93 logc.Printf(ctx, "update %s: %s %s\n", webRoot, repoURL, branch) 94 95 - manifest, err := FetchRepository(ctx, repoURL, branch) 96 if errors.Is(err, context.DeadlineExceeded) { 97 result = UpdateResult{UpdateTimeout, nil, fmt.Errorf("update timeout")} 98 } else if err != nil {
··· 92 93 logc.Printf(ctx, "update %s: %s %s\n", webRoot, repoURL, branch) 94 95 + oldManifest, _, _ := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) 96 + // Ignore errors; worst case we have to re-fetch all of the blobs. 97 + 98 + manifest, err := FetchRepository(ctx, repoURL, branch, oldManifest) 99 if errors.Is(err, context.DeadlineExceeded) { 100 result = UpdateResult{UpdateTimeout, nil, fmt.Errorf("update timeout")} 101 } else if err != nil {
+18
src/util.go
··· 4 "errors" 5 "io" 6 "strings" 7 ) 8 9 type BoundedReader struct { ··· 85 mediaType = strings.TrimSpace(strings.ToLower(mediaType)) 86 return 87 }
··· 4 "errors" 5 "io" 6 "strings" 7 + 8 + "google.golang.org/protobuf/proto" 9 ) 10 11 type BoundedReader struct { ··· 87 mediaType = strings.TrimSpace(strings.ToLower(mediaType)) 88 return 89 } 90 + 91 + // Copying Protobuf messages like `*dest = *src` causes a lock to be copied, which is unsound. 92 + // Copying Protobuf messages field-wise is fragile: adding a new field to the schema does not 93 + // cause a diagnostic to be emitted pointing to the copy site, making it easy to miss updates. 94 + // Serializing and deserializing is reliable and breaks referential links. 95 + func CopyProtoMessage(dest, src proto.Message) { 96 + data, err := proto.Marshal(src) 97 + if err != nil { 98 + panic(err) 99 + } 100 + 101 + err = proto.Unmarshal(data, dest) 102 + if err != nil { 103 + panic(err) 104 + } 105 + }