fork of whitequark.org/git-pages with mods for tangled

Use git filters for incremental updates from a git repository.

This commit changes the git fetch algorithm to only retrieve blobs
that aren't included in the previously deployed site manifest, if
git filters are supported by the remote.

It also changes how manifest entry sizes are represented, such that
both decompressed and compressed sizes are stored. This enables
computing accurate (and repeatable) sizes even after incremental
updates.

Co-authored-by: David Leadbeater <dgl@dgl.cx>

+9 -3
src/extract.go
··· 59 59 } 60 60 61 61 manifestEntry.Type = Type_InlineFile.Enum() 62 - manifestEntry.Size = proto.Int64(header.Size) 63 62 manifestEntry.Data = fileData 63 + manifestEntry.Transform = Transform_Identity.Enum() 64 + manifestEntry.OriginalSize = proto.Int64(header.Size) 65 + manifestEntry.CompressedSize = proto.Int64(header.Size) 64 66 65 67 case tar.TypeSymlink: 66 68 manifestEntry.Type = Type_Symlink.Enum() 67 - manifestEntry.Size = proto.Int64(header.Size) 68 69 manifestEntry.Data = []byte(header.Linkname) 70 + manifestEntry.Transform = Transform_Identity.Enum() 71 + manifestEntry.OriginalSize = proto.Int64(header.Size) 72 + manifestEntry.CompressedSize = proto.Int64(header.Size) 69 73 70 74 case tar.TypeDir: 71 75 manifestEntry.Type = Type_Directory.Enum() ··· 150 154 } else { 151 155 manifestEntry.Type = Type_InlineFile.Enum() 152 156 } 153 - manifestEntry.Size = proto.Int64(int64(file.UncompressedSize64)) 154 157 manifestEntry.Data = fileData 158 + manifestEntry.Transform = Transform_Identity.Enum() 159 + manifestEntry.OriginalSize = proto.Int64(int64(file.UncompressedSize64)) 160 + manifestEntry.CompressedSize = proto.Int64(int64(file.UncompressedSize64)) 155 161 } else { 156 162 manifestEntry.Type = Type_Directory.Enum() 157 163 }
+175 -42
src/fetch.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "errors" 5 6 "fmt" 6 7 "io" 8 + "maps" 9 + "net/url" 7 10 "os" 11 + "slices" 8 12 13 + "github.com/c2h5oh/datasize" 9 14 "github.com/go-git/go-billy/v6/osfs" 10 15 "github.com/go-git/go-git/v6" 11 16 "github.com/go-git/go-git/v6/plumbing" 12 17 "github.com/go-git/go-git/v6/plumbing/cache" 13 18 "github.com/go-git/go-git/v6/plumbing/filemode" 14 19 "github.com/go-git/go-git/v6/plumbing/object" 20 + "github.com/go-git/go-git/v6/plumbing/protocol/packp" 21 + "github.com/go-git/go-git/v6/plumbing/transport" 15 22 "github.com/go-git/go-git/v6/storage/filesystem" 16 23 "google.golang.org/protobuf/proto" 17 24 ) 18 25 19 - func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manifest, error) { 26 + func FetchRepository( 27 + ctx context.Context, repoURL string, branch string, oldManifest *Manifest, 28 + ) ( 29 + *Manifest, error, 30 + ) { 20 31 span, ctx := ObserveFunction(ctx, "FetchRepository", 21 32 "git.repository", repoURL, "git.branch", branch) 22 33 defer span.Finish() 23 34 24 - baseDir, err := os.MkdirTemp("", "fetchRepo") 35 + parsedRepoURL, err := url.Parse(repoURL) 25 36 if err != nil { 26 - return nil, fmt.Errorf("mkdtemp: %w", err) 37 + return nil, fmt.Errorf("URL parse: %w", err) 27 38 } 28 - defer os.RemoveAll(baseDir) 39 + 40 + var repo *git.Repository 41 + var storer *filesystem.Storage 42 + for _, filter := range []packp.Filter{packp.FilterBlobNone(), packp.Filter("")} { 43 + var tempDir string 44 + tempDir, err = os.MkdirTemp("", "fetchRepo") 45 + if err != nil { 46 + return nil, fmt.Errorf("mkdtemp: %w", err) 47 + } 48 + defer os.RemoveAll(tempDir) 29 49 30 - fs := osfs.New(baseDir, osfs.WithBoundOS()) 31 - cache := cache.NewObjectLRUDefault() 32 - storer := filesystem.NewStorageWithOptions(fs, cache, filesystem.Options{ 33 - ExclusiveAccess: true, 34 - LargeObjectThreshold: int64(config.Limits.GitLargeObjectThreshold.Bytes()), 35 - }) 36 - repo, err := git.CloneContext(ctx, storer, nil, &git.CloneOptions{ 37 - Bare: true, 38 - URL: repoURL, 39 - ReferenceName: plumbing.ReferenceName(branch), 40 - SingleBranch: true, 41 - Depth: 1, 42 - Tags: git.NoTags, 43 - }) 50 + storer = filesystem.NewStorageWithOptions( 51 + osfs.New(tempDir, osfs.WithBoundOS()), 52 + cache.NewObjectLRUDefault(), 53 + filesystem.Options{ 54 + ExclusiveAccess: true, 55 + LargeObjectThreshold: int64(config.Limits.GitLargeObjectThreshold.Bytes()), 56 + }, 57 + ) 58 + repo, err = git.CloneContext(ctx, storer, nil, &git.CloneOptions{ 59 + Bare: true, 60 + URL: repoURL, 61 + ReferenceName: plumbing.ReferenceName(branch), 62 + SingleBranch: true, 63 + Depth: 1, 64 + Tags: git.NoTags, 65 + Filter: filter, 66 + }) 67 + if err != nil { 68 + logc.Printf(ctx, "clone err: %s %s filter=%q\n", repoURL, branch, filter) 69 + continue 70 + } else { 71 + logc.Printf(ctx, "clone ok: %s %s filter=%q\n", repoURL, branch, filter) 72 + break 73 + } 74 + } 44 75 if err != nil { 45 76 return nil, fmt.Errorf("git clone: %w", err) 46 77 } ··· 63 94 walker := object.NewTreeWalker(tree, true, make(map[plumbing.Hash]bool)) 64 95 defer walker.Close() 65 96 66 - manifest := Manifest{ 97 + // Create a manifest for the tree object corresponding to `branch`, but do not populate it 98 + // with data yet; instead, record all the blobs we'll need. 99 + manifest := &Manifest{ 67 100 RepoUrl: proto.String(repoURL), 68 101 Branch: proto.String(branch), 69 102 Commit: proto.String(ref.Hash().String()), ··· 71 104 "": {Type: Type_Directory.Enum()}, 72 105 }, 73 106 } 107 + blobsNeeded := map[plumbing.Hash]*Entry{} 74 108 for { 75 109 name, entry, err := walker.Next() 76 110 if err == io.EOF { ··· 78 112 } else if err != nil { 79 113 return nil, fmt.Errorf("git walker: %w", err) 80 114 } else { 81 - manifestEntry := Entry{} 82 - if entry.Mode.IsFile() { 83 - blob, err := repo.BlobObject(entry.Hash) 84 - if err != nil { 85 - return nil, fmt.Errorf("git blob %s: %w", name, err) 86 - } 87 - 88 - reader, err := blob.Reader() 89 - if err != nil { 90 - return nil, fmt.Errorf("git blob open: %w", err) 91 - } 92 - defer reader.Close() 93 - 94 - data, err := io.ReadAll(reader) 95 - if err != nil { 96 - return nil, fmt.Errorf("git blob read: %w", err) 97 - } 98 - 115 + manifestEntry := &Entry{} 116 + if existingManifestEntry, found := blobsNeeded[entry.Hash]; found { 117 + // If the same blob is present twice, we only need to fetch it once (and both 118 + // instances will alias the same `Entry` structure in the manifest). 119 + manifestEntry = existingManifestEntry 120 + } else if entry.Mode.IsFile() { 121 + blobsNeeded[entry.Hash] = manifestEntry 99 122 if entry.Mode == filemode.Symlink { 100 123 manifestEntry.Type = Type_Symlink.Enum() 101 124 } else { 102 125 manifestEntry.Type = Type_InlineFile.Enum() 103 126 } 104 - manifestEntry.Size = proto.Int64(blob.Size) 105 - manifestEntry.Data = data 127 + manifestEntry.GitHash = proto.String(entry.Hash.String()) 106 128 } else if entry.Mode == filemode.Dir { 107 129 manifestEntry.Type = Type_Directory.Enum() 108 130 } else { 109 - AddProblem(&manifest, name, "unsupported mode %#o", entry.Mode) 131 + AddProblem(manifest, name, "unsupported mode %#o", entry.Mode) 110 132 continue 111 133 } 112 - manifest.Contents[name] = &manifestEntry 134 + manifest.Contents[name] = manifestEntry 135 + } 136 + } 137 + 138 + // Collect checkout statistics. 139 + var dataBytesFromOldManifest int64 140 + var dataBytesFromGitCheckout int64 141 + var dataBytesFromGitTransport int64 142 + 143 + // First, see if we can extract the blobs from the old manifest. This is the preferred option 144 + // because it avoids both network transfers and recompression. Note that we do not request 145 + // blobs from the backend under any circumstances to avoid creating a blob existence oracle. 146 + for _, oldManifestEntry := range oldManifest.GetContents() { 147 + if hash, ok := plumbing.FromHex(oldManifestEntry.GetGitHash()); ok { 148 + if manifestEntry, found := blobsNeeded[hash]; found { 149 + CopyProtoMessage(manifestEntry, oldManifestEntry) 150 + dataBytesFromOldManifest += oldManifestEntry.GetOriginalSize() 151 + delete(blobsNeeded, hash) 152 + } 153 + } 154 + } 155 + 156 + // Second, fill the manifest entries with data from the git checkout we just made. 157 + // This will only succeed if a `blob:none` filter isn't supported and we got a full 158 + // clone despite asking for a partial clone. 159 + for hash, manifestEntry := range blobsNeeded { 160 + if err := readGitBlob(repo, hash, manifestEntry); err == nil { 161 + dataBytesFromGitCheckout += manifestEntry.GetOriginalSize() 162 + delete(blobsNeeded, hash) 113 163 } 114 164 } 115 - return &manifest, nil 165 + 166 + // Third, if we still don't have data for some manifest entries, re-establish a git transport 167 + // and request the missing blobs (only) from the server. 168 + if len(blobsNeeded) > 0 { 169 + client, err := transport.Get(parsedRepoURL.Scheme) 170 + if err != nil { 171 + return nil, fmt.Errorf("git transport: %w", err) 172 + } 173 + 174 + endpoint, err := transport.NewEndpoint(repoURL) 175 + if err != nil { 176 + return nil, fmt.Errorf("git endpoint: %w", err) 177 + } 178 + 179 + session, err := client.NewSession(storer, endpoint, nil) 180 + if err != nil { 181 + return nil, fmt.Errorf("git session: %w", err) 182 + } 183 + 184 + connection, err := session.Handshake(ctx, transport.UploadPackService) 185 + if err != nil { 186 + return nil, fmt.Errorf("git connection: %w", err) 187 + } 188 + defer connection.Close() 189 + 190 + if err := connection.Fetch(ctx, &transport.FetchRequest{ 191 + Wants: slices.Collect(maps.Keys(blobsNeeded)), 192 + Depth: 1, 193 + // Git CLI behaves like this, even if the wants above are references to blobs. 194 + Filter: "blob:none", 195 + }); err != nil && !errors.Is(err, transport.ErrNoChange) { 196 + return nil, fmt.Errorf("git blob fetch request: %w", err) 197 + } 198 + 199 + // All remaining blobs should now be available. 200 + for hash, manifestEntry := range blobsNeeded { 201 + if err := readGitBlob(repo, hash, manifestEntry); err != nil { 202 + return nil, err 203 + } 204 + dataBytesFromGitTransport += manifestEntry.GetOriginalSize() 205 + delete(blobsNeeded, hash) 206 + } 207 + } 208 + 209 + logc.Printf(ctx, 210 + "fetch: %s from old manifest, %s from git checkout, %s from git transport\n", 211 + datasize.ByteSize(dataBytesFromOldManifest).HR(), 212 + datasize.ByteSize(dataBytesFromGitCheckout).HR(), 213 + datasize.ByteSize(dataBytesFromGitTransport).HR(), 214 + ) 215 + 216 + return manifest, nil 217 + } 218 + 219 + func readGitBlob(repo *git.Repository, hash plumbing.Hash, entry *Entry) error { 220 + blob, err := repo.BlobObject(hash) 221 + if err != nil { 222 + return fmt.Errorf("git blob %s: %w", hash, err) 223 + } 224 + 225 + reader, err := blob.Reader() 226 + if err != nil { 227 + return fmt.Errorf("git blob open: %w", err) 228 + } 229 + defer reader.Close() 230 + 231 + data, err := io.ReadAll(reader) 232 + if err != nil { 233 + return fmt.Errorf("git blob read: %w", err) 234 + } 235 + 236 + switch entry.GetType() { 237 + case Type_InlineFile, Type_Symlink: 238 + // okay 239 + default: 240 + panic(fmt.Errorf("readGitBlob encountered invalid entry: %v, %v", 241 + entry.GetType(), entry.GetTransform())) 242 + } 243 + 244 + entry.Data = data 245 + entry.Transform = Transform_Identity.Enum() 246 + entry.OriginalSize = proto.Int64(blob.Size) 247 + entry.CompressedSize = proto.Int64(blob.Size) 248 + return nil 116 249 }
+30 -21
src/manifest.go
··· 150 150 contentType = http.DetectContentType(entry.Data[:min(512, len(entry.Data))]) 151 151 } 152 152 entry.ContentType = proto.String(contentType) 153 - } else { 153 + } else if entry.GetContentType() == "" { 154 154 panic(fmt.Errorf("DetectContentType encountered invalid entry: %v, %v", 155 155 entry.GetType(), entry.GetTransform())) 156 156 } 157 157 } 158 158 } 159 159 160 - // The `clauspost/compress/zstd` package recommends reusing a compressor to avoid repeated 160 + // The `klauspost/compress/zstd` package recommends reusing a compressor to avoid repeated 161 161 // allocations of internal buffers. 162 162 var zstdEncoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) 163 163 ··· 166 166 span, _ := ObserveFunction(ctx, "CompressFiles") 167 167 defer span.Finish() 168 168 169 - var originalSize, compressedSize int64 169 + var originalSize int64 170 + var compressedSize int64 170 171 for _, entry := range manifest.Contents { 171 172 if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_Identity { 172 - mtype := getMediaType(entry.GetContentType()) 173 - if strings.HasPrefix(mtype, "video/") || strings.HasPrefix(mtype, "audio/") { 173 + mediaType := getMediaType(entry.GetContentType()) 174 + if strings.HasPrefix(mediaType, "video/") || strings.HasPrefix(mediaType, "audio/") { 174 175 continue 175 176 } 176 - originalSize += entry.GetSize() 177 - compressedData := zstdEncoder.EncodeAll(entry.GetData(), make([]byte, 0, entry.GetSize())) 178 - if len(compressedData) < int(*entry.Size) { 177 + compressedData := zstdEncoder.EncodeAll(entry.GetData(), 178 + make([]byte, 0, entry.GetOriginalSize())) 179 + if int64(len(compressedData)) < entry.GetOriginalSize() { 179 180 entry.Data = compressedData 180 - entry.Size = proto.Int64(int64(len(entry.Data))) 181 181 entry.Transform = Transform_Zstd.Enum() 182 + entry.CompressedSize = proto.Int64(int64(len(entry.Data))) 182 183 } 183 - compressedSize += entry.GetSize() 184 184 } 185 + originalSize += entry.GetOriginalSize() 186 + compressedSize += entry.GetCompressedSize() 185 187 } 186 188 manifest.OriginalSize = proto.Int64(originalSize) 187 189 manifest.CompressedSize = proto.Int64(compressedSize) ··· 246 248 CompressedSize: manifest.CompressedSize, 247 249 StoredSize: proto.Int64(0), 248 250 } 249 - extObjectSizes := make(map[string]int64) 250 251 for name, entry := range manifest.Contents { 251 252 cannotBeInlined := entry.GetType() == Type_InlineFile && 252 - entry.GetSize() > int64(config.Limits.MaxInlineFileSize.Bytes()) 253 + entry.GetCompressedSize() > int64(config.Limits.MaxInlineFileSize.Bytes()) 253 254 if cannotBeInlined { 254 255 dataHash := sha256.Sum256(entry.Data) 255 256 extManifest.Contents[name] = &Entry{ 256 - Type: Type_ExternalFile.Enum(), 257 - Size: entry.Size, 258 - Data: fmt.Appendf(nil, "sha256-%x", dataHash), 259 - Transform: entry.Transform, 260 - ContentType: entry.ContentType, 257 + Type: Type_ExternalFile.Enum(), 258 + OriginalSize: entry.OriginalSize, 259 + CompressedSize: entry.CompressedSize, 260 + Data: fmt.Appendf(nil, "sha256-%x", dataHash), 261 + Transform: entry.Transform, 262 + ContentType: entry.ContentType, 263 + GitHash: entry.GitHash, 261 264 } 262 - extObjectSizes[string(dataHash[:])] = entry.GetSize() 263 265 } else { 264 266 extManifest.Contents[name] = entry 265 267 } 266 268 } 267 - // `extObjectMap` stores size once per object, deduplicating it 268 - for _, storedSize := range extObjectSizes { 269 - *extManifest.StoredSize += storedSize 269 + 270 + // Compute the deduplicated storage size. 271 + var blobSizes = make(map[string]int64) 272 + for _, entry := range manifest.Contents { 273 + if entry.GetType() == Type_ExternalFile { 274 + blobSizes[string(entry.Data)] = entry.GetCompressedSize() 275 + } 276 + } 277 + for _, blobSize := range blobSizes { 278 + *extManifest.StoredSize += blobSize 270 279 } 271 280 272 281 // Upload the resulting manifest and the blob it references.
+1 -1
src/pages.go
··· 328 328 case "zstd": 329 329 // Set Content-Length ourselves since `http.ServeContent` only sets 330 330 // it if Content-Encoding is unset or if it's a range request. 331 - w.Header().Set("Content-Length", strconv.FormatInt(*entry.Size, 10)) 331 + w.Header().Set("Content-Length", strconv.FormatInt(entry.GetCompressedSize(), 10)) 332 332 w.Header().Set("Content-Encoding", "zstd") 333 333 serveEncodingCount. 334 334 With(prometheus.Labels{"transform": "zstd", "negotiated": "zstd"}).
+37 -10
src/schema.pb.go
··· 134 134 state protoimpl.MessageState `protogen:"open.v1"` 135 135 Type *Type `protobuf:"varint,1,opt,name=type,enum=Type" json:"type,omitempty"` 136 136 // Only present for `type == InlineFile` and `type == ExternalFile`. 137 - // For transformed entries, refers to the post-transformation (compressed) size. 138 - Size *int64 `protobuf:"varint,2,opt,name=size" json:"size,omitempty"` 137 + // For transformed entries, refers to the pre-transformation (decompressed) size; otherwise 138 + // equal to `compressed_size`. 139 + OriginalSize *int64 `protobuf:"varint,7,opt,name=original_size,json=originalSize" json:"original_size,omitempty"` 140 + // Only present for `type == InlineFile` and `type == ExternalFile`. 141 + // For transformed entries, refers to the post-transformation (compressed) size; otherwise 142 + // equal to `original_size`. 143 + CompressedSize *int64 `protobuf:"varint,2,opt,name=compressed_size,json=compressedSize" json:"compressed_size,omitempty"` 139 144 // Meaning depends on `type`: 140 145 // - If `type == InlineFile`, contains file data. 141 146 // - If `type == ExternalFile`, contains blob name (an otherwise unspecified ··· 148 153 Transform *Transform `protobuf:"varint,4,opt,name=transform,enum=Transform" json:"transform,omitempty"` 149 154 // Only present for `type == InlineFile` and `type == ExternalFile`. 150 155 // Currently, optional (not present on certain legacy manifests). 151 - ContentType *string `protobuf:"bytes,5,opt,name=content_type,json=contentType" json:"content_type,omitempty"` 156 + ContentType *string `protobuf:"bytes,5,opt,name=content_type,json=contentType" json:"content_type,omitempty"` 157 + // May be present for `type == InlineFile` and `type == ExternalFile`. 158 + // Used to reduce the amount of work being done during git checkouts. 159 + // The type of hash used is determined by the length: 160 + // - 40 bytes: SHA1DC (as hex) 161 + // - 64 bytes: SHA256 (as hex) 162 + GitHash *string `protobuf:"bytes,6,opt,name=git_hash,json=gitHash" json:"git_hash,omitempty"` 152 163 unknownFields protoimpl.UnknownFields 153 164 sizeCache protoimpl.SizeCache 154 165 } ··· 190 201 return Type_Invalid 191 202 } 192 203 193 - func (x *Entry) GetSize() int64 { 194 - if x != nil && x.Size != nil { 195 - return *x.Size 204 + func (x *Entry) GetOriginalSize() int64 { 205 + if x != nil && x.OriginalSize != nil { 206 + return *x.OriginalSize 207 + } 208 + return 0 209 + } 210 + 211 + func (x *Entry) GetCompressedSize() int64 { 212 + if x != nil && x.CompressedSize != nil { 213 + return *x.CompressedSize 196 214 } 197 215 return 0 198 216 } ··· 214 232 func (x *Entry) GetContentType() string { 215 233 if x != nil && x.ContentType != nil { 216 234 return *x.ContentType 235 + } 236 + return "" 237 + } 238 + 239 + func (x *Entry) GetGitHash() string { 240 + if x != nil && x.GitHash != nil { 241 + return *x.GitHash 217 242 } 218 243 return "" 219 244 } ··· 569 594 570 595 const file_schema_proto_rawDesc = "" + 571 596 "\n" + 572 - "\fschema.proto\"\x97\x01\n" + 597 + "\fschema.proto\"\xec\x01\n" + 573 598 "\x05Entry\x12\x19\n" + 574 - "\x04type\x18\x01 \x01(\x0e2\x05.TypeR\x04type\x12\x12\n" + 575 - "\x04size\x18\x02 \x01(\x03R\x04size\x12\x12\n" + 599 + "\x04type\x18\x01 \x01(\x0e2\x05.TypeR\x04type\x12#\n" + 600 + "\roriginal_size\x18\a \x01(\x03R\foriginalSize\x12'\n" + 601 + "\x0fcompressed_size\x18\x02 \x01(\x03R\x0ecompressedSize\x12\x12\n" + 576 602 "\x04data\x18\x03 \x01(\fR\x04data\x12(\n" + 577 603 "\ttransform\x18\x04 \x01(\x0e2\n" + 578 604 ".TransformR\ttransform\x12!\n" + 579 - "\fcontent_type\x18\x05 \x01(\tR\vcontentType\"`\n" + 605 + "\fcontent_type\x18\x05 \x01(\tR\vcontentType\x12\x19\n" + 606 + "\bgit_hash\x18\x06 \x01(\tR\agitHash\"`\n" + 580 607 "\fRedirectRule\x12\x12\n" + 581 608 "\x04from\x18\x01 \x01(\tR\x04from\x12\x0e\n" + 582 609 "\x02to\x18\x02 \x01(\tR\x02to\x12\x16\n" +
+16 -5
src/schema.proto
··· 26 26 message Entry { 27 27 Type type = 1; 28 28 // Only present for `type == InlineFile` and `type == ExternalFile`. 29 - // For transformed entries, refers to the post-transformation (compressed) size. 30 - int64 size = 2; 29 + // For transformed entries, refers to the pre-transformation (decompressed) size; otherwise 30 + // equal to `compressed_size`. 31 + int64 original_size = 7; 32 + // Only present for `type == InlineFile` and `type == ExternalFile`. 33 + // For transformed entries, refers to the post-transformation (compressed) size; otherwise 34 + // equal to `original_size`. 35 + int64 compressed_size = 2; 31 36 // Meaning depends on `type`: 32 37 // * If `type == InlineFile`, contains file data. 33 38 // * If `type == ExternalFile`, contains blob name (an otherwise unspecified ··· 41 46 // Only present for `type == InlineFile` and `type == ExternalFile`. 42 47 // Currently, optional (not present on certain legacy manifests). 43 48 string content_type = 5; 49 + // May be present for `type == InlineFile` and `type == ExternalFile`. 50 + // Used to reduce the amount of work being done during git checkouts. 51 + // The type of hash used is determined by the length: 52 + // * 40 bytes: SHA1DC (as hex) 53 + // * 64 bytes: SHA256 (as hex) 54 + string git_hash = 6; 44 55 } 45 56 46 57 // See https://docs.netlify.com/manage/routing/redirects/overview/ for details. ··· 76 87 77 88 // Contents 78 89 map<string, Entry> contents = 4; 79 - int64 original_size = 10; // total size of entries before compression 80 - int64 compressed_size = 5; // simple sum of each `entry.size` 81 - int64 stored_size = 8; // total size of (deduplicated) external objects 90 + int64 original_size = 10; // sum of each `entry.original_size` 91 + int64 compressed_size = 5; // sum of each `entry.compressed_size` 92 + int64 stored_size = 8; // sum of deduplicated `entry.compressed_size` for external files only 82 93 83 94 // Netlify-style `_redirects` and `_headers` 84 95 repeated RedirectRule redirects = 6;
+4 -1
src/update.go
··· 92 92 93 93 logc.Printf(ctx, "update %s: %s %s\n", webRoot, repoURL, branch) 94 94 95 - manifest, err := FetchRepository(ctx, repoURL, branch) 95 + oldManifest, _, _ := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) 96 + // Ignore errors; worst case we have to re-fetch all of the blobs. 97 + 98 + manifest, err := FetchRepository(ctx, repoURL, branch, oldManifest) 96 99 if errors.Is(err, context.DeadlineExceeded) { 97 100 result = UpdateResult{UpdateTimeout, nil, fmt.Errorf("update timeout")} 98 101 } else if err != nil {
+18
src/util.go
··· 4 4 "errors" 5 5 "io" 6 6 "strings" 7 + 8 + "google.golang.org/protobuf/proto" 7 9 ) 8 10 9 11 type BoundedReader struct { ··· 85 87 mediaType = strings.TrimSpace(strings.ToLower(mediaType)) 86 88 return 87 89 } 90 + 91 + // Copying Protobuf messages like `*dest = *src` causes a lock to be copied, which is unsound. 92 + // Copying Protobuf messages field-wise is fragile: adding a new field to the schema does not 93 + // cause a diagnostic to be emitted pointing to the copy site, making it easy to miss updates. 94 + // Serializing and deserializing is reliable and breaks referential links. 95 + func CopyProtoMessage(dest, src proto.Message) { 96 + data, err := proto.Marshal(src) 97 + if err != nil { 98 + panic(err) 99 + } 100 + 101 + err = proto.Unmarshal(data, dest) 102 + if err != nil { 103 + panic(err) 104 + } 105 + }