+9
-3
src/extract.go
+9
-3
src/extract.go
···
59
59
}
60
60
61
61
manifestEntry.Type = Type_InlineFile.Enum()
62
-
manifestEntry.Size = proto.Int64(header.Size)
63
62
manifestEntry.Data = fileData
63
+
manifestEntry.Transform = Transform_Identity.Enum()
64
+
manifestEntry.OriginalSize = proto.Int64(header.Size)
65
+
manifestEntry.CompressedSize = proto.Int64(header.Size)
64
66
65
67
case tar.TypeSymlink:
66
68
manifestEntry.Type = Type_Symlink.Enum()
67
-
manifestEntry.Size = proto.Int64(header.Size)
68
69
manifestEntry.Data = []byte(header.Linkname)
70
+
manifestEntry.Transform = Transform_Identity.Enum()
71
+
manifestEntry.OriginalSize = proto.Int64(header.Size)
72
+
manifestEntry.CompressedSize = proto.Int64(header.Size)
69
73
70
74
case tar.TypeDir:
71
75
manifestEntry.Type = Type_Directory.Enum()
···
150
154
} else {
151
155
manifestEntry.Type = Type_InlineFile.Enum()
152
156
}
153
-
manifestEntry.Size = proto.Int64(int64(file.UncompressedSize64))
154
157
manifestEntry.Data = fileData
158
+
manifestEntry.Transform = Transform_Identity.Enum()
159
+
manifestEntry.OriginalSize = proto.Int64(int64(file.UncompressedSize64))
160
+
manifestEntry.CompressedSize = proto.Int64(int64(file.UncompressedSize64))
155
161
} else {
156
162
manifestEntry.Type = Type_Directory.Enum()
157
163
}
+175
-42
src/fetch.go
+175
-42
src/fetch.go
···
2
2
3
3
import (
4
4
"context"
5
+
"errors"
5
6
"fmt"
6
7
"io"
8
+
"maps"
9
+
"net/url"
7
10
"os"
11
+
"slices"
8
12
13
+
"github.com/c2h5oh/datasize"
9
14
"github.com/go-git/go-billy/v6/osfs"
10
15
"github.com/go-git/go-git/v6"
11
16
"github.com/go-git/go-git/v6/plumbing"
12
17
"github.com/go-git/go-git/v6/plumbing/cache"
13
18
"github.com/go-git/go-git/v6/plumbing/filemode"
14
19
"github.com/go-git/go-git/v6/plumbing/object"
20
+
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
21
+
"github.com/go-git/go-git/v6/plumbing/transport"
15
22
"github.com/go-git/go-git/v6/storage/filesystem"
16
23
"google.golang.org/protobuf/proto"
17
24
)
18
25
19
-
func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manifest, error) {
26
+
func FetchRepository(
27
+
ctx context.Context, repoURL string, branch string, oldManifest *Manifest,
28
+
) (
29
+
*Manifest, error,
30
+
) {
20
31
span, ctx := ObserveFunction(ctx, "FetchRepository",
21
32
"git.repository", repoURL, "git.branch", branch)
22
33
defer span.Finish()
23
34
24
-
baseDir, err := os.MkdirTemp("", "fetchRepo")
35
+
parsedRepoURL, err := url.Parse(repoURL)
25
36
if err != nil {
26
-
return nil, fmt.Errorf("mkdtemp: %w", err)
37
+
return nil, fmt.Errorf("URL parse: %w", err)
27
38
}
28
-
defer os.RemoveAll(baseDir)
39
+
40
+
var repo *git.Repository
41
+
var storer *filesystem.Storage
42
+
for _, filter := range []packp.Filter{packp.FilterBlobNone(), packp.Filter("")} {
43
+
var tempDir string
44
+
tempDir, err = os.MkdirTemp("", "fetchRepo")
45
+
if err != nil {
46
+
return nil, fmt.Errorf("mkdtemp: %w", err)
47
+
}
48
+
defer os.RemoveAll(tempDir)
29
49
30
-
fs := osfs.New(baseDir, osfs.WithBoundOS())
31
-
cache := cache.NewObjectLRUDefault()
32
-
storer := filesystem.NewStorageWithOptions(fs, cache, filesystem.Options{
33
-
ExclusiveAccess: true,
34
-
LargeObjectThreshold: int64(config.Limits.GitLargeObjectThreshold.Bytes()),
35
-
})
36
-
repo, err := git.CloneContext(ctx, storer, nil, &git.CloneOptions{
37
-
Bare: true,
38
-
URL: repoURL,
39
-
ReferenceName: plumbing.ReferenceName(branch),
40
-
SingleBranch: true,
41
-
Depth: 1,
42
-
Tags: git.NoTags,
43
-
})
50
+
storer = filesystem.NewStorageWithOptions(
51
+
osfs.New(tempDir, osfs.WithBoundOS()),
52
+
cache.NewObjectLRUDefault(),
53
+
filesystem.Options{
54
+
ExclusiveAccess: true,
55
+
LargeObjectThreshold: int64(config.Limits.GitLargeObjectThreshold.Bytes()),
56
+
},
57
+
)
58
+
repo, err = git.CloneContext(ctx, storer, nil, &git.CloneOptions{
59
+
Bare: true,
60
+
URL: repoURL,
61
+
ReferenceName: plumbing.ReferenceName(branch),
62
+
SingleBranch: true,
63
+
Depth: 1,
64
+
Tags: git.NoTags,
65
+
Filter: filter,
66
+
})
67
+
if err != nil {
68
+
logc.Printf(ctx, "clone err: %s %s filter=%q\n", repoURL, branch, filter)
69
+
continue
70
+
} else {
71
+
logc.Printf(ctx, "clone ok: %s %s filter=%q\n", repoURL, branch, filter)
72
+
break
73
+
}
74
+
}
44
75
if err != nil {
45
76
return nil, fmt.Errorf("git clone: %w", err)
46
77
}
···
63
94
walker := object.NewTreeWalker(tree, true, make(map[plumbing.Hash]bool))
64
95
defer walker.Close()
65
96
66
-
manifest := Manifest{
97
+
// Create a manifest for the tree object corresponding to `branch`, but do not populate it
98
+
// with data yet; instead, record all the blobs we'll need.
99
+
manifest := &Manifest{
67
100
RepoUrl: proto.String(repoURL),
68
101
Branch: proto.String(branch),
69
102
Commit: proto.String(ref.Hash().String()),
···
71
104
"": {Type: Type_Directory.Enum()},
72
105
},
73
106
}
107
+
blobsNeeded := map[plumbing.Hash]*Entry{}
74
108
for {
75
109
name, entry, err := walker.Next()
76
110
if err == io.EOF {
···
78
112
} else if err != nil {
79
113
return nil, fmt.Errorf("git walker: %w", err)
80
114
} else {
81
-
manifestEntry := Entry{}
82
-
if entry.Mode.IsFile() {
83
-
blob, err := repo.BlobObject(entry.Hash)
84
-
if err != nil {
85
-
return nil, fmt.Errorf("git blob %s: %w", name, err)
86
-
}
87
-
88
-
reader, err := blob.Reader()
89
-
if err != nil {
90
-
return nil, fmt.Errorf("git blob open: %w", err)
91
-
}
92
-
defer reader.Close()
93
-
94
-
data, err := io.ReadAll(reader)
95
-
if err != nil {
96
-
return nil, fmt.Errorf("git blob read: %w", err)
97
-
}
98
-
115
+
manifestEntry := &Entry{}
116
+
if existingManifestEntry, found := blobsNeeded[entry.Hash]; found {
117
+
// If the same blob is present twice, we only need to fetch it once (and both
118
+
// instances will alias the same `Entry` structure in the manifest).
119
+
manifestEntry = existingManifestEntry
120
+
} else if entry.Mode.IsFile() {
121
+
blobsNeeded[entry.Hash] = manifestEntry
99
122
if entry.Mode == filemode.Symlink {
100
123
manifestEntry.Type = Type_Symlink.Enum()
101
124
} else {
102
125
manifestEntry.Type = Type_InlineFile.Enum()
103
126
}
104
-
manifestEntry.Size = proto.Int64(blob.Size)
105
-
manifestEntry.Data = data
127
+
manifestEntry.GitHash = proto.String(entry.Hash.String())
106
128
} else if entry.Mode == filemode.Dir {
107
129
manifestEntry.Type = Type_Directory.Enum()
108
130
} else {
109
-
AddProblem(&manifest, name, "unsupported mode %#o", entry.Mode)
131
+
AddProblem(manifest, name, "unsupported mode %#o", entry.Mode)
110
132
continue
111
133
}
112
-
manifest.Contents[name] = &manifestEntry
134
+
manifest.Contents[name] = manifestEntry
135
+
}
136
+
}
137
+
138
+
// Collect checkout statistics.
139
+
var dataBytesFromOldManifest int64
140
+
var dataBytesFromGitCheckout int64
141
+
var dataBytesFromGitTransport int64
142
+
143
+
// First, see if we can extract the blobs from the old manifest. This is the preferred option
144
+
// because it avoids both network transfers and recompression. Note that we do not request
145
+
// blobs from the backend under any circumstances to avoid creating a blob existence oracle.
146
+
for _, oldManifestEntry := range oldManifest.GetContents() {
147
+
if hash, ok := plumbing.FromHex(oldManifestEntry.GetGitHash()); ok {
148
+
if manifestEntry, found := blobsNeeded[hash]; found {
149
+
CopyProtoMessage(manifestEntry, oldManifestEntry)
150
+
dataBytesFromOldManifest += oldManifestEntry.GetOriginalSize()
151
+
delete(blobsNeeded, hash)
152
+
}
153
+
}
154
+
}
155
+
156
+
// Second, fill the manifest entries with data from the git checkout we just made.
157
+
// This will only succeed if a `blob:none` filter isn't supported and we got a full
158
+
// clone despite asking for a partial clone.
159
+
for hash, manifestEntry := range blobsNeeded {
160
+
if err := readGitBlob(repo, hash, manifestEntry); err == nil {
161
+
dataBytesFromGitCheckout += manifestEntry.GetOriginalSize()
162
+
delete(blobsNeeded, hash)
113
163
}
114
164
}
115
-
return &manifest, nil
165
+
166
+
// Third, if we still don't have data for some manifest entries, re-establish a git transport
167
+
// and request the missing blobs (only) from the server.
168
+
if len(blobsNeeded) > 0 {
169
+
client, err := transport.Get(parsedRepoURL.Scheme)
170
+
if err != nil {
171
+
return nil, fmt.Errorf("git transport: %w", err)
172
+
}
173
+
174
+
endpoint, err := transport.NewEndpoint(repoURL)
175
+
if err != nil {
176
+
return nil, fmt.Errorf("git endpoint: %w", err)
177
+
}
178
+
179
+
session, err := client.NewSession(storer, endpoint, nil)
180
+
if err != nil {
181
+
return nil, fmt.Errorf("git session: %w", err)
182
+
}
183
+
184
+
connection, err := session.Handshake(ctx, transport.UploadPackService)
185
+
if err != nil {
186
+
return nil, fmt.Errorf("git connection: %w", err)
187
+
}
188
+
defer connection.Close()
189
+
190
+
if err := connection.Fetch(ctx, &transport.FetchRequest{
191
+
Wants: slices.Collect(maps.Keys(blobsNeeded)),
192
+
Depth: 1,
193
+
// Git CLI behaves like this, even if the wants above are references to blobs.
194
+
Filter: "blob:none",
195
+
}); err != nil && !errors.Is(err, transport.ErrNoChange) {
196
+
return nil, fmt.Errorf("git blob fetch request: %w", err)
197
+
}
198
+
199
+
// All remaining blobs should now be available.
200
+
for hash, manifestEntry := range blobsNeeded {
201
+
if err := readGitBlob(repo, hash, manifestEntry); err != nil {
202
+
return nil, err
203
+
}
204
+
dataBytesFromGitTransport += manifestEntry.GetOriginalSize()
205
+
delete(blobsNeeded, hash)
206
+
}
207
+
}
208
+
209
+
logc.Printf(ctx,
210
+
"fetch: %s from old manifest, %s from git checkout, %s from git transport\n",
211
+
datasize.ByteSize(dataBytesFromOldManifest).HR(),
212
+
datasize.ByteSize(dataBytesFromGitCheckout).HR(),
213
+
datasize.ByteSize(dataBytesFromGitTransport).HR(),
214
+
)
215
+
216
+
return manifest, nil
217
+
}
218
+
219
+
func readGitBlob(repo *git.Repository, hash plumbing.Hash, entry *Entry) error {
220
+
blob, err := repo.BlobObject(hash)
221
+
if err != nil {
222
+
return fmt.Errorf("git blob %s: %w", hash, err)
223
+
}
224
+
225
+
reader, err := blob.Reader()
226
+
if err != nil {
227
+
return fmt.Errorf("git blob open: %w", err)
228
+
}
229
+
defer reader.Close()
230
+
231
+
data, err := io.ReadAll(reader)
232
+
if err != nil {
233
+
return fmt.Errorf("git blob read: %w", err)
234
+
}
235
+
236
+
switch entry.GetType() {
237
+
case Type_InlineFile, Type_Symlink:
238
+
// okay
239
+
default:
240
+
panic(fmt.Errorf("readGitBlob encountered invalid entry: %v, %v",
241
+
entry.GetType(), entry.GetTransform()))
242
+
}
243
+
244
+
entry.Data = data
245
+
entry.Transform = Transform_Identity.Enum()
246
+
entry.OriginalSize = proto.Int64(blob.Size)
247
+
entry.CompressedSize = proto.Int64(blob.Size)
248
+
return nil
116
249
}
+30
-21
src/manifest.go
+30
-21
src/manifest.go
···
150
150
contentType = http.DetectContentType(entry.Data[:min(512, len(entry.Data))])
151
151
}
152
152
entry.ContentType = proto.String(contentType)
153
-
} else {
153
+
} else if entry.GetContentType() == "" {
154
154
panic(fmt.Errorf("DetectContentType encountered invalid entry: %v, %v",
155
155
entry.GetType(), entry.GetTransform()))
156
156
}
157
157
}
158
158
}
159
159
160
-
// The `clauspost/compress/zstd` package recommends reusing a compressor to avoid repeated
160
+
// The `klauspost/compress/zstd` package recommends reusing a compressor to avoid repeated
161
161
// allocations of internal buffers.
162
162
var zstdEncoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
163
163
···
166
166
span, _ := ObserveFunction(ctx, "CompressFiles")
167
167
defer span.Finish()
168
168
169
-
var originalSize, compressedSize int64
169
+
var originalSize int64
170
+
var compressedSize int64
170
171
for _, entry := range manifest.Contents {
171
172
if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_Identity {
172
-
mtype := getMediaType(entry.GetContentType())
173
-
if strings.HasPrefix(mtype, "video/") || strings.HasPrefix(mtype, "audio/") {
173
+
mediaType := getMediaType(entry.GetContentType())
174
+
if strings.HasPrefix(mediaType, "video/") || strings.HasPrefix(mediaType, "audio/") {
174
175
continue
175
176
}
176
-
originalSize += entry.GetSize()
177
-
compressedData := zstdEncoder.EncodeAll(entry.GetData(), make([]byte, 0, entry.GetSize()))
178
-
if len(compressedData) < int(*entry.Size) {
177
+
compressedData := zstdEncoder.EncodeAll(entry.GetData(),
178
+
make([]byte, 0, entry.GetOriginalSize()))
179
+
if int64(len(compressedData)) < entry.GetOriginalSize() {
179
180
entry.Data = compressedData
180
-
entry.Size = proto.Int64(int64(len(entry.Data)))
181
181
entry.Transform = Transform_Zstd.Enum()
182
+
entry.CompressedSize = proto.Int64(int64(len(entry.Data)))
182
183
}
183
-
compressedSize += entry.GetSize()
184
184
}
185
+
originalSize += entry.GetOriginalSize()
186
+
compressedSize += entry.GetCompressedSize()
185
187
}
186
188
manifest.OriginalSize = proto.Int64(originalSize)
187
189
manifest.CompressedSize = proto.Int64(compressedSize)
···
246
248
CompressedSize: manifest.CompressedSize,
247
249
StoredSize: proto.Int64(0),
248
250
}
249
-
extObjectSizes := make(map[string]int64)
250
251
for name, entry := range manifest.Contents {
251
252
cannotBeInlined := entry.GetType() == Type_InlineFile &&
252
-
entry.GetSize() > int64(config.Limits.MaxInlineFileSize.Bytes())
253
+
entry.GetCompressedSize() > int64(config.Limits.MaxInlineFileSize.Bytes())
253
254
if cannotBeInlined {
254
255
dataHash := sha256.Sum256(entry.Data)
255
256
extManifest.Contents[name] = &Entry{
256
-
Type: Type_ExternalFile.Enum(),
257
-
Size: entry.Size,
258
-
Data: fmt.Appendf(nil, "sha256-%x", dataHash),
259
-
Transform: entry.Transform,
260
-
ContentType: entry.ContentType,
257
+
Type: Type_ExternalFile.Enum(),
258
+
OriginalSize: entry.OriginalSize,
259
+
CompressedSize: entry.CompressedSize,
260
+
Data: fmt.Appendf(nil, "sha256-%x", dataHash),
261
+
Transform: entry.Transform,
262
+
ContentType: entry.ContentType,
263
+
GitHash: entry.GitHash,
261
264
}
262
-
extObjectSizes[string(dataHash[:])] = entry.GetSize()
263
265
} else {
264
266
extManifest.Contents[name] = entry
265
267
}
266
268
}
267
-
// `extObjectMap` stores size once per object, deduplicating it
268
-
for _, storedSize := range extObjectSizes {
269
-
*extManifest.StoredSize += storedSize
269
+
270
+
// Compute the deduplicated storage size.
271
+
var blobSizes = make(map[string]int64)
272
+
for _, entry := range manifest.Contents {
273
+
if entry.GetType() == Type_ExternalFile {
274
+
blobSizes[string(entry.Data)] = entry.GetCompressedSize()
275
+
}
276
+
}
277
+
for _, blobSize := range blobSizes {
278
+
*extManifest.StoredSize += blobSize
270
279
}
271
280
272
281
// Upload the resulting manifest and the blob it references.
+1
-1
src/pages.go
+1
-1
src/pages.go
···
328
328
case "zstd":
329
329
// Set Content-Length ourselves since `http.ServeContent` only sets
330
330
// it if Content-Encoding is unset or if it's a range request.
331
-
w.Header().Set("Content-Length", strconv.FormatInt(*entry.Size, 10))
331
+
w.Header().Set("Content-Length", strconv.FormatInt(entry.GetCompressedSize(), 10))
332
332
w.Header().Set("Content-Encoding", "zstd")
333
333
serveEncodingCount.
334
334
With(prometheus.Labels{"transform": "zstd", "negotiated": "zstd"}).
+37
-10
src/schema.pb.go
+37
-10
src/schema.pb.go
···
134
134
state protoimpl.MessageState `protogen:"open.v1"`
135
135
Type *Type `protobuf:"varint,1,opt,name=type,enum=Type" json:"type,omitempty"`
136
136
// Only present for `type == InlineFile` and `type == ExternalFile`.
137
-
// For transformed entries, refers to the post-transformation (compressed) size.
138
-
Size *int64 `protobuf:"varint,2,opt,name=size" json:"size,omitempty"`
137
+
// For transformed entries, refers to the pre-transformation (decompressed) size; otherwise
138
+
// equal to `compressed_size`.
139
+
OriginalSize *int64 `protobuf:"varint,7,opt,name=original_size,json=originalSize" json:"original_size,omitempty"`
140
+
// Only present for `type == InlineFile` and `type == ExternalFile`.
141
+
// For transformed entries, refers to the post-transformation (compressed) size; otherwise
142
+
// equal to `original_size`.
143
+
CompressedSize *int64 `protobuf:"varint,2,opt,name=compressed_size,json=compressedSize" json:"compressed_size,omitempty"`
139
144
// Meaning depends on `type`:
140
145
// - If `type == InlineFile`, contains file data.
141
146
// - If `type == ExternalFile`, contains blob name (an otherwise unspecified
···
148
153
Transform *Transform `protobuf:"varint,4,opt,name=transform,enum=Transform" json:"transform,omitempty"`
149
154
// Only present for `type == InlineFile` and `type == ExternalFile`.
150
155
// Currently, optional (not present on certain legacy manifests).
151
-
ContentType *string `protobuf:"bytes,5,opt,name=content_type,json=contentType" json:"content_type,omitempty"`
156
+
ContentType *string `protobuf:"bytes,5,opt,name=content_type,json=contentType" json:"content_type,omitempty"`
157
+
// May be present for `type == InlineFile` and `type == ExternalFile`.
158
+
// Used to reduce the amount of work being done during git checkouts.
159
+
// The type of hash used is determined by the length:
160
+
// - 40 bytes: SHA1DC (as hex)
161
+
// - 64 bytes: SHA256 (as hex)
162
+
GitHash *string `protobuf:"bytes,6,opt,name=git_hash,json=gitHash" json:"git_hash,omitempty"`
152
163
unknownFields protoimpl.UnknownFields
153
164
sizeCache protoimpl.SizeCache
154
165
}
···
190
201
return Type_Invalid
191
202
}
192
203
193
-
func (x *Entry) GetSize() int64 {
194
-
if x != nil && x.Size != nil {
195
-
return *x.Size
204
+
func (x *Entry) GetOriginalSize() int64 {
205
+
if x != nil && x.OriginalSize != nil {
206
+
return *x.OriginalSize
207
+
}
208
+
return 0
209
+
}
210
+
211
+
func (x *Entry) GetCompressedSize() int64 {
212
+
if x != nil && x.CompressedSize != nil {
213
+
return *x.CompressedSize
196
214
}
197
215
return 0
198
216
}
···
214
232
func (x *Entry) GetContentType() string {
215
233
if x != nil && x.ContentType != nil {
216
234
return *x.ContentType
235
+
}
236
+
return ""
237
+
}
238
+
239
+
func (x *Entry) GetGitHash() string {
240
+
if x != nil && x.GitHash != nil {
241
+
return *x.GitHash
217
242
}
218
243
return ""
219
244
}
···
569
594
570
595
const file_schema_proto_rawDesc = "" +
571
596
"\n" +
572
-
"\fschema.proto\"\x97\x01\n" +
597
+
"\fschema.proto\"\xec\x01\n" +
573
598
"\x05Entry\x12\x19\n" +
574
-
"\x04type\x18\x01 \x01(\x0e2\x05.TypeR\x04type\x12\x12\n" +
575
-
"\x04size\x18\x02 \x01(\x03R\x04size\x12\x12\n" +
599
+
"\x04type\x18\x01 \x01(\x0e2\x05.TypeR\x04type\x12#\n" +
600
+
"\roriginal_size\x18\a \x01(\x03R\foriginalSize\x12'\n" +
601
+
"\x0fcompressed_size\x18\x02 \x01(\x03R\x0ecompressedSize\x12\x12\n" +
576
602
"\x04data\x18\x03 \x01(\fR\x04data\x12(\n" +
577
603
"\ttransform\x18\x04 \x01(\x0e2\n" +
578
604
".TransformR\ttransform\x12!\n" +
579
-
"\fcontent_type\x18\x05 \x01(\tR\vcontentType\"`\n" +
605
+
"\fcontent_type\x18\x05 \x01(\tR\vcontentType\x12\x19\n" +
606
+
"\bgit_hash\x18\x06 \x01(\tR\agitHash\"`\n" +
580
607
"\fRedirectRule\x12\x12\n" +
581
608
"\x04from\x18\x01 \x01(\tR\x04from\x12\x0e\n" +
582
609
"\x02to\x18\x02 \x01(\tR\x02to\x12\x16\n" +
+16
-5
src/schema.proto
+16
-5
src/schema.proto
···
26
26
message Entry {
27
27
Type type = 1;
28
28
// Only present for `type == InlineFile` and `type == ExternalFile`.
29
-
// For transformed entries, refers to the post-transformation (compressed) size.
30
-
int64 size = 2;
29
+
// For transformed entries, refers to the pre-transformation (decompressed) size; otherwise
30
+
// equal to `compressed_size`.
31
+
int64 original_size = 7;
32
+
// Only present for `type == InlineFile` and `type == ExternalFile`.
33
+
// For transformed entries, refers to the post-transformation (compressed) size; otherwise
34
+
// equal to `original_size`.
35
+
int64 compressed_size = 2;
31
36
// Meaning depends on `type`:
32
37
// * If `type == InlineFile`, contains file data.
33
38
// * If `type == ExternalFile`, contains blob name (an otherwise unspecified
···
41
46
// Only present for `type == InlineFile` and `type == ExternalFile`.
42
47
// Currently, optional (not present on certain legacy manifests).
43
48
string content_type = 5;
49
+
// May be present for `type == InlineFile` and `type == ExternalFile`.
50
+
// Used to reduce the amount of work being done during git checkouts.
51
+
// The type of hash used is determined by the length:
52
+
// * 40 bytes: SHA1DC (as hex)
53
+
// * 64 bytes: SHA256 (as hex)
54
+
string git_hash = 6;
44
55
}
45
56
46
57
// See https://docs.netlify.com/manage/routing/redirects/overview/ for details.
···
76
87
77
88
// Contents
78
89
map<string, Entry> contents = 4;
79
-
int64 original_size = 10; // total size of entries before compression
80
-
int64 compressed_size = 5; // simple sum of each `entry.size`
81
-
int64 stored_size = 8; // total size of (deduplicated) external objects
90
+
int64 original_size = 10; // sum of each `entry.original_size`
91
+
int64 compressed_size = 5; // sum of each `entry.compressed_size`
92
+
int64 stored_size = 8; // sum of deduplicated `entry.compressed_size` for external files only
82
93
83
94
// Netlify-style `_redirects` and `_headers`
84
95
repeated RedirectRule redirects = 6;
+4
-1
src/update.go
+4
-1
src/update.go
···
92
92
93
93
logc.Printf(ctx, "update %s: %s %s\n", webRoot, repoURL, branch)
94
94
95
-
manifest, err := FetchRepository(ctx, repoURL, branch)
95
+
oldManifest, _, _ := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
96
+
// Ignore errors; worst case we have to re-fetch all of the blobs.
97
+
98
+
manifest, err := FetchRepository(ctx, repoURL, branch, oldManifest)
96
99
if errors.Is(err, context.DeadlineExceeded) {
97
100
result = UpdateResult{UpdateTimeout, nil, fmt.Errorf("update timeout")}
98
101
} else if err != nil {
+18
src/util.go
+18
src/util.go
···
4
4
"errors"
5
5
"io"
6
6
"strings"
7
+
8
+
"google.golang.org/protobuf/proto"
7
9
)
8
10
9
11
type BoundedReader struct {
···
85
87
mediaType = strings.TrimSpace(strings.ToLower(mediaType))
86
88
return
87
89
}
90
+
91
+
// Copying Protobuf messages like `*dest = *src` causes a lock to be copied, which is unsound.
92
+
// Copying Protobuf messages field-wise is fragile: adding a new field to the schema does not
93
+
// cause a diagnostic to be emitted pointing to the copy site, making it easy to miss updates.
94
+
// Serializing and deserializing is reliable and breaks referential links.
95
+
func CopyProtoMessage(dest, src proto.Message) {
96
+
data, err := proto.Marshal(src)
97
+
if err != nil {
98
+
panic(err)
99
+
}
100
+
101
+
err = proto.Unmarshal(data, dest)
102
+
if err != nil {
103
+
panic(err)
104
+
}
105
+
}