[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "archive/tar"
5 "archive/zip"
6 "bytes"
7 "compress/gzip"
8 "context"
9 "errors"
10 "fmt"
11 "io"
12 "math"
13 "os"
14 "strings"
15
16 "github.com/c2h5oh/datasize"
17 "github.com/go-git/go-git/v6/plumbing"
18 "github.com/klauspost/compress/zstd"
19)
20
21var ErrArchiveTooLarge = errors.New("archive too large")
22
23func boundArchiveStream(reader io.Reader) io.Reader {
24 return ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()),
25 fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR()))
26}
27
28func ExtractGzip(
29 ctx context.Context, reader io.Reader,
30 next func(context.Context, io.Reader) (*Manifest, error),
31) (*Manifest, error) {
32 stream, err := gzip.NewReader(reader)
33 if err != nil {
34 return nil, err
35 }
36 defer stream.Close()
37
38 return next(ctx, boundArchiveStream(stream))
39}
40
41func ExtractZstd(
42 ctx context.Context, reader io.Reader,
43 next func(context.Context, io.Reader) (*Manifest, error),
44) (*Manifest, error) {
45 stream, err := zstd.NewReader(reader)
46 if err != nil {
47 return nil, err
48 }
49 defer stream.Close()
50
51 return next(ctx, boundArchiveStream(stream))
52}
53
54const BlobReferencePrefix = "/git/blobs/"
55
56type UnresolvedRefError struct {
57 missing []string
58}
59
60func (err UnresolvedRefError) Error() string {
61 return fmt.Sprintf("%d unresolved blob references", len(err.missing))
62}
63
64// Returns a map of git hash to entry. If `manifest` is nil, returns an empty map.
65func indexManifestByGitHash(manifest *Manifest) map[string]*Entry {
66 index := map[string]*Entry{}
67 for _, entry := range manifest.GetContents() {
68 if hash := entry.GetGitHash(); hash != "" {
69 if _, ok := plumbing.FromHex(hash); ok {
70 index[hash] = entry
71 } else {
72 panic(fmt.Errorf("index: malformed hash: %s", hash))
73 }
74 }
75 }
76 return index
77}
78
79func addSymlinkOrBlobReference(
80 manifest *Manifest, fileName string, target string,
81 index map[string]*Entry, missing *[]string,
82) *Entry {
83 if hash, found := strings.CutPrefix(target, BlobReferencePrefix); found {
84 if entry, found := index[hash]; found {
85 manifest.Contents[fileName] = entry
86 return entry
87 } else {
88 *missing = append(*missing, hash)
89 return nil
90 }
91 } else {
92 return AddSymlink(manifest, fileName, target)
93 }
94}
95
96func ExtractTar(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
97 archive := tar.NewReader(reader)
98
99 var dataBytesRecycled int64
100 var dataBytesTransferred int64
101
102 index := indexManifestByGitHash(oldManifest)
103 missing := []string{}
104 manifest := NewManifest()
105 for {
106 header, err := archive.Next()
107 if err == io.EOF {
108 break
109 } else if err != nil {
110 return nil, err
111 }
112
113 // For some reason, GNU tar includes any leading `.` path segments in archive filenames,
114 // unless there is a `..` path segment anywhere in the input filenames.
115 fileName := header.Name
116 for {
117 if strippedName, found := strings.CutPrefix(fileName, "./"); found {
118 fileName = strippedName
119 } else {
120 break
121 }
122 }
123
124 switch header.Typeflag {
125 case tar.TypeReg:
126 fileData, err := io.ReadAll(archive)
127 if err != nil {
128 return nil, fmt.Errorf("tar: %s: %w", fileName, err)
129 }
130 AddFile(manifest, fileName, fileData)
131 dataBytesTransferred += int64(len(fileData))
132 case tar.TypeSymlink:
133 entry := addSymlinkOrBlobReference(
134 manifest, fileName, header.Linkname, index, &missing)
135 dataBytesRecycled += entry.GetOriginalSize()
136 case tar.TypeDir:
137 AddDirectory(manifest, fileName)
138 default:
139 AddProblem(manifest, fileName, "tar: unsupported type '%c'", header.Typeflag)
140 continue
141 }
142 }
143
144 if len(missing) > 0 {
145 return nil, UnresolvedRefError{missing}
146 }
147
148 // Ensure parent directories exist for all entries.
149 EnsureLeadingDirectories(manifest)
150
151 logc.Printf(ctx,
152 "reuse: %s recycled, %s transferred\n",
153 datasize.ByteSize(dataBytesRecycled).HR(),
154 datasize.ByteSize(dataBytesTransferred).HR(),
155 )
156
157 return manifest, nil
158}
159
160// Used for zstd decompression inside zip files, it is recommended to share this.
161var zstdDecomp = zstd.ZipDecompressor()
162
163func ExtractZip(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
164 data, err := io.ReadAll(reader)
165 if err != nil {
166 return nil, err
167 }
168
169 archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
170 if err != nil {
171 return nil, err
172 }
173
174 // Support zstd compression inside zip files.
175 archive.RegisterDecompressor(zstd.ZipMethodWinZip, zstdDecomp)
176 archive.RegisterDecompressor(zstd.ZipMethodPKWare, zstdDecomp)
177
178 // Detect and defuse zipbombs.
179 var totalSize uint64
180 for _, file := range archive.File {
181 if totalSize+file.UncompressedSize64 < totalSize {
182 // Would overflow
183 totalSize = math.MaxUint64
184 break
185 }
186 totalSize += file.UncompressedSize64
187 }
188 if totalSize > config.Limits.MaxSiteSize.Bytes() {
189 return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit",
190 ErrArchiveTooLarge,
191 datasize.ByteSize(totalSize).HR(),
192 config.Limits.MaxSiteSize.HR(),
193 )
194 }
195
196 var dataBytesRecycled int64
197 var dataBytesTransferred int64
198
199 index := indexManifestByGitHash(oldManifest)
200 missing := []string{}
201 manifest := NewManifest()
202 for _, file := range archive.File {
203 if strings.HasSuffix(file.Name, "/") {
204 AddDirectory(manifest, file.Name)
205 } else {
206 fileReader, err := file.Open()
207 if err != nil {
208 return nil, err
209 }
210 defer fileReader.Close()
211
212 fileData, err := io.ReadAll(fileReader)
213 if err != nil {
214 return nil, fmt.Errorf("zip: %s: %w", file.Name, err)
215 }
216
217 if file.Mode()&os.ModeSymlink != 0 {
218 entry := addSymlinkOrBlobReference(
219 manifest, file.Name, string(fileData), index, &missing)
220 dataBytesRecycled += entry.GetOriginalSize()
221 } else {
222 AddFile(manifest, file.Name, fileData)
223 dataBytesTransferred += int64(len(fileData))
224 }
225 }
226 }
227
228 if len(missing) > 0 {
229 return nil, UnresolvedRefError{missing}
230 }
231
232 // Ensure parent directories exist for all entries.
233 EnsureLeadingDirectories(manifest)
234
235 logc.Printf(ctx,
236 "reuse: %s recycled, %s transferred\n",
237 datasize.ByteSize(dataBytesRecycled).HR(),
238 datasize.ByteSize(dataBytesTransferred).HR(),
239 )
240
241 return manifest, nil
242}
243