[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "archive/tar"
5 "archive/zip"
6 "bytes"
7 "compress/gzip"
8 "errors"
9 "fmt"
10 "io"
11 "os"
12 "strings"
13
14 "github.com/c2h5oh/datasize"
15 "github.com/klauspost/compress/zstd"
16 "google.golang.org/protobuf/proto"
17)
18
19var ErrArchiveTooLarge = errors.New("archive too large")
20
21func ExtractTar(reader io.Reader) (*Manifest, error) {
22 // If the tar stream is itself compressed, both the outer and the inner bounds checks
23 // are load-bearing.
24 boundedReader := ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()),
25 fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR()))
26
27 archive := tar.NewReader(boundedReader)
28
29 manifest := Manifest{
30 Contents: map[string]*Entry{
31 "": {Type: Type_Directory.Enum()},
32 },
33 }
34 for {
35 header, err := archive.Next()
36 if err == io.EOF {
37 break
38 } else if err != nil {
39 return nil, err
40 }
41
42 // For some reason, GNU tar includes any leading `.` path segments in archive filenames,
43 // unless there is a `..` path segment anywhere in the input filenames.
44 fileName := header.Name
45 for {
46 if strippedName, found := strings.CutPrefix(fileName, "./"); found {
47 fileName = strippedName
48 } else {
49 break
50 }
51 }
52
53 manifestEntry := Entry{}
54 switch header.Typeflag {
55 case tar.TypeReg:
56 fileData, err := io.ReadAll(archive)
57 if err != nil {
58 return nil, fmt.Errorf("tar: %s: %w", fileName, err)
59 }
60
61 manifestEntry.Type = Type_InlineFile.Enum()
62 manifestEntry.Size = proto.Int64(header.Size)
63 manifestEntry.Data = fileData
64
65 case tar.TypeSymlink:
66 manifestEntry.Type = Type_Symlink.Enum()
67 manifestEntry.Size = proto.Int64(header.Size)
68 manifestEntry.Data = []byte(header.Linkname)
69
70 case tar.TypeDir:
71 manifestEntry.Type = Type_Directory.Enum()
72 fileName = strings.TrimSuffix(fileName, "/")
73
74 default:
75 AddProblem(&manifest, fileName, "unsupported type '%c'", header.Typeflag)
76 continue
77 }
78 manifest.Contents[fileName] = &manifestEntry
79 }
80 return &manifest, nil
81}
82
83func ExtractTarGzip(reader io.Reader) (*Manifest, error) {
84 stream, err := gzip.NewReader(reader)
85 if err != nil {
86 return nil, err
87 }
88 defer stream.Close()
89
90 // stream length is limited in `ExtractTar`
91 return ExtractTar(stream)
92}
93
94func ExtractTarZstd(reader io.Reader) (*Manifest, error) {
95 stream, err := zstd.NewReader(reader)
96 if err != nil {
97 return nil, err
98 }
99 defer stream.Close()
100
101 // stream length is limited in `ExtractTar`
102 return ExtractTar(stream)
103}
104
105func ExtractZip(reader io.Reader) (*Manifest, error) {
106 data, err := io.ReadAll(reader)
107 if err != nil {
108 return nil, err
109 }
110
111 archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
112 if err != nil {
113 return nil, err
114 }
115
116 // Detect and defuse zipbombs.
117 var totalSize uint64
118 for _, file := range archive.File {
119 totalSize += file.UncompressedSize64
120 }
121 if totalSize > config.Limits.MaxSiteSize.Bytes() {
122 return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit",
123 ErrArchiveTooLarge,
124 datasize.ByteSize(totalSize).HR(),
125 config.Limits.MaxSiteSize.HR(),
126 )
127 }
128
129 manifest := Manifest{
130 Contents: map[string]*Entry{
131 "": {Type: Type_Directory.Enum()},
132 },
133 }
134 for _, file := range archive.File {
135 manifestEntry := Entry{}
136 if !strings.HasSuffix(file.Name, "/") {
137 fileReader, err := file.Open()
138 if err != nil {
139 return nil, err
140 }
141 defer fileReader.Close()
142
143 fileData, err := io.ReadAll(fileReader)
144 if err != nil {
145 return nil, fmt.Errorf("zip: %s: %w", file.Name, err)
146 }
147
148 if file.Mode()&os.ModeSymlink != 0 {
149 manifestEntry.Type = Type_Symlink.Enum()
150 } else {
151 manifestEntry.Type = Type_InlineFile.Enum()
152 }
153 manifestEntry.Size = proto.Int64(int64(file.UncompressedSize64))
154 manifestEntry.Data = fileData
155 } else {
156 manifestEntry.Type = Type_Directory.Enum()
157 }
158 manifest.Contents[strings.TrimSuffix(file.Name, "/")] = &manifestEntry
159 }
160 return &manifest, nil
161}