forked from
whitequark.org/git-pages
fork of whitequark.org/git-pages with mods for tangled
1package git_pages
2
3import (
4 "archive/tar"
5 "archive/zip"
6 "bytes"
7 "compress/gzip"
8 "errors"
9 "fmt"
10 "io"
11 "os"
12 "strings"
13
14 "github.com/c2h5oh/datasize"
15 "github.com/klauspost/compress/zstd"
16 "google.golang.org/protobuf/proto"
17)
18
19var ErrArchiveTooLarge = errors.New("archive too large")
20
21func ExtractTar(reader io.Reader) (*Manifest, error) {
22 // If the tar stream is itself compressed, both the outer and the inner bounds checks
23 // are load-bearing.
24 boundedReader := ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()),
25 fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR()))
26
27 archive := tar.NewReader(boundedReader)
28
29 manifest := Manifest{
30 Contents: map[string]*Entry{
31 "": {Type: Type_Directory.Enum()},
32 },
33 }
34 for {
35 header, err := archive.Next()
36 if err == io.EOF {
37 break
38 } else if err != nil {
39 return nil, err
40 }
41
42 // For some reason, GNU tar includes any leading `.` path segments in archive filenames,
43 // unless there is a `..` path segment anywhere in the input filenames.
44 fileName := header.Name
45 for {
46 if strippedName, found := strings.CutPrefix(fileName, "./"); found {
47 fileName = strippedName
48 } else {
49 break
50 }
51 }
52
53 manifestEntry := Entry{}
54 switch header.Typeflag {
55 case tar.TypeReg:
56 fileData, err := io.ReadAll(archive)
57 if err != nil {
58 return nil, fmt.Errorf("tar: %s: %w", fileName, err)
59 }
60
61 manifestEntry.Type = Type_InlineFile.Enum()
62 manifestEntry.Data = fileData
63 manifestEntry.Transform = Transform_Identity.Enum()
64 manifestEntry.OriginalSize = proto.Int64(header.Size)
65 manifestEntry.CompressedSize = proto.Int64(header.Size)
66
67 case tar.TypeSymlink:
68 manifestEntry.Type = Type_Symlink.Enum()
69 manifestEntry.Data = []byte(header.Linkname)
70 manifestEntry.Transform = Transform_Identity.Enum()
71 manifestEntry.OriginalSize = proto.Int64(header.Size)
72 manifestEntry.CompressedSize = proto.Int64(header.Size)
73
74 case tar.TypeDir:
75 manifestEntry.Type = Type_Directory.Enum()
76 fileName = strings.TrimSuffix(fileName, "/")
77
78 default:
79 AddProblem(&manifest, fileName, "unsupported type '%c'", header.Typeflag)
80 continue
81 }
82 manifest.Contents[fileName] = &manifestEntry
83 }
84 return &manifest, nil
85}
86
87func ExtractTarGzip(reader io.Reader) (*Manifest, error) {
88 stream, err := gzip.NewReader(reader)
89 if err != nil {
90 return nil, err
91 }
92 defer stream.Close()
93
94 // stream length is limited in `ExtractTar`
95 return ExtractTar(stream)
96}
97
98func ExtractTarZstd(reader io.Reader) (*Manifest, error) {
99 stream, err := zstd.NewReader(reader)
100 if err != nil {
101 return nil, err
102 }
103 defer stream.Close()
104
105 // stream length is limited in `ExtractTar`
106 return ExtractTar(stream)
107}
108
109func ExtractZip(reader io.Reader) (*Manifest, error) {
110 data, err := io.ReadAll(reader)
111 if err != nil {
112 return nil, err
113 }
114
115 archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
116 if err != nil {
117 return nil, err
118 }
119
120 // Detect and defuse zipbombs.
121 var totalSize uint64
122 for _, file := range archive.File {
123 totalSize += file.UncompressedSize64
124 }
125 if totalSize > config.Limits.MaxSiteSize.Bytes() {
126 return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit",
127 ErrArchiveTooLarge,
128 datasize.ByteSize(totalSize).HR(),
129 config.Limits.MaxSiteSize.HR(),
130 )
131 }
132
133 manifest := Manifest{
134 Contents: map[string]*Entry{
135 "": {Type: Type_Directory.Enum()},
136 },
137 }
138 for _, file := range archive.File {
139 manifestEntry := Entry{}
140 if !strings.HasSuffix(file.Name, "/") {
141 fileReader, err := file.Open()
142 if err != nil {
143 return nil, err
144 }
145 defer fileReader.Close()
146
147 fileData, err := io.ReadAll(fileReader)
148 if err != nil {
149 return nil, fmt.Errorf("zip: %s: %w", file.Name, err)
150 }
151
152 if file.Mode()&os.ModeSymlink != 0 {
153 manifestEntry.Type = Type_Symlink.Enum()
154 } else {
155 manifestEntry.Type = Type_InlineFile.Enum()
156 }
157 manifestEntry.Data = fileData
158 manifestEntry.Transform = Transform_Identity.Enum()
159 manifestEntry.OriginalSize = proto.Int64(int64(file.UncompressedSize64))
160 manifestEntry.CompressedSize = proto.Int64(int64(file.UncompressedSize64))
161 } else {
162 manifestEntry.Type = Type_Directory.Enum()
163 }
164 manifest.Contents[strings.TrimSuffix(file.Name, "/")] = &manifestEntry
165 }
166 return &manifest, nil
167}