[mirror] Scalable static site server for Git forges (like GitHub Pages)
at v0.1.0 3.9 kB view raw
1package git_pages 2 3import ( 4 "archive/tar" 5 "archive/zip" 6 "bytes" 7 "compress/gzip" 8 "errors" 9 "fmt" 10 "io" 11 "os" 12 "strings" 13 14 "github.com/c2h5oh/datasize" 15 "github.com/klauspost/compress/zstd" 16 "google.golang.org/protobuf/proto" 17) 18 19var ErrArchiveTooLarge = errors.New("archive too large") 20 21func ExtractTar(reader io.Reader) (*Manifest, error) { 22 // If the tar stream is itself compressed, both the outer and the inner bounds checks 23 // are load-bearing. 24 boundedReader := ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()), 25 fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR())) 26 27 archive := tar.NewReader(boundedReader) 28 29 manifest := Manifest{ 30 Contents: map[string]*Entry{ 31 "": {Type: Type_Directory.Enum()}, 32 }, 33 } 34 for { 35 header, err := archive.Next() 36 if err == io.EOF { 37 break 38 } else if err != nil { 39 return nil, err 40 } 41 42 // For some reason, GNU tar includes any leading `.` path segments in archive filenames, 43 // unless there is a `..` path segment anywhere in the input filenames. 44 fileName := header.Name 45 for { 46 if strippedName, found := strings.CutPrefix(fileName, "./"); found { 47 fileName = strippedName 48 } else { 49 break 50 } 51 } 52 53 manifestEntry := Entry{} 54 switch header.Typeflag { 55 case tar.TypeReg: 56 fileData, err := io.ReadAll(archive) 57 if err != nil { 58 return nil, fmt.Errorf("tar: %s: %w", fileName, err) 59 } 60 61 manifestEntry.Type = Type_InlineFile.Enum() 62 manifestEntry.Size = proto.Int64(header.Size) 63 manifestEntry.Data = fileData 64 65 case tar.TypeSymlink: 66 manifestEntry.Type = Type_Symlink.Enum() 67 manifestEntry.Size = proto.Int64(header.Size) 68 manifestEntry.Data = []byte(header.Linkname) 69 70 case tar.TypeDir: 71 manifestEntry.Type = Type_Directory.Enum() 72 fileName = strings.TrimSuffix(fileName, "/") 73 74 default: 75 AddProblem(&manifest, fileName, "unsupported type '%c'", header.Typeflag) 76 continue 77 } 78 manifest.Contents[fileName] = &manifestEntry 79 } 80 return &manifest, nil 81} 82 83func ExtractTarGzip(reader io.Reader) (*Manifest, error) { 84 stream, err := gzip.NewReader(reader) 85 if err != nil { 86 return nil, err 87 } 88 defer stream.Close() 89 90 // stream length is limited in `ExtractTar` 91 return ExtractTar(stream) 92} 93 94func ExtractTarZstd(reader io.Reader) (*Manifest, error) { 95 stream, err := zstd.NewReader(reader) 96 if err != nil { 97 return nil, err 98 } 99 defer stream.Close() 100 101 // stream length is limited in `ExtractTar` 102 return ExtractTar(stream) 103} 104 105func ExtractZip(reader io.Reader) (*Manifest, error) { 106 data, err := io.ReadAll(reader) 107 if err != nil { 108 return nil, err 109 } 110 111 archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) 112 if err != nil { 113 return nil, err 114 } 115 116 // Detect and defuse zipbombs. 117 var totalSize uint64 118 for _, file := range archive.File { 119 totalSize += file.UncompressedSize64 120 } 121 if totalSize > config.Limits.MaxSiteSize.Bytes() { 122 return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit", 123 ErrArchiveTooLarge, 124 datasize.ByteSize(totalSize).HR(), 125 config.Limits.MaxSiteSize.HR(), 126 ) 127 } 128 129 manifest := Manifest{ 130 Contents: map[string]*Entry{ 131 "": {Type: Type_Directory.Enum()}, 132 }, 133 } 134 for _, file := range archive.File { 135 manifestEntry := Entry{} 136 if !strings.HasSuffix(file.Name, "/") { 137 fileReader, err := file.Open() 138 if err != nil { 139 return nil, err 140 } 141 defer fileReader.Close() 142 143 fileData, err := io.ReadAll(fileReader) 144 if err != nil { 145 return nil, fmt.Errorf("zip: %s: %w", file.Name, err) 146 } 147 148 if file.Mode()&os.ModeSymlink != 0 { 149 manifestEntry.Type = Type_Symlink.Enum() 150 } else { 151 manifestEntry.Type = Type_InlineFile.Enum() 152 } 153 manifestEntry.Size = proto.Int64(int64(file.UncompressedSize64)) 154 manifestEntry.Data = fileData 155 } else { 156 manifestEntry.Type = Type_Directory.Enum() 157 } 158 manifest.Contents[strings.TrimSuffix(file.Name, "/")] = &manifestEntry 159 } 160 return &manifest, nil 161}