fork of whitequark.org/git-pages with mods for tangled
at main 4.3 kB view raw
1package git_pages 2 3import ( 4 "archive/tar" 5 "archive/zip" 6 "bytes" 7 "compress/gzip" 8 "errors" 9 "fmt" 10 "io" 11 "os" 12 "strings" 13 14 "github.com/c2h5oh/datasize" 15 "github.com/klauspost/compress/zstd" 16 "google.golang.org/protobuf/proto" 17) 18 19var ErrArchiveTooLarge = errors.New("archive too large") 20 21func ExtractTar(reader io.Reader) (*Manifest, error) { 22 // If the tar stream is itself compressed, both the outer and the inner bounds checks 23 // are load-bearing. 24 boundedReader := ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()), 25 fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR())) 26 27 archive := tar.NewReader(boundedReader) 28 29 manifest := Manifest{ 30 Contents: map[string]*Entry{ 31 "": {Type: Type_Directory.Enum()}, 32 }, 33 } 34 for { 35 header, err := archive.Next() 36 if err == io.EOF { 37 break 38 } else if err != nil { 39 return nil, err 40 } 41 42 // For some reason, GNU tar includes any leading `.` path segments in archive filenames, 43 // unless there is a `..` path segment anywhere in the input filenames. 44 fileName := header.Name 45 for { 46 if strippedName, found := strings.CutPrefix(fileName, "./"); found { 47 fileName = strippedName 48 } else { 49 break 50 } 51 } 52 53 manifestEntry := Entry{} 54 switch header.Typeflag { 55 case tar.TypeReg: 56 fileData, err := io.ReadAll(archive) 57 if err != nil { 58 return nil, fmt.Errorf("tar: %s: %w", fileName, err) 59 } 60 61 manifestEntry.Type = Type_InlineFile.Enum() 62 manifestEntry.Data = fileData 63 manifestEntry.Transform = Transform_Identity.Enum() 64 manifestEntry.OriginalSize = proto.Int64(header.Size) 65 manifestEntry.CompressedSize = proto.Int64(header.Size) 66 67 case tar.TypeSymlink: 68 manifestEntry.Type = Type_Symlink.Enum() 69 manifestEntry.Data = []byte(header.Linkname) 70 manifestEntry.Transform = Transform_Identity.Enum() 71 manifestEntry.OriginalSize = proto.Int64(header.Size) 72 manifestEntry.CompressedSize = proto.Int64(header.Size) 73 74 case tar.TypeDir: 75 manifestEntry.Type = Type_Directory.Enum() 76 fileName = strings.TrimSuffix(fileName, "/") 77 78 default: 79 AddProblem(&manifest, fileName, "unsupported type '%c'", header.Typeflag) 80 continue 81 } 82 manifest.Contents[fileName] = &manifestEntry 83 } 84 return &manifest, nil 85} 86 87func ExtractTarGzip(reader io.Reader) (*Manifest, error) { 88 stream, err := gzip.NewReader(reader) 89 if err != nil { 90 return nil, err 91 } 92 defer stream.Close() 93 94 // stream length is limited in `ExtractTar` 95 return ExtractTar(stream) 96} 97 98func ExtractTarZstd(reader io.Reader) (*Manifest, error) { 99 stream, err := zstd.NewReader(reader) 100 if err != nil { 101 return nil, err 102 } 103 defer stream.Close() 104 105 // stream length is limited in `ExtractTar` 106 return ExtractTar(stream) 107} 108 109func ExtractZip(reader io.Reader) (*Manifest, error) { 110 data, err := io.ReadAll(reader) 111 if err != nil { 112 return nil, err 113 } 114 115 archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) 116 if err != nil { 117 return nil, err 118 } 119 120 // Detect and defuse zipbombs. 121 var totalSize uint64 122 for _, file := range archive.File { 123 totalSize += file.UncompressedSize64 124 } 125 if totalSize > config.Limits.MaxSiteSize.Bytes() { 126 return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit", 127 ErrArchiveTooLarge, 128 datasize.ByteSize(totalSize).HR(), 129 config.Limits.MaxSiteSize.HR(), 130 ) 131 } 132 133 manifest := Manifest{ 134 Contents: map[string]*Entry{ 135 "": {Type: Type_Directory.Enum()}, 136 }, 137 } 138 for _, file := range archive.File { 139 manifestEntry := Entry{} 140 if !strings.HasSuffix(file.Name, "/") { 141 fileReader, err := file.Open() 142 if err != nil { 143 return nil, err 144 } 145 defer fileReader.Close() 146 147 fileData, err := io.ReadAll(fileReader) 148 if err != nil { 149 return nil, fmt.Errorf("zip: %s: %w", file.Name, err) 150 } 151 152 if file.Mode()&os.ModeSymlink != 0 { 153 manifestEntry.Type = Type_Symlink.Enum() 154 } else { 155 manifestEntry.Type = Type_InlineFile.Enum() 156 } 157 manifestEntry.Data = fileData 158 manifestEntry.Transform = Transform_Identity.Enum() 159 manifestEntry.OriginalSize = proto.Int64(int64(file.UncompressedSize64)) 160 manifestEntry.CompressedSize = proto.Int64(int64(file.UncompressedSize64)) 161 } else { 162 manifestEntry.Type = Type_Directory.Enum() 163 } 164 manifest.Contents[strings.TrimSuffix(file.Name, "/")] = &manifestEntry 165 } 166 return &manifest, nil 167}