[mirror] Scalable static site server for Git forges (like GitHub Pages)
at main 6.1 kB view raw
1package git_pages 2 3import ( 4 "archive/tar" 5 "archive/zip" 6 "bytes" 7 "compress/gzip" 8 "context" 9 "errors" 10 "fmt" 11 "io" 12 "math" 13 "os" 14 "strings" 15 16 "github.com/c2h5oh/datasize" 17 "github.com/go-git/go-git/v6/plumbing" 18 "github.com/klauspost/compress/zstd" 19) 20 21var ErrArchiveTooLarge = errors.New("archive too large") 22 23func boundArchiveStream(reader io.Reader) io.Reader { 24 return ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()), 25 fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR())) 26} 27 28func ExtractGzip( 29 ctx context.Context, reader io.Reader, 30 next func(context.Context, io.Reader) (*Manifest, error), 31) (*Manifest, error) { 32 stream, err := gzip.NewReader(reader) 33 if err != nil { 34 return nil, err 35 } 36 defer stream.Close() 37 38 return next(ctx, boundArchiveStream(stream)) 39} 40 41func ExtractZstd( 42 ctx context.Context, reader io.Reader, 43 next func(context.Context, io.Reader) (*Manifest, error), 44) (*Manifest, error) { 45 stream, err := zstd.NewReader(reader) 46 if err != nil { 47 return nil, err 48 } 49 defer stream.Close() 50 51 return next(ctx, boundArchiveStream(stream)) 52} 53 54const BlobReferencePrefix = "/git/blobs/" 55 56type UnresolvedRefError struct { 57 missing []string 58} 59 60func (err UnresolvedRefError) Error() string { 61 return fmt.Sprintf("%d unresolved blob references", len(err.missing)) 62} 63 64// Returns a map of git hash to entry. If `manifest` is nil, returns an empty map. 65func indexManifestByGitHash(manifest *Manifest) map[string]*Entry { 66 index := map[string]*Entry{} 67 for _, entry := range manifest.GetContents() { 68 if hash := entry.GetGitHash(); hash != "" { 69 if _, ok := plumbing.FromHex(hash); ok { 70 index[hash] = entry 71 } else { 72 panic(fmt.Errorf("index: malformed hash: %s", hash)) 73 } 74 } 75 } 76 return index 77} 78 79func addSymlinkOrBlobReference( 80 manifest *Manifest, fileName string, target string, 81 index map[string]*Entry, missing *[]string, 82) *Entry { 83 if hash, found := strings.CutPrefix(target, BlobReferencePrefix); found { 84 if entry, found := index[hash]; found { 85 manifest.Contents[fileName] = entry 86 return entry 87 } else { 88 *missing = append(*missing, hash) 89 return nil 90 } 91 } else { 92 return AddSymlink(manifest, fileName, target) 93 } 94} 95 96func ExtractTar(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) { 97 archive := tar.NewReader(reader) 98 99 var dataBytesRecycled int64 100 var dataBytesTransferred int64 101 102 index := indexManifestByGitHash(oldManifest) 103 missing := []string{} 104 manifest := NewManifest() 105 for { 106 header, err := archive.Next() 107 if err == io.EOF { 108 break 109 } else if err != nil { 110 return nil, err 111 } 112 113 // For some reason, GNU tar includes any leading `.` path segments in archive filenames, 114 // unless there is a `..` path segment anywhere in the input filenames. 115 fileName := header.Name 116 for { 117 if strippedName, found := strings.CutPrefix(fileName, "./"); found { 118 fileName = strippedName 119 } else { 120 break 121 } 122 } 123 124 switch header.Typeflag { 125 case tar.TypeReg: 126 fileData, err := io.ReadAll(archive) 127 if err != nil { 128 return nil, fmt.Errorf("tar: %s: %w", fileName, err) 129 } 130 AddFile(manifest, fileName, fileData) 131 dataBytesTransferred += int64(len(fileData)) 132 case tar.TypeSymlink: 133 entry := addSymlinkOrBlobReference( 134 manifest, fileName, header.Linkname, index, &missing) 135 dataBytesRecycled += entry.GetOriginalSize() 136 case tar.TypeDir: 137 AddDirectory(manifest, fileName) 138 default: 139 AddProblem(manifest, fileName, "tar: unsupported type '%c'", header.Typeflag) 140 continue 141 } 142 } 143 144 if len(missing) > 0 { 145 return nil, UnresolvedRefError{missing} 146 } 147 148 // Ensure parent directories exist for all entries. 149 EnsureLeadingDirectories(manifest) 150 151 logc.Printf(ctx, 152 "reuse: %s recycled, %s transferred\n", 153 datasize.ByteSize(dataBytesRecycled).HR(), 154 datasize.ByteSize(dataBytesTransferred).HR(), 155 ) 156 157 return manifest, nil 158} 159 160// Used for zstd decompression inside zip files, it is recommended to share this. 161var zstdDecomp = zstd.ZipDecompressor() 162 163func ExtractZip(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) { 164 data, err := io.ReadAll(reader) 165 if err != nil { 166 return nil, err 167 } 168 169 archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) 170 if err != nil { 171 return nil, err 172 } 173 174 // Support zstd compression inside zip files. 175 archive.RegisterDecompressor(zstd.ZipMethodWinZip, zstdDecomp) 176 archive.RegisterDecompressor(zstd.ZipMethodPKWare, zstdDecomp) 177 178 // Detect and defuse zipbombs. 179 var totalSize uint64 180 for _, file := range archive.File { 181 if totalSize+file.UncompressedSize64 < totalSize { 182 // Would overflow 183 totalSize = math.MaxUint64 184 break 185 } 186 totalSize += file.UncompressedSize64 187 } 188 if totalSize > config.Limits.MaxSiteSize.Bytes() { 189 return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit", 190 ErrArchiveTooLarge, 191 datasize.ByteSize(totalSize).HR(), 192 config.Limits.MaxSiteSize.HR(), 193 ) 194 } 195 196 var dataBytesRecycled int64 197 var dataBytesTransferred int64 198 199 index := indexManifestByGitHash(oldManifest) 200 missing := []string{} 201 manifest := NewManifest() 202 for _, file := range archive.File { 203 if strings.HasSuffix(file.Name, "/") { 204 AddDirectory(manifest, file.Name) 205 } else { 206 fileReader, err := file.Open() 207 if err != nil { 208 return nil, err 209 } 210 defer fileReader.Close() 211 212 fileData, err := io.ReadAll(fileReader) 213 if err != nil { 214 return nil, fmt.Errorf("zip: %s: %w", file.Name, err) 215 } 216 217 if file.Mode()&os.ModeSymlink != 0 { 218 entry := addSymlinkOrBlobReference( 219 manifest, file.Name, string(fileData), index, &missing) 220 dataBytesRecycled += entry.GetOriginalSize() 221 } else { 222 AddFile(manifest, file.Name, fileData) 223 dataBytesTransferred += int64(len(fileData)) 224 } 225 } 226 } 227 228 if len(missing) > 0 { 229 return nil, UnresolvedRefError{missing} 230 } 231 232 // Ensure parent directories exist for all entries. 233 EnsureLeadingDirectories(manifest) 234 235 logc.Printf(ctx, 236 "reuse: %s recycled, %s transferred\n", 237 datasize.ByteSize(dataBytesRecycled).HR(), 238 datasize.ByteSize(dataBytesTransferred).HR(), 239 ) 240 241 return manifest, nil 242} 243