[mirror] Scalable static site server for Git forges (like GitHub Pages)

Resolve `/git/blobs/` symlinks as blob references to the old manifest.

This will be used for incremental archive uploads.

Changed files
+64 -21
src
+38 -4
src/extract.go
··· 12 12 "strings" 13 13 14 14 "github.com/c2h5oh/datasize" 15 + "github.com/go-git/go-git/v6/plumbing" 15 16 "github.com/klauspost/compress/zstd" 16 17 ) 17 18 18 19 var ErrArchiveTooLarge = errors.New("archive too large") 20 + 21 + const BlobReferencePrefix = "/git/blobs/" 19 22 20 23 func boundArchiveStream(reader io.Reader) io.Reader { 21 24 return ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()), ··· 42 45 return next(boundArchiveStream(stream)) 43 46 } 44 47 45 - func ExtractTar(reader io.Reader) (*Manifest, error) { 48 + // Returns a map of git hash to entry. If `manifest` is nil, returns an empty map. 49 + func indexManifestByGitHash(manifest *Manifest) map[string]*Entry { 50 + index := map[string]*Entry{} 51 + for _, entry := range manifest.GetContents() { 52 + if hash := entry.GetGitHash(); hash != "" { 53 + if _, ok := plumbing.FromHex(hash); ok { 54 + index[hash] = entry 55 + } else { 56 + panic(fmt.Errorf("index: malformed hash: %s", hash)) 57 + } 58 + } 59 + } 60 + return index 61 + } 62 + 63 + func addSymlinkOrBlobReference( 64 + manifest *Manifest, fileName string, target string, index map[string]*Entry, 65 + ) { 66 + if hash, found := strings.CutPrefix(target, BlobReferencePrefix); found { 67 + if entry, found := index[hash]; found { 68 + manifest.Contents[fileName] = entry 69 + } else { 70 + AddProblem(manifest, fileName, "unresolved reference: %s", target) 71 + } 72 + } else { 73 + AddSymlink(manifest, fileName, target) 74 + } 75 + } 76 + 77 + func ExtractTar(reader io.Reader, oldManifest *Manifest) (*Manifest, error) { 46 78 archive := tar.NewReader(reader) 47 79 80 + index := indexManifestByGitHash(oldManifest) 48 81 manifest := NewManifest() 49 82 for { 50 83 header, err := archive.Next() ··· 73 106 } 74 107 AddFile(manifest, fileName, fileData) 75 108 case tar.TypeSymlink: 76 - AddSymlink(manifest, fileName, header.Linkname) 109 + addSymlinkOrBlobReference(manifest, fileName, header.Linkname, index) 77 110 case tar.TypeDir: 78 111 AddDirectory(manifest, fileName) 79 112 default: ··· 84 117 return manifest, nil 85 118 } 86 119 87 - func ExtractZip(reader io.Reader) (*Manifest, error) { 120 + func ExtractZip(reader io.Reader, oldManifest *Manifest) (*Manifest, error) { 88 121 data, err := io.ReadAll(reader) 89 122 if err != nil { 90 123 return nil, err ··· 108 141 ) 109 142 } 110 143 144 + index := indexManifestByGitHash(oldManifest) 111 145 manifest := NewManifest() 112 146 for _, file := range archive.File { 113 147 if strings.HasSuffix(file.Name, "/") { ··· 125 159 } 126 160 127 161 if file.Mode()&os.ModeSymlink != 0 { 128 - AddSymlink(manifest, file.Name, string(fileData)) 162 + addSymlinkOrBlobReference(manifest, file.Name, string(fileData), index) 129 163 } else { 130 164 AddFile(manifest, file.Name, fileData) 131 165 }
+17 -12
src/manifest.go
··· 104 104 return entry 105 105 } 106 106 107 - func AddFile(manifest *Manifest, path string, data []byte) *Entry { 107 + func AddFile(manifest *Manifest, fileName string, data []byte) *Entry { 108 108 // Fill in `git_hash` even for files not originating from git using the SHA256 algorithm; 109 109 // we use this primarily for incremental archive uploads, but when support for git SHA256 110 110 // repositories is complete, archive uploads and git checkouts will have cross-support for ··· 113 113 hasher.Write(data) 114 114 entry := NewManifestEntry(Type_InlineFile, data) 115 115 entry.GitHash = proto.String(hasher.Sum().String()) 116 - manifest.Contents[path] = entry 116 + manifest.Contents[fileName] = entry 117 117 return entry 118 118 } 119 119 120 - func AddSymlink(manifest *Manifest, path string, target string) *Entry { 121 - entry := NewManifestEntry(Type_Symlink, []byte(target)) 122 - manifest.Contents[path] = entry 123 - return entry 120 + func AddSymlink(manifest *Manifest, fileName string, target string) *Entry { 121 + if path.IsAbs(target) { 122 + AddProblem(manifest, fileName, "absolute symlink: %s", target) 123 + return nil 124 + } else { 125 + entry := NewManifestEntry(Type_Symlink, []byte(target)) 126 + manifest.Contents[fileName] = entry 127 + return entry 128 + } 124 129 } 125 130 126 - func AddDirectory(manifest *Manifest, path string) *Entry { 127 - path = strings.TrimSuffix(path, "/") 131 + func AddDirectory(manifest *Manifest, dirName string) *Entry { 132 + dirName = strings.TrimSuffix(dirName, "/") 128 133 entry := NewManifestEntry(Type_Directory, nil) 129 - manifest.Contents[path] = entry 134 + manifest.Contents[dirName] = entry 130 135 return entry 131 136 } 132 137 133 - func AddProblem(manifest *Manifest, path, format string, args ...any) error { 138 + func AddProblem(manifest *Manifest, pathName, format string, args ...any) error { 134 139 cause := fmt.Sprintf(format, args...) 135 140 manifest.Problems = append(manifest.Problems, &Problem{ 136 - Path: proto.String(path), 141 + Path: proto.String(pathName), 137 142 Cause: proto.String(cause), 138 143 }) 139 - return fmt.Errorf("%s: %s", path, cause) 144 + return fmt.Errorf("%s: %s", pathName, cause) 140 145 } 141 146 142 147 func GetProblemReport(manifest *Manifest) []string {
+9 -5
src/update.go
··· 122 122 ) (result UpdateResult) { 123 123 var err error 124 124 125 - // Ignore errors; here the old manifest is used only to determine the update outcome. 125 + // Ignore errors; worst case we have to re-fetch all of the blobs. 126 126 oldManifest, _, _ := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) 127 127 128 + extractTar := func(reader io.Reader) (*Manifest, error) { 129 + return ExtractTar(reader, oldManifest) 130 + } 131 + 128 132 var newManifest *Manifest 129 133 switch contentType { 130 134 case "application/x-tar": 131 135 logc.Printf(ctx, "update %s: (tar)", webRoot) 132 - newManifest, err = ExtractTar(reader) // yellow? 136 + newManifest, err = extractTar(reader) // yellow? 133 137 case "application/x-tar+gzip": 134 138 logc.Printf(ctx, "update %s: (tar.gz)", webRoot) 135 - newManifest, err = ExtractGzip(reader, ExtractTar) // definitely yellow. 139 + newManifest, err = ExtractGzip(reader, extractTar) // definitely yellow. 136 140 case "application/x-tar+zstd": 137 141 logc.Printf(ctx, "update %s: (tar.zst)", webRoot) 138 - newManifest, err = ExtractZstd(reader, ExtractTar) 142 + newManifest, err = ExtractZstd(reader, extractTar) 139 143 case "application/zip": 140 144 logc.Printf(ctx, "update %s: (zip)", webRoot) 141 - newManifest, err = ExtractZip(reader) 145 + newManifest, err = ExtractZip(reader, oldManifest) 142 146 default: 143 147 err = errArchiveFormat 144 148 }