[mirror] Command-line application for uploading a site to a git-pages server

Add incremental directory upload support.

Changed files
+113 -60
+113 -60
main.go
··· 2 2 3 3 import ( 4 4 "archive/tar" 5 + "bufio" 5 6 "bytes" 7 + "crypto" 6 8 "crypto/sha256" 9 + "encoding/hex" 7 10 "errors" 8 11 "fmt" 9 12 "io" ··· 12 15 "net/url" 13 16 "os" 14 17 "runtime/debug" 18 + "strconv" 15 19 "strings" 16 20 17 21 "github.com/google/uuid" ··· 39 43 var challengeFlag = pflag.Bool("challenge", false, "compute DNS challenge entry from password (output zone file record)") 40 44 var challengeBareFlag = pflag.Bool("challenge-bare", false, "compute DNS challenge entry from password (output bare TXT value)") 41 45 var uploadGitFlag = pflag.String("upload-git", "", "replace site with contents of specified git repository") 42 - var uploadDirFlag = pflag.String("upload-dir", "", "replace whole site or a subdirectory with contents of specified directory") 43 - var deleteFlag = pflag.Bool("delete", false, "delete whole site or a subdirectory") 46 + var uploadDirFlag = pflag.String("upload-dir", "", "replace whole site or a path with contents of specified directory") 47 + var deleteFlag = pflag.Bool("delete", false, "delete whole site or a path") 44 48 var debugManifestFlag = pflag.Bool("debug-manifest", false, "retrieve site manifest as ProtoJSON, for debugging") 45 49 var serverFlag = pflag.String("server", "", "hostname of server to connect to") 46 50 var pathFlag = pflag.String("path", "", "partially update site at specified path") 47 51 var parentsFlag = pflag.Bool("parents", false, "create parent directories of --path") 48 52 var atomicFlag = pflag.Bool("atomic", false, "require partial updates to be atomic") 53 + var incrementalFlag = pflag.Bool("incremental", false, "only upload changed files") 49 54 var verboseFlag = pflag.BoolP("verbose", "v", false, "display more information for debugging") 50 55 var versionFlag = pflag.BoolP("version", "V", false, "display version information") 51 56 ··· 75 80 return operations == 1 76 81 } 77 82 83 + func gitBlobSHA256(data []byte) string { 84 + h := crypto.SHA256.New() 85 + h.Write([]byte("blob ")) 86 + h.Write([]byte(strconv.FormatInt(int64(len(data)), 10))) 87 + h.Write([]byte{0}) 88 + h.Write(data) 89 + return hex.EncodeToString(h.Sum(nil)) 90 + } 91 + 78 92 func displayFS(root fs.FS, prefix string) error { 79 93 return fs.WalkDir(root, ".", func(name string, entry fs.DirEntry, err error) error { 80 94 if err != nil { ··· 94 108 }) 95 109 } 96 110 97 - func archiveFS(writer io.Writer, root fs.FS, prefix string) (err error) { 111 + // It doesn't make sense to use incremental updates for very small files since the cost of 112 + // repeating a request to fill in a missing blob is likely to be higher than any savings gained. 113 + const incrementalSizeThreshold = 256 114 + 115 + func archiveFS(writer io.Writer, root fs.FS, prefix string, needBlobs []string) (err error) { 116 + requestedSet := make(map[string]struct{}) 117 + for _, hash := range needBlobs { 118 + requestedSet[hash] = struct{}{} 119 + } 98 120 zstdWriter, _ := zstd.NewWriter(writer) 99 121 tarWriter := tar.NewWriter(zstdWriter) 100 122 if err = fs.WalkDir(root, ".", func(name string, entry fs.DirEntry, err error) error { 101 123 if err != nil { 102 124 return err 103 125 } 104 - fileInfo, err := entry.Info() 105 - if err != nil { 106 - return err 107 - } 108 - var tarName string 126 + header := &tar.Header{} 127 + data := []byte{} 109 128 if prefix == "" && name == "." { 110 129 return nil 111 130 } else if name == "." { 112 - tarName = prefix 131 + header.Name = prefix 113 132 } else { 114 - tarName = prefix + name 133 + header.Name = prefix + name 115 134 } 116 - var file io.ReadCloser 117 - var linkTarget string 118 135 switch { 119 136 case entry.Type().IsDir(): 120 - name += "/" 137 + header.Typeflag = tar.TypeDir 138 + header.Name += "/" 121 139 case entry.Type().IsRegular(): 122 - if file, err = root.Open(name); err != nil { 140 + header.Typeflag = tar.TypeReg 141 + if data, err = fs.ReadFile(root, name); err != nil { 123 142 return err 124 143 } 125 - defer file.Close() 144 + if *incrementalFlag && len(data) > incrementalSizeThreshold { 145 + hash := gitBlobSHA256(data) 146 + if _, requested := requestedSet[hash]; !requested { 147 + header.Typeflag = tar.TypeSymlink 148 + header.Linkname = "/git/blobs/" + hash 149 + data = nil 150 + } 151 + } 126 152 case entry.Type() == fs.ModeSymlink: 127 - if linkTarget, err = fs.ReadLink(root, name); err != nil { 153 + header.Typeflag = tar.TypeSymlink 154 + if header.Linkname, err = fs.ReadLink(root, name); err != nil { 128 155 return err 129 156 } 130 157 default: 131 158 return errors.New("tar: cannot add non-regular file") 132 159 } 133 - header, err := tar.FileInfoHeader(fileInfo, linkTarget) 134 - if err != nil { 160 + header.Size = int64(len(data)) 161 + if err = tarWriter.WriteHeader(header); err != nil { 135 162 return err 136 163 } 137 - header.Name = tarName 138 - if err = tarWriter.WriteHeader(header); err != nil { 164 + if _, err = tarWriter.Write(data); err != nil { 139 165 return err 140 166 } 141 - if file != nil { 142 - _, err = io.Copy(tarWriter, file) 143 - } 144 167 return err 145 168 }); err != nil { 146 169 return ··· 152 175 return 153 176 } 154 177 return 178 + } 179 + 180 + // Stream archive data without ever loading the entire working set into RAM. 181 + func streamArchiveFS(root fs.FS, prefix string, needBlobs []string) io.ReadCloser { 182 + reader, writer := io.Pipe() 183 + go func() { 184 + err := archiveFS(writer, root, prefix, needBlobs) 185 + if err != nil { 186 + writer.CloseWithError(err) 187 + } else { 188 + writer.Close() 189 + } 190 + }() 191 + return reader 155 192 } 156 193 157 194 func makeWhiteout(path string) (reader io.Reader) { ··· 203 240 } 204 241 } 205 242 243 + if *incrementalFlag && *uploadDirFlag == "" { 244 + fmt.Fprintf(os.Stderr, "--incremental requires --upload-dir") 245 + os.Exit(usageExitCode) 246 + } 247 + 206 248 var err error 207 249 siteURL, err := url.Parse(pflag.Args()[0]) 208 250 if err != nil { ··· 211 253 } 212 254 213 255 var request *http.Request 256 + var uploadDir *os.Root 214 257 switch { 215 258 case *challengeFlag || *challengeBareFlag: 216 259 if *passwordFlag == "" { ··· 242 285 request.Header.Add("Content-Type", "application/x-www-form-urlencoded") 243 286 244 287 case *uploadDirFlag != "": 245 - uploadDirFS, err := os.OpenRoot(*uploadDirFlag) 288 + uploadDir, err = os.OpenRoot(*uploadDirFlag) 246 289 if err != nil { 247 290 fmt.Fprintf(os.Stderr, "error: invalid directory: %s\n", err) 248 291 os.Exit(1) 249 292 } 250 293 251 294 if *verboseFlag { 252 - err := displayFS(uploadDirFS.FS(), pathPrefix) 295 + err := displayFS(uploadDir.FS(), pathPrefix) 253 296 if err != nil { 254 297 fmt.Fprintf(os.Stderr, "error: %s\n", err) 255 298 os.Exit(1) 256 299 } 257 300 } 258 301 259 - // Stream archive data without ever loading the entire working set into RAM. 260 - reader, writer := io.Pipe() 261 - go func() { 262 - err = archiveFS(writer, uploadDirFS.FS(), pathPrefix) 263 - if err != nil { 264 - fmt.Fprintf(os.Stderr, "error: %s\n", err) 265 - os.Exit(1) 266 - } 267 - writer.Close() 268 - }() 269 - 270 302 if *pathFlag == "" { 271 - request, err = http.NewRequest("PUT", siteURL.String(), reader) 303 + request, err = http.NewRequest("PUT", siteURL.String(), nil) 272 304 } else { 273 - request, err = http.NewRequest("PATCH", siteURL.String(), reader) 305 + request, err = http.NewRequest("PATCH", siteURL.String(), nil) 274 306 } 275 307 if err != nil { 276 308 fmt.Fprintf(os.Stderr, "error: %s\n", err) 277 309 os.Exit(1) 278 310 } 311 + request.Body = streamArchiveFS(uploadDir.FS(), pathPrefix, []string{}) 279 312 request.ContentLength = -1 280 313 request.Header.Add("Content-Type", "application/x-tar+zstd") 314 + request.Header.Add("Accept", "application/vnd.git-pages.unresolved;q=1.0, text/plain;q=0.9") 281 315 if *parentsFlag { 282 316 request.Header.Add("Create-Parents", "yes") 283 317 } else { ··· 338 372 request.Header.Set("Host", siteURL.Host) 339 373 } 340 374 341 - response, err := http.DefaultClient.Do(request) 342 - if err != nil { 343 - fmt.Fprintf(os.Stderr, "error: %s\n", err) 344 - os.Exit(1) 345 - } 346 - if *verboseFlag { 347 - fmt.Fprintf(os.Stderr, "server: %s\n", response.Header.Get("Server")) 348 - } 349 - if *debugManifestFlag { 350 - if response.StatusCode == 200 { 351 - io.Copy(os.Stdout, response.Body) 352 - fmt.Fprintf(os.Stdout, "\n") 353 - } else { 354 - io.Copy(os.Stderr, response.Body) 375 + displayServer := *verboseFlag 376 + for { 377 + response, err := http.DefaultClient.Do(request) 378 + if err != nil { 379 + fmt.Fprintf(os.Stderr, "error: %s\n", err) 355 380 os.Exit(1) 356 381 } 357 - } else { // an update operation 358 - if response.StatusCode == 200 { 359 - fmt.Fprintf(os.Stdout, "result: %s\n", response.Header.Get("Update-Result")) 360 - io.Copy(os.Stdout, response.Body) 361 - } else { 362 - fmt.Fprintf(os.Stderr, "result: error\n") 363 - io.Copy(os.Stderr, response.Body) 364 - os.Exit(1) 382 + if displayServer { 383 + fmt.Fprintf(os.Stderr, "server: %s\n", response.Header.Get("Server")) 384 + displayServer = false 385 + } 386 + if *debugManifestFlag { 387 + if response.StatusCode == http.StatusOK { 388 + io.Copy(os.Stdout, response.Body) 389 + fmt.Fprintf(os.Stdout, "\n") 390 + } else { 391 + io.Copy(os.Stderr, response.Body) 392 + os.Exit(1) 393 + } 394 + } else { // an update operation 395 + if *verboseFlag { 396 + fmt.Fprintf(os.Stderr, "response: %d %s\n", 397 + response.StatusCode, response.Header.Get("Content-Type")) 398 + } 399 + if response.StatusCode == http.StatusUnprocessableEntity && 400 + response.Header.Get("Content-Type") == "application/vnd.git-pages.unresolved" { 401 + needBlobs := []string{} 402 + scanner := bufio.NewScanner(response.Body) 403 + for scanner.Scan() { 404 + needBlobs = append(needBlobs, scanner.Text()) 405 + } 406 + response.Body.Close() 407 + request.Body = streamArchiveFS(uploadDir.FS(), pathPrefix, needBlobs) 408 + continue // resubmit 409 + } else if response.StatusCode == http.StatusOK { 410 + fmt.Fprintf(os.Stdout, "result: %s\n", response.Header.Get("Update-Result")) 411 + io.Copy(os.Stdout, response.Body) 412 + } else { 413 + fmt.Fprintf(os.Stderr, "result: error\n") 414 + io.Copy(os.Stderr, response.Body) 415 + os.Exit(1) 416 + } 365 417 } 418 + break 366 419 } 367 420 }