🧱 Chunk is a download manager for slow and unstable servers
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request #34 from cuducos/cli

Re-introducing chunk CLI with richer parameter options

authored by

Daniel Fireman and committed by
GitHub
50891bf8 53b8bf9b

+101 -51
+44 -8
cmd/chunk/main.go
··· 4 4 "fmt" 5 5 "log" 6 6 "os" 7 + "time" 7 8 8 9 "github.com/cuducos/chunk" 10 + "github.com/spf13/cobra" 9 11 ) 10 12 11 - func main() { 12 - chunk := chunk.DefaultDownloader() 13 - prog := newProgress() 14 - for status := range chunk.Download(os.Args[1:len(os.Args)]...) { 15 - if status.Error != nil { 16 - log.Fatal(status.Error) 13 + var rootCmd = &cobra.Command{ 14 + Use: "chunk", 15 + Short: "Download tool for slow and unstable servers", 16 + Long: "Download tool for slow and unstable servers using HTTP range requests, retries per HTTP request (not by file), prevents re-downloading the same content range and supports wait time to give servers time to recover.", 17 + Run: func(cmd *cobra.Command, args []string) { 18 + chunk := chunk.DefaultDownloader() 19 + chunk.Timeout = timeoutChunk 20 + chunk.ConcurrencyPerServer = concurrencyPerServer 21 + chunk.MaxRetries = maxRetriesChunk 22 + chunk.WaitRetry = waitBetweenRetries 23 + chunk.ChunkSize = chunkSize 24 + prog := newProgress() 25 + for status := range chunk.Download(os.Args[1:len(os.Args)]...) { 26 + if status.Error != nil { 27 + log.Fatal(status.Error) 28 + } 29 + prog.update(status) 17 30 } 18 - prog.update(status) 31 + fmt.Printf("\r%s\nDownloaded to: %s", prog.String(), os.TempDir()) 32 + }, 33 + } 34 + 35 + // Flags 36 + var ( 37 + timeoutChunk time.Duration 38 + concurrencyPerServer int 39 + maxRetriesChunk uint 40 + chunkSize int64 41 + waitBetweenRetries time.Duration 42 + ) 43 + 44 + func init() { 45 + rootCmd.Flags().DurationVarP(&timeoutChunk, "timeout", "t", chunk.DefaultTimeout, "timeout for the download of each chunk from each URL.") 46 + rootCmd.Flags().UintVarP(&maxRetriesChunk, "max-retries", "r", chunk.DefaultMaxRetries, "maximum number of retries for each chunk.") 47 + rootCmd.Flags().DurationVarP(&waitBetweenRetries, "wait-retry", "w", chunk.DefaultWaitRetry, "pause before retrying an HTTP request that has failed.") 48 + rootCmd.Flags().Int64VarP(&chunkSize, "chunk-size", "s", chunk.DefaultChunkSize, "maximum size of each HTTP request done using the content range header.") 49 + rootCmd.Flags().IntVarP(&concurrencyPerServer, "concurrency-per-server", "c", chunk.DefaultConcurrencyPerServer, "controls the max number of concurrent connections opened to the same server.") 50 + } 51 + 52 + func main() { 53 + if err := rootCmd.Execute(); err != nil { 54 + fmt.Println(err) 55 + os.Exit(1) 19 56 } 20 - fmt.Printf("\r%s\nDownloaded to: %s", prog.String(), os.TempDir()) 21 57 }
+25 -26
downloader.go
··· 15 15 ) 16 16 17 17 const ( 18 - DefaultTimeoutPerChunk = 90 * time.Second 19 - DefaultMaxParallelDownloadsPerServer = 8 20 - DefaultMaxRetriesPerChunk = 5 21 - DefaultChunkSize = 8192 22 - DefaultWaitBetweenRetries = 0 * time.Minute 18 + DefaultTimeout = 90 * time.Second 19 + DefaultConcurrencyPerServer = 8 20 + DefaultMaxRetries = 5 21 + DefaultChunkSize = 8192 22 + DefaultWaitRetry = 1 * time.Second 23 23 ) 24 24 25 25 // DownloadStatus is the data propagated via the channel sent back to the user ··· 62 62 // URL. A chunk is a part of a file requested using the content range HTTP 63 63 // header. Thus, this timeout is not the timeout for the each file or for 64 64 // the the download of every file). 65 - TimeoutPerChunk time.Duration 65 + Timeout time.Duration 66 66 67 - // MaxParallelDownloadsPerServer controls how many requests are sent in 68 - // parallel to the same server. If all the URLs are from the same server 69 - // this is the total of parallel requests. If the user is downloading files 70 - // from different servers (including different subdomains), this limit is 71 - // applied to each server idependently. 72 - MaxParallelDownloadsPerServer int 67 + // MaxParallelDownloadsPerServer controls the max number of concurrent 68 + // connections opened to the same server. If all the URLs are from the same 69 + // server this is the total of concurrent connections. If the user is downloading 70 + // files from different servers, this limit is applied to each server idependently. 71 + ConcurrencyPerServer int 73 72 74 73 // MaxRetriesPerChunk is the maximum amount of retries for each HTTP request 75 74 // using the content range header that fails. 76 - MaxRetriesPerChunk uint 75 + MaxRetries uint 77 76 78 77 // ChunkSize is the maximum size of each HTTP request done using the 79 78 // content range header. There is no way to specify how many chunks a ··· 83 82 84 83 // WaitBetweenRetries is an optional pause before retrying an HTTP request 85 84 // that has failed. 86 - WaitBetweenRetries time.Duration 85 + WaitRetry time.Duration 87 86 } 88 87 89 88 type chunk struct { ··· 117 116 } 118 117 119 118 func (d *Downloader) downloadChunkWithTimeout(userCtx context.Context, u string, c chunk) ([]byte, error) { 120 - ctx, cancel := context.WithTimeout(userCtx, d.TimeoutPerChunk) // need to propagate context, which might contain app-specific data. 119 + ctx, cancel := context.WithTimeout(userCtx, d.Timeout) // need to propagate context, which might contain app-specific data. 121 120 defer cancel() 122 121 ch := make(chan []byte) 123 122 errs := make(chan error) ··· 161 160 ch <- resp 162 161 return nil 163 162 }, 164 - retry.Attempts(d.MaxRetriesPerChunk), 165 - retry.MaxDelay(d.WaitBetweenRetries), 163 + retry.Attempts(d.MaxRetries), 164 + retry.MaxDelay(d.WaitRetry), 166 165 ) 167 166 if err != nil { 168 167 return 0, fmt.Errorf("error sending get http request to %s: %w", u, err) ··· 194 193 ch <- b 195 194 return nil 196 195 }, 197 - retry.Attempts(d.MaxRetriesPerChunk), 198 - retry.MaxDelay(d.WaitBetweenRetries), 196 + retry.Attempts(d.MaxRetries), 197 + retry.MaxDelay(d.WaitRetry), 199 198 ) 200 199 if err != nil { 201 200 return nil, fmt.Errorf("error downloading %s: %w", u, err) ··· 275 274 // context can be used to stop all downloads in progress. 276 275 func (d *Downloader) DownloadWithContext(ctx context.Context, urls ...string) <-chan DownloadStatus { 277 276 if d.client == nil { 278 - d.client = newClient(d.MaxParallelDownloadsPerServer, d.TimeoutPerChunk) 277 + d.client = newClient(d.ConcurrencyPerServer, d.Timeout) 279 278 } 280 279 ch := make(chan DownloadStatus, 2*len(urls)) // the first status will be the total file size (and or an error creating/trucating the file). 281 280 var wg sync.WaitGroup // this wait group is used to wait for all chunks (from all downloads) to finish. ··· 304 303 // the constants in this package for their values. 305 304 func DefaultDownloader() *Downloader { 306 305 return &Downloader{ 307 - TimeoutPerChunk: DefaultTimeoutPerChunk, 308 - MaxParallelDownloadsPerServer: DefaultMaxParallelDownloadsPerServer, 309 - MaxRetriesPerChunk: DefaultMaxRetriesPerChunk, 310 - ChunkSize: DefaultChunkSize, 311 - WaitBetweenRetries: DefaultWaitBetweenRetries, 312 - client: newClient(DefaultMaxRetriesPerChunk, DefaultTimeoutPerChunk), 306 + Timeout: DefaultTimeout, 307 + ConcurrencyPerServer: DefaultConcurrencyPerServer, 308 + MaxRetries: DefaultMaxRetries, 309 + ChunkSize: DefaultChunkSize, 310 + WaitRetry: DefaultWaitRetry, 311 + client: newClient(DefaultMaxRetries, DefaultTimeout), 313 312 } 314 313 } 315 314
+15 -15
downloader_test.go
··· 38 38 )) 39 39 defer s.Close() 40 40 d := Downloader{ 41 - TimeoutPerChunk: timeout, 42 - MaxRetriesPerChunk: 4, 43 - MaxParallelDownloadsPerServer: 1, 44 - ChunkSize: 1024, 45 - WaitBetweenRetries: 0 * time.Second, 41 + Timeout: timeout, 42 + MaxRetries: 4, 43 + ConcurrencyPerServer: 1, 44 + ChunkSize: 1024, 45 + WaitRetry: 0 * time.Second, 46 46 } 47 47 ch := d.Download(s.URL) 48 48 <-ch // discard the first got (just the file size) ··· 191 191 defer s.Close() 192 192 193 193 d := Downloader{ 194 - TimeoutPerChunk: timeout, 195 - MaxRetriesPerChunk: 4, 196 - MaxParallelDownloadsPerServer: 1, 197 - ChunkSize: 1024, 198 - WaitBetweenRetries: 0 * time.Second, 194 + Timeout: timeout, 195 + MaxRetries: 4, 196 + ConcurrencyPerServer: 1, 197 + ChunkSize: 1024, 198 + WaitRetry: 0 * time.Second, 199 199 } 200 200 ch := d.Download(s.URL) 201 201 <-ch // discard the first status (just the file size) ··· 243 243 )) 244 244 defer s.Close() 245 245 d := Downloader{ 246 - TimeoutPerChunk: timeout, 247 - MaxRetriesPerChunk: 4, 248 - MaxParallelDownloadsPerServer: 1, 249 - ChunkSize: 1024, 250 - WaitBetweenRetries: 0 * time.Second, 246 + Timeout: timeout, 247 + MaxRetries: 4, 248 + ConcurrencyPerServer: 1, 249 + ChunkSize: 1024, 250 + WaitRetry: 0 * time.Second, 251 251 } 252 252 userCtx, cancFunc := context.WithTimeout(context.Background(), userTimeout) 253 253 defer cancFunc()
+9 -2
go.mod
··· 2 2 3 3 go 1.19 4 4 5 - require github.com/avast/retry-go v3.0.0+incompatible 5 + require ( 6 + github.com/avast/retry-go v3.0.0+incompatible 7 + github.com/spf13/cobra v1.6.1 8 + ) 6 9 7 - require github.com/stretchr/testify v1.8.1 // indirect 10 + require ( 11 + github.com/inconshreveable/mousetrap v1.0.1 // indirect 12 + github.com/spf13/pflag v1.0.5 // indirect 13 + github.com/stretchr/testify v1.8.1 // indirect 14 + )
+8
go.sum
··· 1 1 github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0= 2 2 github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= 3 + github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 3 4 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 5 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 5 6 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 + github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc= 8 + github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 6 9 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 7 10 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 11 + github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 12 + github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= 13 + github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= 14 + github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 15 + github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 8 16 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 9 17 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 10 18 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=