···44 "fmt"
55 "log"
66 "os"
77+ "time"
7889 "github.com/cuducos/chunk"
1010+ "github.com/spf13/cobra"
911)
10121111-func main() {
1212- chunk := chunk.DefaultDownloader()
1313- prog := newProgress()
1414- for status := range chunk.Download(os.Args[1:len(os.Args)]...) {
1515- if status.Error != nil {
1616- log.Fatal(status.Error)
1313+var rootCmd = &cobra.Command{
1414+ Use: "chunk",
1515+ Short: "Download tool for slow and unstable servers",
1616+ Long: "Download tool for slow and unstable servers using HTTP range requests, retries per HTTP request (not by file), prevents re-downloading the same content range and supports wait time to give servers time to recover.",
1717+ Run: func(cmd *cobra.Command, args []string) {
1818+ chunk := chunk.DefaultDownloader()
1919+ chunk.Timeout = timeoutChunk
2020+ chunk.ConcurrencyPerServer = concurrencyPerServer
2121+ chunk.MaxRetries = maxRetriesChunk
2222+ chunk.WaitRetry = waitBetweenRetries
2323+ chunk.ChunkSize = chunkSize
2424+ prog := newProgress()
2525+ for status := range chunk.Download(os.Args[1:len(os.Args)]...) {
2626+ if status.Error != nil {
2727+ log.Fatal(status.Error)
2828+ }
2929+ prog.update(status)
1730 }
1818- prog.update(status)
3131+ fmt.Printf("\r%s\nDownloaded to: %s", prog.String(), os.TempDir())
3232+ },
3333+}
3434+3535+// Flags
3636+var (
3737+ timeoutChunk time.Duration
3838+ concurrencyPerServer int
3939+ maxRetriesChunk uint
4040+ chunkSize int64
4141+ waitBetweenRetries time.Duration
4242+)
4343+4444+func init() {
4545+ rootCmd.Flags().DurationVarP(&timeoutChunk, "timeout", "t", chunk.DefaultTimeout, "timeout for the download of each chunk from each URL.")
4646+ rootCmd.Flags().UintVarP(&maxRetriesChunk, "max-retries", "r", chunk.DefaultMaxRetries, "maximum number of retries for each chunk.")
4747+ rootCmd.Flags().DurationVarP(&waitBetweenRetries, "wait-retry", "w", chunk.DefaultWaitRetry, "pause before retrying an HTTP request that has failed.")
4848+ rootCmd.Flags().Int64VarP(&chunkSize, "chunk-size", "s", chunk.DefaultChunkSize, "maximum size of each HTTP request done using the content range header.")
4949+ rootCmd.Flags().IntVarP(&concurrencyPerServer, "concurrency-per-server", "c", chunk.DefaultConcurrencyPerServer, "controls the max number of concurrent connections opened to the same server.")
5050+}
5151+5252+func main() {
5353+ if err := rootCmd.Execute(); err != nil {
5454+ fmt.Println(err)
5555+ os.Exit(1)
1956 }
2020- fmt.Printf("\r%s\nDownloaded to: %s", prog.String(), os.TempDir())
2157}
+25-26
downloader.go
···1515)
16161717const (
1818- DefaultTimeoutPerChunk = 90 * time.Second
1919- DefaultMaxParallelDownloadsPerServer = 8
2020- DefaultMaxRetriesPerChunk = 5
2121- DefaultChunkSize = 8192
2222- DefaultWaitBetweenRetries = 0 * time.Minute
1818+ DefaultTimeout = 90 * time.Second
1919+ DefaultConcurrencyPerServer = 8
2020+ DefaultMaxRetries = 5
2121+ DefaultChunkSize = 8192
2222+ DefaultWaitRetry = 1 * time.Second
2323)
24242525// DownloadStatus is the data propagated via the channel sent back to the user
···6262 // URL. A chunk is a part of a file requested using the content range HTTP
6363 // header. Thus, this timeout is not the timeout for the each file or for
6464 // the the download of every file).
6565- TimeoutPerChunk time.Duration
6565+ Timeout time.Duration
66666767- // MaxParallelDownloadsPerServer controls how many requests are sent in
6868- // parallel to the same server. If all the URLs are from the same server
6969- // this is the total of parallel requests. If the user is downloading files
7070- // from different servers (including different subdomains), this limit is
7171- // applied to each server idependently.
7272- MaxParallelDownloadsPerServer int
6767+ // MaxParallelDownloadsPerServer controls the max number of concurrent
6868+ // connections opened to the same server. If all the URLs are from the same
6969+ // server this is the total of concurrent connections. If the user is downloading
7070+ // files from different servers, this limit is applied to each server idependently.
7171+ ConcurrencyPerServer int
73727473 // MaxRetriesPerChunk is the maximum amount of retries for each HTTP request
7574 // using the content range header that fails.
7676- MaxRetriesPerChunk uint
7575+ MaxRetries uint
77767877 // ChunkSize is the maximum size of each HTTP request done using the
7978 // content range header. There is no way to specify how many chunks a
···83828483 // WaitBetweenRetries is an optional pause before retrying an HTTP request
8584 // that has failed.
8686- WaitBetweenRetries time.Duration
8585+ WaitRetry time.Duration
8786}
88878988type chunk struct {
···117116}
118117119118func (d *Downloader) downloadChunkWithTimeout(userCtx context.Context, u string, c chunk) ([]byte, error) {
120120- ctx, cancel := context.WithTimeout(userCtx, d.TimeoutPerChunk) // need to propagate context, which might contain app-specific data.
119119+ ctx, cancel := context.WithTimeout(userCtx, d.Timeout) // need to propagate context, which might contain app-specific data.
121120 defer cancel()
122121 ch := make(chan []byte)
123122 errs := make(chan error)
···161160 ch <- resp
162161 return nil
163162 },
164164- retry.Attempts(d.MaxRetriesPerChunk),
165165- retry.MaxDelay(d.WaitBetweenRetries),
163163+ retry.Attempts(d.MaxRetries),
164164+ retry.MaxDelay(d.WaitRetry),
166165 )
167166 if err != nil {
168167 return 0, fmt.Errorf("error sending get http request to %s: %w", u, err)
···194193 ch <- b
195194 return nil
196195 },
197197- retry.Attempts(d.MaxRetriesPerChunk),
198198- retry.MaxDelay(d.WaitBetweenRetries),
196196+ retry.Attempts(d.MaxRetries),
197197+ retry.MaxDelay(d.WaitRetry),
199198 )
200199 if err != nil {
201200 return nil, fmt.Errorf("error downloading %s: %w", u, err)
···275274// context can be used to stop all downloads in progress.
276275func (d *Downloader) DownloadWithContext(ctx context.Context, urls ...string) <-chan DownloadStatus {
277276 if d.client == nil {
278278- d.client = newClient(d.MaxParallelDownloadsPerServer, d.TimeoutPerChunk)
277277+ d.client = newClient(d.ConcurrencyPerServer, d.Timeout)
279278 }
280279 ch := make(chan DownloadStatus, 2*len(urls)) // the first status will be the total file size (and or an error creating/trucating the file).
281280 var wg sync.WaitGroup // this wait group is used to wait for all chunks (from all downloads) to finish.
···304303// the constants in this package for their values.
305304func DefaultDownloader() *Downloader {
306305 return &Downloader{
307307- TimeoutPerChunk: DefaultTimeoutPerChunk,
308308- MaxParallelDownloadsPerServer: DefaultMaxParallelDownloadsPerServer,
309309- MaxRetriesPerChunk: DefaultMaxRetriesPerChunk,
310310- ChunkSize: DefaultChunkSize,
311311- WaitBetweenRetries: DefaultWaitBetweenRetries,
312312- client: newClient(DefaultMaxRetriesPerChunk, DefaultTimeoutPerChunk),
306306+ Timeout: DefaultTimeout,
307307+ ConcurrencyPerServer: DefaultConcurrencyPerServer,
308308+ MaxRetries: DefaultMaxRetries,
309309+ ChunkSize: DefaultChunkSize,
310310+ WaitRetry: DefaultWaitRetry,
311311+ client: newClient(DefaultMaxRetries, DefaultTimeout),
313312 }
314313}
315314