fork of indigo with slightly nicer lexgen

better handling of backfill errors on pds fetch (#964)

Have a fair amount of random missing data coming from my consumer. Some
of it i've found is that the initial repo fetch errorred for some reason
and then we just drop all future events (which kinda makes sense i
guess)

However its useful to distinguish why the fetch failed, distinguishing
between a rate limit and 'this repo does not exist' is important for
recovery situations.

authored by Whyrusleeping and committed by GitHub 50f1cdbe a4e0cc37

Changed files
+23 -7
backfill
+23 -7
backfill/backfill.go
··· 308 308 err error 309 309 } 310 310 311 + type FetchRepoError struct { 312 + StatusCode int 313 + Status string 314 + } 315 + 316 + func (e *FetchRepoError) Error() string { 317 + reason := "unknown error" 318 + if e.StatusCode == http.StatusBadRequest { 319 + reason = "repo not found" 320 + } else { 321 + reason = e.Status 322 + } 323 + return fmt.Sprintf("failed to get repo: %s (%d)", reason, e.StatusCode) 324 + } 325 + 311 326 // Fetches a repo CAR file over HTTP from the indicated host. If successful, parses the CAR and returns repo.Repo 312 327 func (b *Backfiller) fetchRepo(ctx context.Context, did, since, host string) (*repo.Repo, error) { 313 328 url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo?did=%s", host, did) ··· 340 355 } 341 356 342 357 if resp.StatusCode != http.StatusOK { 343 - reason := "unknown error" 344 - if resp.StatusCode == http.StatusBadRequest { 345 - reason = "repo not found" 346 - } else { 347 - reason = resp.Status 358 + return nil, &FetchRepoError{ 359 + StatusCode: resp.StatusCode, 360 + Status: resp.Status, 348 361 } 349 - return nil, fmt.Errorf("failed to get repo: %s", reason) 350 362 } 351 363 352 364 instrumentedReader := instrumentedReader{ ··· 401 413 r, err = b.fetchRepo(ctx, repoDID, job.Rev(), pdsHost) 402 414 if err != nil { 403 415 slog.Warn("repo CAR fetch from PDS failed", "did", repoDID, "since", job.Rev(), "pdsHost", pdsHost, "err", err) 404 - return "repo CAR fetch from PDS failed", err 416 + rfe, ok := err.(*FetchRepoError) 417 + if ok { 418 + return fmt.Sprintf("failed to fetch repo CAR from PDS (http %d:%s)", rfe.StatusCode, rfe.Status), err 419 + } 420 + return "failed to fetch repo CAR from PDS", err 405 421 } 406 422 } 407 423