fork of indigo with slightly nicer lexgen

backfill: add failback to fetch CAR file from PDS (if relay errors) (#855)

The main motivation here is that non-archival relay is returning a 4xx
error for fetching repos, and we want to try fetching those from the
actual PDS when that happens. This code adds a new code branch when a
relay CAR fetch fails to do an identity lookup to find the account's PDS
instance, and fetches the CAR from there.

For the search code specifically, it re-uses an existing identity
directory, to reduce double-resolution.

This also refactors how fetch URLs are constructed to use just
hostnames.

UPDATE: should probably *not* merge this to `main` until Jaz can review

authored by bnewbold.net and committed by GitHub 7fd58873 72b4acb7

Changed files
+63 -34
backfill
search
+60 -33
backfill/backfill.go
··· 12 "time" 13 14 "github.com/bluesky-social/indigo/api/atproto" 15 "github.com/bluesky-social/indigo/repo" 16 "github.com/bluesky-social/indigo/repomgr" 17 "github.com/ipfs/go-cid" 18 "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" 19 "go.opentelemetry.io/otel" ··· 71 ParallelRecordCreates int 72 // Prefix match for records to backfill i.e. app.bsky.feed.app/ 73 // If empty, all records will be backfilled 74 - NSIDFilter string 75 - CheckoutPath string 76 77 syncLimiter *rate.Limiter 78 ··· 80 magicHeaderVal string 81 82 stop chan chan struct{} 83 } 84 85 var ( ··· 110 ParallelRecordCreates int 111 NSIDFilter string 112 SyncRequestsPerSecond int 113 - CheckoutPath string 114 } 115 116 func DefaultBackfillOptions() *BackfillOptions { ··· 119 ParallelRecordCreates: 100, 120 NSIDFilter: "", 121 SyncRequestsPerSecond: 2, 122 - CheckoutPath: "https://bsky.network/xrpc/com.atproto.sync.getRepo", 123 } 124 } 125 ··· 145 ParallelRecordCreates: opts.ParallelRecordCreates, 146 NSIDFilter: opts.NSIDFilter, 147 syncLimiter: rate.NewLimiter(rate.Limit(opts.SyncRequestsPerSecond), 1), 148 - CheckoutPath: opts.CheckoutPath, 149 stop: make(chan chan struct{}, 1), 150 } 151 } 152 ··· 292 err error 293 } 294 295 - // BackfillRepo backfills a repo 296 - func (b *Backfiller) BackfillRepo(ctx context.Context, job Job) (string, error) { 297 - ctx, span := tracer.Start(ctx, "BackfillRepo") 298 - defer span.End() 299 300 - start := time.Now() 301 - 302 - repoDid := job.Repo() 303 - 304 - log := slog.With("source", "backfiller_backfill_repo", "repo", repoDid) 305 - if job.RetryCount() > 0 { 306 - log = log.With("retry_count", job.RetryCount()) 307 - } 308 - log.Info(fmt.Sprintf("processing backfill for %s", repoDid)) 309 - 310 - url := fmt.Sprintf("%s?did=%s", b.CheckoutPath, repoDid) 311 - 312 - if job.Rev() != "" { 313 - url = url + fmt.Sprintf("&since=%s", job.Rev()) 314 } 315 316 // GET and CAR decode the body ··· 320 } 321 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 322 if err != nil { 323 - state := fmt.Sprintf("failed (create request: %s)", err.Error()) 324 - return state, fmt.Errorf("failed to create request: %w", err) 325 } 326 327 req.Header.Set("Accept", "application/vnd.ipld.car") ··· 334 335 resp, err := client.Do(req) 336 if err != nil { 337 - state := fmt.Sprintf("failed (do request: %s)", err.Error()) 338 - return state, fmt.Errorf("failed to send request: %w", err) 339 } 340 341 if resp.StatusCode != http.StatusOK { ··· 345 } else { 346 reason = resp.Status 347 } 348 - state := fmt.Sprintf("failed (%s)", reason) 349 - return state, fmt.Errorf("failed to get repo: %s", reason) 350 } 351 352 instrumentedReader := instrumentedReader{ ··· 356 357 defer instrumentedReader.Close() 358 359 - r, err := repo.ReadRepoFromCar(ctx, instrumentedReader) 360 if err != nil { 361 - state := "failed (couldn't read repo CAR from response body)" 362 - return state, fmt.Errorf("failed to read repo from car: %w", err) 363 } 364 365 numRecords := 0 ··· 396 397 raw := blk.RawData() 398 399 - err = b.HandleCreateRecord(ctx, repoDid, rev, item.recordPath, &raw, &item.nodeCid) 400 if err != nil { 401 recordResults <- recordResult{recordPath: item.recordPath, err: fmt.Errorf("failed to handle create record: %w", err)} 402 continue
··· 12 "time" 13 14 "github.com/bluesky-social/indigo/api/atproto" 15 + "github.com/bluesky-social/indigo/atproto/identity" 16 + "github.com/bluesky-social/indigo/atproto/syntax" 17 "github.com/bluesky-social/indigo/repo" 18 "github.com/bluesky-social/indigo/repomgr" 19 + 20 "github.com/ipfs/go-cid" 21 "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" 22 "go.opentelemetry.io/otel" ··· 74 ParallelRecordCreates int 75 // Prefix match for records to backfill i.e. app.bsky.feed.app/ 76 // If empty, all records will be backfilled 77 + NSIDFilter string 78 + RelayHost string 79 80 syncLimiter *rate.Limiter 81 ··· 83 magicHeaderVal string 84 85 stop chan chan struct{} 86 + 87 + Directory identity.Directory 88 } 89 90 var ( ··· 115 ParallelRecordCreates int 116 NSIDFilter string 117 SyncRequestsPerSecond int 118 + RelayHost string 119 } 120 121 func DefaultBackfillOptions() *BackfillOptions { ··· 124 ParallelRecordCreates: 100, 125 NSIDFilter: "", 126 SyncRequestsPerSecond: 2, 127 + RelayHost: "https://bsky.network", 128 } 129 } 130 ··· 150 ParallelRecordCreates: opts.ParallelRecordCreates, 151 NSIDFilter: opts.NSIDFilter, 152 syncLimiter: rate.NewLimiter(rate.Limit(opts.SyncRequestsPerSecond), 1), 153 + RelayHost: opts.RelayHost, 154 stop: make(chan chan struct{}, 1), 155 + Directory: identity.DefaultDirectory(), 156 } 157 } 158 ··· 298 err error 299 } 300 301 + // Fetches a repo CAR file over HTTP from the indicated host. If successful, parses the CAR and returns repo.Repo 302 + func (b *Backfiller) fetchRepo(ctx context.Context, did, since, host string) (*repo.Repo, error) { 303 + url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo?did=%s", host, did) 304 305 + if since != "" { 306 + url = url + fmt.Sprintf("&since=%s", since) 307 } 308 309 // GET and CAR decode the body ··· 313 } 314 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 315 if err != nil { 316 + return nil, fmt.Errorf("failed to create request: %w", err) 317 } 318 319 req.Header.Set("Accept", "application/vnd.ipld.car") ··· 326 327 resp, err := client.Do(req) 328 if err != nil { 329 + return nil, fmt.Errorf("failed to send request: %w", err) 330 } 331 332 if resp.StatusCode != http.StatusOK { ··· 336 } else { 337 reason = resp.Status 338 } 339 + return nil, fmt.Errorf("failed to get repo: %s", reason) 340 } 341 342 instrumentedReader := instrumentedReader{ ··· 346 347 defer instrumentedReader.Close() 348 349 + repo, err := repo.ReadRepoFromCar(ctx, instrumentedReader) 350 if err != nil { 351 + return nil, fmt.Errorf("failed to parse repo from CAR file: %w", err) 352 + } 353 + return repo, nil 354 + } 355 + 356 + // BackfillRepo backfills a repo 357 + func (b *Backfiller) BackfillRepo(ctx context.Context, job Job) (string, error) { 358 + ctx, span := tracer.Start(ctx, "BackfillRepo") 359 + defer span.End() 360 + 361 + start := time.Now() 362 + 363 + repoDID := job.Repo() 364 + 365 + log := slog.With("source", "backfiller_backfill_repo", "repo", repoDID) 366 + if job.RetryCount() > 0 { 367 + log = log.With("retry_count", job.RetryCount()) 368 + } 369 + log.Info(fmt.Sprintf("processing backfill for %s", repoDID)) 370 + 371 + // first try with Relay endpoint 372 + r, err := b.fetchRepo(ctx, repoDID, job.Rev(), b.RelayHost) 373 + if err != nil { 374 + slog.Warn("repo CAR fetch from relay failed", "did", repoDID, "since", job.Rev(), "relayHost", b.RelayHost, "err", err) 375 + // fallback to direct PDS fetch 376 + ident, err := b.Directory.LookupDID(ctx, syntax.DID(repoDID)) 377 + if err != nil { 378 + return "failed resolving DID to PDS repo", fmt.Errorf("resolving DID for PDS repo fetch: %w", err) 379 + } 380 + pdsHost := ident.PDSEndpoint() 381 + if pdsHost == "" { 382 + return "DID document missing PDS endpoint", fmt.Errorf("no PDS endpoint for DID: %s", repoDID) 383 + } 384 + r, err = b.fetchRepo(ctx, repoDID, job.Rev(), pdsHost) 385 + if err != nil { 386 + slog.Warn("repo CAR fetch from PDS failed", "did", repoDID, "since", job.Rev(), "pdsHost", pdsHost, "err", err) 387 + return "repo CAR fetch from PDS failed", err 388 + } 389 + slog.Info("repo CAR fetch from PDS successful", "did", repoDID, "since", job.Rev(), "pdsHost", pdsHost, "err", err) 390 } 391 392 numRecords := 0 ··· 423 424 raw := blk.RawData() 425 426 + err = b.HandleCreateRecord(ctx, repoDID, rev, item.recordPath, &raw, &item.nodeCid) 427 if err != nil { 428 recordResults <- recordResult{recordPath: item.recordPath, err: fmt.Errorf("failed to handle create record: %w", err)} 429 continue
+3 -1
search/indexing.go
··· 130 opts.SyncRequestsPerSecond = 8 131 } 132 133 - opts.CheckoutPath = fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo", relayHTTP) 134 if config.IndexMaxConcurrency > 0 { 135 opts.ParallelRecordCreates = config.IndexMaxConcurrency 136 } else { ··· 145 idx.handleDelete, 146 opts, 147 ) 148 149 idx.bfs = bfstore 150 idx.bf = bf
··· 130 opts.SyncRequestsPerSecond = 8 131 } 132 133 + opts.RelayHost = relayHTTP 134 if config.IndexMaxConcurrency > 0 { 135 opts.ParallelRecordCreates = config.IndexMaxConcurrency 136 } else { ··· 145 idx.handleDelete, 146 opts, 147 ) 148 + // reuse identity directory (for efficient caching) 149 + bf.Directory = dir 150 151 idx.bfs = bfstore 152 idx.bf = bf