fork of indigo with slightly nicer lexgen

scylla blockstore also toy sqlite blockstore

authored by Brian Olson and committed by Brian Olson 5cdb0fe8 e1d4639a

+34 -7
bgs/admin.go
··· 356 356 }) 357 357 } 358 358 359 + type PDSRates struct { 360 + PerSecond int64 `json:"per_second,omitempty"` 361 + PerHour int64 `json:"per_hour,omitempty"` 362 + PerDay int64 `json:"per_day,omitempty"` 363 + CrawlRate int64 `json:"crawl_rate,omitempty"` 364 + RepoLimit int64 `json:"repo_limit,omitempty"` 365 + } 366 + 367 + func (pr *PDSRates) FromSlurper(s *Slurper) { 368 + if pr.PerSecond == 0 { 369 + pr.PerHour = s.DefaultPerSecondLimit 370 + } 371 + if pr.PerHour == 0 { 372 + pr.PerHour = s.DefaultPerHourLimit 373 + } 374 + if pr.PerDay == 0 { 375 + pr.PerDay = s.DefaultPerDayLimit 376 + } 377 + if pr.CrawlRate == 0 { 378 + pr.CrawlRate = int64(s.DefaultCrawlLimit) 379 + } 380 + if pr.RepoLimit == 0 { 381 + pr.RepoLimit = s.DefaultRepoLimit 382 + } 383 + } 384 + 359 385 type RateLimitChangeRequest struct { 360 - Host string `json:"host"` 361 - PerSecond int64 `json:"per_second"` 362 - PerHour int64 `json:"per_hour"` 363 - PerDay int64 `json:"per_day"` 364 - CrawlRate int64 `json:"crawl_rate"` 365 - RepoLimit int64 `json:"repo_limit"` 386 + Host string `json:"host"` 387 + PDSRates 366 388 } 367 389 368 390 func (bgs *BGS) handleAdminChangePDSRateLimits(e echo.Context) error { ··· 595 617 596 618 type AdminRequestCrawlRequest struct { 597 619 Hostname string `json:"hostname"` 620 + 621 + // optional: 622 + PDSRates 598 623 } 599 624 600 625 func (bgs *BGS) handleAdminRequestCrawl(e echo.Context) error { ··· 647 672 } 648 673 649 674 // Skip checking if the server is online for now 675 + rateOverrides := body.PDSRates 676 + rateOverrides.FromSlurper(bgs.slurper) 650 677 651 - return bgs.slurper.SubscribeToPds(ctx, host, true, true) // Override Trusted Domain Check 678 + return bgs.slurper.SubscribeToPds(ctx, host, true, true, &rateOverrides) // Override Trusted Domain Check 652 679 }
+8 -1
bgs/fedmgr.go
··· 363 363 return !s.newSubsDisabled 364 364 } 365 365 366 - func (s *Slurper) SubscribeToPds(ctx context.Context, host string, reg bool, adminOverride bool) error { 366 + func (s *Slurper) SubscribeToPds(ctx context.Context, host string, reg bool, adminOverride bool, rateOverrides *PDSRates) error { 367 367 // TODO: for performance, lock on the hostname instead of global 368 368 s.lk.Lock() 369 369 defer s.lk.Unlock() ··· 396 396 DailyEventLimit: s.DefaultPerDayLimit, 397 397 CrawlRateLimit: float64(s.DefaultCrawlLimit), 398 398 RepoLimit: s.DefaultRepoLimit, 399 + } 400 + if rateOverrides != nil { 401 + npds.RateLimit = float64(rateOverrides.PerSecond) 402 + npds.HourlyEventLimit = rateOverrides.PerHour 403 + npds.DailyEventLimit = rateOverrides.PerDay 404 + npds.CrawlRateLimit = float64(rateOverrides.CrawlRate) 405 + npds.RepoLimit = rateOverrides.RepoLimit 399 406 } 400 407 if err := s.db.Create(&npds).Error; err != nil { 401 408 return err
+1 -1
bgs/handlers.go
··· 212 212 } 213 213 } 214 214 215 - return s.slurper.SubscribeToPds(ctx, host, true, false) 215 + return s.slurper.SubscribeToPds(ctx, host, true, false, nil) 216 216 } 217 217 218 218 func (s *BGS) handleComAtprotoSyncNotifyOfUpdate(ctx context.Context, body *comatprototypes.SyncNotifyOfUpdate_Input) error {
+41
carstore/README.md
··· 1 + # Carstore 2 + 3 + Store a zillion users of PDS-like repo, with more limited operations (mainly: firehose in, firehose out). 4 + 5 + ## [ScyllaStore](scylla.go) 6 + 7 + Blocks stored in ScyllaDB. 8 + User and PDS metadata stored in gorm (PostgreSQL or sqlite3). 9 + 10 + ## [FileCarStore](bs.go) 11 + 12 + Store 'car slices' from PDS source subscribeRepo firehose streams to filesystem. 13 + Store metadata to gorm postgresql (or sqlite3). 14 + Periodic compaction of car slices into fewer larger car slices. 15 + User and PDS metadata stored in gorm (PostgreSQL or sqlite3). 16 + FileCarStore was the first production carstore and used through at least 2024-11. 17 + 18 + ## [SQLiteStore](sqlite_store.go) 19 + 20 + Experimental/demo. 21 + Blocks stored in trivial local sqlite3 schema. 22 + Minimal reference implementation from which fancy scalable/performant implementations may be derived. 23 + 24 + ```sql 25 + CREATE TABLE IF NOT EXISTS blocks (uid int, cid blob, rev varchar, root blob, block blob, PRIMARY KEY(uid,cid)) 26 + CREATE INDEX IF NOT EXISTS blocx_by_rev ON blocks (uid, rev DESC) 27 + 28 + INSERT INTO blocks (uid, cid, rev, root, block) VALUES (?, ?, ?, ?, ?) ON CONFLICT (uid,cid) DO UPDATE SET rev=excluded.rev, root=excluded.root, block=excluded.block 29 + 30 + SELECT rev, root FROM blocks WHERE uid = ? ORDER BY rev DESC LIMIT 1 31 + 32 + SELECT cid,rev,root,block FROM blocks WHERE uid = ? AND rev > ? ORDER BY rev DESC 33 + 34 + DELETE FROM blocks WHERE uid = ? 35 + 36 + SELECT rev, root FROM blocks WHERE uid = ? AND cid = ? LIMIT 1 37 + 38 + SELECT block FROM blocks WHERE uid = ? AND cid = ? LIMIT 1 39 + 40 + SELECT length(block) FROM blocks WHERE uid = ? AND cid = ? LIMIT 1 41 + ```
+54 -81
carstore/bs.go
··· 10 10 "os" 11 11 "path/filepath" 12 12 "sort" 13 - "sync" 14 13 "sync/atomic" 15 14 "time" 16 15 ··· 20 19 21 20 blockformat "github.com/ipfs/go-block-format" 22 21 "github.com/ipfs/go-cid" 23 - "github.com/ipfs/go-datastore" 24 22 blockstore "github.com/ipfs/go-ipfs-blockstore" 25 23 cbor "github.com/ipfs/go-ipld-cbor" 26 24 ipld "github.com/ipfs/go-ipld-format" ··· 47 45 const BigShardThreshold = 2 << 20 48 46 49 47 type CarStore interface { 48 + // TODO: not really part of general interface 50 49 CompactUserShards(ctx context.Context, user models.Uid, skipBigShards bool) (*CompactionStats, error) 50 + // TODO: not really part of general interface 51 51 GetCompactionTargets(ctx context.Context, shardCount int) ([]CompactionTarget, error) 52 + 52 53 GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) 53 54 GetUserRepoRev(ctx context.Context, user models.Uid) (string, error) 54 55 ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) ··· 63 64 meta *CarStoreGormMeta 64 65 rootDirs []string 65 66 66 - lscLk sync.Mutex 67 - lastShardCache map[models.Uid]*CarShard 67 + lastShardCache lastShardCache 68 68 69 69 log *slog.Logger 70 70 } ··· 88 88 return nil, err 89 89 } 90 90 91 - return &FileCarStore{ 92 - meta: &CarStoreGormMeta{meta: meta}, 93 - rootDirs: roots, 94 - lastShardCache: make(map[models.Uid]*CarShard), 95 - log: slog.Default().With("system", "carstore"), 96 - }, nil 91 + gormMeta := &CarStoreGormMeta{meta: meta} 92 + out := &FileCarStore{ 93 + meta: gormMeta, 94 + rootDirs: roots, 95 + lastShardCache: lastShardCache{ 96 + source: gormMeta, 97 + }, 98 + log: slog.Default().With("system", "carstore"), 99 + } 100 + out.lastShardCache.Init() 101 + return out, nil 97 102 } 98 103 104 + // userView needs these things to get into the underlying block store 105 + // implemented by CarStoreGormMeta 106 + type userViewSource interface { 107 + HasUidCid(ctx context.Context, user models.Uid, k cid.Cid) (bool, error) 108 + LookupBlockRef(ctx context.Context, k cid.Cid) (path string, offset int64, user models.Uid, err error) 109 + } 110 + 111 + // wrapper into a block store that keeps track of which user we are working on behalf of 99 112 type userView struct { 100 - cs CarStore 113 + cs userViewSource 101 114 user models.Uid 102 115 103 116 cache map[cid.Cid]blockformat.Block ··· 115 128 if have { 116 129 return have, nil 117 130 } 118 - 119 - fcd, ok := uv.cs.(*FileCarStore) 120 - if !ok { 121 - return false, nil 122 - } 123 - 124 - return fcd.meta.HasUidCid(ctx, uv.user, k) 131 + return uv.cs.HasUidCid(ctx, uv.user, k) 125 132 } 126 133 127 134 var CacheHits int64 ··· 143 150 } 144 151 atomic.AddInt64(&CacheMiss, 1) 145 152 146 - fcd, ok := uv.cs.(*FileCarStore) 147 - if !ok { 148 - return nil, ipld.ErrNotFound{Cid: k} 149 - } 150 - 151 - path, offset, user, err := fcd.meta.LookupBlockRef(ctx, k) 153 + path, offset, user, err := uv.cs.LookupBlockRef(ctx, k) 152 154 if err != nil { 153 155 return nil, err 154 156 } ··· 279 281 return len(blk.RawData()), nil 280 282 } 281 283 284 + // subset of blockstore.Blockstore that we actually use here 285 + type minBlockstore interface { 286 + Get(ctx context.Context, bcid cid.Cid) (blockformat.Block, error) 287 + Has(ctx context.Context, bcid cid.Cid) (bool, error) 288 + GetSize(ctx context.Context, bcid cid.Cid) (int, error) 289 + } 290 + 282 291 type DeltaSession struct { 283 - fresh blockstore.Blockstore 284 292 blks map[cid.Cid]blockformat.Block 285 293 rmcids map[cid.Cid]bool 286 - base blockstore.Blockstore 294 + base minBlockstore 287 295 user models.Uid 288 296 baseCid cid.Cid 289 297 seq int 290 298 readonly bool 291 - cs CarStore 299 + cs shardWriter 292 300 lastRev string 293 301 } 294 302 295 303 func (cs *FileCarStore) checkLastShardCache(user models.Uid) *CarShard { 296 - cs.lscLk.Lock() 297 - defer cs.lscLk.Unlock() 298 - 299 - ls, ok := cs.lastShardCache[user] 300 - if ok { 301 - return ls 302 - } 303 - 304 - return nil 304 + return cs.lastShardCache.check(user) 305 305 } 306 306 307 307 func (cs *FileCarStore) removeLastShardCache(user models.Uid) { 308 - cs.lscLk.Lock() 309 - defer cs.lscLk.Unlock() 310 - 311 - delete(cs.lastShardCache, user) 308 + cs.lastShardCache.remove(user) 312 309 } 313 310 314 311 func (cs *FileCarStore) putLastShardCache(ls *CarShard) { 315 - cs.lscLk.Lock() 316 - defer cs.lscLk.Unlock() 317 - 318 - cs.lastShardCache[ls.Usr] = ls 312 + cs.lastShardCache.put(ls) 319 313 } 320 314 321 315 func (cs *FileCarStore) getLastShard(ctx context.Context, user models.Uid) (*CarShard, error) { 322 - ctx, span := otel.Tracer("carstore").Start(ctx, "getLastShard") 323 - defer span.End() 324 - 325 - maybeLs := cs.checkLastShardCache(user) 326 - if maybeLs != nil { 327 - return maybeLs, nil 328 - } 329 - 330 - lastShard, err := cs.meta.GetLastShard(ctx, user) 331 - if err != nil { 332 - return nil, err 333 - } 334 - 335 - cs.putLastShardCache(lastShard) 336 - return lastShard, nil 316 + return cs.lastShardCache.get(ctx, user) 337 317 } 338 318 339 319 var ErrRepoBaseMismatch = fmt.Errorf("attempted a delta session on top of the wrong previous head") ··· 354 334 } 355 335 356 336 return &DeltaSession{ 357 - fresh: blockstore.NewBlockstore(datastore.NewMapDatastore()), 358 - blks: make(map[cid.Cid]blockformat.Block), 337 + blks: make(map[cid.Cid]blockformat.Block), 359 338 base: &userView{ 360 339 user: user, 361 - cs: cs, 340 + cs: cs.meta, 362 341 prefetch: true, 363 342 cache: make(map[cid.Cid]blockformat.Block), 364 343 }, ··· 374 353 return &DeltaSession{ 375 354 base: &userView{ 376 355 user: user, 377 - cs: cs, 356 + cs: cs.meta, 378 357 prefetch: false, 379 358 cache: make(map[cid.Cid]blockformat.Block), 380 359 }, ··· 385 364 } 386 365 387 366 // TODO: incremental is only ever called true, remove the param 388 - func (cs *FileCarStore) ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, w io.Writer) error { 367 + func (cs *FileCarStore) ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, shardOut io.Writer) error { 389 368 ctx, span := otel.Tracer("carstore").Start(ctx, "ReadUserCar") 390 369 defer span.End() 391 370 ··· 398 377 } 399 378 } 400 379 401 - // TODO: Why does ReadUserCar want shards seq DESC but CompactUserShards wants seq ASC ? 402 380 shards, err := cs.meta.GetUserShardsDesc(ctx, user, earlySeq) 403 381 if err != nil { 404 382 return err ··· 418 396 if err := car.WriteHeader(&car.CarHeader{ 419 397 Roots: []cid.Cid{shards[0].Root.CID}, 420 398 Version: 1, 421 - }, w); err != nil { 399 + }, shardOut); err != nil { 422 400 return err 423 401 } 424 402 425 403 for _, sh := range shards { 426 - if err := cs.writeShardBlocks(ctx, &sh, w); err != nil { 404 + if err := cs.writeShardBlocks(ctx, &sh, shardOut); err != nil { 427 405 return err 428 406 } 429 407 } ··· 433 411 434 412 // inner loop part of ReadUserCar 435 413 // copy shard blocks from disk to Writer 436 - func (cs *FileCarStore) writeShardBlocks(ctx context.Context, sh *CarShard, w io.Writer) error { 414 + func (cs *FileCarStore) writeShardBlocks(ctx context.Context, sh *CarShard, shardOut io.Writer) error { 437 415 ctx, span := otel.Tracer("carstore").Start(ctx, "writeShardBlocks") 438 416 defer span.End() 439 417 ··· 448 426 return err 449 427 } 450 428 451 - _, err = io.Copy(w, fi) 429 + _, err = io.Copy(shardOut, fi) 452 430 if err != nil { 453 431 return err 454 432 } ··· 603 581 return nil, fmt.Errorf("cannot write to readonly deltaSession") 604 582 } 605 583 606 - switch ocs := ds.cs.(type) { 607 - case *FileCarStore: 608 - return ocs.writeNewShard(ctx, root, rev, ds.user, ds.seq, ds.blks, ds.rmcids) 609 - case *NonArchivalCarstore: 610 - slice, err := blocksToCar(ctx, root, rev, ds.blks) 611 - if err != nil { 612 - return nil, err 613 - } 614 - return slice, ocs.updateLastCommit(ctx, ds.user, rev, root) 615 - default: 616 - return nil, fmt.Errorf("unsupported carstore type") 617 - } 584 + return ds.cs.writeNewShard(ctx, root, rev, ds.user, ds.seq, ds.blks, ds.rmcids) 618 585 } 619 586 620 587 func WriteCarHeader(w io.Writer, root cid.Cid) (int64, error) { ··· 633 600 } 634 601 635 602 return hnw, nil 603 + } 604 + 605 + // shardWriter.writeNewShard called from inside DeltaSession.CloseWithRoot 606 + type shardWriter interface { 607 + // writeNewShard stores blocks in `blks` arg and creates a new shard to propagate out to our firehose 608 + writeNewShard(ctx context.Context, root cid.Cid, rev string, user models.Uid, seq int, blks map[cid.Cid]blockformat.Block, rmcids map[cid.Cid]bool) ([]byte, error) 636 609 } 637 610 638 611 func blocksToCar(ctx context.Context, root cid.Cid, rev string, blks map[cid.Cid]blockformat.Block) ([]byte, error) {
+70
carstore/last_shard_cache.go
··· 1 + package carstore 2 + 3 + import ( 4 + "context" 5 + "github.com/bluesky-social/indigo/models" 6 + "go.opentelemetry.io/otel" 7 + "sync" 8 + ) 9 + 10 + type LastShardSource interface { 11 + GetLastShard(context.Context, models.Uid) (*CarShard, error) 12 + } 13 + 14 + type lastShardCache struct { 15 + source LastShardSource 16 + 17 + lscLk sync.Mutex 18 + lastShardCache map[models.Uid]*CarShard 19 + } 20 + 21 + func (lsc *lastShardCache) Init() { 22 + lsc.lastShardCache = make(map[models.Uid]*CarShard) 23 + } 24 + 25 + func (lsc *lastShardCache) check(user models.Uid) *CarShard { 26 + lsc.lscLk.Lock() 27 + defer lsc.lscLk.Unlock() 28 + 29 + ls, ok := lsc.lastShardCache[user] 30 + if ok { 31 + return ls 32 + } 33 + 34 + return nil 35 + } 36 + 37 + func (lsc *lastShardCache) remove(user models.Uid) { 38 + lsc.lscLk.Lock() 39 + defer lsc.lscLk.Unlock() 40 + 41 + delete(lsc.lastShardCache, user) 42 + } 43 + 44 + func (lsc *lastShardCache) put(ls *CarShard) { 45 + if ls == nil { 46 + return 47 + } 48 + lsc.lscLk.Lock() 49 + defer lsc.lscLk.Unlock() 50 + 51 + lsc.lastShardCache[ls.Usr] = ls 52 + } 53 + 54 + func (lsc *lastShardCache) get(ctx context.Context, user models.Uid) (*CarShard, error) { 55 + ctx, span := otel.Tracer("carstore").Start(ctx, "getLastShard") 56 + defer span.End() 57 + 58 + maybeLs := lsc.check(user) 59 + if maybeLs != nil { 60 + return maybeLs, nil 61 + } 62 + 63 + lastShard, err := lsc.source.GetLastShard(ctx, user) 64 + if err != nil { 65 + return nil, err 66 + } 67 + 68 + lsc.put(lastShard) 69 + return lastShard, nil 70 + }
+18 -4
carstore/nonarchive.go
··· 4 4 "bytes" 5 5 "context" 6 6 "fmt" 7 + ipld "github.com/ipfs/go-ipld-format" 7 8 "io" 8 9 "log/slog" 9 10 "sync" ··· 11 12 "github.com/bluesky-social/indigo/models" 12 13 blockformat "github.com/ipfs/go-block-format" 13 14 "github.com/ipfs/go-cid" 14 - "github.com/ipfs/go-datastore" 15 - blockstore "github.com/ipfs/go-ipfs-blockstore" 16 15 car "github.com/ipld/go-car" 17 16 "go.opentelemetry.io/otel" 18 17 "gorm.io/gorm" ··· 135 134 } 136 135 137 136 return &DeltaSession{ 138 - fresh: blockstore.NewBlockstore(datastore.NewMapDatastore()), 139 - blks: make(map[cid.Cid]blockformat.Block), 137 + blks: make(map[cid.Cid]blockformat.Block), 140 138 base: &userView{ 141 139 user: user, 142 140 cs: cs, ··· 252 250 func (cs *NonArchivalCarstore) CompactUserShards(ctx context.Context, user models.Uid, skipBigShards bool) (*CompactionStats, error) { 253 251 return nil, fmt.Errorf("compaction not supported in non-archival") 254 252 } 253 + 254 + func (cs *NonArchivalCarstore) HasUidCid(ctx context.Context, user models.Uid, k cid.Cid) (bool, error) { 255 + return false, nil 256 + } 257 + 258 + func (cs *NonArchivalCarstore) LookupBlockRef(ctx context.Context, k cid.Cid) (path string, offset int64, user models.Uid, err error) { 259 + return "", 0, 0, ipld.ErrNotFound{Cid: k} 260 + } 261 + 262 + func (cs *NonArchivalCarstore) writeNewShard(ctx context.Context, root cid.Cid, rev string, user models.Uid, seq int, blks map[cid.Cid]blockformat.Block, rmcids map[cid.Cid]bool) ([]byte, error) { 263 + slice, err := blocksToCar(ctx, root, rev, blks) 264 + if err != nil { 265 + return nil, err 266 + } 267 + return slice, cs.updateLastCommit(ctx, user, rev, root) 268 + }
+216 -163
carstore/repo_test.go
··· 6 6 "errors" 7 7 "fmt" 8 8 "io" 9 + "log/slog" 9 10 "os" 10 11 "path/filepath" 11 12 "testing" ··· 24 25 "gorm.io/gorm" 25 26 ) 26 27 27 - func testCarStore() (CarStore, func(), error) { 28 + func testCarStore(t testing.TB) (CarStore, func(), error) { 28 29 tempdir, err := os.MkdirTemp("", "msttest-") 29 30 if err != nil { 30 31 return nil, nil, err ··· 60 61 }, nil 61 62 } 62 63 64 + func testSqliteCarStore(t testing.TB) (CarStore, func(), error) { 65 + sqs := &SQLiteStore{} 66 + sqs.log = slogForTest(t) 67 + err := sqs.Open(":memory:") 68 + if err != nil { 69 + return nil, nil, err 70 + } 71 + return sqs, func() {}, nil 72 + } 73 + 74 + type testFactory func(t testing.TB) (CarStore, func(), error) 75 + 76 + var backends = map[string]testFactory{ 77 + "cartore": testCarStore, 78 + "sqlite": testSqliteCarStore, 79 + } 80 + 63 81 func testFlatfsBs() (blockstore.Blockstore, func(), error) { 64 82 tempdir, err := os.MkdirTemp("", "msttest-") 65 83 if err != nil { ··· 78 96 }, nil 79 97 } 80 98 81 - func TestBasicOperation(t *testing.T) { 99 + func TestBasicOperation(ot *testing.T) { 82 100 ctx := context.TODO() 83 101 84 - cs, cleanup, err := testCarStore() 85 - if err != nil { 86 - t.Fatal(err) 87 - } 88 - defer cleanup() 102 + for fname, tf := range backends { 103 + ot.Run(fname, func(t *testing.T) { 89 104 90 - ds, err := cs.NewDeltaSession(ctx, 1, nil) 91 - if err != nil { 92 - t.Fatal(err) 93 - } 105 + cs, cleanup, err := tf(t) 106 + if err != nil { 107 + t.Fatal(err) 108 + } 109 + defer cleanup() 94 110 95 - ncid, rev, err := setupRepo(ctx, ds, false) 96 - if err != nil { 97 - t.Fatal(err) 98 - } 111 + ds, err := cs.NewDeltaSession(ctx, 1, nil) 112 + if err != nil { 113 + t.Fatal(err) 114 + } 99 115 100 - if _, err := ds.CloseWithRoot(ctx, ncid, rev); err != nil { 101 - t.Fatal(err) 102 - } 116 + ncid, rev, err := setupRepo(ctx, ds, false) 117 + if err != nil { 118 + t.Fatal(err) 119 + } 103 120 104 - var recs []cid.Cid 105 - head := ncid 106 - for i := 0; i < 10; i++ { 107 - ds, err := cs.NewDeltaSession(ctx, 1, &rev) 108 - if err != nil { 109 - t.Fatal(err) 110 - } 121 + if _, err := ds.CloseWithRoot(ctx, ncid, rev); err != nil { 122 + t.Fatal(err) 123 + } 111 124 112 - rr, err := repo.OpenRepo(ctx, ds, head) 113 - if err != nil { 114 - t.Fatal(err) 115 - } 125 + var recs []cid.Cid 126 + head := ncid 127 + for i := 0; i < 10; i++ { 128 + ds, err := cs.NewDeltaSession(ctx, 1, &rev) 129 + if err != nil { 130 + t.Fatal(err) 131 + } 116 132 117 - rc, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 118 - Text: fmt.Sprintf("hey look its a tweet %d", time.Now().UnixNano()), 119 - }) 120 - if err != nil { 121 - t.Fatal(err) 122 - } 133 + rr, err := repo.OpenRepo(ctx, ds, head) 134 + if err != nil { 135 + t.Fatal(err) 136 + } 123 137 124 - recs = append(recs, rc) 138 + rc, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 139 + Text: fmt.Sprintf("hey look its a tweet %d", time.Now().UnixNano()), 140 + }) 141 + if err != nil { 142 + t.Fatal(err) 143 + } 125 144 126 - kmgr := &util.FakeKeyManager{} 127 - nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 128 - if err != nil { 129 - t.Fatal(err) 130 - } 145 + recs = append(recs, rc) 131 146 132 - rev = nrev 147 + kmgr := &util.FakeKeyManager{} 148 + nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 149 + if err != nil { 150 + t.Fatal(err) 151 + } 133 152 134 - if err := ds.CalcDiff(ctx, nil); err != nil { 135 - t.Fatal(err) 136 - } 153 + rev = nrev 137 154 138 - if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 139 - t.Fatal(err) 140 - } 155 + if err := ds.CalcDiff(ctx, nil); err != nil { 156 + t.Fatal(err) 157 + } 141 158 142 - head = nroot 143 - } 159 + if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 160 + t.Fatal(err) 161 + } 144 162 145 - buf := new(bytes.Buffer) 146 - if err := cs.ReadUserCar(ctx, 1, "", true, buf); err != nil { 147 - t.Fatal(err) 148 - } 149 - checkRepo(t, cs, buf, recs) 163 + head = nroot 164 + } 150 165 151 - if _, err := cs.CompactUserShards(ctx, 1, false); err != nil { 152 - t.Fatal(err) 153 - } 166 + buf := new(bytes.Buffer) 167 + if err := cs.ReadUserCar(ctx, 1, "", true, buf); err != nil { 168 + t.Fatal(err) 169 + } 170 + checkRepo(t, cs, buf, recs) 154 171 155 - buf = new(bytes.Buffer) 156 - if err := cs.ReadUserCar(ctx, 1, "", true, buf); err != nil { 157 - t.Fatal(err) 172 + if _, err := cs.CompactUserShards(ctx, 1, false); err != nil { 173 + t.Fatal(err) 174 + } 175 + 176 + buf = new(bytes.Buffer) 177 + if err := cs.ReadUserCar(ctx, 1, "", true, buf); err != nil { 178 + t.Fatal(err) 179 + } 180 + checkRepo(t, cs, buf, recs) 181 + }) 158 182 } 159 - checkRepo(t, cs, buf, recs) 160 183 } 161 184 162 185 func TestRepeatedCompactions(t *testing.T) { 163 186 ctx := context.TODO() 164 187 165 - cs, cleanup, err := testCarStore() 188 + cs, cleanup, err := testCarStore(t) 166 189 if err != nil { 167 190 t.Fatal(err) 168 191 } ··· 323 346 func BenchmarkRepoWritesCarstore(b *testing.B) { 324 347 ctx := context.TODO() 325 348 326 - cs, cleanup, err := testCarStore() 349 + cs, cleanup, err := testCarStore(b) 350 + innerBenchmarkRepoWritesCarstore(b, ctx, cs, cleanup, err) 351 + } 352 + func BenchmarkRepoWritesSqliteCarstore(b *testing.B) { 353 + ctx := context.TODO() 354 + 355 + cs, cleanup, err := testSqliteCarStore(b) 356 + innerBenchmarkRepoWritesCarstore(b, ctx, cs, cleanup, err) 357 + } 358 + func innerBenchmarkRepoWritesCarstore(b *testing.B, ctx context.Context, cs CarStore, cleanup func(), err error) { 327 359 if err != nil { 328 360 b.Fatal(err) 329 361 } ··· 458 490 } 459 491 } 460 492 461 - func TestDuplicateBlockAcrossShards(t *testing.T) { 493 + func TestDuplicateBlockAcrossShards(ot *testing.T) { 462 494 ctx := context.TODO() 463 495 464 - cs, cleanup, err := testCarStore() 465 - if err != nil { 466 - t.Fatal(err) 467 - } 468 - defer cleanup() 496 + for fname, tf := range backends { 497 + ot.Run(fname, func(t *testing.T) { 498 + 499 + cs, cleanup, err := tf(t) 500 + if err != nil { 501 + t.Fatal(err) 502 + } 503 + defer cleanup() 504 + 505 + ds1, err := cs.NewDeltaSession(ctx, 1, nil) 506 + if err != nil { 507 + t.Fatal(err) 508 + } 469 509 470 - ds1, err := cs.NewDeltaSession(ctx, 1, nil) 471 - if err != nil { 472 - t.Fatal(err) 473 - } 510 + ds2, err := cs.NewDeltaSession(ctx, 2, nil) 511 + if err != nil { 512 + t.Fatal(err) 513 + } 474 514 475 - ds2, err := cs.NewDeltaSession(ctx, 2, nil) 476 - if err != nil { 477 - t.Fatal(err) 478 - } 515 + ds3, err := cs.NewDeltaSession(ctx, 3, nil) 516 + if err != nil { 517 + t.Fatal(err) 518 + } 479 519 480 - ds3, err := cs.NewDeltaSession(ctx, 3, nil) 481 - if err != nil { 482 - t.Fatal(err) 483 - } 520 + var cids []cid.Cid 521 + var revs []string 522 + for _, ds := range []*DeltaSession{ds1, ds2, ds3} { 523 + ncid, rev, err := setupRepo(ctx, ds, true) 524 + if err != nil { 525 + t.Fatal(err) 526 + } 484 527 485 - var cids []cid.Cid 486 - var revs []string 487 - for _, ds := range []*DeltaSession{ds1, ds2, ds3} { 488 - ncid, rev, err := setupRepo(ctx, ds, true) 489 - if err != nil { 490 - t.Fatal(err) 491 - } 528 + if _, err := ds.CloseWithRoot(ctx, ncid, rev); err != nil { 529 + t.Fatal(err) 530 + } 531 + cids = append(cids, ncid) 532 + revs = append(revs, rev) 533 + } 492 534 493 - if _, err := ds.CloseWithRoot(ctx, ncid, rev); err != nil { 494 - t.Fatal(err) 495 - } 496 - cids = append(cids, ncid) 497 - revs = append(revs, rev) 498 - } 535 + var recs []cid.Cid 536 + head := cids[1] 537 + rev := revs[1] 538 + for i := 0; i < 10; i++ { 539 + ds, err := cs.NewDeltaSession(ctx, 2, &rev) 540 + if err != nil { 541 + t.Fatal(err) 542 + } 499 543 500 - var recs []cid.Cid 501 - head := cids[1] 502 - rev := revs[1] 503 - for i := 0; i < 10; i++ { 504 - ds, err := cs.NewDeltaSession(ctx, 2, &rev) 505 - if err != nil { 506 - t.Fatal(err) 507 - } 544 + rr, err := repo.OpenRepo(ctx, ds, head) 545 + if err != nil { 546 + t.Fatal(err) 547 + } 508 548 509 - rr, err := repo.OpenRepo(ctx, ds, head) 510 - if err != nil { 511 - t.Fatal(err) 512 - } 549 + rc, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 550 + Text: fmt.Sprintf("hey look its a tweet %d", time.Now().UnixNano()), 551 + }) 552 + if err != nil { 553 + t.Fatal(err) 554 + } 513 555 514 - rc, _, err := rr.CreateRecord(ctx, "app.bsky.feed.post", &appbsky.FeedPost{ 515 - Text: fmt.Sprintf("hey look its a tweet %d", time.Now().UnixNano()), 516 - }) 517 - if err != nil { 518 - t.Fatal(err) 519 - } 556 + recs = append(recs, rc) 520 557 521 - recs = append(recs, rc) 558 + kmgr := &util.FakeKeyManager{} 559 + nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 560 + if err != nil { 561 + t.Fatal(err) 562 + } 522 563 523 - kmgr := &util.FakeKeyManager{} 524 - nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 525 - if err != nil { 526 - t.Fatal(err) 527 - } 564 + rev = nrev 528 565 529 - rev = nrev 566 + if err := ds.CalcDiff(ctx, nil); err != nil { 567 + t.Fatal(err) 568 + } 530 569 531 - if err := ds.CalcDiff(ctx, nil); err != nil { 532 - t.Fatal(err) 533 - } 570 + if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 571 + t.Fatal(err) 572 + } 534 573 535 - if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 536 - t.Fatal(err) 537 - } 574 + head = nroot 575 + } 538 576 539 - head = nroot 540 - } 577 + // explicitly update the profile object 578 + { 579 + ds, err := cs.NewDeltaSession(ctx, 2, &rev) 580 + if err != nil { 581 + t.Fatal(err) 582 + } 541 583 542 - // explicitly update the profile object 543 - { 544 - ds, err := cs.NewDeltaSession(ctx, 2, &rev) 545 - if err != nil { 546 - t.Fatal(err) 547 - } 584 + rr, err := repo.OpenRepo(ctx, ds, head) 585 + if err != nil { 586 + t.Fatal(err) 587 + } 548 588 549 - rr, err := repo.OpenRepo(ctx, ds, head) 550 - if err != nil { 551 - t.Fatal(err) 552 - } 589 + desc := "this is so unique" 590 + rc, err := rr.UpdateRecord(ctx, "app.bsky.actor.profile/self", &appbsky.ActorProfile{ 591 + Description: &desc, 592 + }) 593 + if err != nil { 594 + t.Fatal(err) 595 + } 553 596 554 - desc := "this is so unique" 555 - rc, err := rr.UpdateRecord(ctx, "app.bsky.actor.profile/self", &appbsky.ActorProfile{ 556 - Description: &desc, 557 - }) 558 - if err != nil { 559 - t.Fatal(err) 560 - } 597 + recs = append(recs, rc) 561 598 562 - recs = append(recs, rc) 599 + kmgr := &util.FakeKeyManager{} 600 + nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 601 + if err != nil { 602 + t.Fatal(err) 603 + } 563 604 564 - kmgr := &util.FakeKeyManager{} 565 - nroot, nrev, err := rr.Commit(ctx, kmgr.SignForUser) 566 - if err != nil { 567 - t.Fatal(err) 568 - } 605 + rev = nrev 569 606 570 - rev = nrev 607 + if err := ds.CalcDiff(ctx, nil); err != nil { 608 + t.Fatal(err) 609 + } 571 610 572 - if err := ds.CalcDiff(ctx, nil); err != nil { 573 - t.Fatal(err) 574 - } 611 + if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 612 + t.Fatal(err) 613 + } 575 614 576 - if _, err := ds.CloseWithRoot(ctx, nroot, rev); err != nil { 577 - t.Fatal(err) 578 - } 615 + head = nroot 616 + } 579 617 580 - head = nroot 618 + buf := new(bytes.Buffer) 619 + if err := cs.ReadUserCar(ctx, 2, "", true, buf); err != nil { 620 + t.Fatal(err) 621 + } 622 + checkRepo(t, cs, buf, recs) 623 + }) 581 624 } 625 + } 582 626 583 - buf := new(bytes.Buffer) 584 - if err := cs.ReadUserCar(ctx, 2, "", true, buf); err != nil { 585 - t.Fatal(err) 627 + type testWriter struct { 628 + t testing.TB 629 + } 630 + 631 + func (tw testWriter) Write(p []byte) (n int, err error) { 632 + tw.t.Log(string(p)) 633 + return len(p), nil 634 + } 635 + 636 + func slogForTest(t testing.TB) *slog.Logger { 637 + hopts := slog.HandlerOptions{ 638 + Level: slog.LevelDebug, 586 639 } 587 - checkRepo(t, cs, buf, recs) 640 + return slog.New(slog.NewTextHandler(&testWriter{t}, &hopts)) 588 641 }
+636
carstore/scylla.go
··· 1 + package carstore 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "errors" 7 + "fmt" 8 + "github.com/bluesky-social/indigo/models" 9 + "github.com/gocql/gocql" 10 + blockformat "github.com/ipfs/go-block-format" 11 + "github.com/ipfs/go-cid" 12 + "github.com/ipfs/go-libipfs/blocks" 13 + "github.com/ipld/go-car" 14 + _ "github.com/mattn/go-sqlite3" 15 + "github.com/prometheus/client_golang/prometheus" 16 + "github.com/prometheus/client_golang/prometheus/promauto" 17 + "go.opentelemetry.io/otel" 18 + "go.opentelemetry.io/otel/attribute" 19 + "io" 20 + "log/slog" 21 + "math" 22 + "math/rand/v2" 23 + "time" 24 + ) 25 + 26 + type ScyllaStore struct { 27 + WriteSession *gocql.Session 28 + ReadSession *gocql.Session 29 + 30 + // scylla servers 31 + scyllaAddrs []string 32 + // scylla namespace where we find our table 33 + keyspace string 34 + 35 + log *slog.Logger 36 + 37 + lastShardCache lastShardCache 38 + } 39 + 40 + func NewScyllaStore(addrs []string, keyspace string) (*ScyllaStore, error) { 41 + out := new(ScyllaStore) 42 + out.scyllaAddrs = addrs 43 + out.keyspace = keyspace 44 + err := out.Open() 45 + if err != nil { 46 + return nil, err 47 + } 48 + return out, nil 49 + } 50 + 51 + func (sqs *ScyllaStore) Open() error { 52 + if sqs.log == nil { 53 + sqs.log = slog.Default() 54 + } 55 + sqs.log.Debug("scylla connect", "addrs", sqs.scyllaAddrs) 56 + var err error 57 + 58 + // 59 + // Write session 60 + // 61 + var writeSession *gocql.Session 62 + for retry := 0; ; retry++ { 63 + writeCluster := gocql.NewCluster(sqs.scyllaAddrs...) 64 + writeCluster.Keyspace = sqs.keyspace 65 + // Default port, the client should automatically upgrade to shard-aware port 66 + writeCluster.Port = 9042 67 + writeCluster.Consistency = gocql.Quorum 68 + writeCluster.RetryPolicy = &ExponentialBackoffRetryPolicy{NumRetries: 10, Min: 100 * time.Millisecond, Max: 10 * time.Second} 69 + writeCluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 70 + writeSession, err = writeCluster.CreateSession() 71 + if err != nil { 72 + if retry > 200 { 73 + return fmt.Errorf("failed to connect read session too many times: %w", err) 74 + } 75 + sqs.log.Error("failed to connect to ScyllaDB Read Session, retrying in 1s", "retry", retry, "err", err) 76 + time.Sleep(delayForAttempt(retry)) 77 + continue 78 + } 79 + break 80 + } 81 + 82 + // 83 + // Read session 84 + // 85 + var readSession *gocql.Session 86 + for retry := 0; ; retry++ { 87 + readCluster := gocql.NewCluster(sqs.scyllaAddrs...) 88 + readCluster.Keyspace = sqs.keyspace 89 + // Default port, the client should automatically upgrade to shard-aware port 90 + readCluster.Port = 9042 91 + readCluster.RetryPolicy = &ExponentialBackoffRetryPolicy{NumRetries: 5, Min: 10 * time.Millisecond, Max: 1 * time.Second} 92 + readCluster.Consistency = gocql.One 93 + readCluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 94 + readSession, err = readCluster.CreateSession() 95 + if err != nil { 96 + if retry > 200 { 97 + return fmt.Errorf("failed to connect read session too many times: %w", err) 98 + } 99 + sqs.log.Error("failed to connect to ScyllaDB Read Session, retrying in 1s", "retry", retry, "err", err) 100 + time.Sleep(delayForAttempt(retry)) 101 + continue 102 + } 103 + break 104 + } 105 + 106 + sqs.WriteSession = writeSession 107 + sqs.ReadSession = readSession 108 + 109 + err = sqs.createTables() 110 + if err != nil { 111 + return fmt.Errorf("scylla could not create tables, %w", err) 112 + } 113 + sqs.lastShardCache.source = sqs 114 + sqs.lastShardCache.Init() 115 + return nil 116 + } 117 + 118 + var createTableTexts = []string{ 119 + `CREATE TABLE IF NOT EXISTS blocks (uid bigint, cid blob, rev varchar, root blob, block blob, PRIMARY KEY((uid,cid)))`, 120 + // This is the INDEX I wish we could use, but scylla can't do it so we MATERIALIZED VIEW instead 121 + //`CREATE INDEX IF NOT EXISTS block_by_rev ON blocks (uid, rev)`, 122 + `CREATE MATERIALIZED VIEW IF NOT EXISTS blocks_by_uidrev 123 + AS SELECT uid, rev, cid, root 124 + FROM blocks 125 + WHERE uid IS NOT NULL AND rev IS NOT NULL AND cid IS NOT NULL 126 + PRIMARY KEY ((uid), rev, cid) WITH CLUSTERING ORDER BY (rev DESC)`, 127 + } 128 + 129 + func (sqs *ScyllaStore) createTables() error { 130 + for i, text := range createTableTexts { 131 + err := sqs.WriteSession.Query(text).Exec() 132 + if err != nil { 133 + return fmt.Errorf("scylla create table statement [%d] %v: %w", i, text, err) 134 + } 135 + } 136 + return nil 137 + } 138 + 139 + // writeNewShard needed for DeltaSession.CloseWithRoot 140 + func (sqs *ScyllaStore) writeNewShard(ctx context.Context, root cid.Cid, rev string, user models.Uid, seq int, blks map[cid.Cid]blockformat.Block, rmcids map[cid.Cid]bool) ([]byte, error) { 141 + scWriteNewShard.Inc() 142 + sqs.log.Debug("write shard", "uid", user, "root", root, "rev", rev, "nblocks", len(blks)) 143 + start := time.Now() 144 + ctx, span := otel.Tracer("carstore").Start(ctx, "writeNewShard") 145 + defer span.End() 146 + buf := new(bytes.Buffer) 147 + hnw, err := WriteCarHeader(buf, root) 148 + if err != nil { 149 + return nil, fmt.Errorf("failed to write car header: %w", err) 150 + } 151 + offset := hnw 152 + 153 + dbroot := root.Bytes() 154 + 155 + span.SetAttributes(attribute.Int("blocks", len(blks))) 156 + 157 + for bcid, block := range blks { 158 + // build shard for output firehose 159 + nw, err := LdWrite(buf, bcid.Bytes(), block.RawData()) 160 + if err != nil { 161 + return nil, fmt.Errorf("failed to write block: %w", err) 162 + } 163 + offset += nw 164 + 165 + // TODO: scylla BATCH doesn't apply if the batch crosses partition keys; BUT, we may be able to send many blocks concurrently? 166 + dbcid := bcid.Bytes() 167 + blockbytes := block.RawData() 168 + // we're relying on cql auto-prepare, no 'PreparedStatement' 169 + err = sqs.WriteSession.Query( 170 + `INSERT INTO blocks (uid, cid, rev, root, block) VALUES (?, ?, ?, ?, ?)`, 171 + user, dbcid, rev, dbroot, blockbytes, 172 + ).Idempotent(true).Exec() 173 + if err != nil { 174 + return nil, fmt.Errorf("(uid,cid) block store failed, %w", err) 175 + } 176 + sqs.log.Debug("put block", "uid", user, "cid", bcid, "size", len(blockbytes)) 177 + } 178 + 179 + shard := CarShard{ 180 + Root: models.DbCID{CID: root}, 181 + DataStart: hnw, 182 + Seq: seq, 183 + Usr: user, 184 + Rev: rev, 185 + } 186 + 187 + sqs.lastShardCache.put(&shard) 188 + 189 + dt := time.Since(start).Seconds() 190 + scWriteTimes.Observe(dt) 191 + return buf.Bytes(), nil 192 + } 193 + 194 + // GetLastShard nedeed for NewDeltaSession indirectly through lastShardCache 195 + // What we actually seem to need from this: last {Rev, Root.CID} 196 + func (sqs *ScyllaStore) GetLastShard(ctx context.Context, uid models.Uid) (*CarShard, error) { 197 + scGetLastShard.Inc() 198 + var rev string 199 + var rootb []byte 200 + err := sqs.ReadSession.Query(`SELECT rev, root FROM blocks_by_uidrev WHERE uid = ? ORDER BY rev DESC LIMIT 1`, uid).Scan(&rev, &rootb) 201 + if errors.Is(err, gocql.ErrNotFound) { 202 + return nil, nil 203 + } 204 + if err != nil { 205 + return nil, fmt.Errorf("last shard err, %w", err) 206 + } 207 + xcid, cidErr := cid.Cast(rootb) 208 + if cidErr != nil { 209 + return nil, fmt.Errorf("last shard bad cid, %w", cidErr) 210 + } 211 + return &CarShard{ 212 + Root: models.DbCID{CID: xcid}, 213 + Rev: rev, 214 + }, nil 215 + } 216 + 217 + func (sqs *ScyllaStore) CompactUserShards(ctx context.Context, user models.Uid, skipBigShards bool) (*CompactionStats, error) { 218 + sqs.log.Warn("TODO: don't call compaction") 219 + return nil, nil 220 + } 221 + 222 + func (sqs *ScyllaStore) GetCompactionTargets(ctx context.Context, shardCount int) ([]CompactionTarget, error) { 223 + sqs.log.Warn("TODO: don't call compaction targets") 224 + return nil, nil 225 + } 226 + 227 + func (sqs *ScyllaStore) GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) { 228 + // TODO: same as FileCarStore; re-unify 229 + lastShard, err := sqs.lastShardCache.get(ctx, user) 230 + if err != nil { 231 + return cid.Undef, err 232 + } 233 + if lastShard == nil { 234 + return cid.Undef, nil 235 + } 236 + if lastShard.ID == 0 { 237 + return cid.Undef, nil 238 + } 239 + 240 + return lastShard.Root.CID, nil 241 + } 242 + 243 + func (sqs *ScyllaStore) GetUserRepoRev(ctx context.Context, user models.Uid) (string, error) { 244 + // TODO: same as FileCarStore; re-unify 245 + lastShard, err := sqs.lastShardCache.get(ctx, user) 246 + if err != nil { 247 + return "", err 248 + } 249 + if lastShard == nil { 250 + return "", nil 251 + } 252 + if lastShard.ID == 0 { 253 + return "", nil 254 + } 255 + 256 + return lastShard.Rev, nil 257 + } 258 + 259 + func (sqs *ScyllaStore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) { 260 + // TODO: same as FileCarStore, re-unify 261 + ctx, span := otel.Tracer("carstore").Start(ctx, "ImportSlice") 262 + defer span.End() 263 + 264 + carr, err := car.NewCarReader(bytes.NewReader(carslice)) 265 + if err != nil { 266 + return cid.Undef, nil, err 267 + } 268 + 269 + if len(carr.Header.Roots) != 1 { 270 + return cid.Undef, nil, fmt.Errorf("invalid car file, header must have a single root (has %d)", len(carr.Header.Roots)) 271 + } 272 + 273 + ds, err := sqs.NewDeltaSession(ctx, uid, since) 274 + if err != nil { 275 + return cid.Undef, nil, fmt.Errorf("new delta session failed: %w", err) 276 + } 277 + 278 + var cids []cid.Cid 279 + for { 280 + blk, err := carr.Next() 281 + if err != nil { 282 + if err == io.EOF { 283 + break 284 + } 285 + return cid.Undef, nil, err 286 + } 287 + 288 + cids = append(cids, blk.Cid()) 289 + 290 + if err := ds.Put(ctx, blk); err != nil { 291 + return cid.Undef, nil, err 292 + } 293 + } 294 + 295 + return carr.Header.Roots[0], ds, nil 296 + } 297 + 298 + func (sqs *ScyllaStore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (*DeltaSession, error) { 299 + ctx, span := otel.Tracer("carstore").Start(ctx, "NewSession") 300 + defer span.End() 301 + 302 + // TODO: ensure that we don't write updates on top of the wrong head 303 + // this needs to be a compare and swap type operation 304 + lastShard, err := sqs.lastShardCache.get(ctx, user) 305 + if err != nil { 306 + return nil, fmt.Errorf("NewDeltaSession, lsc, %w", err) 307 + } 308 + 309 + if lastShard == nil { 310 + lastShard = &zeroShard 311 + } 312 + 313 + if since != nil && *since != lastShard.Rev { 314 + return nil, fmt.Errorf("revision mismatch: %s != %s: %w", *since, lastShard.Rev, ErrRepoBaseMismatch) 315 + } 316 + 317 + return &DeltaSession{ 318 + blks: make(map[cid.Cid]blockformat.Block), 319 + base: &sqliteUserView{ 320 + uid: user, 321 + sqs: sqs, 322 + }, 323 + user: user, 324 + baseCid: lastShard.Root.CID, 325 + cs: sqs, 326 + seq: lastShard.Seq + 1, 327 + lastRev: lastShard.Rev, 328 + }, nil 329 + } 330 + 331 + func (sqs *ScyllaStore) ReadOnlySession(user models.Uid) (*DeltaSession, error) { 332 + return &DeltaSession{ 333 + base: &sqliteUserView{ 334 + uid: user, 335 + sqs: sqs, 336 + }, 337 + readonly: true, 338 + user: user, 339 + cs: sqs, 340 + }, nil 341 + } 342 + 343 + // ReadUserCar 344 + // incremental is only ever called true 345 + func (sqs *ScyllaStore) ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, shardOut io.Writer) error { 346 + scGetCar.Inc() 347 + ctx, span := otel.Tracer("carstore").Start(ctx, "ReadUserCar") 348 + defer span.End() 349 + start := time.Now() 350 + 351 + cidchan := make(chan cid.Cid, 100) 352 + 353 + go func() { 354 + defer close(cidchan) 355 + cids := sqs.ReadSession.Query(`SELECT cid FROM blocks_by_uidrev WHERE uid = ? AND rev > ? ORDER BY rev DESC`, user, sinceRev).Iter() 356 + defer cids.Close() 357 + for { 358 + var cidb []byte 359 + ok := cids.Scan(&cidb) 360 + if !ok { 361 + break 362 + } 363 + xcid, cidErr := cid.Cast(cidb) 364 + if cidErr != nil { 365 + sqs.log.Warn("ReadUserCar bad cid", "err", cidErr) 366 + continue 367 + } 368 + cidchan <- xcid 369 + } 370 + }() 371 + nblocks := 0 372 + first := true 373 + for xcid := range cidchan { 374 + var xrev string 375 + var xroot []byte 376 + var xblock []byte 377 + err := sqs.ReadSession.Query("SELECT rev, root, block FROM blocks WHERE uid = ? AND cid = ? LIMIT 1", user, xcid.Bytes()).Scan(&xrev, &xroot, &xblock) 378 + if err != nil { 379 + return fmt.Errorf("rcar bad read, %w", err) 380 + } 381 + if first { 382 + rootCid, cidErr := cid.Cast(xroot) 383 + if cidErr != nil { 384 + return fmt.Errorf("rcar bad rootcid, %w", err) 385 + } 386 + if err := car.WriteHeader(&car.CarHeader{ 387 + Roots: []cid.Cid{rootCid}, 388 + Version: 1, 389 + }, shardOut); err != nil { 390 + return fmt.Errorf("rcar bad header, %w", err) 391 + } 392 + first = false 393 + } 394 + nblocks++ 395 + _, err = LdWrite(shardOut, xcid.Bytes(), xblock) 396 + if err != nil { 397 + return fmt.Errorf("rcar bad write, %w", err) 398 + } 399 + } 400 + span.SetAttributes(attribute.Int("blocks", nblocks)) 401 + sqs.log.Debug("read car", "nblocks", nblocks, "since", sinceRev) 402 + scReadCarTimes.Observe(time.Since(start).Seconds()) 403 + return nil 404 + } 405 + 406 + // Stat is only used in a debugging admin handler 407 + // don't bother implementing it (for now?) 408 + func (sqs *ScyllaStore) Stat(ctx context.Context, usr models.Uid) ([]UserStat, error) { 409 + sqs.log.Warn("Stat debugging method not implemented for sqlite store") 410 + return nil, nil 411 + } 412 + 413 + func (sqs *ScyllaStore) WipeUserData(ctx context.Context, user models.Uid) error { 414 + ctx, span := otel.Tracer("carstore").Start(ctx, "WipeUserData") 415 + defer span.End() 416 + 417 + // LOL, can't do this if primary key is (uid,cid) because that's hashed with no scan! 418 + //err := sqs.WriteSession.Query("DELETE FROM blocks WHERE uid = ?", user).Exec() 419 + 420 + cidchan := make(chan cid.Cid, 100) 421 + 422 + go func() { 423 + defer close(cidchan) 424 + cids := sqs.ReadSession.Query(`SELECT cid FROM blocks_by_uidrev WHERE uid = ?`, user).Iter() 425 + defer cids.Close() 426 + for { 427 + var cidb []byte 428 + ok := cids.Scan(&cidb) 429 + if !ok { 430 + break 431 + } 432 + xcid, cidErr := cid.Cast(cidb) 433 + if cidErr != nil { 434 + sqs.log.Warn("ReadUserCar bad cid", "err", cidErr) 435 + continue 436 + } 437 + cidchan <- xcid 438 + } 439 + }() 440 + nblocks := 0 441 + errcount := 0 442 + for xcid := range cidchan { 443 + err := sqs.ReadSession.Query("DELETE FROM blocks WHERE uid = ? AND cid = ?", user, xcid.Bytes()).Exec() 444 + if err != nil { 445 + sqs.log.Warn("ReadUserCar bad delete, %w", err) 446 + errcount++ 447 + if errcount > 10 { 448 + return err 449 + } 450 + } 451 + nblocks++ 452 + } 453 + scUsersWiped.Inc() 454 + scBlocksDeleted.Add(float64(nblocks)) 455 + return nil 456 + } 457 + 458 + // HasUidCid needed for NewDeltaSession userView 459 + func (sqs *ScyllaStore) HasUidCid(ctx context.Context, user models.Uid, bcid cid.Cid) (bool, error) { 460 + // TODO: this is pretty cacheable? invalidate (uid,*) on WipeUserData 461 + scHas.Inc() 462 + var rev string 463 + var rootb []byte 464 + err := sqs.ReadSession.Query(`SELECT rev, root FROM blocks WHERE uid = ? AND cid = ? LIMIT 1`, user, bcid.Bytes()).Scan(&rev, &rootb) 465 + if err != nil { 466 + return false, fmt.Errorf("hasUC bad scan, %w", err) 467 + } 468 + return true, nil 469 + } 470 + 471 + func (sqs *ScyllaStore) CarStore() CarStore { 472 + return sqs 473 + } 474 + 475 + func (sqs *ScyllaStore) Close() error { 476 + sqs.WriteSession.Close() 477 + sqs.ReadSession.Close() 478 + return nil 479 + } 480 + 481 + func (sqs *ScyllaStore) getBlock(ctx context.Context, user models.Uid, bcid cid.Cid) (blockformat.Block, error) { 482 + // TODO: this is pretty cacheable? invalidate (uid,*) on WipeUserData 483 + scGetBlock.Inc() 484 + start := time.Now() 485 + var blockb []byte 486 + err := sqs.ReadSession.Query("SELECT block FROM blocks WHERE uid = ? AND cid = ? LIMIT 1", user, bcid.Bytes()).Scan(&blockb) 487 + if err != nil { 488 + return nil, fmt.Errorf("getb err, %w", err) 489 + } 490 + dt := time.Since(start) 491 + scGetTimes.Observe(dt.Seconds()) 492 + return blocks.NewBlock(blockb), nil 493 + } 494 + 495 + func (sqs *ScyllaStore) getBlockSize(ctx context.Context, user models.Uid, bcid cid.Cid) (int64, error) { 496 + // TODO: this is pretty cacheable? invalidate (uid,*) on WipeUserData 497 + scGetBlockSize.Inc() 498 + var out int64 499 + err := sqs.ReadSession.Query("SELECT length(block) FROM blocks WHERE uid = ? AND cid = ? LIMIT 1", user, bcid.Bytes()).Scan(&out) 500 + if err != nil { 501 + return 0, fmt.Errorf("getbs err, %w", err) 502 + } 503 + return out, nil 504 + } 505 + 506 + var scUsersWiped = promauto.NewCounter(prometheus.CounterOpts{ 507 + Name: "bgs_sc_users_wiped", 508 + Help: "User rows deleted in scylla backend", 509 + }) 510 + 511 + var scBlocksDeleted = promauto.NewCounter(prometheus.CounterOpts{ 512 + Name: "bgs_sc_blocks_deleted", 513 + Help: "User blocks deleted in scylla backend", 514 + }) 515 + 516 + var scGetBlock = promauto.NewCounter(prometheus.CounterOpts{ 517 + Name: "bgs_sc_get_block", 518 + Help: "get block scylla backend", 519 + }) 520 + 521 + var scGetBlockSize = promauto.NewCounter(prometheus.CounterOpts{ 522 + Name: "bgs_sc_get_block_size", 523 + Help: "get block size scylla backend", 524 + }) 525 + 526 + var scGetCar = promauto.NewCounter(prometheus.CounterOpts{ 527 + Name: "bgs_sc_get_car", 528 + Help: "get block scylla backend", 529 + }) 530 + 531 + var scHas = promauto.NewCounter(prometheus.CounterOpts{ 532 + Name: "bgs_sc_has", 533 + Help: "check block presence scylla backend", 534 + }) 535 + 536 + var scGetLastShard = promauto.NewCounter(prometheus.CounterOpts{ 537 + Name: "bgs_sc_get_last_shard", 538 + Help: "get last shard scylla backend", 539 + }) 540 + 541 + var scWriteNewShard = promauto.NewCounter(prometheus.CounterOpts{ 542 + Name: "bgs_sc_write_shard", 543 + Help: "write shard blocks scylla backend", 544 + }) 545 + 546 + var timeBuckets []float64 547 + var scWriteTimes prometheus.Histogram 548 + var scGetTimes prometheus.Histogram 549 + var scReadCarTimes prometheus.Histogram 550 + 551 + func init() { 552 + timeBuckets = make([]float64, 1, 20) 553 + timeBuckets[0] = 0.000_0100 554 + i := 0 555 + for timeBuckets[i] < 1 && len(timeBuckets) < 20 { 556 + timeBuckets = append(timeBuckets, timeBuckets[i]*2) 557 + i++ 558 + } 559 + scWriteTimes = promauto.NewHistogram(prometheus.HistogramOpts{ 560 + Name: "bgs_sc_write_times", 561 + Buckets: timeBuckets, 562 + }) 563 + scGetTimes = promauto.NewHistogram(prometheus.HistogramOpts{ 564 + Name: "bgs_sc_get_times", 565 + Buckets: timeBuckets, 566 + }) 567 + scReadCarTimes = promauto.NewHistogram(prometheus.HistogramOpts{ 568 + Name: "bgs_sc_readcar_times", 569 + Buckets: timeBuckets, 570 + }) 571 + } 572 + 573 + // TODO: copied from tango, re-unify? 574 + // ExponentialBackoffRetryPolicy sleeps between attempts 575 + type ExponentialBackoffRetryPolicy struct { 576 + NumRetries int 577 + Min, Max time.Duration 578 + } 579 + 580 + func (e *ExponentialBackoffRetryPolicy) napTime(attempts int) time.Duration { 581 + return getExponentialTime(e.Min, e.Max, attempts) 582 + } 583 + 584 + func (e *ExponentialBackoffRetryPolicy) Attempt(q gocql.RetryableQuery) bool { 585 + if q.Attempts() > e.NumRetries { 586 + return false 587 + } 588 + time.Sleep(e.napTime(q.Attempts())) 589 + return true 590 + } 591 + 592 + // used to calculate exponentially growing time 593 + func getExponentialTime(min time.Duration, max time.Duration, attempts int) time.Duration { 594 + if min <= 0 { 595 + min = 100 * time.Millisecond 596 + } 597 + if max <= 0 { 598 + max = 10 * time.Second 599 + } 600 + minFloat := float64(min) 601 + napDuration := minFloat * math.Pow(2, float64(attempts-1)) 602 + // add some jitter 603 + napDuration += rand.Float64()*minFloat - (minFloat / 2) 604 + if napDuration > float64(max) { 605 + return time.Duration(max) 606 + } 607 + return time.Duration(napDuration) 608 + } 609 + 610 + // GetRetryType returns the retry type for the given error 611 + func (e *ExponentialBackoffRetryPolicy) GetRetryType(err error) gocql.RetryType { 612 + // Retry timeouts and/or contention errors on the same host 613 + if errors.Is(err, gocql.ErrTimeoutNoResponse) || 614 + errors.Is(err, gocql.ErrNoStreams) || 615 + errors.Is(err, gocql.ErrTooManyTimeouts) { 616 + return gocql.Retry 617 + } 618 + 619 + // Retry next host on unavailable errors 620 + if errors.Is(err, gocql.ErrUnavailable) || 621 + errors.Is(err, gocql.ErrConnectionClosed) || 622 + errors.Is(err, gocql.ErrSessionClosed) { 623 + return gocql.RetryNextHost 624 + } 625 + 626 + // Otherwise don't retry 627 + return gocql.Rethrow 628 + } 629 + 630 + func delayForAttempt(attempt int) time.Duration { 631 + if attempt < 50 { 632 + return time.Millisecond * 5 633 + } 634 + 635 + return time.Second 636 + }
+576
carstore/sqlite_store.go
··· 1 + package carstore 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "database/sql" 7 + "errors" 8 + "fmt" 9 + "go.opentelemetry.io/otel/attribute" 10 + "io" 11 + "log/slog" 12 + "os" 13 + "path/filepath" 14 + 15 + "github.com/bluesky-social/indigo/models" 16 + blockformat "github.com/ipfs/go-block-format" 17 + "github.com/ipfs/go-cid" 18 + "github.com/ipfs/go-libipfs/blocks" 19 + "github.com/ipld/go-car" 20 + _ "github.com/mattn/go-sqlite3" 21 + "github.com/prometheus/client_golang/prometheus" 22 + "github.com/prometheus/client_golang/prometheus/promauto" 23 + "go.opentelemetry.io/otel" 24 + ) 25 + 26 + // var log = logging.Logger("sqstore") 27 + 28 + type SQLiteStore struct { 29 + dbPath string 30 + db *sql.DB 31 + 32 + log *slog.Logger 33 + 34 + lastShardCache lastShardCache 35 + } 36 + 37 + func ensureDir(path string) error { 38 + fi, err := os.Stat(path) 39 + if err != nil { 40 + if os.IsNotExist(err) { 41 + return os.MkdirAll(path, 0755) 42 + } 43 + return err 44 + } 45 + if fi.IsDir() { 46 + return nil 47 + } 48 + return fmt.Errorf("%s exists but is not a directory", path) 49 + } 50 + 51 + func NewSqliteStore(csdir string) (*SQLiteStore, error) { 52 + if err := ensureDir(csdir); err != nil { 53 + return nil, err 54 + } 55 + dbpath := filepath.Join(csdir, "db.sqlite3") 56 + out := new(SQLiteStore) 57 + err := out.Open(dbpath) 58 + if err != nil { 59 + return nil, err 60 + } 61 + return out, nil 62 + } 63 + 64 + func (sqs *SQLiteStore) Open(path string) error { 65 + if sqs.log == nil { 66 + sqs.log = slog.Default() 67 + } 68 + sqs.log.Debug("open db", "path", path) 69 + db, err := sql.Open("sqlite3", path) 70 + if err != nil { 71 + return fmt.Errorf("%s: sqlite could not open, %w", path, err) 72 + } 73 + sqs.db = db 74 + sqs.dbPath = path 75 + err = sqs.createTables() 76 + if err != nil { 77 + return fmt.Errorf("%s: sqlite could not create tables, %w", path, err) 78 + } 79 + sqs.lastShardCache.source = sqs 80 + sqs.lastShardCache.Init() 81 + return nil 82 + } 83 + 84 + func (sqs *SQLiteStore) createTables() error { 85 + tx, err := sqs.db.Begin() 86 + if err != nil { 87 + return err 88 + } 89 + defer tx.Rollback() 90 + _, err = tx.Exec("CREATE TABLE IF NOT EXISTS blocks (uid int, cid blob, rev varchar, root blob, block blob, PRIMARY KEY(uid,cid));") 91 + if err != nil { 92 + return fmt.Errorf("%s: create table blocks..., %w", sqs.dbPath, err) 93 + } 94 + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS blocx_by_rev ON blocks (uid, rev DESC)") 95 + if err != nil { 96 + return fmt.Errorf("%s: create blocks by rev index, %w", sqs.dbPath, err) 97 + } 98 + return tx.Commit() 99 + } 100 + 101 + // writeNewShard needed for DeltaSession.CloseWithRoot 102 + func (sqs *SQLiteStore) writeNewShard(ctx context.Context, root cid.Cid, rev string, user models.Uid, seq int, blks map[cid.Cid]blockformat.Block, rmcids map[cid.Cid]bool) ([]byte, error) { 103 + sqWriteNewShard.Inc() 104 + sqs.log.Debug("write shard", "uid", user, "root", root, "rev", rev, "nblocks", len(blks)) 105 + ctx, span := otel.Tracer("carstore").Start(ctx, "writeNewShard") 106 + defer span.End() 107 + // this is "write many blocks", "write one block" is above in putBlock(). keep them in sync. 108 + buf := new(bytes.Buffer) 109 + hnw, err := WriteCarHeader(buf, root) 110 + if err != nil { 111 + return nil, fmt.Errorf("failed to write car header: %w", err) 112 + } 113 + offset := hnw 114 + 115 + tx, err := sqs.db.BeginTx(ctx, nil) 116 + if err != nil { 117 + return nil, fmt.Errorf("bad block insert tx, %w", err) 118 + } 119 + defer tx.Rollback() 120 + insertStatement, err := tx.PrepareContext(ctx, "INSERT INTO blocks (uid, cid, rev, root, block) VALUES (?, ?, ?, ?, ?) ON CONFLICT (uid,cid) DO UPDATE SET rev=excluded.rev, root=excluded.root, block=excluded.block") 121 + if err != nil { 122 + return nil, fmt.Errorf("bad block insert sql, %w", err) 123 + } 124 + defer insertStatement.Close() 125 + 126 + dbroot := models.DbCID{CID: root} 127 + 128 + span.SetAttributes(attribute.Int("blocks", len(blks))) 129 + 130 + for bcid, block := range blks { 131 + // build shard for output firehose 132 + nw, err := LdWrite(buf, bcid.Bytes(), block.RawData()) 133 + if err != nil { 134 + return nil, fmt.Errorf("failed to write block: %w", err) 135 + } 136 + offset += nw 137 + 138 + // TODO: better databases have an insert-many option for a prepared statement 139 + dbcid := models.DbCID{CID: bcid} 140 + blockbytes := block.RawData() 141 + _, err = insertStatement.ExecContext(ctx, user, dbcid, rev, dbroot, blockbytes) 142 + if err != nil { 143 + return nil, fmt.Errorf("(uid,cid) block store failed, %w", err) 144 + } 145 + sqs.log.Debug("put block", "uid", user, "cid", bcid, "size", len(blockbytes)) 146 + } 147 + err = tx.Commit() 148 + if err != nil { 149 + return nil, fmt.Errorf("bad block insert commit, %w", err) 150 + } 151 + 152 + shard := CarShard{ 153 + Root: models.DbCID{CID: root}, 154 + DataStart: hnw, 155 + Seq: seq, 156 + Usr: user, 157 + Rev: rev, 158 + } 159 + 160 + sqs.lastShardCache.put(&shard) 161 + 162 + return buf.Bytes(), nil 163 + } 164 + 165 + var ErrNothingThere = errors.New("nothing to read)") 166 + 167 + // GetLastShard nedeed for NewDeltaSession indirectly through lastShardCache 168 + // What we actually seem to need from this: last {Rev, Root.CID} 169 + func (sqs *SQLiteStore) GetLastShard(ctx context.Context, uid models.Uid) (*CarShard, error) { 170 + sqGetLastShard.Inc() 171 + tx, err := sqs.db.BeginTx(ctx, &txReadOnly) 172 + if err != nil { 173 + return nil, fmt.Errorf("bad last shard tx, %w", err) 174 + } 175 + defer tx.Rollback() 176 + qstmt, err := tx.PrepareContext(ctx, "SELECT rev, root FROM blocks WHERE uid = ? ORDER BY rev DESC LIMIT 1") 177 + if err != nil { 178 + return nil, fmt.Errorf("bad last shard sql, %w", err) 179 + } 180 + rows, err := qstmt.QueryContext(ctx, uid) 181 + if err != nil { 182 + return nil, fmt.Errorf("last shard err, %w", err) 183 + } 184 + if rows.Next() { 185 + var rev string 186 + var rootb models.DbCID 187 + err = rows.Scan(&rev, &rootb) 188 + if err != nil { 189 + return nil, fmt.Errorf("last shard bad scan, %w", err) 190 + } 191 + return &CarShard{ 192 + Root: rootb, 193 + Rev: rev, 194 + }, nil 195 + } 196 + return nil, nil 197 + } 198 + 199 + func (sqs *SQLiteStore) CompactUserShards(ctx context.Context, user models.Uid, skipBigShards bool) (*CompactionStats, error) { 200 + sqs.log.Warn("TODO: don't call compaction") 201 + return nil, nil 202 + } 203 + 204 + func (sqs *SQLiteStore) GetCompactionTargets(ctx context.Context, shardCount int) ([]CompactionTarget, error) { 205 + sqs.log.Warn("TODO: don't call compaction targets") 206 + return nil, nil 207 + } 208 + 209 + func (sqs *SQLiteStore) GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) { 210 + // TODO: same as FileCarStore; re-unify 211 + lastShard, err := sqs.lastShardCache.get(ctx, user) 212 + if err != nil { 213 + return cid.Undef, err 214 + } 215 + if lastShard == nil { 216 + return cid.Undef, nil 217 + } 218 + if lastShard.ID == 0 { 219 + return cid.Undef, nil 220 + } 221 + 222 + return lastShard.Root.CID, nil 223 + } 224 + 225 + func (sqs *SQLiteStore) GetUserRepoRev(ctx context.Context, user models.Uid) (string, error) { 226 + // TODO: same as FileCarStore; re-unify 227 + lastShard, err := sqs.lastShardCache.get(ctx, user) 228 + if err != nil { 229 + return "", err 230 + } 231 + if lastShard == nil { 232 + return "", nil 233 + } 234 + if lastShard.ID == 0 { 235 + return "", nil 236 + } 237 + 238 + return lastShard.Rev, nil 239 + } 240 + 241 + func (sqs *SQLiteStore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) { 242 + // TODO: same as FileCarStore, re-unify 243 + ctx, span := otel.Tracer("carstore").Start(ctx, "ImportSlice") 244 + defer span.End() 245 + 246 + carr, err := car.NewCarReader(bytes.NewReader(carslice)) 247 + if err != nil { 248 + return cid.Undef, nil, err 249 + } 250 + 251 + if len(carr.Header.Roots) != 1 { 252 + return cid.Undef, nil, fmt.Errorf("invalid car file, header must have a single root (has %d)", len(carr.Header.Roots)) 253 + } 254 + 255 + ds, err := sqs.NewDeltaSession(ctx, uid, since) 256 + if err != nil { 257 + return cid.Undef, nil, fmt.Errorf("new delta session failed: %w", err) 258 + } 259 + 260 + var cids []cid.Cid 261 + for { 262 + blk, err := carr.Next() 263 + if err != nil { 264 + if err == io.EOF { 265 + break 266 + } 267 + return cid.Undef, nil, err 268 + } 269 + 270 + cids = append(cids, blk.Cid()) 271 + 272 + if err := ds.Put(ctx, blk); err != nil { 273 + return cid.Undef, nil, err 274 + } 275 + } 276 + 277 + return carr.Header.Roots[0], ds, nil 278 + } 279 + 280 + var zeroShard CarShard 281 + 282 + func (sqs *SQLiteStore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (*DeltaSession, error) { 283 + ctx, span := otel.Tracer("carstore").Start(ctx, "NewSession") 284 + defer span.End() 285 + 286 + // TODO: ensure that we don't write updates on top of the wrong head 287 + // this needs to be a compare and swap type operation 288 + lastShard, err := sqs.lastShardCache.get(ctx, user) 289 + if err != nil { 290 + return nil, fmt.Errorf("NewDeltaSession, lsc, %w", err) 291 + } 292 + 293 + if lastShard == nil { 294 + lastShard = &zeroShard 295 + } 296 + 297 + if since != nil && *since != lastShard.Rev { 298 + return nil, fmt.Errorf("revision mismatch: %s != %s: %w", *since, lastShard.Rev, ErrRepoBaseMismatch) 299 + } 300 + 301 + return &DeltaSession{ 302 + blks: make(map[cid.Cid]blockformat.Block), 303 + base: &sqliteUserView{ 304 + uid: user, 305 + sqs: sqs, 306 + }, 307 + user: user, 308 + baseCid: lastShard.Root.CID, 309 + cs: sqs, 310 + seq: lastShard.Seq + 1, 311 + lastRev: lastShard.Rev, 312 + }, nil 313 + } 314 + 315 + func (sqs *SQLiteStore) ReadOnlySession(user models.Uid) (*DeltaSession, error) { 316 + return &DeltaSession{ 317 + base: &sqliteUserView{ 318 + uid: user, 319 + sqs: sqs, 320 + }, 321 + readonly: true, 322 + user: user, 323 + cs: sqs, 324 + }, nil 325 + } 326 + 327 + type cartmp struct { 328 + xcid cid.Cid 329 + rev string 330 + root string 331 + block []byte 332 + } 333 + 334 + // ReadUserCar 335 + // incremental is only ever called true 336 + func (sqs *SQLiteStore) ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, shardOut io.Writer) error { 337 + sqGetCar.Inc() 338 + ctx, span := otel.Tracer("carstore").Start(ctx, "ReadUserCar") 339 + defer span.End() 340 + 341 + tx, err := sqs.db.BeginTx(ctx, &txReadOnly) 342 + if err != nil { 343 + return fmt.Errorf("rcar tx, %w", err) 344 + } 345 + defer tx.Rollback() 346 + qstmt, err := tx.PrepareContext(ctx, "SELECT cid,rev,root,block FROM blocks WHERE uid = ? AND rev > ? ORDER BY rev DESC") 347 + if err != nil { 348 + return fmt.Errorf("rcar sql, %w", err) 349 + } 350 + defer qstmt.Close() 351 + rows, err := qstmt.QueryContext(ctx, user, sinceRev) 352 + if err != nil { 353 + return fmt.Errorf("rcar err, %w", err) 354 + } 355 + nblocks := 0 356 + first := true 357 + for rows.Next() { 358 + var xcid models.DbCID 359 + var xrev string 360 + var xroot models.DbCID 361 + var xblock []byte 362 + err = rows.Scan(&xcid, &xrev, &xroot, &xblock) 363 + if err != nil { 364 + return fmt.Errorf("rcar bad scan, %w", err) 365 + } 366 + if first { 367 + if err := car.WriteHeader(&car.CarHeader{ 368 + Roots: []cid.Cid{xroot.CID}, 369 + Version: 1, 370 + }, shardOut); err != nil { 371 + return fmt.Errorf("rcar bad header, %w", err) 372 + } 373 + first = false 374 + } 375 + nblocks++ 376 + _, err := LdWrite(shardOut, xcid.CID.Bytes(), xblock) 377 + if err != nil { 378 + return fmt.Errorf("rcar bad write, %w", err) 379 + } 380 + } 381 + sqs.log.Debug("read car", "nblocks", nblocks, "since", sinceRev) 382 + return nil 383 + } 384 + 385 + // Stat is only used in a debugging admin handler 386 + // don't bother implementing it (for now?) 387 + func (sqs *SQLiteStore) Stat(ctx context.Context, usr models.Uid) ([]UserStat, error) { 388 + sqs.log.Warn("Stat debugging method not implemented for sqlite store") 389 + return nil, nil 390 + } 391 + 392 + func (sqs *SQLiteStore) WipeUserData(ctx context.Context, user models.Uid) error { 393 + ctx, span := otel.Tracer("carstore").Start(ctx, "WipeUserData") 394 + defer span.End() 395 + tx, err := sqs.db.BeginTx(ctx, nil) 396 + if err != nil { 397 + return fmt.Errorf("wipe tx, %w", err) 398 + } 399 + defer tx.Rollback() 400 + deleteResult, err := tx.ExecContext(ctx, "DELETE FROM blocks WHERE uid = ?", user) 401 + nrows, ierr := deleteResult.RowsAffected() 402 + if ierr == nil { 403 + sqRowsDeleted.Add(float64(nrows)) 404 + } 405 + if err == nil { 406 + err = ierr 407 + } 408 + if err == nil { 409 + err = tx.Commit() 410 + } 411 + return err 412 + } 413 + 414 + var txReadOnly = sql.TxOptions{ReadOnly: true} 415 + 416 + // HasUidCid needed for NewDeltaSession userView 417 + func (sqs *SQLiteStore) HasUidCid(ctx context.Context, user models.Uid, bcid cid.Cid) (bool, error) { 418 + // TODO: this is pretty cacheable? invalidate (uid,*) on WipeUserData 419 + sqHas.Inc() 420 + tx, err := sqs.db.BeginTx(ctx, &txReadOnly) 421 + if err != nil { 422 + return false, fmt.Errorf("hasUC tx, %w", err) 423 + } 424 + defer tx.Rollback() 425 + qstmt, err := tx.PrepareContext(ctx, "SELECT rev, root FROM blocks WHERE uid = ? AND cid = ? LIMIT 1") 426 + if err != nil { 427 + return false, fmt.Errorf("hasUC sql, %w", err) 428 + } 429 + defer qstmt.Close() 430 + rows, err := qstmt.QueryContext(ctx, user, models.DbCID{CID: bcid}) 431 + if err != nil { 432 + return false, fmt.Errorf("hasUC err, %w", err) 433 + } 434 + if rows.Next() { 435 + var rev string 436 + var rootb models.DbCID 437 + err = rows.Scan(&rev, &rootb) 438 + if err != nil { 439 + return false, fmt.Errorf("hasUC bad scan, %w", err) 440 + } 441 + return true, nil 442 + } 443 + return false, nil 444 + } 445 + 446 + func (sqs *SQLiteStore) CarStore() CarStore { 447 + return sqs 448 + } 449 + 450 + func (sqs *SQLiteStore) Close() error { 451 + return sqs.db.Close() 452 + } 453 + 454 + func (sqs *SQLiteStore) getBlock(ctx context.Context, user models.Uid, bcid cid.Cid) (blockformat.Block, error) { 455 + // TODO: this is pretty cacheable? invalidate (uid,*) on WipeUserData 456 + sqGetBlock.Inc() 457 + tx, err := sqs.db.BeginTx(ctx, &txReadOnly) 458 + if err != nil { 459 + return nil, fmt.Errorf("getb tx, %w", err) 460 + } 461 + defer tx.Rollback() 462 + qstmt, err := tx.PrepareContext(ctx, "SELECT block FROM blocks WHERE uid = ? AND cid = ? LIMIT 1") 463 + if err != nil { 464 + return nil, fmt.Errorf("getb sql, %w", err) 465 + } 466 + defer qstmt.Close() 467 + rows, err := qstmt.QueryContext(ctx, user, models.DbCID{CID: bcid}) 468 + if err != nil { 469 + return nil, fmt.Errorf("getb err, %w", err) 470 + } 471 + if rows.Next() { 472 + //var rev string 473 + //var rootb models.DbCID 474 + var blockb []byte 475 + err = rows.Scan(&blockb) 476 + if err != nil { 477 + return nil, fmt.Errorf("getb bad scan, %w", err) 478 + } 479 + return blocks.NewBlock(blockb), nil 480 + } 481 + return nil, ErrNothingThere 482 + } 483 + 484 + func (sqs *SQLiteStore) getBlockSize(ctx context.Context, user models.Uid, bcid cid.Cid) (int64, error) { 485 + // TODO: this is pretty cacheable? invalidate (uid,*) on WipeUserData 486 + sqGetBlockSize.Inc() 487 + tx, err := sqs.db.BeginTx(ctx, &txReadOnly) 488 + if err != nil { 489 + return 0, fmt.Errorf("getbs tx, %w", err) 490 + } 491 + defer tx.Rollback() 492 + qstmt, err := tx.PrepareContext(ctx, "SELECT length(block) FROM blocks WHERE uid = ? AND cid = ? LIMIT 1") 493 + if err != nil { 494 + return 0, fmt.Errorf("getbs sql, %w", err) 495 + } 496 + defer qstmt.Close() 497 + rows, err := qstmt.QueryContext(ctx, user, models.DbCID{CID: bcid}) 498 + if err != nil { 499 + return 0, fmt.Errorf("getbs err, %w", err) 500 + } 501 + if rows.Next() { 502 + var out int64 503 + err = rows.Scan(&out) 504 + if err != nil { 505 + return 0, fmt.Errorf("getbs bad scan, %w", err) 506 + } 507 + return out, nil 508 + } 509 + return 0, nil 510 + } 511 + 512 + type sqliteUserViewInner interface { 513 + HasUidCid(ctx context.Context, user models.Uid, bcid cid.Cid) (bool, error) 514 + getBlock(ctx context.Context, user models.Uid, bcid cid.Cid) (blockformat.Block, error) 515 + getBlockSize(ctx context.Context, user models.Uid, bcid cid.Cid) (int64, error) 516 + } 517 + 518 + // TODO: rename, used by both sqlite and scylla 519 + type sqliteUserView struct { 520 + sqs sqliteUserViewInner 521 + uid models.Uid 522 + } 523 + 524 + func (s sqliteUserView) Has(ctx context.Context, c cid.Cid) (bool, error) { 525 + // TODO: cache block metadata? 526 + return s.sqs.HasUidCid(ctx, s.uid, c) 527 + } 528 + 529 + func (s sqliteUserView) Get(ctx context.Context, c cid.Cid) (blockformat.Block, error) { 530 + // TODO: cache blocks? 531 + return s.sqs.getBlock(ctx, s.uid, c) 532 + } 533 + 534 + func (s sqliteUserView) GetSize(ctx context.Context, c cid.Cid) (int, error) { 535 + // TODO: cache block metadata? 536 + bigsize, err := s.sqs.getBlockSize(ctx, s.uid, c) 537 + return int(bigsize), err 538 + } 539 + 540 + // ensure we implement the interface 541 + var _ minBlockstore = (*sqliteUserView)(nil) 542 + 543 + var sqRowsDeleted = promauto.NewCounter(prometheus.CounterOpts{ 544 + Name: "bgs_sq_rows_deleted", 545 + Help: "User rows deleted in sqlite backend", 546 + }) 547 + 548 + var sqGetBlock = promauto.NewCounter(prometheus.CounterOpts{ 549 + Name: "bgs_sq_get_block", 550 + Help: "get block sqlite backend", 551 + }) 552 + 553 + var sqGetBlockSize = promauto.NewCounter(prometheus.CounterOpts{ 554 + Name: "bgs_sq_get_block_size", 555 + Help: "get block size sqlite backend", 556 + }) 557 + 558 + var sqGetCar = promauto.NewCounter(prometheus.CounterOpts{ 559 + Name: "bgs_sq_get_car", 560 + Help: "get block sqlite backend", 561 + }) 562 + 563 + var sqHas = promauto.NewCounter(prometheus.CounterOpts{ 564 + Name: "bgs_sq_has", 565 + Help: "check block presence sqlite backend", 566 + }) 567 + 568 + var sqGetLastShard = promauto.NewCounter(prometheus.CounterOpts{ 569 + Name: "bgs_sq_get_last_shard", 570 + Help: "get last shard sqlite backend", 571 + }) 572 + 573 + var sqWriteNewShard = promauto.NewCounter(prometheus.CounterOpts{ 574 + Name: "bgs_sq_write_shard", 575 + Help: "write shard blocks sqlite backend", 576 + })
+54 -27
cmd/bigsky/main.go
··· 217 217 EnvVars: []string{"RELAY_NEXT_CRAWLER"}, 218 218 }, 219 219 &cli.BoolFlag{ 220 + Name: "ex-sqlite-carstore", 221 + Usage: "enable experimental sqlite carstore", 222 + Value: false, 223 + }, 224 + &cli.StringSliceFlag{ 225 + Name: "scylla-carstore", 226 + Usage: "scylla server addresses for storage backend, comma separated", 227 + Value: &cli.StringSlice{}, 228 + EnvVars: []string{"RELAY_SCYLLA_NODES"}, 229 + }, 230 + &cli.BoolFlag{ 220 231 Name: "non-archival", 221 232 EnvVars: []string{"RELAY_NON_ARCHIVAL"}, 222 233 Value: false, ··· 316 327 return err 317 328 } 318 329 319 - slog.Info("setting up main database") 320 330 dburl := cctx.String("db-url") 331 + slog.Info("setting up main database", "url", dburl) 321 332 db, err := cliutil.SetupDatabase(dburl, cctx.Int("max-metadb-connections")) 322 333 if err != nil { 323 334 return err 324 335 } 325 - 326 - slog.Info("setting up carstore database") 327 - csdburl := cctx.String("carstore-db-url") 328 - csdb, err := cliutil.SetupDatabase(csdburl, cctx.Int("max-carstore-connections")) 329 - if err != nil { 330 - return err 331 - } 332 - 333 336 if cctx.Bool("db-tracing") { 334 337 if err := db.Use(tracing.NewPlugin()); err != nil { 335 338 return err 336 339 } 337 - if err := csdb.Use(tracing.NewPlugin()); err != nil { 338 - return err 339 - } 340 340 } 341 341 342 - csdirs := []string{csdir} 343 - if paramDirs := cctx.StringSlice("carstore-shard-dirs"); len(paramDirs) > 0 { 344 - csdirs = paramDirs 345 - } 346 - 347 - for _, csd := range csdirs { 348 - if err := os.MkdirAll(filepath.Dir(csd), os.ModePerm); err != nil { 342 + var cstore carstore.CarStore 343 + scyllaAddrs := cctx.StringSlice("scylla-carstore") 344 + sqliteStore := cctx.Bool("ex-sqlite-carstore") 345 + if len(scyllaAddrs) != 0 { 346 + slog.Info("starting scylla carstore", "addrs", scyllaAddrs) 347 + cstore, err = carstore.NewScyllaStore(scyllaAddrs, "cs") 348 + } else if sqliteStore { 349 + slog.Info("starting sqlite carstore", "dir", csdir) 350 + cstore, err = carstore.NewSqliteStore(csdir) 351 + } else if cctx.Bool("non-archival") { 352 + csdburl := cctx.String("carstore-db-url") 353 + slog.Info("setting up non-archival carstore database", "url", csdburl) 354 + csdb, err := cliutil.SetupDatabase(csdburl, cctx.Int("max-carstore-connections")) 355 + if err != nil { 349 356 return err 350 357 } 351 - } 352 - 353 - var cstore carstore.CarStore 354 - 355 - if cctx.Bool("non-archival") { 358 + if cctx.Bool("db-tracing") { 359 + if err := csdb.Use(tracing.NewPlugin()); err != nil { 360 + return err 361 + } 362 + } 356 363 cs, err := carstore.NewNonArchivalCarstore(csdb) 357 364 if err != nil { 358 365 return err 359 366 } 360 - 361 367 cstore = cs 362 368 } else { 363 - cs, err := carstore.NewCarStore(csdb, csdirs) 369 + // make standard FileCarStore 370 + csdburl := cctx.String("carstore-db-url") 371 + slog.Info("setting up carstore database", "url", csdburl) 372 + csdb, err := cliutil.SetupDatabase(csdburl, cctx.Int("max-carstore-connections")) 364 373 if err != nil { 365 374 return err 366 375 } 376 + if cctx.Bool("db-tracing") { 377 + if err := csdb.Use(tracing.NewPlugin()); err != nil { 378 + return err 379 + } 380 + } 381 + csdirs := []string{csdir} 382 + if paramDirs := cctx.StringSlice("carstore-shard-dirs"); len(paramDirs) > 0 { 383 + csdirs = paramDirs 384 + } 367 385 368 - cstore = cs 386 + for _, csd := range csdirs { 387 + if err := os.MkdirAll(filepath.Dir(csd), os.ModePerm); err != nil { 388 + return err 389 + } 390 + } 391 + cstore, err = carstore.NewCarStore(csdb, csdirs) 392 + } 393 + 394 + if err != nil { 395 + return err 369 396 } 370 397 371 398 // DID RESOLUTION
+2 -2
cmd/gosky/debug.go
··· 885 885 886 886 rep1, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo1bytes)) 887 887 if err != nil { 888 - logger.Error("reading repo", "err", err) 888 + logger.Error("reading repo", "err", err, "bytes", len(repo1bytes)) 889 889 os.Exit(1) 890 890 return 891 891 } ··· 904 904 905 905 rep2, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo2bytes)) 906 906 if err != nil { 907 - logger.Error("reading repo", "err", err) 907 + logger.Error("reading repo", "err", err, "bytes", len(repo2bytes)) 908 908 os.Exit(1) 909 909 return 910 910 }
+6 -1
go.mod
··· 16 16 github.com/flosch/pongo2/v6 v6.0.0 17 17 github.com/go-redis/cache/v9 v9.0.0 18 18 github.com/goccy/go-json v0.10.2 19 + github.com/gocql/gocql v0.0.0-00010101000000-000000000000 19 20 github.com/golang-jwt/jwt v3.2.2+incompatible 20 21 github.com/gorilla/websocket v1.5.1 21 22 github.com/hashicorp/go-retryablehttp v0.7.5 ··· 90 91 github.com/getsentry/sentry-go v0.27.0 // indirect 91 92 github.com/go-redis/redis v6.15.9+incompatible // indirect 92 93 github.com/golang/snappy v0.0.4 // indirect 94 + github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect 93 95 github.com/hashicorp/golang-lru v1.0.2 // indirect 94 96 github.com/ipfs/go-log v1.0.5 // indirect 95 97 github.com/jackc/puddle/v2 v2.2.1 // indirect ··· 106 108 github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect 107 109 github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect 108 110 golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect 111 + gopkg.in/inf.v0 v0.9.1 // indirect 109 112 ) 110 113 111 114 require ( ··· 152 155 github.com/lestrrat-go/option v1.0.1 // indirect 153 156 github.com/mattn/go-colorable v0.1.13 // indirect 154 157 github.com/mattn/go-isatty v0.0.20 // indirect 155 - github.com/mattn/go-sqlite3 v1.14.22 // indirect 158 + github.com/mattn/go-sqlite3 v1.14.22 156 159 github.com/multiformats/go-base32 v0.1.0 // indirect 157 160 github.com/multiformats/go-base36 v0.2.0 // indirect 158 161 github.com/multiformats/go-multibase v0.2.0 // indirect ··· 188 191 gopkg.in/yaml.v3 v3.0.1 // indirect 189 192 lukechampine.com/blake3 v1.2.1 // indirect 190 193 ) 194 + 195 + replace github.com/gocql/gocql => github.com/scylladb/gocql v1.14.4
+14
go.sum
··· 73 73 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= 74 74 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 75 75 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 76 + github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY= 77 + github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= 78 + github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= 79 + github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= 76 80 github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 h1:N7oVaKyGp8bttX0bfZGmcGkjz7DLQXhAn3DNd3T0ous= 77 81 github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874/go.mod h1:r5xuitiExdLAJ09PR7vBVENGvp4ZuTBeWTGtxuX3K+c= 78 82 github.com/brianvoe/gofakeit/v6 v6.25.0 h1:ZpFjktOpLZUeF8q223o0rUuXtA+m5qW5srjvVi+JkXk= ··· 211 215 github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 212 216 github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 213 217 github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 218 + github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 214 219 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= 215 220 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 216 221 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= ··· 255 260 github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= 256 261 github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= 257 262 github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= 263 + github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= 264 + github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= 258 265 github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= 259 266 github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= 260 267 github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= ··· 592 599 github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= 593 600 github.com/samber/slog-echo v1.8.0 h1:DQQRtAliSvQw+ScEdu5gv3jbHu9cCTzvHuTD8GDv7zI= 594 601 github.com/samber/slog-echo v1.8.0/go.mod h1:0ab2AwcciQXNAXEcjkHwD9okOh9vEHEYn8xP97ocuhM= 602 + github.com/scylladb/gocql v1.14.4 h1:MhevwCfyAraQ6RvZYFO3pF4Lt0YhvQlfg8Eo2HEqVQA= 603 + github.com/scylladb/gocql v1.14.4/go.mod h1:ZLEJ0EVE5JhmtxIW2stgHq/v1P4fWap0qyyXSKyV8K0= 595 604 github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg= 596 605 github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys= 597 606 github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= ··· 808 817 golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= 809 818 golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= 810 819 golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= 820 + golang.org/x/net v0.0.0-20220526153639-5463443f8c37/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 811 821 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 812 822 golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= 813 823 golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= ··· 1092 1102 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 1093 1103 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 1094 1104 gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 1105 + gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 1106 + gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 1095 1107 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 1096 1108 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 1097 1109 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= ··· 1126 1138 rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= 1127 1139 rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= 1128 1140 rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= 1141 + sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= 1142 + sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
+14 -6
indexer/crawler.go
··· 14 14 ) 15 15 16 16 type CrawlDispatcher struct { 17 + // from Crawl() 17 18 ingest chan *models.ActorInfo 18 19 20 + // from AddToCatchupQueue() 21 + catchup chan *crawlWork 22 + 23 + // from main loop to fetchWorker() 19 24 repoSync chan *crawlWork 20 25 21 - catchup chan *crawlWork 22 - 23 26 complete chan models.Uid 24 27 25 28 maplk sync.Mutex 26 29 todo map[models.Uid]*crawlWork 27 30 inProgress map[models.Uid]*crawlWork 28 31 29 - doRepoCrawl func(context.Context, *crawlWork) error 32 + repoFetcher CrawlRepoFetcher 30 33 31 34 concurrency int 32 35 ··· 35 38 done chan struct{} 36 39 } 37 40 38 - func NewCrawlDispatcher(repoFn func(context.Context, *crawlWork) error, concurrency int, log *slog.Logger) (*CrawlDispatcher, error) { 41 + // this is what we need of RepoFetcher 42 + type CrawlRepoFetcher interface { 43 + FetchAndIndexRepo(ctx context.Context, job *crawlWork) error 44 + } 45 + 46 + func NewCrawlDispatcher(repoFetcher CrawlRepoFetcher, concurrency int, log *slog.Logger) (*CrawlDispatcher, error) { 39 47 if concurrency < 1 { 40 48 return nil, fmt.Errorf("must specify a non-zero positive integer for crawl dispatcher concurrency") 41 49 } ··· 45 53 repoSync: make(chan *crawlWork), 46 54 complete: make(chan models.Uid), 47 55 catchup: make(chan *crawlWork), 48 - doRepoCrawl: repoFn, 56 + repoFetcher: repoFetcher, 49 57 concurrency: concurrency, 50 58 todo: make(map[models.Uid]*crawlWork), 51 59 inProgress: make(map[models.Uid]*crawlWork), ··· 221 229 for { 222 230 select { 223 231 case job := <-c.repoSync: 224 - if err := c.doRepoCrawl(context.TODO(), job); err != nil { 232 + if err := c.repoFetcher.FetchAndIndexRepo(context.TODO(), job); err != nil { 225 233 c.log.Error("failed to perform repo crawl", "did", job.act.Did, "err", err) 226 234 } 227 235
+1 -1
indexer/indexer.go
··· 69 69 } 70 70 71 71 if crawl { 72 - c, err := NewCrawlDispatcher(fetcher.FetchAndIndexRepo, fetcher.MaxConcurrency, ix.log) 72 + c, err := NewCrawlDispatcher(fetcher, fetcher.MaxConcurrency, ix.log) 73 73 if err != nil { 74 74 return nil, err 75 75 }
+3 -1
indexer/repofetch.go
··· 141 141 } 142 142 } 143 143 144 + revp := &rev 144 145 if rev == "" { 145 146 span.SetAttributes(attribute.Bool("full", true)) 147 + revp = nil 146 148 } 147 149 148 150 c := models.ClientForPds(&pds) ··· 153 155 return err 154 156 } 155 157 156 - if err := rf.repoman.ImportNewRepo(ctx, ai.Uid, ai.Did, bytes.NewReader(repo), &rev); err != nil { 158 + if err := rf.repoman.ImportNewRepo(ctx, ai.Uid, ai.Did, bytes.NewReader(repo), revp); err != nil { 157 159 span.RecordError(err) 158 160 159 161 if ipld.IsNotFound(err) || errors.Is(err, io.EOF) || errors.Is(err, fs.ErrNotExist) {
+13
models/dbcid.go
··· 4 4 "database/sql/driver" 5 5 "encoding/json" 6 6 "fmt" 7 + "github.com/gocql/gocql" 7 8 8 9 "github.com/ipfs/go-cid" 9 10 ) ··· 62 63 func (dbc *DbCID) GormDataType() string { 63 64 return "bytes" 64 65 } 66 + 67 + func (dbc *DbCID) MarshalCQL(info gocql.TypeInfo) ([]byte, error) { 68 + return dbc.CID.Bytes(), nil 69 + } 70 + func (dbc *DbCID) UnmarshalCQL(info gocql.TypeInfo, data []byte) error { 71 + xcid, err := cid.Cast(data) 72 + if err != nil { 73 + return err 74 + } 75 + dbc.CID = xcid 76 + return nil 77 + }
+4 -4
repo/repo.go
··· 80 80 81 81 br, err := car.NewBlockReader(r) 82 82 if err != nil { 83 - return cid.Undef, err 83 + return cid.Undef, fmt.Errorf("IngestRepo:NewBlockReader: %w", err) 84 84 } 85 85 86 86 for { ··· 89 89 if err == io.EOF { 90 90 break 91 91 } 92 - return cid.Undef, err 92 + return cid.Undef, fmt.Errorf("IngestRepo:Next: %w", err) 93 93 } 94 94 95 95 if err := bs.Put(ctx, blk); err != nil { 96 - return cid.Undef, err 96 + return cid.Undef, fmt.Errorf("IngestRepo:Put: %w", err) 97 97 } 98 98 } 99 99 ··· 104 104 bs := blockstore.NewBlockstore(datastore.NewMapDatastore()) 105 105 root, err := IngestRepo(ctx, bs, r) 106 106 if err != nil { 107 - return nil, err 107 + return nil, fmt.Errorf("ReadRepoFromCar:IngestRepo: %w", err) 108 108 } 109 109 110 110 return OpenRepo(ctx, bs, root)
+3
repomgr/repomgr.go
··· 912 912 return err 913 913 } 914 914 915 + if rev != nil && *rev == "" { 916 + rev = nil 917 + } 915 918 if rev == nil { 916 919 // if 'rev' is nil, this implies a fresh sync. 917 920 // in this case, ignore any existing blocks we have and treat this like a clean import.
+8 -6
testing/utils.go
··· 210 210 } 211 211 212 212 limReqBody := bgs.RateLimitChangeRequest{ 213 - Host: u.Host, 214 - PerSecond: 5_000, 215 - PerHour: 100_000, 216 - PerDay: 1_000_000, 217 - RepoLimit: 500_000, 218 - CrawlRate: 50_000, 213 + Host: u.Host, 214 + PDSRates: bgs.PDSRates{ 215 + PerSecond: 5_000, 216 + PerHour: 100_000, 217 + PerDay: 1_000_000, 218 + RepoLimit: 500_000, 219 + CrawlRate: 50_000, 220 + }, 219 221 } 220 222 221 223 // JSON encode the request body