very fast at protocol indexer with flexible filtering, xrpc queries, cursor-backed event stream, and more, built on fjall
rust fjall at-protocol atproto indexer

[db] tune keyspaces more

ptr.pet 9cfdd525 94a3ac85

verified
+13 -5
+13 -5
src/db/mod.rs
··· 149 149 let blocks = open_ks( 150 150 "blocks", 151 151 opts() 152 - // point reads are used a lot by stream 152 + // point reads are used a lot by stream, we know the blocks exist though 153 153 .expect_point_read_hits(true) 154 154 .max_memtable_size(mb(cfg.db_blocks_memtable_size_mb)) 155 - // 32 - 64 kb is probably fine, as the newer blocks will be in the first levels 155 + // 16 - 128 kb is probably fine, as the newer blocks will be in the first levels 156 156 // and any consumers will probably be streaming the newer events... 157 - .data_block_size_policy(BlockSizePolicy::new([kb(4), kb(8), kb(32), kb(64)])), 157 + // and blocks are pretty big-ish like around 5kb usually so this helps i think 158 + .data_block_size_policy(BlockSizePolicy::new([kb(8), kb(16), kb(64), kb(128)])), 158 159 )?; 159 160 let records = open_ks( 160 161 "records", ··· 182 183 .max_memtable_size(mb(cfg.db_pending_memtable_size_mb)) 183 184 .data_block_size_policy(BlockSizePolicy::all(kb(4))), 184 185 )?; 185 - // resync point reads often miss (because most repos aren't resyncing), so keeping the bloom filter helps avoid disk hits 186 186 let resync = open_ks( 187 187 "resync", 188 188 opts() 189 + // we only point read in backfill when we check for existing resync state 190 + // ...and also in repos api. so we can disable bloom filters 191 + .expect_point_read_hits(true) 189 192 .max_memtable_size(mb(cfg.db_pending_memtable_size_mb)) 190 193 .data_block_size_policy(BlockSizePolicy::all(kb(8))), 191 194 )?; ··· 203 206 // only iterators are used here, no point reads 204 207 .expect_point_read_hits(true) 205 208 .max_memtable_size(mb(cfg.db_events_memtable_size_mb)) 209 + // the compression here wont be good since events are quite random 210 + // eg. by many different repos and different records etc. 211 + // since its sequential we should still go with bigger block size though 206 212 .data_block_size_policy(BlockSizePolicy::new([kb(16), kb(32)])), 207 213 )?; 208 214 let counts = open_ks( ··· 210 216 opts() 211 217 // count increments hit because counters are mostly pre-initialized 212 218 .expect_point_read_hits(true) 213 - .max_memtable_size(mb(32)) 219 + .max_memtable_size(mb(16)) 214 220 // the data is very small 215 221 .data_block_size_policy(BlockSizePolicy::all(kb(1))), 216 222 )?; ··· 228 234 let crawler = open_ks( 229 235 "crawler", 230 236 opts() 237 + // only iterators are used here 238 + .expect_point_read_hits(true) 231 239 .max_memtable_size(mb(16)) 232 240 .data_block_size_policy(BlockSizePolicy::all(kb(1))), 233 241 )?;