···330330 // LRU cache for hot shards
331331 shard_cache: Arc<RwLock<HashMap<u8, Arc<Shard>>>>,
332332 max_cache: usize,
333333- max_segments_per_shard: usize,
334333335334 config: Arc<RwLock<Config>>,
336335···398397 config_path,
399398 shard_cache: Arc::new(RwLock::new(HashMap::new())),
400399 max_cache: 5,
401401- max_segments_per_shard: 8,
402400 config: Arc::new(RwLock::new(config.clone())),
403401 cache_hits: AtomicI64::new(0),
404402 cache_misses: AtomicI64::new(0),
···13951393 }
1396139413971395 let compacted = if meta_opt.is_some() {
13981398- self.auto_compact_if_needed(shard_num)?
13961396+ self.compact_shard(shard_num)?;
13971397+ true
13991398 } else {
14001399 false
14011400 };
···16811680 }
1682168116831682 /// Compact pending delta segments. If `shards` is `None`, all shards are compacted.
16831683+ /// Uses parallel execution across shards for faster compaction.
16841684 pub fn compact_pending_segments(&self, shards: Option<Vec<u8>>) -> Result<()> {
16851685+ use rayon::prelude::*;
16861686+16851687 match shards {
16861688 Some(list) if !list.is_empty() => {
16871687- for shard in list {
16881688- self.compact_shard(shard)?;
16891689- }
16891689+ list.into_par_iter()
16901690+ .map(|shard| self.compact_shard(shard))
16911691+ .collect::<Result<Vec<_>>>()?;
16901692 }
16911693 _ => {
16921692- for shard in 0..DID_SHARD_COUNT {
16931693- self.compact_shard(shard as u8)?;
16941694- }
16941694+ (0..DID_SHARD_COUNT)
16951695+ .into_par_iter()
16961696+ .map(|shard| self.compact_shard(shard as u8))
16971697+ .collect::<Result<Vec<_>>>()?;
16951698 }
16961699 }
16971700···17611764 Ok(layers)
17621765 }
1763176617641764- fn auto_compact_if_needed(&self, shard_num: u8) -> Result<bool> {
17651765- let should_compact = {
17661766- let config = self.config.read().unwrap();
17671767- config
17681768- .shards
17691769- .get(shard_num as usize)
17701770- .map(|meta| meta.segments.len() >= self.max_segments_per_shard)
17711771- .unwrap_or(false)
17721772- };
17731773-17741774- if should_compact {
17751775- self.compact_shard(shard_num)?;
17761776- Ok(true)
17771777- } else {
17781778- Ok(false)
17791779- }
17801780- }
17671767+ // removed: auto_compact_if_needed (compaction now happens immediately after updates)
1781176817821769 fn compact_shard(&self, shard_num: u8) -> Result<()> {
17831770 use std::time::Instant;
···36223609 .get("delta_segments")
36233610 .and_then(|v| v.as_u64())
36243611 .unwrap_or(0);
36253625- assert!(
36263626- delta_segments > 0,
36273627- "expected pending delta segments, got {}",
36283628- delta_segments
36123612+ assert_eq!(
36133613+ delta_segments, 0,
36143614+ "expected no pending delta segments after immediate compaction"
36293615 );
36303616 }
36313617
+87-25
src/manager.rs
···16481648 ///
16491649 /// IMPORTANT: This method performs heavy blocking I/O and should be called from async
16501650 /// contexts using spawn_blocking to avoid freezing the async runtime (and HTTP server).
16511651- pub fn batch_update_did_index(&self, start_bundle: u32, end_bundle: u32) -> Result<()> {
16511651+ pub fn batch_update_did_index(
16521652+ &self,
16531653+ start_bundle: u32,
16541654+ end_bundle: u32,
16551655+ compact: bool,
16561656+ ) -> Result<()> {
16521657 use std::time::Instant;
1653165816541659 if start_bundle > end_bundle {
···1657166216581663 let total_start = Instant::now();
16591664 let bundle_count = end_bundle - start_bundle + 1;
16651665+ if bundle_count > 10 {
16661666+ use std::time::Instant;
16671667+ eprintln!(
16681668+ "[DID Index] Rebuild triggered for {} bundles ({} → {})",
16691669+ bundle_count, start_bundle, end_bundle
16701670+ );
16711671+ let rebuild_start = Instant::now();
16721672+ let _ = self.build_did_index(
16731673+ crate::constants::DID_INDEX_FLUSH_INTERVAL,
16741674+ Some(
16751675+ |current: u32, total: u32, bytes_processed: u64, total_bytes: u64| {
16761676+ let percent = if total_bytes > 0 {
16771677+ (bytes_processed as f64 / total_bytes as f64) * 100.0
16781678+ } else {
16791679+ 0.0
16801680+ };
16811681+ eprintln!(
16821682+ "[DID Index] Rebuild progress: {}/{} ({:.1}%)",
16831683+ current, total, percent
16841684+ );
16851685+ },
16861686+ ),
16871687+ None,
16881688+ None,
16891689+ )?;
16901690+ let dur = rebuild_start.elapsed();
16911691+ eprintln!("[DID Index] Rebuild complete in {:.1}s", dur.as_secs_f64());
16921692+ return Ok(());
16931693+ }
1660169416611695 if *self.verbose.lock().unwrap() {
16621696 log::info!(
···17101744 total_operations as f64 / update_duration.as_secs_f64()
17111745 );
1712174617471747+ // Optionally compact all shards immediately to avoid leaving delta segments
17481748+ if compact {
17491749+ let idx_guard = self.did_index.read().unwrap();
17501750+ if let Some(idx) = idx_guard.as_ref() {
17511751+ idx.compact_pending_segments(None)?;
17521752+ }
17531753+ }
17541754+17131755 let total_duration = total_start.elapsed();
1714175617151757 if *self.verbose.lock().unwrap() {
···17341776 &self,
17351777 start_bundle: u32,
17361778 end_bundle: u32,
17791779+ compact: bool,
17371780 ) -> Result<()> {
17381781 let manager = self.clone_for_arc();
1739178217401740- tokio::task::spawn_blocking(move || {
17411741- manager.batch_update_did_index(start_bundle, end_bundle)
17831783+ // First perform the batch update in a blocking task
17841784+ let _ = tokio::task::spawn_blocking(move || {
17851785+ manager.batch_update_did_index(start_bundle, end_bundle, compact)
17421786 })
17431787 .await
17441744- .map_err(|e| anyhow::anyhow!("Batch DID index update task failed: {}", e))?
17881788+ .map_err(|e| anyhow::anyhow!("Batch DID index update task failed: {}", e))?;
17891789+17901790+ Ok(())
17451791 }
1746179217471793 /// Fetch and save next bundle from PLC directory
···17501796 &self,
17511797 client: &crate::plc_client::PLCClient,
17521798 shutdown_rx: Option<tokio::sync::watch::Receiver<bool>>,
17991799+ update_did_index: bool,
17531800 ) -> Result<SyncResult> {
17541801 use crate::sync::{get_boundary_cids, strip_boundary_duplicates};
17551802 use std::time::Instant;
···18731920 }
1874192118751922 let fetch_op_start = Instant::now();
18761876- if let Some(ref rx) = shutdown_rx && *rx.borrow() {
19231923+ if let Some(ref rx) = shutdown_rx
19241924+ && *rx.borrow()
19251925+ {
18771926 anyhow::bail!("Shutdown requested");
18781927 }
18791928 let (plc_ops, wait_dur, http_dur) = if let Some(rx) = shutdown_rx.clone() {
···20642113 index_write_time,
20652114 did_index_compacted,
20662115 ) = self
20672067- .save_bundle_with_timing(next_bundle_num, operations)
21162116+ .save_bundle_with_timing(next_bundle_num, operations, update_did_index)
20682117 .await?;
20692118 let save_duration = save_start.elapsed();
20702119···21132162 let total_duration_ms = (fetch_total_duration + save_duration).as_millis() as u64;
21142163 let fetch_duration_ms = fetch_total_duration.as_millis() as u64;
2115216421162116- // Calculate separate timings: bundle save (serialize + compress + hash) vs index (did_index + index_write)
21172117- let bundle_save_ms = (serialize_time + compress_time + hash_time).as_millis() as u64;
21182118- let index_ms = (did_index_time + index_write_time).as_millis() as u64;
21652165+ // Calculate separate timings: bundle save vs index write/DID index
21662166+ let (bundle_save_ms, index_ms) = if update_did_index {
21672167+ (
21682168+ (serialize_time + compress_time + hash_time).as_millis() as u64,
21692169+ (did_index_time + index_write_time).as_millis() as u64,
21702170+ )
21712171+ } else {
21722172+ (
21732173+ (serialize_time + compress_time + hash_time + index_write_time).as_millis() as u64,
21742174+ 0,
21752175+ )
21762176+ };
2119217721202178 // Only log detailed info in verbose mode
21212179 if *self.verbose.lock().unwrap() {
···21642222 let mut synced = 0;
2165222321662224 loop {
21672167- match self.sync_next_bundle(client, None).await {
22252225+ match self.sync_next_bundle(client, None, true).await {
21682226 Ok(SyncResult::BundleCreated { .. }) => {
21692227 synced += 1;
21702228···21932251 &self,
21942252 bundle_num: u32,
21952253 operations: Vec<Operation>,
22542254+ update_did_index: bool,
21962255 ) -> Result<(
21972256 std::time::Duration,
21982257 std::time::Duration,
···24912550 );
24922551 }
2493255224942494- // Update DID index (now fast with delta segments)
24952495- let did_index_start = Instant::now();
24962496- let did_ops: Vec<(String, bool)> = operations
24972497- .iter()
24982498- .map(|op| (op.did.clone(), op.nullified))
24992499- .collect();
25532553+ let (did_index_time, did_index_compacted) = if update_did_index {
25542554+ let did_index_start = Instant::now();
25552555+ let did_ops: Vec<(String, bool)> = operations
25562556+ .iter()
25572557+ .map(|op| (op.did.clone(), op.nullified))
25582558+ .collect();
2500255925012501- self.ensure_did_index()?;
25022502- let did_index_compacted = self
25032503- .did_index
25042504- .write()
25052505- .unwrap()
25062506- .as_mut()
25072507- .unwrap()
25082508- .update_for_bundle(bundle_num, did_ops)?;
25092509- let did_index_time = did_index_start.elapsed();
25602560+ self.ensure_did_index()?;
25612561+ let compacted = self
25622562+ .did_index
25632563+ .write()
25642564+ .unwrap()
25652565+ .as_mut()
25662566+ .unwrap()
25672567+ .update_for_bundle(bundle_num, did_ops)?;
25682568+ (did_index_start.elapsed(), compacted)
25692569+ } else {
25702570+ (std::time::Duration::from_millis(0), false)
25712571+ };
2510257225112573 // Update main index
25122574 let index_write_start = Instant::now();
+10-8
src/plc_client.rs
···7373 after: &str,
7474 count: usize,
7575 ) -> Result<(Vec<PLCOperation>, Duration, Duration)> {
7676- self.fetch_operations_with_retry_cancelable(after, count, 5, None).await
7676+ self.fetch_operations_with_retry_cancelable(after, count, 5, None)
7777+ .await
7778 }
78797980 pub async fn fetch_operations_cancelable(
···8283 count: usize,
8384 shutdown_rx: Option<tokio::sync::watch::Receiver<bool>>,
8485 ) -> Result<(Vec<PLCOperation>, Duration, Duration)> {
8585- self.fetch_operations_with_retry_cancelable(after, count, 5, shutdown_rx).await
8686+ self.fetch_operations_with_retry_cancelable(after, count, 5, shutdown_rx)
8787+ .await
8688 }
87898890 async fn fetch_operations_with_retry_cancelable(
···98100 let mut total_http = Duration::from_secs(0);
99101100102 for attempt in 1..=max_retries {
101101- if let Some(ref rx) = shutdown_rx && *rx.borrow() {
103103+ if let Some(ref rx) = shutdown_rx
104104+ && *rx.borrow()
105105+ {
102106 anyhow::bail!("Shutdown requested");
103107 }
104104- let export_url = format!(
105105- "{}/export?after={}&count={}",
106106- self.base_url, after, count
107107- );
108108+ let export_url = format!("{}/export?after={}&count={}", self.base_url, after, count);
108109109110 let permits = self.rate_limiter.available_permits();
110111 let requests_in_period = self.count_requests_in_period();
···369370 let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(0));
370371 let sem_clone = semaphore.clone();
371372372372- let refill_rate = Duration::from_secs_f64(period.as_secs_f64() / requests_per_period as f64);
373373+ let refill_rate =
374374+ Duration::from_secs_f64(period.as_secs_f64() / requests_per_period as f64);
373375374376 // Spawn background task to refill permits at steady rate
375377 // CRITICAL: Add first permit immediately, then refill at steady rate
+60-13
src/sync.rs
···289289 } else {
290290 format!("{:.0}KB", size_kb)
291291 };
292292-293293- eprintln!(
294294- "[INFO] → Bundle {:06} | {} | {} dids | {} | fetch: {:.2}s ({} reqs, {:.1}s wait) | save: {}ms | index: {}ms | {}",
292292+ let base = format!(
293293+ "[INFO] → Bundle {:06} | {} | {} dids | {} | fetch: {:.2}s ({} reqs, {:.1}s wait) | save: {}ms",
295294 bundle_num,
296295 hash,
297296 unique_dids,
···299298 fetch_secs,
300299 fetch_requests,
301300 wait_secs,
302302- bundle_save_ms,
303303- index_ms,
304304- age
301301+ bundle_save_ms
305302 );
303303+ if index_ms > 0 {
304304+ eprintln!("{} | index: {}ms | {}", base, index_ms, age);
305305+ } else {
306306+ eprintln!("{} | {}", base, age);
307307+ }
306308 }
307309308310 fn on_caught_up(
···504506505507 match self
506508 .manager
507507- .sync_next_bundle(&self.client, None)
509509+ .sync_next_bundle(&self.client, None, true)
508510 .await
509511 {
510512 Ok(crate::manager::SyncResult::BundleCreated {
···597599598600 let mut total_synced = 0u32;
599601 let mut is_initial_sync = true;
602602+ let mut did_index_batch_done = false;
603603+ let mut initial_sync_first_bundle: Option<u32> = None;
600604601605 // Notify logger that sync is starting
602606 if let Some(logger) = &self.logger {
···635639636640 let sync_result = self
637641 .manager
638638- .sync_next_bundle(&self.client, self.config.shutdown_rx.clone())
642642+ .sync_next_bundle(
643643+ &self.client,
644644+ self.config.shutdown_rx.clone(),
645645+ !is_initial_sync,
646646+ )
639647 .await;
640648641649 match sync_result {
···656664 fetch_http_ms,
657665 }) => {
658666 total_synced += 1;
667667+ if is_initial_sync && initial_sync_first_bundle.is_none() {
668668+ initial_sync_first_bundle = Some(bundle_num);
669669+ }
659670660671 // Reset error counter on successful sync
661672 use std::sync::atomic::{AtomicU32, Ordering};
···748759 }
749760750761 // Caught up to the end of the chain
751751- // Mark initial sync as complete ONLY if we actually synced at least one bundle.
752752- // This prevents premature "initial sync complete" when we just have a full
753753- // mempool from a previous run but still have thousands of bundles to sync.
754754- if is_initial_sync && total_synced > 0 {
755755- is_initial_sync = false;
762762+ // When initial sync finishes, perform a single batch DID index update if the index is empty
763763+ // or if we created bundles during initial sync with per-bundle updates disabled.
764764+ if is_initial_sync && !did_index_batch_done {
765765+ let stats = self.manager.get_did_index_stats();
766766+ let total_dids = stats
767767+ .get("total_dids")
768768+ .and_then(|v| v.as_u64())
769769+ .unwrap_or(0);
770770+ let total_entries = stats
771771+ .get("total_entries")
772772+ .and_then(|v| v.as_u64())
773773+ .unwrap_or(0);
756774775775+ let end_bundle = self.manager.get_last_bundle();
776776+ let start_bundle = initial_sync_first_bundle.unwrap_or(1);
777777+778778+ // Only run batch update if there are bundles to process and either index is empty
779779+ // or we created some bundles during this initial sync.
780780+ let created_bundles = total_synced > 0;
781781+ let index_is_empty = total_dids == 0 && total_entries == 0;
782782+ if end_bundle >= start_bundle && (index_is_empty || created_bundles) {
783783+ if self.config.verbose {
784784+ eprintln!(
785785+ "[Sync] Performing batch DID index update: {} → {} (index empty={}, created_bundles={})",
786786+ start_bundle, end_bundle, index_is_empty, created_bundles
787787+ );
788788+ }
789789+ if let Err(e) = self
790790+ .manager
791791+ .batch_update_did_index_async(start_bundle, end_bundle, true)
792792+ .await
793793+ {
794794+ eprintln!(
795795+ "[Sync] Batch DID index update failed after initial sync: {}",
796796+ e
797797+ );
798798+ } else {
799799+ did_index_batch_done = true;
800800+ }
801801+ }
802802+803803+ is_initial_sync = false;
757804 self.handle_event(&SyncEvent::InitialSyncComplete {
758805 total_bundles: total_synced,
759806 mempool_count,
+2-2
tests/manager.rs
···29293030 // Build DID index so DID lookups work
3131 manager
3232- .batch_update_did_index_async(1, manager.get_last_bundle())
3232+ .batch_update_did_index_async(1, manager.get_last_bundle(), false)
3333 .await?;
34343535 // Query DID operations and resolve DID
···127127 common::add_dummy_bundle(&dir2_path)?;
128128 let manager2 = plcbundle::BundleManager::new(dir2_path.clone(), ())?;
129129 manager2
130130- .batch_update_did_index_async(1, manager2.get_last_bundle())
130130+ .batch_update_did_index_async(1, manager2.get_last_bundle(), false)
131131 .await?;
132132133133 // Verify we can query DID operations from the newly built index
+2-2
tests/server.rs
···5858 let manager = Arc::new(manager);
5959 // Build DID index so the resolver can find operations in bundles
6060 manager
6161- .batch_update_did_index_async(1, manager.get_last_bundle())
6161+ .batch_update_did_index_async(1, manager.get_last_bundle(), false)
6262 .await?;
6363 let port = 3032;
6464 let server_handle = common::start_test_server(Arc::clone(&manager), port).await?;
···149149 let manager = Arc::new(manager);
150150 // Ensure DID index is available for data/op lookups
151151 manager
152152- .batch_update_did_index_async(1, manager.get_last_bundle())
152152+ .batch_update_did_index_async(1, manager.get_last_bundle(), false)
153153 .await?;
154154 let port = 3031;
155155 let server_handle = common::start_test_server(Arc::clone(&manager), port).await?;