just playing with tangled

index: omit operations that should be covered by the parent index

I'm not going to add operation index anytime soon, but this might help if we
decide to. Suppose we index "commit_id: originating_op_id" relations, we'll
probably want to store op_id in a separate sstable to save disk space. If the
table contains all operation ids, we can reuse it to resolve short operation
ids.

Changed files
+35 -11
lib
src
default_index
+35 -11
lib/src/default_index/store.rs
··· 15 15 #![allow(missing_docs)] 16 16 17 17 use std::any::Any; 18 + use std::collections::HashMap; 18 19 use std::collections::HashSet; 19 20 use std::fs; 20 21 use std::io; ··· 193 194 let change_id_length = store.change_id_length(); 194 195 let mut visited_heads: HashSet<CommitId> = HashSet::new(); 195 196 let mut historical_heads: Vec<(CommitId, OperationId)> = Vec::new(); 196 - let mut parent_op_id: Option<OperationId> = None; 197 - for op in op_walk::walk_ancestors(slice::from_ref(operation)) { 198 - let op = op?; 199 - // Pick the latest existing ancestor operation as the parent 200 - // segment. 201 - if parent_op_id.is_none() && operations_dir.join(op.id().hex()).is_file() { 202 - parent_op_id = Some(op.id().clone()); 197 + let ops_to_visit: Vec<_> = 198 + op_walk::walk_ancestors(slice::from_ref(operation)).try_collect()?; 199 + // Pick the latest existing ancestor operation as the parent segment. 200 + let parent_op = ops_to_visit 201 + .iter() 202 + .find(|op| operations_dir.join(op.id().hex()).is_file()) 203 + .cloned(); 204 + // Remove ancestors of the latest existing operation, which should have 205 + // been indexed in the parent segment. This could be optimized for 206 + // linear history, but parent_op is often None. 207 + let ops_to_visit = if let Some(op) = &parent_op { 208 + let mut wanted_ops: HashMap<&OperationId, &Operation> = 209 + ops_to_visit.iter().map(|op| (op.id(), op)).collect(); 210 + let mut work = vec![op.id()]; 211 + while let Some(id) = work.pop() { 212 + if let Some(op) = wanted_ops.remove(id) { 213 + work.extend(op.parent_ids()); 214 + } 203 215 } 204 - // TODO: no need to walk ancestors of the parent_op_id operation 216 + ops_to_visit 217 + .iter() 218 + .filter(|op| wanted_ops.contains_key(op.id())) 219 + .cloned() 220 + .collect() 221 + } else { 222 + ops_to_visit 223 + }; 224 + tracing::info!( 225 + ops_count = ops_to_visit.len(), 226 + "collecting head commits to index" 227 + ); 228 + for op in &ops_to_visit { 205 229 for commit_id in op.view()?.all_referenced_commit_ids() { 206 230 if visited_heads.insert(commit_id.clone()) { 207 231 historical_heads.push((commit_id.clone(), op.id().clone())); ··· 210 234 } 211 235 let maybe_parent_file; 212 236 let mut mutable_index; 213 - match parent_op_id { 237 + match &parent_op { 214 238 None => { 215 239 maybe_parent_file = None; 216 240 mutable_index = DefaultMutableIndex::full(commit_id_length, change_id_length); 217 241 } 218 - Some(parent_op_id) => { 242 + Some(op) => { 219 243 let parent_file = self.load_index_segments_at_operation( 220 - &parent_op_id, 244 + op.id(), 221 245 commit_id_length, 222 246 change_id_length, 223 247 )?;