just playing with tangled
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

index: omit operations that should be covered by the parent index

I'm not going to add operation index anytime soon, but this might help if we
decide to. Suppose we index "commit_id: originating_op_id" relations, we'll
probably want to store op_id in a separate sstable to save disk space. If the
table contains all operation ids, we can reuse it to resolve short operation
ids.

+35 -11
+35 -11
lib/src/default_index/store.rs
··· 15 #![allow(missing_docs)] 16 17 use std::any::Any; 18 use std::collections::HashSet; 19 use std::fs; 20 use std::io; ··· 193 let change_id_length = store.change_id_length(); 194 let mut visited_heads: HashSet<CommitId> = HashSet::new(); 195 let mut historical_heads: Vec<(CommitId, OperationId)> = Vec::new(); 196 - let mut parent_op_id: Option<OperationId> = None; 197 - for op in op_walk::walk_ancestors(slice::from_ref(operation)) { 198 - let op = op?; 199 - // Pick the latest existing ancestor operation as the parent 200 - // segment. 201 - if parent_op_id.is_none() && operations_dir.join(op.id().hex()).is_file() { 202 - parent_op_id = Some(op.id().clone()); 203 } 204 - // TODO: no need to walk ancestors of the parent_op_id operation 205 for commit_id in op.view()?.all_referenced_commit_ids() { 206 if visited_heads.insert(commit_id.clone()) { 207 historical_heads.push((commit_id.clone(), op.id().clone())); ··· 210 } 211 let maybe_parent_file; 212 let mut mutable_index; 213 - match parent_op_id { 214 None => { 215 maybe_parent_file = None; 216 mutable_index = DefaultMutableIndex::full(commit_id_length, change_id_length); 217 } 218 - Some(parent_op_id) => { 219 let parent_file = self.load_index_segments_at_operation( 220 - &parent_op_id, 221 commit_id_length, 222 change_id_length, 223 )?;
··· 15 #![allow(missing_docs)] 16 17 use std::any::Any; 18 + use std::collections::HashMap; 19 use std::collections::HashSet; 20 use std::fs; 21 use std::io; ··· 194 let change_id_length = store.change_id_length(); 195 let mut visited_heads: HashSet<CommitId> = HashSet::new(); 196 let mut historical_heads: Vec<(CommitId, OperationId)> = Vec::new(); 197 + let ops_to_visit: Vec<_> = 198 + op_walk::walk_ancestors(slice::from_ref(operation)).try_collect()?; 199 + // Pick the latest existing ancestor operation as the parent segment. 200 + let parent_op = ops_to_visit 201 + .iter() 202 + .find(|op| operations_dir.join(op.id().hex()).is_file()) 203 + .cloned(); 204 + // Remove ancestors of the latest existing operation, which should have 205 + // been indexed in the parent segment. This could be optimized for 206 + // linear history, but parent_op is often None. 207 + let ops_to_visit = if let Some(op) = &parent_op { 208 + let mut wanted_ops: HashMap<&OperationId, &Operation> = 209 + ops_to_visit.iter().map(|op| (op.id(), op)).collect(); 210 + let mut work = vec![op.id()]; 211 + while let Some(id) = work.pop() { 212 + if let Some(op) = wanted_ops.remove(id) { 213 + work.extend(op.parent_ids()); 214 + } 215 } 216 + ops_to_visit 217 + .iter() 218 + .filter(|op| wanted_ops.contains_key(op.id())) 219 + .cloned() 220 + .collect() 221 + } else { 222 + ops_to_visit 223 + }; 224 + tracing::info!( 225 + ops_count = ops_to_visit.len(), 226 + "collecting head commits to index" 227 + ); 228 + for op in &ops_to_visit { 229 for commit_id in op.view()?.all_referenced_commit_ids() { 230 if visited_heads.insert(commit_id.clone()) { 231 historical_heads.push((commit_id.clone(), op.id().clone())); ··· 234 } 235 let maybe_parent_file; 236 let mut mutable_index; 237 + match &parent_op { 238 None => { 239 maybe_parent_file = None; 240 mutable_index = DefaultMutableIndex::full(commit_id_length, change_id_length); 241 } 242 + Some(op) => { 243 let parent_file = self.load_index_segments_at_operation( 244 + op.id(), 245 commit_id_length, 246 change_id_length, 247 )?;