just playing with tangled
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at gvimdiff 1209 lines 46 kB view raw
1// Copyright 2023 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15//! A lazily merged view of a set of trees. 16 17use std::borrow::Borrow; 18use std::cmp::max; 19use std::cmp::Ordering; 20use std::collections::BTreeMap; 21use std::collections::VecDeque; 22use std::iter; 23use std::iter::zip; 24use std::pin::Pin; 25use std::sync::Arc; 26use std::task::Context; 27use std::task::Poll; 28use std::vec; 29 30use either::Either; 31use futures::future::BoxFuture; 32use futures::stream::BoxStream; 33use futures::stream::StreamExt as _; 34use futures::Stream; 35use futures::TryStreamExt as _; 36use itertools::EitherOrBoth; 37use itertools::Itertools as _; 38use pollster::FutureExt as _; 39 40use crate::backend; 41use crate::backend::BackendResult; 42use crate::backend::MergedTreeId; 43use crate::backend::TreeId; 44use crate::backend::TreeValue; 45use crate::copies::CopiesTreeDiffEntry; 46use crate::copies::CopiesTreeDiffStream; 47use crate::copies::CopyRecords; 48use crate::matchers::EverythingMatcher; 49use crate::matchers::Matcher; 50use crate::merge::Merge; 51use crate::merge::MergeBuilder; 52use crate::merge::MergedTreeVal; 53use crate::merge::MergedTreeValue; 54use crate::repo_path::RepoPath; 55use crate::repo_path::RepoPathBuf; 56use crate::repo_path::RepoPathComponent; 57use crate::store::Store; 58use crate::tree::try_resolve_file_conflict; 59use crate::tree::Tree; 60use crate::tree_builder::TreeBuilder; 61 62/// Presents a view of a merged set of trees. 63#[derive(PartialEq, Eq, Clone, Debug)] 64pub struct MergedTree { 65 trees: Merge<Tree>, 66} 67 68impl MergedTree { 69 /// Creates a new `MergedTree` representing a single tree without conflicts. 70 pub fn resolved(tree: Tree) -> Self { 71 MergedTree::new(Merge::resolved(tree)) 72 } 73 74 /// Creates a new `MergedTree` representing a merge of a set of trees. The 75 /// individual trees must not have any conflicts. 76 pub fn new(trees: Merge<Tree>) -> Self { 77 debug_assert!(!trees.iter().any(|t| t.has_conflict())); 78 debug_assert!(trees.iter().map(|tree| tree.dir()).all_equal()); 79 debug_assert!(trees 80 .iter() 81 .map(|tree| Arc::as_ptr(tree.store())) 82 .all_equal()); 83 MergedTree { trees } 84 } 85 86 /// Takes a tree in the legacy format (with path-level conflicts in the 87 /// tree) and returns a `MergedTree` with any conflicts converted to 88 /// tree-level conflicts. 89 pub fn from_legacy_tree(tree: Tree) -> BackendResult<Self> { 90 let conflict_ids = tree.conflicts(); 91 if conflict_ids.is_empty() { 92 return Ok(MergedTree::resolved(tree)); 93 } 94 95 // Find the number of removes and adds in the most complex conflict. 96 let mut max_tree_count = 1; 97 let store = tree.store(); 98 let mut conflicts: Vec<(&RepoPath, MergedTreeValue)> = vec![]; 99 for (path, conflict_id) in &conflict_ids { 100 let conflict = store.read_conflict(path, conflict_id)?; 101 max_tree_count = max(max_tree_count, conflict.iter().len()); 102 conflicts.push((path, conflict)); 103 } 104 let mut tree_builders = Vec::new(); 105 tree_builders.resize_with(max_tree_count, || store.tree_builder(tree.id().clone())); 106 for (path, conflict) in conflicts { 107 // If there are fewer terms in this conflict than in some other conflict, we can 108 // add canceling removes and adds of any value. The simplest value is an absent 109 // value, so we use that. 110 let terms_padded = conflict.into_iter().chain(iter::repeat(None)); 111 for (builder, term) in zip(&mut tree_builders, terms_padded) { 112 builder.set_or_remove(path.to_owned(), term); 113 } 114 } 115 116 let new_trees: Vec<_> = tree_builders 117 .into_iter() 118 .map(|builder| { 119 let tree_id = builder.write_tree()?; 120 store.get_tree(RepoPathBuf::root(), &tree_id) 121 }) 122 .try_collect()?; 123 Ok(MergedTree { 124 trees: Merge::from_vec(new_trees), 125 }) 126 } 127 128 /// Returns the underlying `Merge<Tree>`. 129 pub fn as_merge(&self) -> &Merge<Tree> { 130 &self.trees 131 } 132 133 /// Extracts the underlying `Merge<Tree>`. 134 pub fn take(self) -> Merge<Tree> { 135 self.trees 136 } 137 138 /// This tree's directory 139 pub fn dir(&self) -> &RepoPath { 140 self.trees.first().dir() 141 } 142 143 /// The `Store` associated with this tree. 144 pub fn store(&self) -> &Arc<Store> { 145 self.trees.first().store() 146 } 147 148 /// Base names of entries in this directory. 149 pub fn names<'a>(&'a self) -> Box<dyn Iterator<Item = &'a RepoPathComponent> + 'a> { 150 Box::new(all_tree_basenames(&self.trees)) 151 } 152 153 /// The value at the given basename. The value can be `Resolved` even if 154 /// `self` is a `Merge`, which happens if the value at the path can be 155 /// trivially merged. Does not recurse, so if `basename` refers to a Tree, 156 /// then a `TreeValue::Tree` will be returned. 157 pub fn value(&self, basename: &RepoPathComponent) -> MergedTreeVal { 158 trees_value(&self.trees, basename) 159 } 160 161 /// Tries to resolve any conflicts, resolving any conflicts that can be 162 /// automatically resolved and leaving the rest unresolved. 163 pub fn resolve(&self) -> BackendResult<MergedTree> { 164 let merged = merge_trees(&self.trees)?; 165 // If the result can be resolved, then `merge_trees()` above would have returned 166 // a resolved merge. However, that function will always preserve the arity of 167 // conflicts it cannot resolve. So we simplify the conflict again 168 // here to possibly reduce a complex conflict to a simpler one. 169 let simplified = merged.simplify(); 170 // If debug assertions are enabled, check that the merge was idempotent. In 171 // particular, that this last simplification doesn't enable further automatic 172 // resolutions 173 if cfg!(debug_assertions) { 174 let re_merged = merge_trees(&simplified).unwrap(); 175 debug_assert_eq!(re_merged, simplified); 176 } 177 Ok(MergedTree { trees: simplified }) 178 } 179 180 /// An iterator over the conflicts in this tree, including subtrees. 181 /// Recurses into subtrees and yields conflicts in those, but only if 182 /// all sides are trees, so tree/file conflicts will be reported as a single 183 /// conflict, not one for each path in the tree. 184 // TODO: Restrict this by a matcher (or add a separate method for that). 185 pub fn conflicts( 186 &self, 187 ) -> impl Iterator<Item = (RepoPathBuf, BackendResult<MergedTreeValue>)> + use<> { 188 ConflictIterator::new(self) 189 } 190 191 /// Whether this tree has conflicts. 192 pub fn has_conflict(&self) -> bool { 193 !self.trees.is_resolved() 194 } 195 196 /// Gets the `MergeTree` in a subdirectory of the current tree. If the path 197 /// doesn't correspond to a tree in any of the inputs to the merge, then 198 /// that entry will be replace by an empty tree in the result. 199 pub fn sub_tree(&self, name: &RepoPathComponent) -> BackendResult<Option<MergedTree>> { 200 match self.value(name).into_resolved() { 201 Ok(Some(TreeValue::Tree(sub_tree_id))) => { 202 let subdir = self.dir().join(name); 203 Ok(Some(MergedTree::resolved( 204 self.store().get_tree(subdir, sub_tree_id)?, 205 ))) 206 } 207 Ok(_) => Ok(None), 208 Err(merge) => { 209 let trees = merge.try_map(|value| match value { 210 Some(TreeValue::Tree(sub_tree_id)) => { 211 let subdir = self.dir().join(name); 212 self.store().get_tree(subdir, sub_tree_id) 213 } 214 _ => { 215 let subdir = self.dir().join(name); 216 Ok(Tree::empty(self.store().clone(), subdir)) 217 } 218 })?; 219 Ok(Some(MergedTree { trees })) 220 } 221 } 222 } 223 224 /// The value at the given path. The value can be `Resolved` even if 225 /// `self` is a `Conflict`, which happens if the value at the path can be 226 /// trivially merged. 227 pub fn path_value(&self, path: &RepoPath) -> BackendResult<MergedTreeValue> { 228 assert_eq!(self.dir(), RepoPath::root()); 229 match path.split() { 230 Some((dir, basename)) => match self.sub_tree_recursive(dir)? { 231 None => Ok(Merge::absent()), 232 Some(tree) => Ok(tree.value(basename).cloned()), 233 }, 234 None => Ok(self 235 .trees 236 .map(|tree| Some(TreeValue::Tree(tree.id().clone())))), 237 } 238 } 239 240 /// The tree's id 241 pub fn id(&self) -> MergedTreeId { 242 MergedTreeId::Merge(self.trees.map(|tree| tree.id().clone())) 243 } 244 245 /// Look up the tree at the given path. 246 pub fn sub_tree_recursive(&self, path: &RepoPath) -> BackendResult<Option<MergedTree>> { 247 let mut current_tree = self.clone(); 248 for name in path.components() { 249 match current_tree.sub_tree(name)? { 250 None => { 251 return Ok(None); 252 } 253 Some(sub_tree) => { 254 current_tree = sub_tree; 255 } 256 } 257 } 258 Ok(Some(current_tree)) 259 } 260 261 /// Iterator over the entries matching the given matcher. Subtrees are 262 /// visited recursively. Subtrees that differ between the current 263 /// `MergedTree`'s terms are merged on the fly. Missing terms are treated as 264 /// empty directories. Subtrees that conflict with non-trees are not 265 /// visited. For example, if current tree is a merge of 3 trees, and the 266 /// entry for 'foo' is a conflict between a change subtree and a symlink 267 /// (i.e. the subdirectory was replaced by symlink in one side of the 268 /// conflict), then the entry for `foo` itself will be emitted, but no 269 /// entries from inside `foo/` from either of the trees will be. 270 pub fn entries(&self) -> TreeEntriesIterator<'static> { 271 self.entries_matching(&EverythingMatcher) 272 } 273 274 /// Like `entries()` but restricted by a matcher. 275 pub fn entries_matching<'matcher>( 276 &self, 277 matcher: &'matcher dyn Matcher, 278 ) -> TreeEntriesIterator<'matcher> { 279 TreeEntriesIterator::new(&self.trees, matcher) 280 } 281 282 /// Stream of the differences between this tree and another tree. 283 /// 284 /// The files in a removed tree will be returned before a file that replaces 285 /// it. 286 pub fn diff_stream<'matcher>( 287 &self, 288 other: &MergedTree, 289 matcher: &'matcher dyn Matcher, 290 ) -> TreeDiffStream<'matcher> { 291 let concurrency = self.store().concurrency(); 292 if concurrency <= 1 { 293 Box::pin(futures::stream::iter(TreeDiffIterator::new( 294 &self.trees, 295 &other.trees, 296 matcher, 297 ))) 298 } else { 299 Box::pin(TreeDiffStreamImpl::new( 300 &self.trees, 301 &other.trees, 302 matcher, 303 concurrency, 304 )) 305 } 306 } 307 308 /// Like `diff_stream()` but takes the given copy records into account. 309 pub fn diff_stream_with_copies<'a>( 310 &self, 311 other: &MergedTree, 312 matcher: &'a dyn Matcher, 313 copy_records: &'a CopyRecords, 314 ) -> BoxStream<'a, CopiesTreeDiffEntry> { 315 let stream = self.diff_stream(other, matcher); 316 Box::pin(CopiesTreeDiffStream::new( 317 stream, 318 self.clone(), 319 other.clone(), 320 copy_records, 321 )) 322 } 323 324 /// Merges this tree with `other`, using `base` as base. Any conflicts will 325 /// be resolved recursively if possible. 326 pub fn merge(&self, base: &MergedTree, other: &MergedTree) -> BackendResult<MergedTree> { 327 self.merge_no_resolve(base, other).resolve() 328 } 329 330 /// Merges this tree with `other`, using `base` as base, without attempting 331 /// to resolve file conflicts. 332 pub fn merge_no_resolve(&self, base: &MergedTree, other: &MergedTree) -> MergedTree { 333 let nested = Merge::from_vec(vec![ 334 self.trees.clone(), 335 base.trees.clone(), 336 other.trees.clone(), 337 ]); 338 MergedTree { 339 trees: nested.flatten().simplify(), 340 } 341 } 342} 343 344/// A single entry in a tree diff. 345pub struct TreeDiffEntry { 346 /// The path. 347 pub path: RepoPathBuf, 348 /// The resolved tree values if available. 349 pub values: BackendResult<(MergedTreeValue, MergedTreeValue)>, 350} 351 352/// Type alias for the result from `MergedTree::diff_stream()`. We use a 353/// `Stream` instead of an `Iterator` so high-latency backends (e.g. cloud-based 354/// ones) can fetch trees asynchronously. 355pub type TreeDiffStream<'matcher> = BoxStream<'matcher, TreeDiffEntry>; 356 357fn all_tree_basenames(trees: &Merge<Tree>) -> impl Iterator<Item = &RepoPathComponent> { 358 trees 359 .iter() 360 .map(|tree| tree.data().names()) 361 .kmerge() 362 .dedup() 363} 364 365fn all_tree_entries( 366 trees: &Merge<Tree>, 367) -> impl Iterator<Item = (&RepoPathComponent, MergedTreeVal<'_>)> { 368 if let Some(tree) = trees.as_resolved() { 369 let iter = tree 370 .entries_non_recursive() 371 .map(|entry| (entry.name(), Merge::normal(entry.value()))); 372 Either::Left(iter) 373 } else { 374 let iter = all_merged_tree_entries(trees).map(|(name, values)| { 375 // TODO: move resolve_trivial() to caller? 376 let values = match values.resolve_trivial() { 377 Some(resolved) => Merge::resolved(*resolved), 378 None => values, 379 }; 380 (name, values) 381 }); 382 Either::Right(iter) 383 } 384} 385 386/// Suppose the given `trees` aren't resolved, iterates `(name, values)` pairs 387/// non-recursively. This also works if `trees` are resolved, but is more costly 388/// than `tree.entries_non_recursive()`. 389fn all_merged_tree_entries( 390 trees: &Merge<Tree>, 391) -> impl Iterator<Item = (&RepoPathComponent, MergedTreeVal<'_>)> { 392 let mut entries_iters = trees 393 .iter() 394 .map(|tree| tree.entries_non_recursive().peekable()) 395 .collect_vec(); 396 iter::from_fn(move || { 397 let next_name = entries_iters 398 .iter_mut() 399 .filter_map(|iter| iter.peek()) 400 .map(|entry| entry.name()) 401 .min()?; 402 let values: MergeBuilder<_> = entries_iters 403 .iter_mut() 404 .map(|iter| { 405 let entry = iter.next_if(|entry| entry.name() == next_name)?; 406 Some(entry.value()) 407 }) 408 .collect(); 409 Some((next_name, values.build())) 410 }) 411} 412 413fn merged_tree_entry_diff<'a>( 414 trees1: &'a Merge<Tree>, 415 trees2: &'a Merge<Tree>, 416) -> impl Iterator<Item = (&'a RepoPathComponent, MergedTreeVal<'a>, MergedTreeVal<'a>)> { 417 itertools::merge_join_by( 418 all_tree_entries(trees1), 419 all_tree_entries(trees2), 420 |(name1, _), (name2, _)| name1.cmp(name2), 421 ) 422 .map(|entry| match entry { 423 EitherOrBoth::Both((name, value1), (_, value2)) => (name, value1, value2), 424 EitherOrBoth::Left((name, value1)) => (name, value1, Merge::absent()), 425 EitherOrBoth::Right((name, value2)) => (name, Merge::absent(), value2), 426 }) 427 .filter(|(_, value1, value2)| value1 != value2) 428} 429 430fn trees_value<'a>(trees: &'a Merge<Tree>, basename: &RepoPathComponent) -> MergedTreeVal<'a> { 431 if let Some(tree) = trees.as_resolved() { 432 return Merge::resolved(tree.value(basename)); 433 } 434 let value = trees.map(|tree| tree.value(basename)); 435 if let Some(resolved) = value.resolve_trivial() { 436 return Merge::resolved(*resolved); 437 } 438 value 439} 440 441/// The returned conflict will either be resolved or have the same number of 442/// sides as the input. 443fn merge_trees(merge: &Merge<Tree>) -> BackendResult<Merge<Tree>> { 444 if let Some(tree) = merge.resolve_trivial() { 445 return Ok(Merge::resolved(tree.clone())); 446 } 447 448 let base_tree = merge.first(); 449 let store = base_tree.store(); 450 let dir = base_tree.dir(); 451 // Keep resolved entries in `new_tree` and conflicted entries in `conflicts` to 452 // start with. Then we'll create the full trees later, and only if there are 453 // any conflicts. 454 let mut new_tree = backend::Tree::default(); 455 let mut conflicts = vec![]; 456 // TODO: Merge values concurrently 457 for (basename, path_merge) in all_merged_tree_entries(merge) { 458 let path = dir.join(basename); 459 let path_merge = merge_tree_values(store, &path, &path_merge).block_on()?; 460 match path_merge.into_resolved() { 461 Ok(value) => { 462 new_tree.set_or_remove(basename, value); 463 } 464 Err(path_merge) => { 465 conflicts.push((basename, path_merge.into_iter())); 466 } 467 }; 468 } 469 if conflicts.is_empty() { 470 let new_tree_id = store.write_tree(dir, new_tree).block_on()?; 471 Ok(Merge::resolved(new_tree_id)) 472 } else { 473 // For each side of the conflict, overwrite the entries in `new_tree` with the 474 // values from `conflicts`. Entries that are not in `conflicts` will remain 475 // unchanged and will be reused for each side. 476 let tree_count = merge.iter().len(); 477 let mut new_trees = Vec::with_capacity(tree_count); 478 for _ in 0..tree_count { 479 for (basename, path_conflict) in &mut conflicts { 480 new_tree.set_or_remove(basename, path_conflict.next().unwrap()); 481 } 482 let tree = store.write_tree(dir, new_tree.clone()).block_on()?; 483 new_trees.push(tree); 484 } 485 Ok(Merge::from_vec(new_trees)) 486 } 487} 488 489/// Tries to resolve a conflict between tree values. Returns 490/// Ok(Merge::normal(value)) if the conflict was resolved, and 491/// Ok(Merge::absent()) if the path should be removed. Returns the 492/// conflict unmodified if it cannot be resolved automatically. 493async fn merge_tree_values( 494 store: &Arc<Store>, 495 path: &RepoPath, 496 values: &MergedTreeVal<'_>, 497) -> BackendResult<MergedTreeValue> { 498 if let Some(resolved) = values.resolve_trivial() { 499 return Ok(Merge::resolved(resolved.cloned())); 500 } 501 502 if let Some(trees) = values.to_tree_merge(store, path)? { 503 // If all sides are trees or missing, merge the trees recursively, treating 504 // missing trees as empty. 505 let empty_tree_id = store.empty_tree_id(); 506 let merged_tree = merge_trees(&trees)?; 507 Ok(merged_tree 508 .map(|tree| (tree.id() != empty_tree_id).then(|| TreeValue::Tree(tree.id().clone())))) 509 } else { 510 let maybe_resolved = try_resolve_file_values(store, path, values).await?; 511 Ok(maybe_resolved.unwrap_or_else(|| values.cloned())) 512 } 513} 514 515/// Tries to resolve file conflicts by merging the file contents. Treats missing 516/// files as empty. If the file conflict cannot be resolved, returns the passed 517/// `values` unmodified. 518pub async fn resolve_file_values( 519 store: &Arc<Store>, 520 path: &RepoPath, 521 values: MergedTreeValue, 522) -> BackendResult<MergedTreeValue> { 523 if let Some(resolved) = values.resolve_trivial() { 524 return Ok(Merge::resolved(resolved.clone())); 525 } 526 527 let maybe_resolved = try_resolve_file_values(store, path, &values).await?; 528 Ok(maybe_resolved.unwrap_or(values)) 529} 530 531async fn try_resolve_file_values<T: Borrow<TreeValue>>( 532 store: &Arc<Store>, 533 path: &RepoPath, 534 values: &Merge<Option<T>>, 535) -> BackendResult<Option<MergedTreeValue>> { 536 // The values may contain trees canceling each other (notably padded absent 537 // trees), so we need to simplify them first. 538 let simplified = values 539 .map(|value| value.as_ref().map(Borrow::borrow)) 540 .simplify(); 541 // No fast path for simplified.is_resolved(). If it could be resolved, it would 542 // have been caught by values.resolve_trivial() above. 543 if let Some(resolved) = try_resolve_file_conflict(store, path, &simplified).await? { 544 Ok(Some(Merge::normal(resolved))) 545 } else { 546 // Failed to merge the files, or the paths are not files 547 Ok(None) 548 } 549} 550 551/// Recursive iterator over the entries in a tree. 552pub struct TreeEntriesIterator<'matcher> { 553 store: Arc<Store>, 554 stack: Vec<TreeEntriesDirItem>, 555 matcher: &'matcher dyn Matcher, 556} 557 558struct TreeEntriesDirItem { 559 entries: Vec<(RepoPathBuf, MergedTreeValue)>, 560} 561 562impl TreeEntriesDirItem { 563 fn new(trees: &Merge<Tree>, matcher: &dyn Matcher) -> Self { 564 let mut entries = vec![]; 565 let dir = trees.first().dir(); 566 for (name, value) in all_tree_entries(trees) { 567 let path = dir.join(name); 568 if value.is_tree() { 569 // TODO: Handle the other cases (specific files and trees) 570 if matcher.visit(&path).is_nothing() { 571 continue; 572 } 573 } else if !matcher.matches(&path) { 574 continue; 575 } 576 entries.push((path, value.cloned())); 577 } 578 entries.reverse(); 579 TreeEntriesDirItem { entries } 580 } 581} 582 583impl<'matcher> TreeEntriesIterator<'matcher> { 584 fn new(trees: &Merge<Tree>, matcher: &'matcher dyn Matcher) -> Self { 585 Self { 586 store: trees.first().store().clone(), 587 stack: vec![TreeEntriesDirItem::new(trees, matcher)], 588 matcher, 589 } 590 } 591} 592 593impl Iterator for TreeEntriesIterator<'_> { 594 type Item = (RepoPathBuf, BackendResult<MergedTreeValue>); 595 596 fn next(&mut self) -> Option<Self::Item> { 597 while let Some(top) = self.stack.last_mut() { 598 if let Some((path, value)) = top.entries.pop() { 599 let maybe_trees = match value.to_tree_merge(&self.store, &path) { 600 Ok(maybe_trees) => maybe_trees, 601 Err(err) => return Some((path, Err(err))), 602 }; 603 if let Some(trees) = maybe_trees { 604 self.stack 605 .push(TreeEntriesDirItem::new(&trees, self.matcher)); 606 } else { 607 return Some((path, Ok(value))); 608 } 609 } else { 610 self.stack.pop(); 611 } 612 } 613 None 614 } 615} 616 617/// The state for the non-recursive iteration over the conflicted entries in a 618/// single directory. 619struct ConflictsDirItem { 620 entries: Vec<(RepoPathBuf, MergedTreeValue)>, 621} 622 623impl From<&Merge<Tree>> for ConflictsDirItem { 624 fn from(trees: &Merge<Tree>) -> Self { 625 let dir = trees.first().dir(); 626 if trees.is_resolved() { 627 return ConflictsDirItem { entries: vec![] }; 628 } 629 630 let mut entries = vec![]; 631 for (basename, value) in all_tree_entries(trees) { 632 if !value.is_resolved() { 633 entries.push((dir.join(basename), value.cloned())); 634 } 635 } 636 entries.reverse(); 637 ConflictsDirItem { entries } 638 } 639} 640 641struct ConflictIterator { 642 store: Arc<Store>, 643 stack: Vec<ConflictsDirItem>, 644} 645 646impl ConflictIterator { 647 fn new(tree: &MergedTree) -> Self { 648 ConflictIterator { 649 store: tree.store().clone(), 650 stack: vec![ConflictsDirItem::from(&tree.trees)], 651 } 652 } 653} 654 655impl Iterator for ConflictIterator { 656 type Item = (RepoPathBuf, BackendResult<MergedTreeValue>); 657 658 fn next(&mut self) -> Option<Self::Item> { 659 while let Some(top) = self.stack.last_mut() { 660 if let Some((path, tree_values)) = top.entries.pop() { 661 match tree_values.to_tree_merge(&self.store, &path) { 662 Ok(Some(trees)) => { 663 // If all sides are trees or missing, descend into the merged tree 664 self.stack.push(ConflictsDirItem::from(&trees)); 665 } 666 Ok(None) => { 667 // Otherwise this is a conflict between files, trees, etc. If they could 668 // be automatically resolved, they should have been when the top-level 669 // tree conflict was written, so we assume that they can't be. 670 return Some((path, Ok(tree_values))); 671 } 672 Err(err) => { 673 return Some((path, Err(err))); 674 } 675 } 676 } else { 677 self.stack.pop(); 678 } 679 } 680 None 681 } 682} 683 684/// Iterator over the differences between two trees. 685pub struct TreeDiffIterator<'matcher> { 686 store: Arc<Store>, 687 stack: Vec<TreeDiffItem>, 688 matcher: &'matcher dyn Matcher, 689} 690 691struct TreeDiffDirItem { 692 entries: Vec<(RepoPathBuf, MergedTreeValue, MergedTreeValue)>, 693} 694 695enum TreeDiffItem { 696 Dir(TreeDiffDirItem), 697 // This is used for making sure that when a directory gets replaced by a file, we 698 // yield the value for the addition of the file after we yield the values 699 // for removing files in the directory. 700 File(RepoPathBuf, MergedTreeValue, MergedTreeValue), 701} 702 703impl<'matcher> TreeDiffIterator<'matcher> { 704 /// Creates a iterator over the differences between two trees. Generally 705 /// prefer `MergedTree::diff()` of calling this directly. 706 pub fn new(trees1: &Merge<Tree>, trees2: &Merge<Tree>, matcher: &'matcher dyn Matcher) -> Self { 707 assert!(Arc::ptr_eq(trees1.first().store(), trees2.first().store())); 708 let root_dir = RepoPath::root(); 709 let mut stack = Vec::new(); 710 if !matcher.visit(root_dir).is_nothing() { 711 stack.push(TreeDiffItem::Dir(TreeDiffDirItem::from_trees( 712 root_dir, trees1, trees2, matcher, 713 ))); 714 }; 715 Self { 716 store: trees1.first().store().clone(), 717 stack, 718 matcher, 719 } 720 } 721 722 /// Gets the given trees if `values` are trees, otherwise an empty tree. 723 fn trees( 724 store: &Arc<Store>, 725 dir: &RepoPath, 726 values: &MergedTreeValue, 727 ) -> BackendResult<Merge<Tree>> { 728 if let Some(trees) = values.to_tree_merge(store, dir)? { 729 Ok(trees) 730 } else { 731 Ok(Merge::resolved(Tree::empty(store.clone(), dir.to_owned()))) 732 } 733 } 734} 735 736impl TreeDiffDirItem { 737 fn from_trees( 738 dir: &RepoPath, 739 trees1: &Merge<Tree>, 740 trees2: &Merge<Tree>, 741 matcher: &dyn Matcher, 742 ) -> Self { 743 let mut entries = vec![]; 744 for (name, before, after) in merged_tree_entry_diff(trees1, trees2) { 745 let path = dir.join(name); 746 let tree_before = before.is_tree(); 747 let tree_after = after.is_tree(); 748 // Check if trees and files match, but only if either side is a tree or a file 749 // (don't query the matcher unnecessarily). 750 let tree_matches = (tree_before || tree_after) && !matcher.visit(&path).is_nothing(); 751 let file_matches = (!tree_before || !tree_after) && matcher.matches(&path); 752 753 // Replace trees or files that don't match by `Merge::absent()` 754 let before = if (tree_before && tree_matches) || (!tree_before && file_matches) { 755 before 756 } else { 757 Merge::absent() 758 }; 759 let after = if (tree_after && tree_matches) || (!tree_after && file_matches) { 760 after 761 } else { 762 Merge::absent() 763 }; 764 if before.is_absent() && after.is_absent() { 765 continue; 766 } 767 entries.push((path, before.cloned(), after.cloned())); 768 } 769 entries.reverse(); 770 Self { entries } 771 } 772} 773 774impl Iterator for TreeDiffIterator<'_> { 775 type Item = TreeDiffEntry; 776 777 fn next(&mut self) -> Option<Self::Item> { 778 while let Some(top) = self.stack.last_mut() { 779 let (path, before, after) = match top { 780 TreeDiffItem::Dir(dir) => match dir.entries.pop() { 781 Some(entry) => entry, 782 None => { 783 self.stack.pop().unwrap(); 784 continue; 785 } 786 }, 787 TreeDiffItem::File(..) => { 788 if let TreeDiffItem::File(path, before, after) = self.stack.pop().unwrap() { 789 return Some(TreeDiffEntry { 790 path, 791 values: Ok((before, after)), 792 }); 793 } else { 794 unreachable!(); 795 } 796 } 797 }; 798 799 let tree_before = before.is_tree(); 800 let tree_after = after.is_tree(); 801 let post_subdir = if tree_before || tree_after { 802 let (before_tree, after_tree) = match ( 803 Self::trees(&self.store, &path, &before), 804 Self::trees(&self.store, &path, &after), 805 ) { 806 (Ok(before_tree), Ok(after_tree)) => (before_tree, after_tree), 807 (Err(before_err), _) => { 808 return Some(TreeDiffEntry { 809 path, 810 values: Err(before_err), 811 }); 812 } 813 (_, Err(after_err)) => { 814 return Some(TreeDiffEntry { 815 path, 816 values: Err(after_err), 817 }); 818 } 819 }; 820 let subdir = 821 TreeDiffDirItem::from_trees(&path, &before_tree, &after_tree, self.matcher); 822 self.stack.push(TreeDiffItem::Dir(subdir)); 823 self.stack.len() - 1 824 } else { 825 self.stack.len() 826 }; 827 if !tree_before && tree_after { 828 if before.is_present() { 829 return Some(TreeDiffEntry { 830 path, 831 values: Ok((before, Merge::absent())), 832 }); 833 } 834 } else if tree_before && !tree_after { 835 if after.is_present() { 836 self.stack.insert( 837 post_subdir, 838 TreeDiffItem::File(path, Merge::absent(), after), 839 ); 840 } 841 } else if !tree_before && !tree_after { 842 return Some(TreeDiffEntry { 843 path, 844 values: Ok((before, after)), 845 }); 846 } 847 } 848 None 849 } 850} 851 852/// Stream of differences between two trees. 853pub struct TreeDiffStreamImpl<'matcher> { 854 store: Arc<Store>, 855 matcher: &'matcher dyn Matcher, 856 /// Pairs of tree values that may or may not be ready to emit, sorted in the 857 /// order we want to emit them. If either side is a tree, there will be 858 /// a corresponding entry in `pending_trees`. 859 items: BTreeMap<DiffStreamKey, BackendResult<(MergedTreeValue, MergedTreeValue)>>, 860 // TODO: Is it better to combine this and `items` into a single map? 861 #[expect(clippy::type_complexity)] 862 pending_trees: VecDeque<( 863 RepoPathBuf, 864 BoxFuture<'matcher, BackendResult<(Merge<Tree>, Merge<Tree>)>>, 865 )>, 866 /// The maximum number of trees to request concurrently. However, we do the 867 /// accounting per path, so for there will often be twice as many pending 868 /// `Backend::read_tree()` calls - for the "before" and "after" sides. For 869 /// conflicts, there will be even more. 870 max_concurrent_reads: usize, 871 /// The maximum number of items in `items`. However, we will always add the 872 /// full differences from a particular pair of trees, so it may temporarily 873 /// go over the limit (until we emit those items). It may also go over the 874 /// limit because we have a file item that's blocked by pending subdirectory 875 /// items. 876 max_queued_items: usize, 877} 878 879/// A wrapper around `RepoPath` that allows us to optionally sort files after 880/// directories that have the file as a prefix. 881#[derive(PartialEq, Eq, Clone, Debug)] 882struct DiffStreamKey { 883 path: RepoPathBuf, 884 file_after_dir: bool, 885} 886 887impl DiffStreamKey { 888 fn normal(path: RepoPathBuf) -> Self { 889 DiffStreamKey { 890 path, 891 file_after_dir: false, 892 } 893 } 894 895 fn file_after_dir(path: RepoPathBuf) -> Self { 896 DiffStreamKey { 897 path, 898 file_after_dir: true, 899 } 900 } 901} 902 903impl PartialOrd for DiffStreamKey { 904 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { 905 Some(self.cmp(other)) 906 } 907} 908 909impl Ord for DiffStreamKey { 910 fn cmp(&self, other: &Self) -> Ordering { 911 if self == other { 912 Ordering::Equal 913 } else if self.file_after_dir && other.path.starts_with(&self.path) { 914 Ordering::Greater 915 } else if other.file_after_dir && self.path.starts_with(&other.path) { 916 Ordering::Less 917 } else { 918 self.path.cmp(&other.path) 919 } 920 } 921} 922 923impl<'matcher> TreeDiffStreamImpl<'matcher> { 924 /// Creates a iterator over the differences between two trees. Generally 925 /// prefer `MergedTree::diff_stream()` of calling this directly. 926 pub fn new( 927 trees1: &Merge<Tree>, 928 trees2: &Merge<Tree>, 929 matcher: &'matcher dyn Matcher, 930 max_concurrent_reads: usize, 931 ) -> Self { 932 assert!(Arc::ptr_eq(trees1.first().store(), trees2.first().store())); 933 let mut stream = Self { 934 store: trees1.first().store().clone(), 935 matcher, 936 items: BTreeMap::new(), 937 pending_trees: VecDeque::new(), 938 max_concurrent_reads, 939 max_queued_items: 10000, 940 }; 941 stream.add_dir_diff_items(RepoPath::root(), trees1, trees2); 942 stream 943 } 944 945 async fn single_tree( 946 store: &Arc<Store>, 947 dir: RepoPathBuf, 948 value: Option<&TreeValue>, 949 ) -> BackendResult<Tree> { 950 match value { 951 Some(TreeValue::Tree(tree_id)) => store.get_tree_async(dir, tree_id).await, 952 _ => Ok(Tree::empty(store.clone(), dir.clone())), 953 } 954 } 955 956 /// Gets the given trees if `values` are trees, otherwise an empty tree. 957 async fn trees( 958 store: Arc<Store>, 959 dir: RepoPathBuf, 960 values: MergedTreeValue, 961 ) -> BackendResult<Merge<Tree>> { 962 if values.is_tree() { 963 let builder: MergeBuilder<Tree> = futures::stream::iter(values.iter()) 964 .then(|value| Self::single_tree(&store, dir.clone(), value.as_ref())) 965 .try_collect() 966 .await?; 967 Ok(builder.build()) 968 } else { 969 Ok(Merge::resolved(Tree::empty(store, dir))) 970 } 971 } 972 973 fn add_dir_diff_items(&mut self, dir: &RepoPath, trees1: &Merge<Tree>, trees2: &Merge<Tree>) { 974 for (basename, before, after) in merged_tree_entry_diff(trees1, trees2) { 975 let path = dir.join(basename); 976 let tree_before = before.is_tree(); 977 let tree_after = after.is_tree(); 978 // Check if trees and files match, but only if either side is a tree or a file 979 // (don't query the matcher unnecessarily). 980 let tree_matches = 981 (tree_before || tree_after) && !self.matcher.visit(&path).is_nothing(); 982 let file_matches = (!tree_before || !tree_after) && self.matcher.matches(&path); 983 984 // Replace trees or files that don't match by `Merge::absent()` 985 let before = if (tree_before && tree_matches) || (!tree_before && file_matches) { 986 before 987 } else { 988 Merge::absent() 989 }; 990 let after = if (tree_after && tree_matches) || (!tree_after && file_matches) { 991 after 992 } else { 993 Merge::absent() 994 }; 995 if before.is_absent() && after.is_absent() { 996 continue; 997 } 998 999 // If the path was a tree on either side of the diff, read those trees. 1000 if tree_matches { 1001 let before_tree_future = 1002 Self::trees(self.store.clone(), path.clone(), before.cloned()); 1003 let after_tree_future = 1004 Self::trees(self.store.clone(), path.clone(), after.cloned()); 1005 let both_trees_future = 1006 async { futures::try_join!(before_tree_future, after_tree_future) }; 1007 self.pending_trees 1008 .push_back((path.clone(), Box::pin(both_trees_future))); 1009 } 1010 1011 self.items.insert( 1012 DiffStreamKey::normal(path), 1013 Ok((before.cloned(), after.cloned())), 1014 ); 1015 } 1016 } 1017 1018 fn poll_tree_futures(&mut self, cx: &mut Context<'_>) { 1019 let mut pending_index = 0; 1020 while pending_index < self.pending_trees.len() 1021 && (pending_index < self.max_concurrent_reads 1022 || self.items.len() < self.max_queued_items) 1023 { 1024 let (_, future) = &mut self.pending_trees[pending_index]; 1025 if let Poll::Ready(tree_diff) = future.as_mut().poll(cx) { 1026 let (dir, _) = self.pending_trees.remove(pending_index).unwrap(); 1027 let key = DiffStreamKey::normal(dir); 1028 // Whenever we add an entry to `self.pending_trees`, we also add an Ok() entry 1029 // to `self.items`. 1030 let (before, after) = self.items.remove(&key).unwrap().unwrap(); 1031 // If this was a transition from file to tree or vice versa, add back an item 1032 // for just the removal/addition of the file. 1033 if before.is_present() && !before.is_tree() { 1034 self.items 1035 .insert(key.clone(), Ok((before, Merge::absent()))); 1036 } else if after.is_present() && !after.is_tree() { 1037 self.items.insert( 1038 DiffStreamKey::file_after_dir(key.path.clone()), 1039 Ok((Merge::absent(), after)), 1040 ); 1041 } 1042 match tree_diff { 1043 Ok((trees1, trees2)) => { 1044 self.add_dir_diff_items(&key.path, &trees1, &trees2); 1045 } 1046 Err(err) => { 1047 self.items.insert(DiffStreamKey::normal(key.path), Err(err)); 1048 } 1049 } 1050 } else { 1051 pending_index += 1; 1052 } 1053 } 1054 } 1055} 1056 1057impl Stream for TreeDiffStreamImpl<'_> { 1058 type Item = TreeDiffEntry; 1059 1060 fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> { 1061 // Go through all pending tree futures and poll them. 1062 self.poll_tree_futures(cx); 1063 1064 // Now emit the first file, or the first tree that completed with an error 1065 if let Some(entry) = self.items.first_entry() { 1066 match entry.get() { 1067 Err(_) => { 1068 // File or tree with error 1069 let (key, result) = entry.remove_entry(); 1070 Poll::Ready(Some(match result { 1071 Err(err) => TreeDiffEntry { 1072 path: key.path, 1073 values: Err(err), 1074 }, 1075 Ok((before, after)) => TreeDiffEntry { 1076 path: key.path, 1077 values: Ok((before, after)), 1078 }, 1079 })) 1080 } 1081 Ok((before, after)) if !before.is_tree() && !after.is_tree() => { 1082 // A diff with no trees involved 1083 let (key, result) = entry.remove_entry(); 1084 Poll::Ready(Some(match result { 1085 Err(err) => TreeDiffEntry { 1086 path: key.path, 1087 values: Err(err), 1088 }, 1089 Ok((before, after)) => TreeDiffEntry { 1090 path: key.path, 1091 values: Ok((before, after)), 1092 }, 1093 })) 1094 } 1095 _ => { 1096 // The first entry has a tree on at least one side (before or after). We need to 1097 // wait for that future to complete. 1098 assert!(!self.pending_trees.is_empty()); 1099 Poll::Pending 1100 } 1101 } 1102 } else { 1103 Poll::Ready(None) 1104 } 1105 } 1106} 1107 1108/// Helper for writing trees with conflicts. 1109/// 1110/// You start by creating an instance of this type with one or more 1111/// base trees. You then add overrides on top. The overrides may be 1112/// conflicts. Then you can write the result as a legacy tree 1113/// (allowing path-level conflicts) or as multiple conflict-free 1114/// trees. 1115pub struct MergedTreeBuilder { 1116 base_tree_id: MergedTreeId, 1117 overrides: BTreeMap<RepoPathBuf, MergedTreeValue>, 1118} 1119 1120impl MergedTreeBuilder { 1121 /// Create a new builder with the given trees as base. 1122 pub fn new(base_tree_id: MergedTreeId) -> Self { 1123 MergedTreeBuilder { 1124 base_tree_id, 1125 overrides: BTreeMap::new(), 1126 } 1127 } 1128 1129 /// Set an override compared to the base tree. The `values` merge must 1130 /// either be resolved (i.e. have 1 side) or have the same number of 1131 /// sides as the `base_tree_ids` used to construct this builder. Use 1132 /// `Merge::absent()` to remove a value from the tree. When the base tree is 1133 /// a legacy tree, conflicts can be written either as a multi-way `Merge` 1134 /// value or as a resolved `Merge` value using `TreeValue::Conflict`. 1135 pub fn set_or_remove(&mut self, path: RepoPathBuf, values: MergedTreeValue) { 1136 if let MergedTreeId::Merge(_) = &self.base_tree_id { 1137 assert!(!values 1138 .iter() 1139 .flatten() 1140 .any(|value| matches!(value, TreeValue::Conflict(_)))); 1141 } 1142 self.overrides.insert(path, values); 1143 } 1144 1145 /// Create new tree(s) from the base tree(s) and overrides. 1146 pub fn write_tree(self, store: &Arc<Store>) -> BackendResult<MergedTreeId> { 1147 let base_tree_ids = match self.base_tree_id.clone() { 1148 MergedTreeId::Legacy(base_tree_id) => { 1149 let legacy_base_tree = store.get_tree(RepoPathBuf::root(), &base_tree_id)?; 1150 let base_tree = MergedTree::from_legacy_tree(legacy_base_tree)?; 1151 base_tree.id().to_merge() 1152 } 1153 MergedTreeId::Merge(base_tree_ids) => base_tree_ids, 1154 }; 1155 let new_tree_ids = self.write_merged_trees(base_tree_ids, store)?; 1156 match new_tree_ids.simplify().into_resolved() { 1157 Ok(single_tree_id) => Ok(MergedTreeId::resolved(single_tree_id)), 1158 Err(tree_id) => { 1159 let tree = store.get_root_tree(&MergedTreeId::Merge(tree_id))?; 1160 let resolved = tree.resolve()?; 1161 Ok(resolved.id()) 1162 } 1163 } 1164 } 1165 1166 fn write_merged_trees( 1167 self, 1168 mut base_tree_ids: Merge<TreeId>, 1169 store: &Arc<Store>, 1170 ) -> BackendResult<Merge<TreeId>> { 1171 let num_sides = self 1172 .overrides 1173 .values() 1174 .map(|value| value.num_sides()) 1175 .max() 1176 .unwrap_or(0); 1177 base_tree_ids.pad_to(num_sides, store.empty_tree_id()); 1178 // Create a single-tree builder for each base tree 1179 let mut tree_builders = 1180 base_tree_ids.map(|base_tree_id| TreeBuilder::new(store.clone(), base_tree_id.clone())); 1181 for (path, values) in self.overrides { 1182 match values.into_resolved() { 1183 Ok(value) => { 1184 // This path was overridden with a resolved value. Apply that to all 1185 // builders. 1186 for builder in tree_builders.iter_mut() { 1187 builder.set_or_remove(path.clone(), value.clone()); 1188 } 1189 } 1190 Err(mut values) => { 1191 values.pad_to(num_sides, &None); 1192 // This path was overridden with a conflicted value. Apply each term to 1193 // its corresponding builder. 1194 for (builder, value) in zip(tree_builders.iter_mut(), values) { 1195 builder.set_or_remove(path.clone(), value); 1196 } 1197 } 1198 } 1199 } 1200 // TODO: This can be made more efficient. If there's a single resolved conflict 1201 // in `dir/file`, we shouldn't have to write the `dir/` and root trees more than 1202 // once. 1203 let merge_builder: MergeBuilder<TreeId> = tree_builders 1204 .into_iter() 1205 .map(|builder| builder.write_tree()) 1206 .try_collect()?; 1207 Ok(merge_builder.build()) 1208 } 1209}