just playing with tangled
1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::fmt::{Debug, Error, Formatter};
18use std::hash::{Hash, Hasher};
19use std::io::Read;
20use std::sync::Arc;
21
22use itertools::Itertools;
23use thiserror::Error;
24use tracing::instrument;
25
26use crate::backend::{
27 BackendError, ConflictId, FileId, ObjectId, TreeEntriesNonRecursiveIterator, TreeEntry, TreeId,
28 TreeValue,
29};
30use crate::conflicts::Conflict;
31use crate::files::MergeResult;
32use crate::matchers::{EverythingMatcher, Matcher};
33use crate::merge::trivial_merge;
34use crate::repo_path::{RepoPath, RepoPathComponent, RepoPathJoin};
35use crate::store::Store;
36use crate::{backend, files};
37
38#[derive(Debug, Error)]
39pub enum TreeMergeError {
40 #[error("Failed to read file with ID {} ", .file_id.hex())]
41 ReadError {
42 source: std::io::Error,
43 file_id: FileId,
44 },
45 #[error("Backend error: {0}")]
46 BackendError(#[from] BackendError),
47}
48
49#[derive(Clone)]
50pub struct Tree {
51 store: Arc<Store>,
52 dir: RepoPath,
53 id: TreeId,
54 data: Arc<backend::Tree>,
55}
56
57impl Debug for Tree {
58 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
59 f.debug_struct("Tree")
60 .field("dir", &self.dir)
61 .field("id", &self.id)
62 .finish()
63 }
64}
65
66impl PartialEq for Tree {
67 fn eq(&self, other: &Self) -> bool {
68 self.id == other.id && self.dir == other.dir
69 }
70}
71
72impl Eq for Tree {}
73
74impl Hash for Tree {
75 fn hash<H: Hasher>(&self, state: &mut H) {
76 self.dir.hash(state);
77 self.id.hash(state);
78 }
79}
80
81#[derive(Debug, PartialEq, Eq, Clone)]
82pub struct DiffSummary {
83 pub modified: Vec<RepoPath>,
84 pub added: Vec<RepoPath>,
85 pub removed: Vec<RepoPath>,
86}
87
88impl DiffSummary {
89 pub fn is_empty(&self) -> bool {
90 self.modified.is_empty() && self.added.is_empty() && self.removed.is_empty()
91 }
92}
93
94impl Tree {
95 pub fn new(store: Arc<Store>, dir: RepoPath, id: TreeId, data: Arc<backend::Tree>) -> Self {
96 Tree {
97 store,
98 dir,
99 id,
100 data,
101 }
102 }
103
104 pub fn null(store: Arc<Store>, dir: RepoPath) -> Self {
105 Tree {
106 store,
107 dir,
108 id: TreeId::new(vec![]),
109 data: Arc::new(backend::Tree::default()),
110 }
111 }
112
113 pub fn store(&self) -> &Arc<Store> {
114 &self.store
115 }
116
117 pub fn dir(&self) -> &RepoPath {
118 &self.dir
119 }
120
121 pub fn id(&self) -> &TreeId {
122 &self.id
123 }
124
125 pub fn data(&self) -> &backend::Tree {
126 &self.data
127 }
128
129 pub fn entries_non_recursive(&self) -> TreeEntriesNonRecursiveIterator {
130 self.data.entries()
131 }
132
133 pub fn entries(&self) -> TreeEntriesIterator<'static> {
134 TreeEntriesIterator::new(self.clone(), &EverythingMatcher)
135 }
136
137 pub fn entries_matching<'matcher>(
138 &self,
139 matcher: &'matcher dyn Matcher,
140 ) -> TreeEntriesIterator<'matcher> {
141 TreeEntriesIterator::new(self.clone(), matcher)
142 }
143
144 pub fn entry(&self, basename: &RepoPathComponent) -> Option<TreeEntry> {
145 self.data.entry(basename)
146 }
147
148 pub fn value(&self, basename: &RepoPathComponent) -> Option<&TreeValue> {
149 self.data.value(basename)
150 }
151
152 pub fn path_value(&self, path: &RepoPath) -> Option<TreeValue> {
153 assert_eq!(self.dir(), &RepoPath::root());
154 match path.split() {
155 Some((dir, basename)) => self
156 .sub_tree_recursive(dir.components())
157 .and_then(|tree| tree.data.value(basename).cloned()),
158 None => Some(TreeValue::Tree(self.id.clone())),
159 }
160 }
161
162 pub fn sub_tree(&self, name: &RepoPathComponent) -> Option<Tree> {
163 self.data.value(name).and_then(|sub_tree| match sub_tree {
164 TreeValue::Tree(sub_tree_id) => {
165 let subdir = self.dir.join(name);
166 Some(self.store.get_tree(&subdir, sub_tree_id).unwrap())
167 }
168 _ => None,
169 })
170 }
171
172 fn known_sub_tree(&self, subdir: &RepoPath, id: &TreeId) -> Tree {
173 self.store.get_tree(subdir, id).unwrap()
174 }
175
176 fn sub_tree_recursive(&self, components: &[RepoPathComponent]) -> Option<Tree> {
177 if let Some((first, tail)) = components.split_first() {
178 tail.iter()
179 .try_fold(self.sub_tree(first)?, |tree, name| tree.sub_tree(name))
180 } else {
181 // TODO: It would be nice to be able to return a reference here, but
182 // then we would have to figure out how to share Tree instances
183 // across threads.
184 Some(self.clone())
185 }
186 }
187
188 #[instrument(skip(matcher))]
189 pub fn diff<'matcher>(
190 &self,
191 other: &Tree,
192 matcher: &'matcher dyn Matcher,
193 ) -> TreeDiffIterator<'matcher> {
194 TreeDiffIterator::new(self.clone(), other.clone(), matcher)
195 }
196
197 pub fn diff_summary(&self, other: &Tree, matcher: &dyn Matcher) -> DiffSummary {
198 let mut modified = vec![];
199 let mut added = vec![];
200 let mut removed = vec![];
201 for (file, diff) in self.diff(other, matcher) {
202 match diff {
203 Diff::Modified(_, _) => modified.push(file.clone()),
204 Diff::Added(_) => added.push(file.clone()),
205 Diff::Removed(_) => removed.push(file.clone()),
206 }
207 }
208 modified.sort();
209 added.sort();
210 removed.sort();
211 DiffSummary {
212 modified,
213 added,
214 removed,
215 }
216 }
217
218 pub fn conflicts_matching(&self, matcher: &dyn Matcher) -> Vec<(RepoPath, ConflictId)> {
219 let mut conflicts = vec![];
220 for (name, value) in self.entries_matching(matcher) {
221 if let TreeValue::Conflict(id) = value {
222 conflicts.push((name.clone(), id.clone()));
223 }
224 }
225 conflicts
226 }
227
228 #[instrument]
229 pub fn conflicts(&self) -> Vec<(RepoPath, ConflictId)> {
230 self.conflicts_matching(&EverythingMatcher)
231 }
232
233 pub fn has_conflict(&self) -> bool {
234 !self.conflicts().is_empty()
235 }
236}
237
238pub struct TreeEntriesIterator<'matcher> {
239 stack: Vec<TreeEntriesDirItem>,
240 matcher: &'matcher dyn Matcher,
241}
242
243struct TreeEntriesDirItem {
244 entry_iterator: TreeEntriesNonRecursiveIterator<'static>,
245 // On drop, tree must outlive entry_iterator
246 tree: Box<Tree>,
247}
248
249impl TreeEntriesDirItem {
250 fn new(tree: Tree) -> Self {
251 let tree = Box::new(tree);
252 let entry_iterator = tree.entries_non_recursive();
253 let entry_iterator: TreeEntriesNonRecursiveIterator<'static> =
254 unsafe { std::mem::transmute(entry_iterator) };
255 Self {
256 entry_iterator,
257 tree,
258 }
259 }
260}
261
262impl<'matcher> TreeEntriesIterator<'matcher> {
263 fn new(tree: Tree, matcher: &'matcher dyn Matcher) -> Self {
264 // TODO: Restrict walk according to Matcher::visit()
265 Self {
266 stack: vec![TreeEntriesDirItem::new(tree)],
267 matcher,
268 }
269 }
270}
271
272impl Iterator for TreeEntriesIterator<'_> {
273 type Item = (RepoPath, TreeValue);
274
275 fn next(&mut self) -> Option<Self::Item> {
276 while let Some(top) = self.stack.last_mut() {
277 if let Some(entry) = top.entry_iterator.next() {
278 let path = top.tree.dir().join(entry.name());
279 match entry.value() {
280 TreeValue::Tree(id) => {
281 // TODO: Handle the other cases (specific files and trees)
282 if self.matcher.visit(&path).is_nothing() {
283 continue;
284 }
285 let subtree = top.tree.known_sub_tree(&path, id);
286 self.stack.push(TreeEntriesDirItem::new(subtree));
287 }
288 value => {
289 if self.matcher.matches(&path) {
290 return Some((path, value.clone()));
291 }
292 }
293 };
294 } else {
295 self.stack.pop();
296 }
297 }
298 None
299 }
300}
301
302#[derive(Debug, PartialEq, Eq, Clone)]
303pub enum Diff<T> {
304 Modified(T, T),
305 Added(T),
306 Removed(T),
307}
308
309impl<T> Diff<T> {
310 pub fn from_options(left: Option<T>, right: Option<T>) -> Self {
311 match (left, right) {
312 (Some(left), Some(right)) => Diff::Modified(left, right),
313 (None, Some(right)) => Diff::Added(right),
314 (Some(left), None) => Diff::Removed(left),
315 (None, None) => panic!("left and right cannot both be None"),
316 }
317 }
318
319 pub fn into_options(self) -> (Option<T>, Option<T>) {
320 match self {
321 Diff::Modified(left, right) => (Some(left), Some(right)),
322 Diff::Added(right) => (None, Some(right)),
323 Diff::Removed(left) => (Some(left), None),
324 }
325 }
326}
327
328struct TreeEntryDiffIterator<'trees> {
329 tree1: &'trees Tree,
330 tree2: &'trees Tree,
331 basename_iter: Box<dyn Iterator<Item = &'trees RepoPathComponent> + 'trees>,
332}
333
334impl<'trees> TreeEntryDiffIterator<'trees> {
335 fn new(tree1: &'trees Tree, tree2: &'trees Tree) -> Self {
336 let basename_iter = Box::new(tree1.data.names().merge(tree2.data.names()).dedup());
337 TreeEntryDiffIterator {
338 tree1,
339 tree2,
340 basename_iter,
341 }
342 }
343}
344
345impl<'trees> Iterator for TreeEntryDiffIterator<'trees> {
346 type Item = (
347 &'trees RepoPathComponent,
348 Option<&'trees TreeValue>,
349 Option<&'trees TreeValue>,
350 );
351
352 fn next(&mut self) -> Option<Self::Item> {
353 for basename in self.basename_iter.by_ref() {
354 let value1 = self.tree1.value(basename);
355 let value2 = self.tree2.value(basename);
356 if value1 != value2 {
357 return Some((basename, value1, value2));
358 }
359 }
360 None
361 }
362}
363
364pub struct TreeDiffIterator<'matcher> {
365 stack: Vec<TreeDiffItem>,
366 matcher: &'matcher dyn Matcher,
367}
368
369struct TreeDiffDirItem {
370 path: RepoPath,
371 // Iterator over the diffs between tree1 and tree2
372 entry_iterator: TreeEntryDiffIterator<'static>,
373 // On drop, tree1 and tree2 must outlive entry_iterator
374 tree1: Box<Tree>,
375 tree2: Box<Tree>,
376}
377
378enum TreeDiffItem {
379 Dir(TreeDiffDirItem),
380 // This is used for making sure that when a directory gets replaced by a file, we
381 // yield the value for the addition of the file after we yield the values
382 // for removing files in the directory.
383 File(RepoPath, Diff<TreeValue>),
384}
385
386impl<'matcher> TreeDiffIterator<'matcher> {
387 fn new(tree1: Tree, tree2: Tree, matcher: &'matcher dyn Matcher) -> Self {
388 let root_dir = RepoPath::root();
389 let mut stack = Vec::new();
390 if !matcher.visit(&root_dir).is_nothing() {
391 stack.push(TreeDiffItem::Dir(TreeDiffDirItem::new(
392 root_dir, tree1, tree2,
393 )));
394 };
395 Self { stack, matcher }
396 }
397}
398
399impl TreeDiffDirItem {
400 fn new(path: RepoPath, tree1: Tree, tree2: Tree) -> Self {
401 let tree1 = Box::new(tree1);
402 let tree2 = Box::new(tree2);
403 let iter: TreeEntryDiffIterator = TreeEntryDiffIterator::new(&tree1, &tree2);
404 let iter: TreeEntryDiffIterator<'static> = unsafe { std::mem::transmute(iter) };
405 Self {
406 path,
407 entry_iterator: iter,
408 tree1,
409 tree2,
410 }
411 }
412
413 fn subdir(
414 &self,
415 subdir_path: &RepoPath,
416 before: Option<&TreeValue>,
417 after: Option<&TreeValue>,
418 ) -> Self {
419 let before_tree = match before {
420 Some(TreeValue::Tree(id_before)) => self.tree1.known_sub_tree(subdir_path, id_before),
421 _ => Tree::null(self.tree1.store().clone(), subdir_path.clone()),
422 };
423 let after_tree = match after {
424 Some(TreeValue::Tree(id_after)) => self.tree2.known_sub_tree(subdir_path, id_after),
425 _ => Tree::null(self.tree2.store().clone(), subdir_path.clone()),
426 };
427 Self::new(subdir_path.clone(), before_tree, after_tree)
428 }
429}
430
431impl Iterator for TreeDiffIterator<'_> {
432 type Item = (RepoPath, Diff<TreeValue>);
433
434 fn next(&mut self) -> Option<Self::Item> {
435 while let Some(top) = self.stack.last_mut() {
436 let (dir, (name, before, after)) = match top {
437 TreeDiffItem::Dir(dir) => {
438 if let Some(entry) = dir.entry_iterator.next() {
439 (dir, entry)
440 } else {
441 self.stack.pop().unwrap();
442 continue;
443 }
444 }
445 TreeDiffItem::File(..) => {
446 if let TreeDiffItem::File(name, diff) = self.stack.pop().unwrap() {
447 return Some((name, diff));
448 } else {
449 unreachable!();
450 }
451 }
452 };
453
454 let path = dir.path.join(name);
455 let tree_before = matches!(before, Some(TreeValue::Tree(_)));
456 let tree_after = matches!(after, Some(TreeValue::Tree(_)));
457 let post_subdir =
458 if (tree_before || tree_after) && !self.matcher.visit(&path).is_nothing() {
459 let subdir = dir.subdir(&path, before, after);
460 self.stack.push(TreeDiffItem::Dir(subdir));
461 self.stack.len() - 1
462 } else {
463 self.stack.len()
464 };
465 if self.matcher.matches(&path) {
466 if !tree_before && tree_after {
467 if let Some(value_before) = before {
468 return Some((path, Diff::Removed(value_before.clone())));
469 }
470 } else if tree_before && !tree_after {
471 if let Some(value_after) = after {
472 self.stack.insert(
473 post_subdir,
474 TreeDiffItem::File(path, Diff::Added(value_after.clone())),
475 );
476 }
477 } else if !tree_before && !tree_after {
478 return Some((path, Diff::from_options(before.cloned(), after.cloned())));
479 }
480 }
481 }
482 None
483 }
484}
485
486pub fn merge_trees(
487 side1_tree: &Tree,
488 base_tree: &Tree,
489 side2_tree: &Tree,
490) -> Result<Tree, TreeMergeError> {
491 let store = base_tree.store();
492 let dir = base_tree.dir();
493 assert_eq!(side1_tree.dir(), dir);
494 assert_eq!(side2_tree.dir(), dir);
495
496 if let Some(resolved) = trivial_merge(&[base_tree], &[side1_tree, side2_tree]) {
497 return Ok((*resolved).clone());
498 }
499
500 // Start with a tree identical to side 1 and modify based on changes from base
501 // to side 2.
502 let mut new_tree = side1_tree.data().clone();
503 for (basename, maybe_base, maybe_side2) in TreeEntryDiffIterator::new(base_tree, side2_tree) {
504 let maybe_side1 = side1_tree.value(basename);
505 if maybe_side1 == maybe_base {
506 // side 1 is unchanged: use the value from side 2
507 new_tree.set_or_remove(basename, maybe_side2.cloned());
508 } else if maybe_side1 == maybe_side2 {
509 // Both sides changed in the same way: new_tree already has the
510 // value
511 } else {
512 // The two sides changed in different ways
513 let new_value =
514 merge_tree_value(store, dir, basename, maybe_base, maybe_side1, maybe_side2)?;
515 new_tree.set_or_remove(basename, new_value);
516 }
517 }
518 Ok(store.write_tree(dir, new_tree)?)
519}
520
521/// Returns `Some(TreeId)` if this is a directory or missing. If it's missing,
522/// we treat it as an empty tree.
523fn maybe_tree_id<'id>(
524 value: Option<&'id TreeValue>,
525 empty_tree_id: &'id TreeId,
526) -> Option<&'id TreeId> {
527 match value {
528 Some(TreeValue::Tree(id)) => Some(id),
529 None => Some(empty_tree_id),
530 _ => None,
531 }
532}
533
534fn merge_tree_value(
535 store: &Arc<Store>,
536 dir: &RepoPath,
537 basename: &RepoPathComponent,
538 maybe_base: Option<&TreeValue>,
539 maybe_side1: Option<&TreeValue>,
540 maybe_side2: Option<&TreeValue>,
541) -> Result<Option<TreeValue>, TreeMergeError> {
542 // Resolve non-trivial conflicts:
543 // * resolve tree conflicts by recursing
544 // * try to resolve file conflicts by merging the file contents
545 // * leave other conflicts (e.g. file/dir conflicts, remove/modify conflicts)
546 // unresolved
547
548 let empty_tree_id = store.empty_tree_id();
549 let base_tree_id = maybe_tree_id(maybe_base, empty_tree_id);
550 let side1_tree_id = maybe_tree_id(maybe_side1, empty_tree_id);
551 let side2_tree_id = maybe_tree_id(maybe_side2, empty_tree_id);
552 Ok(match (base_tree_id, side1_tree_id, side2_tree_id) {
553 (Some(base_id), Some(side1_id), Some(side2_id)) => {
554 let subdir = dir.join(basename);
555 let base_tree = store.get_tree(&subdir, base_id)?;
556 let side1_tree = store.get_tree(&subdir, side1_id)?;
557 let side2_tree = store.get_tree(&subdir, side2_id)?;
558 let merged_tree = merge_trees(&side1_tree, &base_tree, &side2_tree)?;
559 if merged_tree.id() == empty_tree_id {
560 None
561 } else {
562 Some(TreeValue::Tree(merged_tree.id().clone()))
563 }
564 }
565 _ => {
566 // Start by creating a Conflict object. Conflicts can cleanly represent a single
567 // resolved state, the absence of a state, or a conflicted state.
568 let conflict = Conflict::new(
569 vec![maybe_base.cloned()],
570 vec![maybe_side1.cloned(), maybe_side2.cloned()],
571 );
572 let filename = dir.join(basename);
573 let conflict = simplify_conflict(store, &filename, conflict)?;
574 if let Some(value) = conflict.as_resolved() {
575 return Ok(value.clone());
576 }
577 if let Some(tree_value) = try_resolve_file_conflict(store, &filename, &conflict)? {
578 Some(tree_value)
579 } else {
580 let conflict_id = store.write_conflict(&filename, &conflict)?;
581 Some(TreeValue::Conflict(conflict_id))
582 }
583 }
584 })
585}
586
587pub fn try_resolve_file_conflict(
588 store: &Store,
589 filename: &RepoPath,
590 conflict: &Conflict<Option<TreeValue>>,
591) -> Result<Option<TreeValue>, TreeMergeError> {
592 // If there are any non-file or any missing parts in the conflict, we can't
593 // merge it. We check early so we don't waste time reading file contents if
594 // we can't merge them anyway. At the same time we determine whether the
595 // resulting file should be executable.
596 let Some(file_id_conflict) = conflict.maybe_map(|term| match term {
597 Some(TreeValue::File { id, executable: _ }) => Some(id),
598 _ => None,
599 }) else {
600 return Ok(None);
601 };
602 let Some(executable_conflict) = conflict.maybe_map(|term| match term {
603 Some(TreeValue::File { id: _, executable }) => Some(executable),
604 _ => None,
605 }) else {
606 return Ok(None);
607 };
608 let Some(&&executable) = executable_conflict.resolve_trivial() else {
609 // We're unable to determine whether the result should be executable
610 return Ok(None);
611 };
612 if let Some(&resolved_file_id) = file_id_conflict.resolve_trivial() {
613 // Don't bother reading the file contents if the conflict can be trivially
614 // resolved.
615 return Ok(Some(TreeValue::File {
616 id: resolved_file_id.clone(),
617 executable,
618 }));
619 }
620 let mut removed_contents = vec![];
621 let mut added_contents = vec![];
622 for &file_id in file_id_conflict.removes() {
623 let mut content = vec![];
624 store
625 .read_file(filename, file_id)?
626 .read_to_end(&mut content)
627 .map_err(|err| TreeMergeError::ReadError {
628 source: err,
629 file_id: file_id.clone(),
630 })?;
631 removed_contents.push(content);
632 }
633 for &file_id in file_id_conflict.adds() {
634 let mut content = vec![];
635 store
636 .read_file(filename, file_id)?
637 .read_to_end(&mut content)
638 .map_err(|err| TreeMergeError::ReadError {
639 source: err,
640 file_id: file_id.clone(),
641 })?;
642 added_contents.push(content);
643 }
644 let merge_result = files::merge(
645 &removed_contents.iter().map(Vec::as_slice).collect_vec(),
646 &added_contents.iter().map(Vec::as_slice).collect_vec(),
647 );
648 match merge_result {
649 MergeResult::Resolved(merged_content) => {
650 let id = store.write_file(filename, &mut merged_content.0.as_slice())?;
651 Ok(Some(TreeValue::File { id, executable }))
652 }
653 MergeResult::Conflict(_) => Ok(None),
654 }
655}
656
657fn simplify_conflict(
658 store: &Store,
659 path: &RepoPath,
660 conflict: Conflict<Option<TreeValue>>,
661) -> Result<Conflict<Option<TreeValue>>, BackendError> {
662 // Important cases to simplify:
663 //
664 // D
665 // |
666 // B C
667 // |/
668 // A
669 //
670 // 1. rebase C to B, then back to A => there should be no conflict
671 // 2. rebase C to B, then to D => the conflict should not mention B
672 // 3. rebase B to C and D to B', then resolve the conflict in B' and rebase D'
673 // on top => the conflict should be between B'', B, and D; it should not
674 // mention the conflict in B'
675
676 // Case 1 above:
677 // After first rebase, the conflict is {+B-A+C}. After rebasing back,
678 // the unsimplified conflict is {+A-B+{+B-A+C}}. Since the
679 // inner conflict is positive, we can simply move it into the outer conflict. We
680 // thus get {+A-B+B-A+C}, which we can then simplify to just C (because {+C} ==
681 // C).
682 //
683 // Case 2 above:
684 // After first rebase, the conflict is {+B-A+C}. After rebasing to D,
685 // the unsimplified conflict is {+D-C+{+B-A+C}}. As in the
686 // previous case, the inner conflict can be moved into the outer one. We then
687 // get {+D-C+B-A+C}. That can be simplified to
688 // {+D+B-A}, which is the desired conflict.
689 //
690 // Case 3 above:
691 // TODO: describe this case
692
693 let expanded = conflict.try_map(|term| match term {
694 Some(TreeValue::Conflict(id)) => store.read_conflict(path, id),
695 _ => Ok(Conflict::resolved(term.clone())),
696 })?;
697 Ok(expanded.flatten().simplify())
698}