just playing with tangled
1// Copyright 2021 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::cmp::min;
18use std::collections::{BTreeMap, HashSet};
19
20use crate::backend::CommitId;
21use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition};
22use crate::revset_graph::{RevsetGraphEdge, RevsetGraphEdgeType};
23
24/// Like `RevsetGraphEdge`, but stores `IndexPosition` instead.
25///
26/// This can be cheaply allocated and hashed compared to `CommitId`-based type.
27#[derive(Clone, Debug, Eq, Hash, PartialEq)]
28struct IndexGraphEdge {
29 target: IndexPosition,
30 edge_type: RevsetGraphEdgeType,
31}
32
33impl IndexGraphEdge {
34 fn missing(target: IndexPosition) -> Self {
35 let edge_type = RevsetGraphEdgeType::Missing;
36 IndexGraphEdge { target, edge_type }
37 }
38
39 fn direct(target: IndexPosition) -> Self {
40 let edge_type = RevsetGraphEdgeType::Direct;
41 IndexGraphEdge { target, edge_type }
42 }
43
44 fn indirect(target: IndexPosition) -> Self {
45 let edge_type = RevsetGraphEdgeType::Indirect;
46 IndexGraphEdge { target, edge_type }
47 }
48
49 fn to_revset_edge(&self, index: CompositeIndex<'_>) -> RevsetGraphEdge {
50 RevsetGraphEdge {
51 target: index.entry_by_pos(self.target).commit_id(),
52 edge_type: self.edge_type,
53 }
54 }
55}
56
57/// Given an iterator over some set of revisions, yields the same revisions with
58/// associated edge types.
59///
60/// If a revision's parent is in the input set, then the edge will be "direct".
61/// Otherwise, there will be one "indirect" edge for each closest ancestor in
62/// the set, and one "missing" edge for each edge leading outside the set.
63///
64/// Example (uppercase characters are in the input set):
65///
66/// A A
67/// |\ |\
68/// B c B :
69/// |\| => |\:
70/// d E ~ E
71/// |/ ~
72/// root
73///
74/// The implementation works by walking the input iterator one commit at a
75/// time. It then considers all parents of the commit. It looks ahead in the
76/// input iterator far enough that all the parents will have been consumed if
77/// they are in the input (and puts them away so we can emit them later). If a
78/// parent of the current commit is not in the input set (i.e. it was not
79/// in the look-ahead), we walk these external commits until we end up back back
80/// in the input set. That walk may result in consuming more elements from the
81/// input iterator. In the example above, when we consider "A", we will
82/// initially look ahead to "B" and "c". When we consider edges from the
83/// external commit "c", we will further consume the input iterator to "E".
84///
85/// Missing edges are those that don't lead back into the input set. If all
86/// edges from an external commit are missing, we consider the edge to that
87/// commit to also be missing. In the example above, that means that "B" will
88/// have a missing edge to "d" rather than to the root.
89///
90/// The iterator can be configured to skip transitive edges that it would
91/// otherwise return. In this mode (which is the default), the edge from "A" to
92/// "E" in the example above would be excluded because there's also a transitive
93/// path from "A" to "E" via "B". The implementation of that mode
94/// adds a filtering step just before yielding the edges for a commit. The
95/// filtering works by doing a DFS in the simplified graph. That may require
96/// even more look-ahead. Consider this example (uppercase characters are in the
97/// input set):
98///
99/// J
100/// /|
101/// | i
102/// | |\
103/// | | H
104/// G | |
105/// | e f
106/// | \|\
107/// | D |
108/// \ / c
109/// b /
110/// |/
111/// A
112/// |
113/// root
114///
115/// When walking from "J", we'll find indirect edges to "H", "G", and "D". This
116/// is our unfiltered set of edges, before removing transitive edges. In order
117/// to know that "D" is an ancestor of "H", we need to also walk from "H". We
118/// use the same search for finding edges from "H" as we used from "J". That
119/// results in looking ahead all the way to "A". We could reduce the amount of
120/// look-ahead by stopping at "c" since we're only interested in edges that
121/// could lead to "D", but that would require extra book-keeping to remember for
122/// later that the edges from "f" and "H" are only partially computed.
123pub struct RevsetGraphIterator<'revset, 'index> {
124 index: CompositeIndex<'index>,
125 input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
126 /// Commits in the input set we had to take out of the iterator while
127 /// walking external edges. Does not necessarily include the commit
128 /// we're currently about to emit.
129 look_ahead: BTreeMap<IndexPosition, IndexEntry<'index>>,
130 /// The last consumed position. This is always the smallest key in the
131 /// look_ahead map, but it's faster to keep a separate field for it.
132 min_position: IndexPosition,
133 /// Edges for commits not in the input set.
134 // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)?
135 edges: BTreeMap<IndexPosition, Vec<IndexGraphEdge>>,
136 skip_transitive_edges: bool,
137}
138
139impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
140 pub fn new(
141 index: CompositeIndex<'index>,
142 input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
143 ) -> RevsetGraphIterator<'revset, 'index> {
144 RevsetGraphIterator {
145 index,
146 input_set_iter,
147 look_ahead: Default::default(),
148 min_position: IndexPosition::MAX,
149 edges: Default::default(),
150 skip_transitive_edges: true,
151 }
152 }
153
154 pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self {
155 self.skip_transitive_edges = skip_transitive_edges;
156 self
157 }
158
159 fn next_index_entry(&mut self) -> Option<IndexEntry<'index>> {
160 if let Some(index_entry) = self.look_ahead.last_entry().map(|x| x.remove()) {
161 return Some(index_entry);
162 }
163 self.input_set_iter.next()
164 }
165
166 fn edges_from_internal_commit(
167 &mut self,
168 index_entry: &IndexEntry<'index>,
169 ) -> Vec<IndexGraphEdge> {
170 if let Some(edges) = self.edges.get(&index_entry.position()) {
171 return edges.clone();
172 }
173 let mut edges = Vec::new();
174 let mut known_ancestors = HashSet::new();
175 for parent in index_entry.parents() {
176 let parent_position = parent.position();
177 self.consume_to(parent_position);
178 if self.look_ahead.contains_key(&parent_position) {
179 edges.push(IndexGraphEdge::direct(parent_position));
180 } else {
181 let parent_edges = self.edges_from_external_commit(parent);
182 if parent_edges
183 .iter()
184 .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
185 {
186 edges.push(IndexGraphEdge::missing(parent_position));
187 } else {
188 edges.extend(
189 parent_edges
190 .into_iter()
191 .filter(|edge| known_ancestors.insert(edge.target)),
192 )
193 }
194 }
195 }
196 self.edges.insert(index_entry.position(), edges.clone());
197 edges
198 }
199
200 fn edges_from_external_commit(
201 &mut self,
202 index_entry: IndexEntry<'index>,
203 ) -> Vec<IndexGraphEdge> {
204 let position = index_entry.position();
205 let mut stack = vec![index_entry];
206 while let Some(entry) = stack.last() {
207 let position = entry.position();
208 if self.edges.contains_key(&position) {
209 stack.pop().unwrap();
210 continue;
211 }
212 let mut edges = Vec::new();
213 let mut known_ancestors = HashSet::new();
214 let mut parents_complete = true;
215 for parent in entry.parents() {
216 let parent_position = parent.position();
217 self.consume_to(parent_position);
218 if self.look_ahead.contains_key(&parent_position) {
219 // We have found a path back into the input set
220 edges.push(IndexGraphEdge::indirect(parent_position));
221 } else if let Some(parent_edges) = self.edges.get(&parent_position) {
222 if parent_edges
223 .iter()
224 .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
225 {
226 edges.push(IndexGraphEdge::missing(parent_position));
227 } else {
228 edges.extend(
229 parent_edges
230 .iter()
231 .filter(|edge| known_ancestors.insert(edge.target))
232 .cloned(),
233 );
234 }
235 } else if parent_position < self.min_position {
236 // The parent is not in the input set
237 edges.push(IndexGraphEdge::missing(parent_position));
238 } else {
239 // The parent is not in the input set but it's somewhere in the range
240 // where we have commits in the input set, so continue searching.
241 stack.push(parent);
242 parents_complete = false;
243 }
244 }
245 if parents_complete {
246 stack.pop().unwrap();
247 self.edges.insert(position, edges);
248 }
249 }
250 self.edges.get(&position).unwrap().clone()
251 }
252
253 fn remove_transitive_edges(&mut self, edges: Vec<IndexGraphEdge>) -> Vec<IndexGraphEdge> {
254 if !edges
255 .iter()
256 .any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect)
257 {
258 return edges;
259 }
260 let mut min_generation = u32::MAX;
261 let mut initial_targets = HashSet::new();
262 let mut work = vec![];
263 // To start with, add the edges one step after the input edges.
264 for edge in &edges {
265 initial_targets.insert(edge.target);
266 if edge.edge_type != RevsetGraphEdgeType::Missing {
267 let entry = self.look_ahead.get(&edge.target).unwrap().clone();
268 min_generation = min(min_generation, entry.generation_number());
269 work.extend(self.edges_from_internal_commit(&entry));
270 }
271 }
272 // Find commits reachable transitively and add them to the `unwanted` set.
273 let mut unwanted = HashSet::new();
274 while let Some(edge) = work.pop() {
275 if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position {
276 continue;
277 }
278 if !unwanted.insert(edge.target) {
279 // Already visited
280 continue;
281 }
282 if initial_targets.contains(&edge.target) {
283 // Already visited
284 continue;
285 }
286 let entry = self.look_ahead.get(&edge.target).unwrap().clone();
287 if entry.generation_number() < min_generation {
288 continue;
289 }
290 work.extend(self.edges_from_internal_commit(&entry));
291 }
292
293 edges
294 .into_iter()
295 .filter(|edge| !unwanted.contains(&edge.target))
296 .collect()
297 }
298
299 fn consume_to(&mut self, pos: IndexPosition) {
300 while pos < self.min_position {
301 if let Some(next_entry) = self.input_set_iter.next() {
302 let next_position = next_entry.position();
303 self.look_ahead.insert(next_position, next_entry);
304 self.min_position = next_position;
305 } else {
306 break;
307 }
308 }
309 }
310}
311
312impl<'revset, 'index> Iterator for RevsetGraphIterator<'revset, 'index> {
313 type Item = (CommitId, Vec<RevsetGraphEdge>);
314
315 fn next(&mut self) -> Option<Self::Item> {
316 let index_entry = self.next_index_entry()?;
317 let mut edges = self.edges_from_internal_commit(&index_entry);
318 if self.skip_transitive_edges {
319 edges = self.remove_transitive_edges(edges);
320 }
321 let edges = edges
322 .iter()
323 .map(|edge| edge.to_revset_edge(self.index))
324 .collect();
325 Some((index_entry.commit_id(), edges))
326 }
327}