just playing with tangled
at tmp-tutorial 327 lines 13 kB view raw
1// Copyright 2021 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#![allow(missing_docs)] 16 17use std::cmp::min; 18use std::collections::{BTreeMap, HashSet}; 19 20use crate::backend::CommitId; 21use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition}; 22use crate::revset_graph::{RevsetGraphEdge, RevsetGraphEdgeType}; 23 24/// Like `RevsetGraphEdge`, but stores `IndexPosition` instead. 25/// 26/// This can be cheaply allocated and hashed compared to `CommitId`-based type. 27#[derive(Clone, Debug, Eq, Hash, PartialEq)] 28struct IndexGraphEdge { 29 target: IndexPosition, 30 edge_type: RevsetGraphEdgeType, 31} 32 33impl IndexGraphEdge { 34 fn missing(target: IndexPosition) -> Self { 35 let edge_type = RevsetGraphEdgeType::Missing; 36 IndexGraphEdge { target, edge_type } 37 } 38 39 fn direct(target: IndexPosition) -> Self { 40 let edge_type = RevsetGraphEdgeType::Direct; 41 IndexGraphEdge { target, edge_type } 42 } 43 44 fn indirect(target: IndexPosition) -> Self { 45 let edge_type = RevsetGraphEdgeType::Indirect; 46 IndexGraphEdge { target, edge_type } 47 } 48 49 fn to_revset_edge(&self, index: CompositeIndex<'_>) -> RevsetGraphEdge { 50 RevsetGraphEdge { 51 target: index.entry_by_pos(self.target).commit_id(), 52 edge_type: self.edge_type, 53 } 54 } 55} 56 57/// Given an iterator over some set of revisions, yields the same revisions with 58/// associated edge types. 59/// 60/// If a revision's parent is in the input set, then the edge will be "direct". 61/// Otherwise, there will be one "indirect" edge for each closest ancestor in 62/// the set, and one "missing" edge for each edge leading outside the set. 63/// 64/// Example (uppercase characters are in the input set): 65/// 66/// A A 67/// |\ |\ 68/// B c B : 69/// |\| => |\: 70/// d E ~ E 71/// |/ ~ 72/// root 73/// 74/// The implementation works by walking the input iterator one commit at a 75/// time. It then considers all parents of the commit. It looks ahead in the 76/// input iterator far enough that all the parents will have been consumed if 77/// they are in the input (and puts them away so we can emit them later). If a 78/// parent of the current commit is not in the input set (i.e. it was not 79/// in the look-ahead), we walk these external commits until we end up back back 80/// in the input set. That walk may result in consuming more elements from the 81/// input iterator. In the example above, when we consider "A", we will 82/// initially look ahead to "B" and "c". When we consider edges from the 83/// external commit "c", we will further consume the input iterator to "E". 84/// 85/// Missing edges are those that don't lead back into the input set. If all 86/// edges from an external commit are missing, we consider the edge to that 87/// commit to also be missing. In the example above, that means that "B" will 88/// have a missing edge to "d" rather than to the root. 89/// 90/// The iterator can be configured to skip transitive edges that it would 91/// otherwise return. In this mode (which is the default), the edge from "A" to 92/// "E" in the example above would be excluded because there's also a transitive 93/// path from "A" to "E" via "B". The implementation of that mode 94/// adds a filtering step just before yielding the edges for a commit. The 95/// filtering works by doing a DFS in the simplified graph. That may require 96/// even more look-ahead. Consider this example (uppercase characters are in the 97/// input set): 98/// 99/// J 100/// /| 101/// | i 102/// | |\ 103/// | | H 104/// G | | 105/// | e f 106/// | \|\ 107/// | D | 108/// \ / c 109/// b / 110/// |/ 111/// A 112/// | 113/// root 114/// 115/// When walking from "J", we'll find indirect edges to "H", "G", and "D". This 116/// is our unfiltered set of edges, before removing transitive edges. In order 117/// to know that "D" is an ancestor of "H", we need to also walk from "H". We 118/// use the same search for finding edges from "H" as we used from "J". That 119/// results in looking ahead all the way to "A". We could reduce the amount of 120/// look-ahead by stopping at "c" since we're only interested in edges that 121/// could lead to "D", but that would require extra book-keeping to remember for 122/// later that the edges from "f" and "H" are only partially computed. 123pub struct RevsetGraphIterator<'revset, 'index> { 124 index: CompositeIndex<'index>, 125 input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>, 126 /// Commits in the input set we had to take out of the iterator while 127 /// walking external edges. Does not necessarily include the commit 128 /// we're currently about to emit. 129 look_ahead: BTreeMap<IndexPosition, IndexEntry<'index>>, 130 /// The last consumed position. This is always the smallest key in the 131 /// look_ahead map, but it's faster to keep a separate field for it. 132 min_position: IndexPosition, 133 /// Edges for commits not in the input set. 134 // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)? 135 edges: BTreeMap<IndexPosition, Vec<IndexGraphEdge>>, 136 skip_transitive_edges: bool, 137} 138 139impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { 140 pub fn new( 141 index: CompositeIndex<'index>, 142 input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>, 143 ) -> RevsetGraphIterator<'revset, 'index> { 144 RevsetGraphIterator { 145 index, 146 input_set_iter, 147 look_ahead: Default::default(), 148 min_position: IndexPosition::MAX, 149 edges: Default::default(), 150 skip_transitive_edges: true, 151 } 152 } 153 154 pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self { 155 self.skip_transitive_edges = skip_transitive_edges; 156 self 157 } 158 159 fn next_index_entry(&mut self) -> Option<IndexEntry<'index>> { 160 if let Some(index_entry) = self.look_ahead.last_entry().map(|x| x.remove()) { 161 return Some(index_entry); 162 } 163 self.input_set_iter.next() 164 } 165 166 fn edges_from_internal_commit( 167 &mut self, 168 index_entry: &IndexEntry<'index>, 169 ) -> Vec<IndexGraphEdge> { 170 if let Some(edges) = self.edges.get(&index_entry.position()) { 171 return edges.clone(); 172 } 173 let mut edges = Vec::new(); 174 let mut known_ancestors = HashSet::new(); 175 for parent in index_entry.parents() { 176 let parent_position = parent.position(); 177 self.consume_to(parent_position); 178 if self.look_ahead.contains_key(&parent_position) { 179 edges.push(IndexGraphEdge::direct(parent_position)); 180 } else { 181 let parent_edges = self.edges_from_external_commit(parent); 182 if parent_edges 183 .iter() 184 .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) 185 { 186 edges.push(IndexGraphEdge::missing(parent_position)); 187 } else { 188 edges.extend( 189 parent_edges 190 .into_iter() 191 .filter(|edge| known_ancestors.insert(edge.target)), 192 ) 193 } 194 } 195 } 196 self.edges.insert(index_entry.position(), edges.clone()); 197 edges 198 } 199 200 fn edges_from_external_commit( 201 &mut self, 202 index_entry: IndexEntry<'index>, 203 ) -> Vec<IndexGraphEdge> { 204 let position = index_entry.position(); 205 let mut stack = vec![index_entry]; 206 while let Some(entry) = stack.last() { 207 let position = entry.position(); 208 if self.edges.contains_key(&position) { 209 stack.pop().unwrap(); 210 continue; 211 } 212 let mut edges = Vec::new(); 213 let mut known_ancestors = HashSet::new(); 214 let mut parents_complete = true; 215 for parent in entry.parents() { 216 let parent_position = parent.position(); 217 self.consume_to(parent_position); 218 if self.look_ahead.contains_key(&parent_position) { 219 // We have found a path back into the input set 220 edges.push(IndexGraphEdge::indirect(parent_position)); 221 } else if let Some(parent_edges) = self.edges.get(&parent_position) { 222 if parent_edges 223 .iter() 224 .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) 225 { 226 edges.push(IndexGraphEdge::missing(parent_position)); 227 } else { 228 edges.extend( 229 parent_edges 230 .iter() 231 .filter(|edge| known_ancestors.insert(edge.target)) 232 .cloned(), 233 ); 234 } 235 } else if parent_position < self.min_position { 236 // The parent is not in the input set 237 edges.push(IndexGraphEdge::missing(parent_position)); 238 } else { 239 // The parent is not in the input set but it's somewhere in the range 240 // where we have commits in the input set, so continue searching. 241 stack.push(parent); 242 parents_complete = false; 243 } 244 } 245 if parents_complete { 246 stack.pop().unwrap(); 247 self.edges.insert(position, edges); 248 } 249 } 250 self.edges.get(&position).unwrap().clone() 251 } 252 253 fn remove_transitive_edges(&mut self, edges: Vec<IndexGraphEdge>) -> Vec<IndexGraphEdge> { 254 if !edges 255 .iter() 256 .any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect) 257 { 258 return edges; 259 } 260 let mut min_generation = u32::MAX; 261 let mut initial_targets = HashSet::new(); 262 let mut work = vec![]; 263 // To start with, add the edges one step after the input edges. 264 for edge in &edges { 265 initial_targets.insert(edge.target); 266 if edge.edge_type != RevsetGraphEdgeType::Missing { 267 let entry = self.look_ahead.get(&edge.target).unwrap().clone(); 268 min_generation = min(min_generation, entry.generation_number()); 269 work.extend(self.edges_from_internal_commit(&entry)); 270 } 271 } 272 // Find commits reachable transitively and add them to the `unwanted` set. 273 let mut unwanted = HashSet::new(); 274 while let Some(edge) = work.pop() { 275 if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position { 276 continue; 277 } 278 if !unwanted.insert(edge.target) { 279 // Already visited 280 continue; 281 } 282 if initial_targets.contains(&edge.target) { 283 // Already visited 284 continue; 285 } 286 let entry = self.look_ahead.get(&edge.target).unwrap().clone(); 287 if entry.generation_number() < min_generation { 288 continue; 289 } 290 work.extend(self.edges_from_internal_commit(&entry)); 291 } 292 293 edges 294 .into_iter() 295 .filter(|edge| !unwanted.contains(&edge.target)) 296 .collect() 297 } 298 299 fn consume_to(&mut self, pos: IndexPosition) { 300 while pos < self.min_position { 301 if let Some(next_entry) = self.input_set_iter.next() { 302 let next_position = next_entry.position(); 303 self.look_ahead.insert(next_position, next_entry); 304 self.min_position = next_position; 305 } else { 306 break; 307 } 308 } 309 } 310} 311 312impl<'revset, 'index> Iterator for RevsetGraphIterator<'revset, 'index> { 313 type Item = (CommitId, Vec<RevsetGraphEdge>); 314 315 fn next(&mut self) -> Option<Self::Item> { 316 let index_entry = self.next_index_entry()?; 317 let mut edges = self.edges_from_internal_commit(&index_entry); 318 if self.skip_transitive_edges { 319 edges = self.remove_transitive_edges(edges); 320 } 321 let edges = edges 322 .iter() 323 .map(|edge| edge.to_revset_edge(self.index)) 324 .collect(); 325 Some((index_entry.commit_id(), edges)) 326 } 327}