fork of https://github.com/tree-sitter/tree-sitter-graph

Merge pull request #133 from tree-sitter/easier-match-ops

authored by Hendrik van Antwerpen and committed by GitHub db752ab6 8e74625c

+8
CHANGELOG.md
··· 5 5 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 6 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 7 8 + ## v0.10.1 -- 2023-05-15 9 + 10 + ### Library 11 + 12 + #### Added 13 + 14 + - The `ast::File::try_visit_matches` method can be used to execute the stanza query matching without executing the stanza bodies. 15 + 8 16 ## v0.10.0 -- 2023-05-10 9 17 10 18 ### DSL
+1 -1
Cargo.toml
··· 1 1 [package] 2 2 name = "tree-sitter-graph" 3 - version = "0.10.0" 3 + version = "0.10.1" 4 4 description = "Construct graphs from parsed source code" 5 5 homepage = "https://github.com/tree-sitter/tree-sitter-graph/" 6 6 repository = "https://github.com/tree-sitter/tree-sitter-graph/"
+2 -1
src/ast.rs
··· 14 14 use tree_sitter::Language; 15 15 use tree_sitter::Query; 16 16 17 + use crate::parser::Range; 17 18 use crate::Identifier; 18 19 use crate::Location; 19 20 ··· 66 67 pub full_match_stanza_capture_index: usize, 67 68 /// Capture index of the full match in the file query 68 69 pub full_match_file_capture_index: usize, 69 - pub location: Location, 70 + pub range: Range, 70 71 } 71 72 72 73 /// A statement that can appear in a graph DSL stanza
+1 -1
src/checker.rs
··· 198 198 if !unused_captures.is_empty() { 199 199 return Err(CheckError::UnusedCaptures( 200 200 unused_captures.join(" "), 201 - self.location, 201 + self.range.start, 202 202 )); 203 203 } 204 204
+176 -29
src/execution.rs
··· 7 7 8 8 use thiserror::Error; 9 9 use tree_sitter::CaptureQuantifier; 10 + use tree_sitter::Node; 10 11 use tree_sitter::QueryMatch; 11 12 use tree_sitter::Tree; 12 13 13 14 use crate::ast::File; 15 + use crate::ast::Stanza; 14 16 use crate::execution::error::ExecutionError; 15 17 use crate::functions::Functions; 16 18 use crate::graph::Graph; 17 19 use crate::graph::Value; 18 20 use crate::variables::Globals; 19 21 use crate::Identifier; 22 + use crate::Location; 20 23 21 24 pub(crate) mod error; 22 25 mod lazy; ··· 93 96 94 97 Ok(()) 95 98 } 99 + 100 + pub fn try_visit_matches<'tree, E, F>( 101 + &self, 102 + tree: &'tree Tree, 103 + source: &'tree str, 104 + lazy: bool, 105 + mut visit: F, 106 + ) -> Result<(), E> 107 + where 108 + F: FnMut(Match<'_, 'tree>) -> Result<(), E>, 109 + { 110 + if lazy { 111 + let file_query = self.query.as_ref().expect("missing file query"); 112 + self.try_visit_matches_lazy(tree, source, |stanza, mat| { 113 + let named_captures = stanza 114 + .query 115 + .capture_names() 116 + .iter() 117 + .map(|name| { 118 + let index = file_query 119 + .capture_index_for_name(name) 120 + .expect("missing index for capture"); 121 + let quantifier = 122 + file_query.capture_quantifiers(mat.pattern_index)[index as usize]; 123 + (name, quantifier, index) 124 + }) 125 + .filter(|c| c.2 != stanza.full_match_file_capture_index as u32) 126 + .collect(); 127 + visit(Match { 128 + mat, 129 + full_capture_index: stanza.full_match_file_capture_index as u32, 130 + named_captures, 131 + query_location: stanza.range.start, 132 + }) 133 + }) 134 + } else { 135 + self.try_visit_matches_strict(tree, source, |stanza, mat| { 136 + let named_captures = stanza 137 + .query 138 + .capture_names() 139 + .iter() 140 + .map(|name| { 141 + let index = stanza 142 + .query 143 + .capture_index_for_name(name) 144 + .expect("missing index for capture"); 145 + let quantifier = stanza.query.capture_quantifiers(0)[index as usize]; 146 + (name, quantifier, index) 147 + }) 148 + .filter(|c| c.2 != stanza.full_match_stanza_capture_index as u32) 149 + .collect(); 150 + visit(Match { 151 + mat, 152 + full_capture_index: stanza.full_match_stanza_capture_index as u32, 153 + named_captures, 154 + query_location: stanza.range.start, 155 + }) 156 + }) 157 + } 158 + } 159 + } 160 + 161 + impl Stanza { 162 + pub fn try_visit_matches<'tree, E, F>( 163 + &self, 164 + tree: &'tree Tree, 165 + source: &'tree str, 166 + mut visit: F, 167 + ) -> Result<(), E> 168 + where 169 + F: FnMut(Match<'_, 'tree>) -> Result<(), E>, 170 + { 171 + self.try_visit_matches_strict(tree, source, |mat| { 172 + let named_captures = self 173 + .query 174 + .capture_names() 175 + .iter() 176 + .map(|name| { 177 + let index = self 178 + .query 179 + .capture_index_for_name(name) 180 + .expect("missing index for capture"); 181 + let quantifier = self.query.capture_quantifiers(0)[index as usize]; 182 + (name, quantifier, index) 183 + }) 184 + .filter(|c| c.2 != self.full_match_stanza_capture_index as u32) 185 + .collect(); 186 + visit(Match { 187 + mat, 188 + full_capture_index: self.full_match_stanza_capture_index as u32, 189 + named_captures, 190 + query_location: self.range.start, 191 + }) 192 + }) 193 + } 194 + } 195 + 196 + pub struct Match<'a, 'tree> { 197 + mat: QueryMatch<'a, 'tree>, 198 + full_capture_index: u32, 199 + named_captures: Vec<(&'a String, CaptureQuantifier, u32)>, 200 + query_location: Location, 201 + } 202 + 203 + impl<'a, 'tree> Match<'a, 'tree> { 204 + /// Return the top-level matched node. 205 + pub fn full_capture(&self) -> Node<'tree> { 206 + self.mat 207 + .nodes_for_capture_index(self.full_capture_index) 208 + .next() 209 + .expect("missing full capture") 210 + } 211 + 212 + /// Return the matched nodes for a named capture. 213 + pub fn named_captures<'s: 'a + 'tree>( 214 + &'s self, 215 + ) -> impl Iterator< 216 + Item = ( 217 + &String, 218 + CaptureQuantifier, 219 + impl Iterator<Item = Node<'tree>> + 's, 220 + ), 221 + > { 222 + self.named_captures 223 + .iter() 224 + .map(move |c| (c.0, c.1, self.mat.nodes_for_capture_index(c.2))) 225 + } 226 + 227 + /// Return the matched nodes for a named capture. 228 + pub fn named_capture<'s: 'a + 'tree>( 229 + &'s self, 230 + name: &str, 231 + ) -> Option<(CaptureQuantifier, impl Iterator<Item = Node<'tree>> + 's)> { 232 + self.named_captures 233 + .iter() 234 + .find(|c| c.0 == name) 235 + .map(|c| (c.1, self.mat.nodes_for_capture_index(c.2))) 236 + } 237 + 238 + /// Return an iterator over all capture names. 239 + pub fn capture_names(&self) -> impl Iterator<Item = &String> { 240 + self.named_captures.iter().map(|c| c.0) 241 + } 242 + 243 + /// Return the query location. 244 + pub fn query_location(&self) -> &Location { 245 + &self.query_location 246 + } 96 247 } 97 248 98 249 /// Configuration for the execution of a File ··· 156 307 #[error("Cancelled at \"{0}\"")] 157 308 pub struct CancellationError(pub &'static str); 158 309 159 - /// Get the value for the given capture, considering the suffix 160 - pub(self) fn query_capture_value<'tree>( 161 - index: usize, 162 - quantifier: CaptureQuantifier, 163 - mat: &QueryMatch<'_, 'tree>, 164 - graph: &mut Graph<'tree>, 165 - ) -> Value { 166 - let mut nodes = mat 167 - .captures 168 - .iter() 169 - .filter(|c| c.index as usize == index) 170 - .map(|c| c.node); 171 - match quantifier { 172 - CaptureQuantifier::Zero => unreachable!(), 173 - CaptureQuantifier::One => { 174 - let syntax_node = graph.add_syntax_node(nodes.next().expect("missing capture")); 175 - syntax_node.into() 176 - } 177 - CaptureQuantifier::ZeroOrMore | CaptureQuantifier::OneOrMore => { 178 - let syntax_nodes = nodes 179 - .map(|n| graph.add_syntax_node(n.clone()).into()) 180 - .collect::<Vec<Value>>(); 181 - syntax_nodes.into() 182 - } 183 - CaptureQuantifier::ZeroOrOne => match nodes.next() { 184 - None => Value::Null.into(), 185 - Some(node) => { 186 - let syntax_node = graph.add_syntax_node(node); 310 + impl Value { 311 + pub fn from_nodes<'tree, NI: IntoIterator<Item = Node<'tree>>>( 312 + graph: &mut Graph<'tree>, 313 + nodes: NI, 314 + quantifier: CaptureQuantifier, 315 + ) -> Value { 316 + let mut nodes = nodes.into_iter(); 317 + match quantifier { 318 + CaptureQuantifier::Zero => unreachable!(), 319 + CaptureQuantifier::One => { 320 + let syntax_node = graph.add_syntax_node(nodes.next().expect("missing capture")); 187 321 syntax_node.into() 188 322 } 189 - }, 323 + CaptureQuantifier::ZeroOrMore | CaptureQuantifier::OneOrMore => { 324 + let syntax_nodes = nodes 325 + .map(|n| graph.add_syntax_node(n.clone()).into()) 326 + .collect::<Vec<Value>>(); 327 + syntax_nodes.into() 328 + } 329 + CaptureQuantifier::ZeroOrOne => match nodes.next() { 330 + None => Value::Null.into(), 331 + Some(node) => { 332 + let syntax_node = graph.add_syntax_node(node); 333 + syntax_node.into() 334 + } 335 + }, 336 + } 190 337 } 191 338 }
+1 -1
src/execution/error.rs
··· 95 95 Self { 96 96 statement: format!("{}", stmt), 97 97 statement_location: stmt.location(), 98 - stanza_location: stanza.location, 98 + stanza_location: stanza.range.start, 99 99 source_location: Location::from(source_node.range().start_point), 100 100 node_kind: source_node.kind().to_string(), 101 101 }
+33 -24
src/execution/lazy.rs
··· 13 13 14 14 use std::collections::HashMap; 15 15 16 - use tree_sitter::CaptureQuantifier::One; 17 16 use tree_sitter::QueryCursor; 18 17 use tree_sitter::QueryMatch; 19 18 use tree_sitter::Tree; ··· 22 21 use crate::execution::error::ExecutionError; 23 22 use crate::execution::error::ResultWithExecutionError; 24 23 use crate::execution::error::StatementContext; 25 - use crate::execution::query_capture_value; 26 24 use crate::execution::ExecutionConfig; 27 25 use crate::functions::Functions; 28 26 use crate::graph; 29 27 use crate::graph::Graph; 28 + use crate::graph::Value; 30 29 use crate::variables::Globals; 31 30 use crate::variables::MutVariables; 32 31 use crate::variables::VariableMap; ··· 62 61 }; 63 62 64 63 let mut locals = VariableMap::new(); 65 - let mut cursor = QueryCursor::new(); 66 64 let mut store = LazyStore::new(); 67 65 let mut scoped_store = LazyScopedVariables::new(); 68 66 let mut lazy_graph = Vec::new(); 69 67 let mut function_parameters = Vec::new(); 70 68 let mut prev_element_debug_info = HashMap::new(); 71 69 72 - let query = &self.query.as_ref().unwrap(); 73 - let matches = cursor.matches(query, tree.root_node(), source.as_bytes()); 74 - for mat in matches { 70 + self.try_visit_matches_lazy(tree, source, |stanza, mat| { 75 71 cancellation_flag.check("processing matches")?; 76 - let stanza = &self.stanzas[mat.pattern_index]; 77 72 stanza.execute_lazy( 78 73 source, 79 74 &mat, ··· 87 82 &mut prev_element_debug_info, 88 83 &self.shorthands, 89 84 cancellation_flag, 90 - )?; 91 - } 85 + ) 86 + })?; 92 87 93 88 let mut exec = EvaluationContext { 94 89 source, ··· 110 105 111 106 Ok(()) 112 107 } 108 + 109 + pub(super) fn try_visit_matches_lazy<'tree, E, F>( 110 + &self, 111 + tree: &'tree Tree, 112 + source: &'tree str, 113 + mut visit: F, 114 + ) -> Result<(), E> 115 + where 116 + F: FnMut(&ast::Stanza, QueryMatch<'_, 'tree>) -> Result<(), E>, 117 + { 118 + let mut cursor = QueryCursor::new(); 119 + let query = self.query.as_ref().unwrap(); 120 + let matches = cursor.matches(query, tree.root_node(), source.as_bytes()); 121 + for mat in matches { 122 + let stanza = &self.stanzas[mat.pattern_index]; 123 + visit(stanza, mat)?; 124 + } 125 + Ok(()) 126 + } 113 127 } 114 128 115 129 /// Context for execution, which executes stanzas to build the lazy graph ··· 167 181 ) -> Result<(), ExecutionError> { 168 182 let current_regex_captures = vec![]; 169 183 locals.clear(); 170 - let node = query_capture_value(self.full_match_file_capture_index, One, &mat, graph); 171 - debug!("match {} at {}", node, self.location); 184 + let node = mat 185 + .nodes_for_capture_index(self.full_match_file_capture_index as u32) 186 + .next() 187 + .expect("missing capture for full match"); 188 + debug!("match {:?} at {}", node, self.range.start); 172 189 trace!("{{"); 173 190 for statement in &self.statements { 174 - let error_context = { 175 - let node = mat 176 - .captures 177 - .iter() 178 - .find(|c| c.index as usize == self.full_match_file_capture_index) 179 - .expect("missing capture for full match") 180 - .node; 181 - StatementContext::new(&statement, &self, &node) 182 - }; 191 + let error_context = { StatementContext::new(&statement, &self, &node) }; 183 192 let mut exec = ExecutionContext { 184 193 source, 185 194 graph, ··· 613 622 614 623 impl ast::Capture { 615 624 fn evaluate_lazy(&self, exec: &mut ExecutionContext) -> Result<LazyValue, ExecutionError> { 616 - Ok(query_capture_value( 617 - self.file_capture_index, 618 - self.quantifier, 619 - exec.mat, 625 + Ok(Value::from_nodes( 620 626 exec.graph, 627 + exec.mat 628 + .nodes_for_capture_index(self.file_capture_index as u32), 629 + self.quantifier, 621 630 ) 622 631 .into()) 623 632 }
+68 -45
src/execution/strict.rs
··· 44 44 use crate::ast::Variable; 45 45 use crate::execution::error::ExecutionError; 46 46 use crate::execution::error::ResultWithExecutionError; 47 - use crate::execution::query_capture_value; 47 + use crate::execution::error::StatementContext; 48 48 use crate::execution::CancellationFlag; 49 49 use crate::execution::ExecutionConfig; 50 50 use crate::graph::Attributes; ··· 57 57 use crate::variables::Variables; 58 58 use crate::Identifier; 59 59 use crate::Location; 60 - 61 - use super::error::StatementContext; 62 60 63 61 impl File { 64 62 /// Executes this graph DSL file against a source file, saving the results into an existing ··· 88 86 let mut scoped = ScopedVariables::new(); 89 87 let current_regex_captures = Vec::new(); 90 88 let mut function_parameters = Vec::new(); 91 - let mut cursor = QueryCursor::new(); 92 - for stanza in &self.stanzas { 93 - cancellation_flag.check("executing stanza")?; 89 + 90 + self.try_visit_matches_strict(tree, source, |stanza, mat| { 94 91 stanza.execute( 95 - tree, 96 92 source, 93 + &mat, 97 94 graph, 98 95 &mut config, 99 96 &mut locals, 100 97 &mut scoped, 101 98 &current_regex_captures, 102 99 &mut function_parameters, 103 - &mut cursor, 104 100 &self.shorthands, 105 101 cancellation_flag, 106 - )?; 102 + ) 103 + })?; 104 + 105 + Ok(()) 106 + } 107 + 108 + pub(super) fn try_visit_matches_strict<'tree, E, F>( 109 + &self, 110 + tree: &'tree Tree, 111 + source: &'tree str, 112 + mut visit: F, 113 + ) -> Result<(), E> 114 + where 115 + F: FnMut(&Stanza, QueryMatch<'_, 'tree>) -> Result<(), E>, 116 + { 117 + for stanza in &self.stanzas { 118 + stanza.try_visit_matches_strict(tree, source, |mat| visit(stanza, mat))?; 107 119 } 108 120 Ok(()) 109 121 } ··· 143 155 impl Stanza { 144 156 fn execute<'a, 'g, 'l, 's, 'tree>( 145 157 &self, 146 - tree: &'tree Tree, 147 158 source: &'tree str, 159 + mat: &QueryMatch<'_, 'tree>, 148 160 graph: &mut Graph<'tree>, 149 161 config: &ExecutionConfig<'_, 'g>, 150 162 locals: &mut VariableMap<'l, Value>, 151 163 scoped: &mut ScopedVariables<'s>, 152 164 current_regex_captures: &Vec<String>, 153 165 function_parameters: &mut Vec<Value>, 154 - cursor: &mut QueryCursor, 155 166 shorthands: &AttributeShorthands, 156 167 cancellation_flag: &dyn CancellationFlag, 157 168 ) -> Result<(), ExecutionError> { 169 + locals.clear(); 170 + for statement in &self.statements { 171 + let error_context = { 172 + let node = mat 173 + .nodes_for_capture_index(self.full_match_stanza_capture_index as u32) 174 + .next() 175 + .expect("missing full capture"); 176 + StatementContext::new(&statement, &self, &node) 177 + }; 178 + let mut exec = ExecutionContext { 179 + source, 180 + graph, 181 + config, 182 + locals, 183 + scoped, 184 + current_regex_captures, 185 + function_parameters, 186 + mat: &mat, 187 + error_context, 188 + shorthands, 189 + cancellation_flag, 190 + }; 191 + statement 192 + .execute(&mut exec) 193 + .with_context(|| exec.error_context.into())?; 194 + } 195 + Ok(()) 196 + } 197 + 198 + pub(super) fn try_visit_matches_strict<'tree, E, F>( 199 + &self, 200 + tree: &'tree Tree, 201 + source: &'tree str, 202 + mut visit: F, 203 + ) -> Result<(), E> 204 + where 205 + F: FnMut(QueryMatch<'_, 'tree>) -> Result<(), E>, 206 + { 207 + let mut cursor = QueryCursor::new(); 158 208 let matches = cursor.matches(&self.query, tree.root_node(), source.as_bytes()); 159 209 for mat in matches { 160 - cancellation_flag.check("processing matches")?; 161 - locals.clear(); 162 - for statement in &self.statements { 163 - let error_context = { 164 - let node = mat 165 - .captures 166 - .iter() 167 - .find(|c| c.index as usize == self.full_match_stanza_capture_index) 168 - .expect("missing capture for full match") 169 - .node; 170 - StatementContext::new(&statement, &self, &node) 171 - }; 172 - let mut exec = ExecutionContext { 173 - source, 174 - graph, 175 - config, 176 - locals, 177 - scoped, 178 - current_regex_captures, 179 - function_parameters, 180 - mat: &mat, 181 - error_context, 182 - shorthands, 183 - cancellation_flag, 184 - }; 185 - statement 186 - .execute(&mut exec) 187 - .with_context(|| exec.error_context.into())?; 188 - } 210 + visit(mat)?; 189 211 } 190 212 Ok(()) 191 213 } ··· 600 622 601 623 impl Capture { 602 624 fn evaluate(&self, exec: &mut ExecutionContext) -> Result<Value, ExecutionError> { 603 - Ok(query_capture_value( 604 - self.stanza_capture_index, 625 + Ok(Value::from_nodes( 626 + exec.graph, 627 + exec.mat 628 + .nodes_for_capture_index(self.stanza_capture_index as u32), 605 629 self.quantifier, 606 - exec.mat, 607 - exec.graph, 608 - )) 630 + ) 631 + .into()) 609 632 } 610 633 } 611 634
+1
src/lib.rs
··· 41 41 pub use execution::CancellationError; 42 42 pub use execution::CancellationFlag; 43 43 pub use execution::ExecutionConfig; 44 + pub use execution::Match; 44 45 pub use execution::NoCancellation; 45 46 pub use parser::Location; 46 47 pub use parser::ParseError;
+21 -4
src/parser.rs
··· 7 7 8 8 use std::fmt::Display; 9 9 use std::iter::Peekable; 10 - use std::ops::Range; 11 10 use std::path::Path; 12 11 use std::str::Chars; 13 12 ··· 161 160 } 162 161 } 163 162 164 - pub(crate) fn to_column_range(&self) -> Range<usize> { 163 + pub(crate) fn to_column_range(&self) -> std::ops::Range<usize> { 165 164 self.column..self.column + 1 166 165 } 167 166 } ··· 173 172 } 174 173 175 174 // ---------------------------------------------------------------------------- 175 + // Range 176 + 177 + /// The range of a graph DSL entity within its file 178 + #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] 179 + pub struct Range { 180 + pub start: Location, 181 + pub end: Location, 182 + } 183 + 184 + impl Display for Range { 185 + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 186 + write!(f, "{} - {}", self.start, self.end) 187 + } 188 + } 189 + 190 + // ---------------------------------------------------------------------------- 176 191 // Parser 177 192 178 193 struct Parser<'a> { ··· 350 365 } 351 366 352 367 fn parse_stanza(&mut self, language: Language) -> Result<ast::Stanza, ParseError> { 353 - let location = self.location; 368 + let start = self.location; 354 369 let (query, full_match_stanza_capture_index) = self.parse_query(language)?; 355 370 self.consume_whitespace(); 356 371 let statements = self.parse_statements()?; 372 + let end = self.location; 373 + let range = Range { start, end }; 357 374 Ok(ast::Stanza { 358 375 query, 359 376 statements, 360 377 full_match_stanza_capture_index, 361 378 full_match_file_capture_index: usize::MAX, // set in checker 362 - location, 379 + range, 363 380 }) 364 381 } 365 382