fork of https://github.com/tree-sitter/tree-sitter-graph
at main 35 kB view raw
1// -*- coding: utf-8 -*- 2// ------------------------------------------------------------------------------------------------ 3// Copyright © 2021, tree-sitter authors. 4// Licensed under either of Apache License, Version 2.0, or MIT license, at your option. 5// Please see the LICENSE-APACHE or LICENSE-MIT files in this distribution for license details. 6// ------------------------------------------------------------------------------------------------ 7 8use std::fmt::Display; 9use std::iter::Peekable; 10use std::path::Path; 11use std::str::Chars; 12 13use regex::Regex; 14use thiserror::Error; 15use tree_sitter::CaptureQuantifier; 16use tree_sitter::CaptureQuantifier::One; 17use tree_sitter::CaptureQuantifier::OneOrMore; 18use tree_sitter::CaptureQuantifier::Zero; 19use tree_sitter::CaptureQuantifier::ZeroOrMore; 20use tree_sitter::CaptureQuantifier::ZeroOrOne; 21use tree_sitter::Language; 22use tree_sitter::Query; 23use tree_sitter::QueryError; 24 25use crate::ast; 26use crate::parse_error::Excerpt; 27use crate::Identifier; 28 29pub const FULL_MATCH: &str = "__tsg__full_match"; 30 31impl ast::File { 32 /// Parses a graph DSL file, returning a new `File` instance. 33 pub fn from_str(language: Language, source: &str) -> Result<Self, ParseError> { 34 let mut file = ast::File::new(language); 35 #[allow(deprecated)] 36 file.parse(source)?; 37 file.check()?; 38 Ok(file) 39 } 40 41 /// Parses a graph DSL file, adding its content to an existing `File` instance. 42 #[deprecated( 43 note = "Parsing multiple times into the same `File` instance is unsound. Use `File::from_str` instead." 44 )] 45 pub fn parse(&mut self, content: &str) -> Result<(), ParseError> { 46 Parser::new(content).parse_into_file(self) 47 } 48} 49 50// ---------------------------------------------------------------------------- 51// Parse errors 52 53/// An error that can occur while parsing a graph DSL file 54#[derive(Debug, Error)] 55pub enum ParseError { 56 #[error("Expected quantifier at {0}")] 57 ExpectedQuantifier(Location), 58 #[error("Expected '{0}' at {1}")] 59 ExpectedToken(&'static str, Location), 60 #[error("Expected variable name at {0}")] 61 ExpectedVariable(Location), 62 #[error("Expected unscoped variable at {0}")] 63 ExpectedUnscopedVariable(Location), 64 #[error("Invalid regular expression /{0}/ at {1}")] 65 InvalidRegex(String, Location), 66 #[error("Expected integer constant in regex capture at {0}")] 67 InvalidRegexCapture(Location), 68 #[error("Invalid query pattern: {}", _0.message)] 69 QueryError(#[from] QueryError), 70 #[error("Unexpected character '{0}' in {1} at {2}")] 71 UnexpectedCharacter(char, &'static str, Location), 72 #[error("Unexpected end of file at {0}")] 73 UnexpectedEOF(Location), 74 #[error("Unexpected keyword '{0}' at {1}")] 75 UnexpectedKeyword(String, Location), 76 #[error("Unexpected literal '#{0}' at {1}")] 77 UnexpectedLiteral(String, Location), 78 #[error("Query contains multiple patterns at {0}")] 79 UnexpectedQueryPatterns(Location), 80 #[error(transparent)] 81 Check(#[from] crate::checker::CheckError), 82} 83 84impl ParseError { 85 pub fn display_pretty<'a>( 86 &'a self, 87 path: &'a Path, 88 source: &'a str, 89 ) -> impl std::fmt::Display + 'a { 90 DisplayParseErrorPretty { 91 error: self, 92 path, 93 source, 94 } 95 } 96} 97 98struct DisplayParseErrorPretty<'a> { 99 error: &'a ParseError, 100 path: &'a Path, 101 source: &'a str, 102} 103 104impl std::fmt::Display for DisplayParseErrorPretty<'_> { 105 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 106 let location = match self.error { 107 ParseError::ExpectedQuantifier(location) => *location, 108 ParseError::ExpectedToken(_, location) => *location, 109 ParseError::ExpectedVariable(location) => *location, 110 ParseError::ExpectedUnscopedVariable(location) => *location, 111 ParseError::InvalidRegex(_, location) => *location, 112 ParseError::InvalidRegexCapture(location) => *location, 113 ParseError::QueryError(err) => Location { 114 row: err.row, 115 column: err.column, 116 }, 117 ParseError::UnexpectedCharacter(_, _, location) => *location, 118 ParseError::UnexpectedEOF(location) => *location, 119 ParseError::UnexpectedKeyword(_, location) => *location, 120 ParseError::UnexpectedLiteral(_, location) => *location, 121 ParseError::UnexpectedQueryPatterns(location) => *location, 122 ParseError::Check(err) => { 123 write!(f, "{}", err.display_pretty(self.path, self.source))?; 124 return Ok(()); 125 } 126 }; 127 writeln!(f, "{}", self.error)?; 128 write!( 129 f, 130 "{}", 131 Excerpt::from_source( 132 self.path, 133 self.source, 134 location.row, 135 location.to_column_range(), 136 0 137 ) 138 )?; 139 Ok(()) 140 } 141} 142 143// ---------------------------------------------------------------------------- 144// Location 145 146/// The location of a graph DSL entity within its file 147#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] 148pub struct Location { 149 pub row: usize, 150 pub column: usize, 151} 152 153impl Location { 154 fn advance(&mut self, ch: char) { 155 if ch == '\n' { 156 self.row += 1; 157 self.column = 0; 158 } else { 159 self.column += 1; 160 } 161 } 162 163 pub(crate) fn to_column_range(&self) -> std::ops::Range<usize> { 164 self.column..self.column + 1 165 } 166} 167 168impl Display for Location { 169 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 170 write!(f, "({}, {})", self.row + 1, self.column + 1) 171 } 172} 173 174// ---------------------------------------------------------------------------- 175// Range 176 177/// The range of a graph DSL entity within its file 178#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] 179pub struct Range { 180 pub start: Location, 181 pub end: Location, 182} 183 184impl Display for Range { 185 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 186 write!(f, "{} - {}", self.start, self.end) 187 } 188} 189 190// ---------------------------------------------------------------------------- 191// Parser 192 193struct Parser<'a> { 194 source: &'a str, 195 chars: Peekable<Chars<'a>>, 196 offset: usize, 197 location: Location, 198 query_source: String, 199} 200 201fn is_ident_start(c: char) -> bool { 202 c == '_' || c.is_alphabetic() 203} 204 205fn is_ident(c: char) -> bool { 206 c == '_' || c == '-' || c.is_alphanumeric() 207} 208 209impl<'a> Parser<'a> { 210 fn new(source: &'a str) -> Parser<'a> { 211 let chars = source.chars().peekable(); 212 let query_source = String::with_capacity(source.len()); 213 Parser { 214 source, 215 chars, 216 offset: 0, 217 location: Location::default(), 218 query_source, 219 } 220 } 221} 222 223impl<'a> Parser<'a> { 224 fn peek(&mut self) -> Result<char, ParseError> { 225 self.chars 226 .peek() 227 .copied() 228 .ok_or_else(|| ParseError::UnexpectedEOF(self.location)) 229 } 230 231 fn try_peek(&mut self) -> Option<char> { 232 self.peek().ok() 233 } 234 235 fn next(&mut self) -> Result<char, ParseError> { 236 let ch = self 237 .chars 238 .next() 239 .ok_or_else(|| ParseError::UnexpectedEOF(self.location))?; 240 self.offset += ch.len_utf8(); 241 self.location.advance(ch); 242 Ok(ch) 243 } 244 245 fn skip(&mut self) -> Result<(), ParseError> { 246 self.next().map(|_| ()) 247 } 248 249 fn consume_whitespace(&mut self) { 250 let mut in_comment = false; 251 while let Some(ch) = self.try_peek() { 252 if in_comment { 253 if ch == '\n' { 254 in_comment = false; 255 } 256 } else { 257 if ch == ';' { 258 in_comment = true; 259 } else if !ch.is_whitespace() { 260 return; 261 } 262 } 263 self.skip().unwrap(); 264 } 265 } 266 267 fn consume_while(&mut self, mut f: impl FnMut(char) -> bool) { 268 while let Some(ch) = self.try_peek() { 269 if !f(ch) { 270 return; 271 } 272 self.skip().unwrap(); 273 } 274 } 275 276 fn consume_n(&mut self, count: usize) -> Result<(), ParseError> { 277 for _ in 0..count { 278 self.next()?; 279 } 280 Ok(()) 281 } 282 283 fn consume_token(&mut self, token: &'static str) -> Result<(), ParseError> { 284 if self.source[self.offset..].starts_with(token) { 285 self.consume_n(token.len()) 286 } else { 287 Err(ParseError::ExpectedToken(token, self.location)) 288 } 289 } 290 291 fn parse_into_file(&mut self, file: &mut ast::File) -> Result<(), ParseError> { 292 self.consume_whitespace(); 293 while self.try_peek().is_some() { 294 if let Ok(_) = self.consume_token("attribute") { 295 self.consume_whitespace(); 296 let shorthand = self.parse_shorthand()?; 297 file.shorthands.add(shorthand); 298 } else if let Ok(_) = self.consume_token("global") { 299 self.consume_whitespace(); 300 let global = self.parse_global()?; 301 file.globals.push(global); 302 } else if let Ok(_) = self.consume_token("inherit") { 303 self.consume_whitespace(); 304 self.consume_token(".")?; 305 let name = self.parse_identifier("inherit")?; 306 file.inherited_variables.insert(name); 307 } else { 308 let stanza = self.parse_stanza(&file.language)?; 309 file.stanzas.push(stanza); 310 } 311 self.consume_whitespace(); 312 } 313 // we can unwrap here because all queries have already been parsed before 314 file.query = Some(Query::new(&file.language, &self.query_source).unwrap()); 315 Ok(()) 316 } 317 318 fn parse_global(&mut self) -> Result<ast::Global, ParseError> { 319 let location = self.location; 320 let name = self.parse_identifier("global variable")?; 321 let quantifier = self.parse_quantifier()?; 322 let mut default = None; 323 self.consume_whitespace(); 324 if let Ok(_) = self.consume_token("=") { 325 self.consume_whitespace(); 326 default = Some(self.parse_string()?); 327 } 328 Ok(ast::Global { 329 name, 330 quantifier, 331 default, 332 location, 333 }) 334 } 335 336 fn parse_shorthand(&mut self) -> Result<ast::AttributeShorthand, ParseError> { 337 let location = self.location; 338 let name = self.parse_identifier("shorthand name")?; 339 self.consume_whitespace(); 340 self.consume_token("=")?; 341 self.consume_whitespace(); 342 let variable = self.parse_unscoped_variable()?; 343 self.consume_whitespace(); 344 self.consume_token("=>")?; 345 self.consume_whitespace(); 346 let attributes = self.parse_attributes()?; 347 Ok(ast::AttributeShorthand { 348 name, 349 variable, 350 attributes, 351 location, 352 }) 353 } 354 355 fn parse_quantifier(&mut self) -> Result<CaptureQuantifier, ParseError> { 356 let mut quantifier = One; 357 if let Some(c) = self.try_peek() { 358 self.skip().unwrap(); 359 if c == '?' { 360 quantifier = ZeroOrOne; 361 } else if c == '*' { 362 quantifier = ZeroOrMore; 363 } else if c == '+' { 364 quantifier = OneOrMore; 365 } else if !c.is_whitespace() { 366 return Err(ParseError::ExpectedQuantifier(self.location)); 367 } 368 } 369 Ok(quantifier) 370 } 371 372 fn parse_stanza(&mut self, language: &Language) -> Result<ast::Stanza, ParseError> { 373 let start = self.location; 374 let (query, full_match_stanza_capture_index) = self.parse_query(language)?; 375 self.consume_whitespace(); 376 let statements = self.parse_statements()?; 377 let end = self.location; 378 let range = Range { start, end }; 379 Ok(ast::Stanza { 380 query, 381 statements, 382 full_match_stanza_capture_index, 383 full_match_file_capture_index: usize::MAX, // set in checker 384 range, 385 }) 386 } 387 388 fn parse_query(&mut self, language: &Language) -> Result<(Query, usize), ParseError> { 389 let location = self.location; 390 let query_start = self.offset; 391 self.skip_query()?; 392 let query_end = self.offset; 393 let query_source = self.source[query_start..query_end].to_owned() + "@" + FULL_MATCH; 394 // If tree-sitter allowed us to incrementally add patterns to a query, we wouldn't need 395 // the global query_source. 396 self.query_source += &query_source; 397 self.query_source += "\n"; 398 let query = Query::new(language, &query_source).map_err(|mut e| { 399 // the column of the first row of a query pattern must be shifted by the whitespace 400 // that was already consumed 401 if e.row == 0 { 402 // must come before we update e.row! 403 e.column += location.column; 404 } 405 e.row += location.row; 406 e.offset += query_start; 407 e 408 })?; 409 if query.pattern_count() > 1 { 410 return Err(ParseError::UnexpectedQueryPatterns(location)); 411 } 412 let full_match_capture_index = query 413 .capture_index_for_name(FULL_MATCH) 414 .expect("missing capture index for full match") 415 as usize; 416 Ok((query, full_match_capture_index)) 417 } 418 419 fn skip_query(&mut self) -> Result<(), ParseError> { 420 let mut paren_depth = 0; 421 let mut in_string = false; 422 let mut in_escape = false; 423 let mut in_comment = false; 424 loop { 425 let ch = self.peek()?; 426 if in_escape { 427 in_escape = false; 428 } else if in_string { 429 match ch { 430 '\\' => { 431 in_escape = true; 432 } 433 '"' | '\n' => { 434 in_string = false; 435 } 436 _ => {} 437 } 438 } else if in_comment { 439 if ch == '\n' { 440 in_comment = false; 441 } 442 } else { 443 match ch { 444 '"' => in_string = true, 445 '(' => paren_depth += 1, 446 ')' => { 447 if paren_depth > 0 { 448 paren_depth -= 1; 449 } 450 } 451 '{' => return Ok(()), 452 ';' => in_comment = true, 453 _ => {} 454 } 455 } 456 self.skip().unwrap(); 457 } 458 } 459 460 fn parse_statements(&mut self) -> Result<Vec<ast::Statement>, ParseError> { 461 self.consume_token("{")?; 462 let mut statements = Vec::new(); 463 self.consume_whitespace(); 464 while self.peek()? != '}' { 465 let statement = self.parse_statement()?; 466 statements.push(statement); 467 self.consume_whitespace(); 468 } 469 self.consume_token("}")?; 470 Ok(statements) 471 } 472 473 fn parse_name(&mut self, within: &'static str) -> Result<&'a str, ParseError> { 474 let start = self.offset; 475 let ch = self.next()?; 476 if !is_ident_start(ch) { 477 return Err(ParseError::UnexpectedCharacter(ch, within, self.location)); 478 } 479 self.consume_while(is_ident); 480 let end = self.offset; 481 Ok(&self.source[start..end]) 482 } 483 484 fn parse_statement(&mut self) -> Result<ast::Statement, ParseError> { 485 if matches!(self.peek(), Ok( '#' | '"' | '@' | '$' | '(' | '[' | '{')) { 486 if let Ok(value) = self.parse_expression() { 487 return Ok(ast::Statement::Expr(ast::ExpressionStatement { 488 location: self.location, 489 value, 490 })); 491 } 492 } 493 let keyword_location = self.location; 494 let keyword = self.parse_name("keyword")?; 495 self.consume_whitespace(); 496 if keyword == "let" { 497 let variable = self.parse_variable()?; 498 self.consume_whitespace(); 499 self.consume_token("=")?; 500 self.consume_whitespace(); 501 let value = self.parse_expression()?; 502 Ok(ast::DeclareImmutable { 503 variable, 504 value, 505 location: keyword_location, 506 } 507 .into()) 508 } else if keyword == "var" { 509 let variable = self.parse_variable()?; 510 self.consume_whitespace(); 511 self.consume_token("=")?; 512 self.consume_whitespace(); 513 let value = self.parse_expression()?; 514 Ok(ast::DeclareMutable { 515 variable, 516 value, 517 location: keyword_location, 518 } 519 .into()) 520 } else if keyword == "set" { 521 let variable = self.parse_variable()?; 522 self.consume_whitespace(); 523 self.consume_token("=")?; 524 self.consume_whitespace(); 525 let value = self.parse_expression()?; 526 Ok(ast::Assign { 527 variable, 528 value, 529 location: keyword_location, 530 } 531 .into()) 532 } else if keyword == "node" { 533 let node = self.parse_variable()?; 534 Ok(ast::CreateGraphNode { 535 node, 536 location: keyword_location, 537 } 538 .into()) 539 } else if keyword == "edge" { 540 let source = self.parse_expression()?; 541 self.consume_whitespace(); 542 self.consume_token("->")?; 543 self.consume_whitespace(); 544 let sink = self.parse_expression()?; 545 Ok(ast::CreateEdge { 546 source, 547 sink, 548 location: keyword_location, 549 } 550 .into()) 551 } else if keyword == "attr" { 552 self.consume_token("(")?; 553 self.consume_whitespace(); 554 let node_or_source = self.parse_expression()?; 555 self.consume_whitespace(); 556 557 if self.peek()? == '-' { 558 let source = node_or_source; 559 self.consume_token("->")?; 560 self.consume_whitespace(); 561 let sink = self.parse_expression()?; 562 self.consume_whitespace(); 563 self.consume_token(")")?; 564 self.consume_whitespace(); 565 let attributes = self.parse_attributes()?; 566 Ok(ast::AddEdgeAttribute { 567 source, 568 sink, 569 attributes, 570 location: keyword_location, 571 } 572 .into()) 573 } else { 574 let node = node_or_source; 575 self.consume_whitespace(); 576 self.consume_token(")")?; 577 self.consume_whitespace(); 578 let attributes = self.parse_attributes()?; 579 Ok(ast::AddGraphNodeAttribute { 580 node, 581 attributes, 582 location: keyword_location, 583 } 584 .into()) 585 } 586 } else if keyword == "print" { 587 let mut values = vec![self.parse_expression()?]; 588 self.consume_whitespace(); 589 while self.try_peek() == Some(',') { 590 self.consume_token(",")?; 591 self.consume_whitespace(); 592 values.push(self.parse_expression()?); 593 self.consume_whitespace(); 594 } 595 self.consume_whitespace(); 596 Ok(ast::Print { 597 values, 598 location: keyword_location, 599 } 600 .into()) 601 } else if keyword == "scan" { 602 let value = self.parse_expression()?; 603 self.consume_whitespace(); 604 self.consume_token("{")?; 605 self.consume_whitespace(); 606 let mut arms = Vec::new(); 607 while self.peek()? != '}' { 608 let pattern_location = self.location; 609 let pattern = self.parse_string()?; 610 let regex = Regex::new(&pattern) 611 .map_err(|_| ParseError::InvalidRegex(pattern.into(), pattern_location))?; 612 self.consume_whitespace(); 613 let statements = self.parse_statements()?; 614 arms.push(ast::ScanArm { 615 regex, 616 statements, 617 location: keyword_location, 618 }); 619 self.consume_whitespace(); 620 } 621 self.consume_token("}")?; 622 Ok(ast::Scan { 623 value, 624 arms, 625 location: keyword_location, 626 } 627 .into()) 628 } else if keyword == "if" { 629 let mut arms = Vec::new(); 630 631 // if 632 let location = keyword_location; 633 self.consume_whitespace(); 634 let conditions = self.parse_conditions()?; 635 self.consume_whitespace(); 636 let statements = self.parse_statements()?; 637 self.consume_whitespace(); 638 arms.push(ast::IfArm { 639 conditions, 640 statements, 641 location, 642 }); 643 644 // elif 645 let mut location = self.location; 646 while let Ok(_) = self.consume_token("elif") { 647 self.consume_whitespace(); 648 let conditions = self.parse_conditions()?; 649 self.consume_whitespace(); 650 let statements = self.parse_statements()?; 651 self.consume_whitespace(); 652 arms.push(ast::IfArm { 653 conditions, 654 statements, 655 location, 656 }); 657 self.consume_whitespace(); 658 location = self.location; 659 } 660 661 // else 662 let location = self.location; 663 if let Ok(_) = self.consume_token("else") { 664 let conditions = vec![]; 665 self.consume_whitespace(); 666 let statements = self.parse_statements()?; 667 self.consume_whitespace(); 668 arms.push(ast::IfArm { 669 conditions, 670 statements, 671 location, 672 }); 673 self.consume_whitespace(); 674 } 675 676 Ok(ast::If { 677 arms, 678 location: keyword_location, 679 } 680 .into()) 681 } else if keyword == "for" { 682 self.consume_whitespace(); 683 let variable = self.parse_unscoped_variable()?; 684 self.consume_whitespace(); 685 self.consume_token("in")?; 686 self.consume_whitespace(); 687 let value = self.parse_expression()?; 688 self.consume_whitespace(); 689 let statements = self.parse_statements()?; 690 Ok(ast::ForIn { 691 variable, 692 value, 693 statements, 694 location: keyword_location, 695 } 696 .into()) 697 } else { 698 Err(ParseError::UnexpectedKeyword( 699 keyword.into(), 700 keyword_location, 701 )) 702 } 703 } 704 705 fn parse_conditions(&mut self) -> Result<Vec<ast::Condition>, ParseError> { 706 let mut conditions = Vec::new(); 707 let mut has_next = true; 708 while has_next { 709 conditions.push(self.parse_condition()?); 710 self.consume_whitespace(); 711 if let Some(',') = self.try_peek() { 712 self.consume_token(",")?; 713 self.consume_whitespace(); 714 has_next = true; 715 } else { 716 has_next = false; 717 } 718 } 719 Ok(conditions) 720 } 721 722 fn parse_condition(&mut self) -> Result<ast::Condition, ParseError> { 723 let location = self.location; 724 let condition = if let Ok(_) = self.consume_token("some") { 725 self.consume_whitespace(); 726 let value = self.parse_expression()?; 727 ast::Condition::Some { value, location } 728 } else if let Ok(_) = self.consume_token("none") { 729 self.consume_whitespace(); 730 let value = self.parse_expression()?; 731 ast::Condition::None { value, location } 732 } else if let Ok(value) = self.parse_expression() { 733 self.consume_whitespace(); 734 ast::Condition::Bool { value, location } 735 } else { 736 return Err(ParseError::ExpectedToken( 737 "(some|none)? EXPRESSION", 738 location, 739 )); 740 }; 741 self.consume_whitespace(); 742 Ok(condition) 743 } 744 745 fn parse_identifier(&mut self, within: &'static str) -> Result<Identifier, ParseError> { 746 let content = self.parse_name(within)?; 747 Ok(Identifier::from(content)) 748 } 749 750 fn parse_string(&mut self) -> Result<String, ParseError> { 751 self.consume_token("\"")?; 752 let mut escape = false; 753 let mut value = String::new(); 754 loop { 755 let ch = self.next()?; 756 if escape { 757 escape = false; 758 value.push(match ch { 759 '0' => '\0', 760 'n' => '\n', 761 'r' => '\r', 762 't' => '\t', 763 _ => ch, 764 }); 765 } else { 766 match ch { 767 '"' => return Ok(value), 768 '\\' => escape = true, 769 _ => value.push(ch), 770 } 771 } 772 } 773 } 774 775 fn parse_expression(&mut self) -> Result<ast::Expression, ParseError> { 776 let mut expression = match self.peek()? { 777 '#' => self.parse_literal()?, 778 '"' => self.parse_string()?.into(), 779 '@' => self.parse_capture()?.into(), 780 '$' => self.parse_regex_capture()?.into(), 781 '(' => self.parse_call()?, 782 '[' => self.parse_list()?, 783 '{' => self.parse_set()?, 784 ch if ch.is_ascii_digit() => self.parse_integer_constant()?, 785 ch if is_ident_start(ch) => { 786 let location = self.location; 787 let name = self.parse_identifier("variable name")?; 788 ast::UnscopedVariable { name, location }.into() 789 } 790 ch => { 791 return Err(ParseError::UnexpectedCharacter( 792 ch, 793 "expression", 794 self.location, 795 )) 796 } 797 }; 798 self.consume_whitespace(); 799 while self.try_peek() == Some('.') { 800 self.skip().unwrap(); 801 self.consume_whitespace(); 802 let location = self.location; 803 let scope = Box::new(expression); 804 let name = self.parse_identifier("scoped variable name")?; 805 self.consume_whitespace(); 806 expression = ast::ScopedVariable { 807 scope, 808 name, 809 location, 810 } 811 .into(); 812 } 813 Ok(expression) 814 } 815 816 fn parse_call(&mut self) -> Result<ast::Expression, ParseError> { 817 self.consume_token("(")?; 818 self.consume_whitespace(); 819 let function = self.parse_identifier("function name")?; 820 self.consume_whitespace(); 821 let mut parameters = Vec::new(); 822 while self.peek()? != ')' { 823 parameters.push(self.parse_expression()?); 824 self.consume_whitespace(); 825 } 826 self.consume_token(")")?; 827 Ok(ast::Call { 828 function, 829 parameters, 830 } 831 .into()) 832 } 833 834 fn parse_sequence(&mut self, end_marker: char) -> Result<Vec<ast::Expression>, ParseError> { 835 let mut elements = Vec::new(); 836 while self.peek()? != end_marker { 837 elements.push(self.parse_expression()?); 838 self.consume_whitespace(); 839 if self.peek()? != end_marker { 840 self.consume_token(",")?; 841 self.consume_whitespace(); 842 } 843 } 844 Ok(elements) 845 } 846 847 fn parse_list(&mut self) -> Result<ast::Expression, ParseError> { 848 let location = self.location; 849 self.consume_token("[")?; 850 self.consume_whitespace(); 851 if let Ok(_) = self.consume_token("]") { 852 return Ok(ast::ListLiteral { elements: vec![] }.into()); 853 } 854 let first_element = self.parse_expression()?; 855 self.consume_whitespace(); 856 if let Ok(_) = self.consume_token("]") { 857 let elements = vec![first_element]; 858 Ok(ast::ListLiteral { elements }.into()) 859 } else if let Ok(_) = self.consume_token(",") { 860 self.consume_whitespace(); 861 let mut elements = self.parse_sequence(']')?; 862 self.consume_whitespace(); 863 self.consume_token("]")?; 864 elements.insert(0, first_element); 865 Ok(ast::ListLiteral { elements }.into()) 866 } else { 867 self.consume_token("for")?; 868 self.consume_whitespace(); 869 let variable = self.parse_unscoped_variable()?; 870 self.consume_whitespace(); 871 self.consume_token("in")?; 872 self.consume_whitespace(); 873 let value = self.parse_expression()?; 874 self.consume_whitespace(); 875 self.consume_token("]")?; 876 Ok(ast::ListComprehension { 877 element: first_element.into(), 878 variable, 879 value: value.into(), 880 location, 881 } 882 .into()) 883 } 884 } 885 886 fn parse_set(&mut self) -> Result<ast::Expression, ParseError> { 887 let location = self.location; 888 self.consume_token("{")?; 889 self.consume_whitespace(); 890 if let Ok(_) = self.consume_token("}") { 891 return Ok(ast::SetLiteral { elements: vec![] }.into()); 892 } 893 let first_element = self.parse_expression()?; 894 self.consume_whitespace(); 895 if let Ok(_) = self.consume_token("}") { 896 let elements = vec![first_element]; 897 Ok(ast::SetLiteral { elements }.into()) 898 } else if let Ok(_) = self.consume_token(",") { 899 self.consume_whitespace(); 900 let mut elements = self.parse_sequence('}')?; 901 self.consume_whitespace(); 902 self.consume_token("}")?; 903 elements.insert(0, first_element); 904 Ok(ast::SetLiteral { elements }.into()) 905 } else { 906 self.consume_token("for")?; 907 self.consume_whitespace(); 908 let variable = self.parse_unscoped_variable()?; 909 self.consume_whitespace(); 910 self.consume_token("in")?; 911 self.consume_whitespace(); 912 let value = self.parse_expression()?; 913 self.consume_whitespace(); 914 self.consume_token("}")?; 915 Ok(ast::SetComprehension { 916 element: first_element.into(), 917 variable, 918 value: value.into(), 919 location, 920 } 921 .into()) 922 } 923 } 924 925 fn parse_capture(&mut self) -> Result<ast::Capture, ParseError> { 926 let location = self.location; 927 let start = self.offset; 928 self.consume_token("@")?; 929 let ch = self.next()?; 930 if !is_ident_start(ch) { 931 return Err(ParseError::UnexpectedCharacter( 932 ch, 933 "query capture", 934 self.location, 935 )); 936 } 937 self.consume_while(is_ident); 938 let end = self.offset; 939 let name = Identifier::from(&self.source[start + 1..end]); 940 Ok(ast::Capture { 941 name, 942 quantifier: Zero, // set in checker 943 file_capture_index: usize::MAX, // set in checker 944 stanza_capture_index: usize::MAX, // set in checker 945 location, 946 } 947 .into()) 948 } 949 950 fn parse_integer_constant(&mut self) -> Result<ast::Expression, ParseError> { 951 // We'll have already verified that the next digit is an integer. 952 let start = self.offset; 953 self.consume_while(|ch| ch.is_ascii_digit()); 954 let end = self.offset; 955 let value = u32::from_str_radix(&self.source[start..end], 10).unwrap(); 956 Ok(ast::IntegerConstant { value }.into()) 957 } 958 959 fn parse_literal(&mut self) -> Result<ast::Expression, ParseError> { 960 let literal_location = self.location; 961 self.consume_token("#")?; 962 let literal = self.parse_name("literal")?; 963 if literal == "false" { 964 return Ok(ast::Expression::FalseLiteral); 965 } else if literal == "null" { 966 return Ok(ast::Expression::NullLiteral); 967 } else if literal == "true" { 968 return Ok(ast::Expression::TrueLiteral); 969 } else { 970 Err(ParseError::UnexpectedLiteral( 971 literal.into(), 972 literal_location, 973 )) 974 } 975 } 976 977 fn parse_regex_capture(&mut self) -> Result<ast::RegexCapture, ParseError> { 978 let regex_capture_location = self.location; 979 self.consume_token("$")?; 980 let start = self.offset; 981 self.consume_while(|ch| ch.is_ascii_digit()); 982 let end = self.offset; 983 if start == end { 984 return Err(ParseError::InvalidRegexCapture(regex_capture_location)); 985 } 986 let match_index = usize::from_str_radix(&self.source[start..end], 10).unwrap(); 987 Ok(ast::RegexCapture { match_index }.into()) 988 } 989 990 fn parse_attributes(&mut self) -> Result<Vec<ast::Attribute>, ParseError> { 991 let mut attributes = vec![self.parse_attribute()?]; 992 self.consume_whitespace(); 993 while self.try_peek() == Some(',') { 994 self.skip().unwrap(); 995 self.consume_whitespace(); 996 attributes.push(self.parse_attribute()?); 997 self.consume_whitespace(); 998 } 999 Ok(attributes) 1000 } 1001 1002 fn parse_attribute(&mut self) -> Result<ast::Attribute, ParseError> { 1003 let name = self.parse_identifier("attribute name")?; 1004 self.consume_whitespace(); 1005 let value = if self.try_peek() == Some('=') { 1006 self.consume_token("=")?; 1007 self.consume_whitespace(); 1008 self.parse_expression()? 1009 } else { 1010 ast::Expression::TrueLiteral 1011 }; 1012 Ok(ast::Attribute { name, value }) 1013 } 1014 1015 fn parse_variable(&mut self) -> Result<ast::Variable, ParseError> { 1016 let expression_location = self.location; 1017 match self.parse_expression()? { 1018 ast::Expression::Variable(variable) => Ok(variable), 1019 _ => Err(ParseError::ExpectedVariable(expression_location)), 1020 } 1021 } 1022 1023 fn parse_unscoped_variable(&mut self) -> Result<ast::UnscopedVariable, ParseError> { 1024 match self.parse_variable()? { 1025 ast::Variable::Unscoped(variable) => Ok(variable), 1026 ast::Variable::Scoped(variable) => { 1027 Err(ParseError::ExpectedUnscopedVariable(variable.location)) 1028 } 1029 } 1030 } 1031}