fork of https://github.com/tree-sitter/tree-sitter-graph
1// -*- coding: utf-8 -*- 2// ------------------------------------------------------------------------------------------------ 3// Copyright © 2021, tree-sitter authors. 4// Licensed under either of Apache License, Version 2.0, or MIT license, at your option. 5// Please see the LICENSE-APACHE or LICENSE-MIT files in this distribution for license details. 6// ------------------------------------------------------------------------------------------------ 7 8use std::fmt::Display; 9use std::iter::Peekable; 10use std::ops::Range; 11use std::path::Path; 12use std::str::Chars; 13 14use regex::Regex; 15use thiserror::Error; 16use tree_sitter::CaptureQuantifier; 17use tree_sitter::CaptureQuantifier::One; 18use tree_sitter::CaptureQuantifier::OneOrMore; 19use tree_sitter::CaptureQuantifier::Zero; 20use tree_sitter::CaptureQuantifier::ZeroOrMore; 21use tree_sitter::CaptureQuantifier::ZeroOrOne; 22use tree_sitter::Language; 23use tree_sitter::Query; 24use tree_sitter::QueryError; 25 26use crate::ast; 27use crate::parse_error::Excerpt; 28use crate::Identifier; 29 30pub const FULL_MATCH: &str = "__tsg__full_match"; 31 32impl ast::File { 33 /// Parses a graph DSL file, returning a new `File` instance. 34 pub fn from_str(language: Language, source: &str) -> Result<Self, ParseError> { 35 let mut file = ast::File::new(language); 36 #[allow(deprecated)] 37 file.parse(source)?; 38 file.check()?; 39 Ok(file) 40 } 41 42 /// Parses a graph DSL file, adding its content to an existing `File` instance. 43 #[deprecated( 44 note = "Parsing multiple times into the same `File` instance is unsound. Use `File::from_str` instead." 45 )] 46 pub fn parse(&mut self, content: &str) -> Result<(), ParseError> { 47 Parser::new(content).parse_into_file(self) 48 } 49} 50 51// ---------------------------------------------------------------------------- 52// Parse errors 53 54/// An error that can occur while parsing a graph DSL file 55#[derive(Debug, Error)] 56pub enum ParseError { 57 #[error("Expected quantifier at {0}")] 58 ExpectedQuantifier(Location), 59 #[error("Expected '{0}' at {1}")] 60 ExpectedToken(&'static str, Location), 61 #[error("Expected variable name at {0}")] 62 ExpectedVariable(Location), 63 #[error("Expected unscoped variable at {0}")] 64 ExpectedUnscopedVariable(Location), 65 #[error("Invalid regular expression /{0}/ at {1}")] 66 InvalidRegex(String, Location), 67 #[error("Expected integer constant in regex capture at {0}")] 68 InvalidRegexCapture(Location), 69 #[error("Invalid query pattern: {}", _0.message)] 70 QueryError(#[from] QueryError), 71 #[error("Unexpected character '{0}' in {1} at {2}")] 72 UnexpectedCharacter(char, &'static str, Location), 73 #[error("Unexpected end of file at {0}")] 74 UnexpectedEOF(Location), 75 #[error("Unexpected keyword '{0}' at {1}")] 76 UnexpectedKeyword(String, Location), 77 #[error("Unexpected literal '#{0}' at {1}")] 78 UnexpectedLiteral(String, Location), 79 #[error("Query contains multiple patterns at {0}")] 80 UnexpectedQueryPatterns(Location), 81 #[error(transparent)] 82 Check(#[from] crate::checker::CheckError), 83} 84 85impl ParseError { 86 pub fn display_pretty<'a>( 87 &'a self, 88 path: &'a Path, 89 source: &'a str, 90 ) -> impl std::fmt::Display + 'a { 91 DisplayParseErrorPretty { 92 error: self, 93 path, 94 source, 95 } 96 } 97} 98 99struct DisplayParseErrorPretty<'a> { 100 error: &'a ParseError, 101 path: &'a Path, 102 source: &'a str, 103} 104 105impl std::fmt::Display for DisplayParseErrorPretty<'_> { 106 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 107 let location = match self.error { 108 ParseError::ExpectedQuantifier(location) => *location, 109 ParseError::ExpectedToken(_, location) => *location, 110 ParseError::ExpectedVariable(location) => *location, 111 ParseError::ExpectedUnscopedVariable(location) => *location, 112 ParseError::InvalidRegex(_, location) => *location, 113 ParseError::InvalidRegexCapture(location) => *location, 114 ParseError::QueryError(err) => Location { 115 row: err.row, 116 column: err.column, 117 }, 118 ParseError::UnexpectedCharacter(_, _, location) => *location, 119 ParseError::UnexpectedEOF(location) => *location, 120 ParseError::UnexpectedKeyword(_, location) => *location, 121 ParseError::UnexpectedLiteral(_, location) => *location, 122 ParseError::UnexpectedQueryPatterns(location) => *location, 123 ParseError::Check(err) => { 124 write!(f, "{}", err.display_pretty(self.path, self.source))?; 125 return Ok(()); 126 } 127 }; 128 writeln!(f, "{}", self.error)?; 129 write!( 130 f, 131 "{}", 132 Excerpt::from_source( 133 self.path, 134 self.source, 135 location.row, 136 location.to_column_range(), 137 0 138 ) 139 )?; 140 Ok(()) 141 } 142} 143 144// ---------------------------------------------------------------------------- 145// Location 146 147/// The location of a graph DSL entity within its file 148#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] 149pub struct Location { 150 pub row: usize, 151 pub column: usize, 152} 153 154impl Location { 155 fn advance(&mut self, ch: char) { 156 if ch == '\n' { 157 self.row += 1; 158 self.column = 0; 159 } else { 160 self.column += 1; 161 } 162 } 163 164 pub(crate) fn to_column_range(&self) -> Range<usize> { 165 self.column..self.column + 1 166 } 167} 168 169impl Display for Location { 170 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 171 write!(f, "({}, {})", self.row + 1, self.column + 1) 172 } 173} 174 175// ---------------------------------------------------------------------------- 176// Parser 177 178struct Parser<'a> { 179 source: &'a str, 180 chars: Peekable<Chars<'a>>, 181 offset: usize, 182 location: Location, 183 query_source: String, 184} 185 186fn is_ident_start(c: char) -> bool { 187 c == '_' || c.is_alphabetic() 188} 189 190fn is_ident(c: char) -> bool { 191 c == '_' || c == '-' || c.is_alphanumeric() 192} 193 194impl<'a> Parser<'a> { 195 fn new(source: &'a str) -> Parser<'a> { 196 let chars = source.chars().peekable(); 197 let query_source = String::with_capacity(source.len()); 198 Parser { 199 source, 200 chars, 201 offset: 0, 202 location: Location::default(), 203 query_source, 204 } 205 } 206} 207 208impl<'a> Parser<'a> { 209 fn peek(&mut self) -> Result<char, ParseError> { 210 self.chars 211 .peek() 212 .copied() 213 .ok_or_else(|| ParseError::UnexpectedEOF(self.location)) 214 } 215 216 fn try_peek(&mut self) -> Option<char> { 217 self.peek().ok() 218 } 219 220 fn next(&mut self) -> Result<char, ParseError> { 221 let ch = self 222 .chars 223 .next() 224 .ok_or_else(|| ParseError::UnexpectedEOF(self.location))?; 225 self.offset += ch.len_utf8(); 226 self.location.advance(ch); 227 Ok(ch) 228 } 229 230 fn skip(&mut self) -> Result<(), ParseError> { 231 self.next().map(|_| ()) 232 } 233 234 fn consume_whitespace(&mut self) { 235 let mut in_comment = false; 236 while let Some(ch) = self.try_peek() { 237 if in_comment { 238 if ch == '\n' { 239 in_comment = false; 240 } 241 } else { 242 if ch == ';' { 243 in_comment = true; 244 } else if !ch.is_whitespace() { 245 return; 246 } 247 } 248 self.skip().unwrap(); 249 } 250 } 251 252 fn consume_while(&mut self, mut f: impl FnMut(char) -> bool) { 253 while let Some(ch) = self.try_peek() { 254 if !f(ch) { 255 return; 256 } 257 self.skip().unwrap(); 258 } 259 } 260 261 fn consume_n(&mut self, count: usize) -> Result<(), ParseError> { 262 for _ in 0..count { 263 self.next()?; 264 } 265 Ok(()) 266 } 267 268 fn consume_token(&mut self, token: &'static str) -> Result<(), ParseError> { 269 if self.source[self.offset..].starts_with(token) { 270 self.consume_n(token.len()) 271 } else { 272 Err(ParseError::ExpectedToken(token, self.location)) 273 } 274 } 275 276 fn parse_into_file(&mut self, file: &mut ast::File) -> Result<(), ParseError> { 277 self.consume_whitespace(); 278 while self.try_peek().is_some() { 279 if let Ok(_) = self.consume_token("global") { 280 self.consume_whitespace(); 281 let global = self.parse_global()?; 282 file.globals.push(global); 283 } else if let Ok(_) = self.consume_token("attribute") { 284 self.consume_whitespace(); 285 let shorthand = self.parse_shorthand()?; 286 file.shorthands.add(shorthand); 287 } else { 288 let stanza = self.parse_stanza(file.language)?; 289 file.stanzas.push(stanza); 290 } 291 self.consume_whitespace(); 292 } 293 // we can unwrap here because all queries have already been parsed before 294 file.query = Some(Query::new(file.language, &self.query_source).unwrap()); 295 Ok(()) 296 } 297 298 fn parse_global(&mut self) -> Result<ast::Global, ParseError> { 299 let location = self.location; 300 let name = self.parse_identifier("global variable")?; 301 let quantifier = self.parse_quantifier()?; 302 let mut default = None; 303 self.consume_whitespace(); 304 if let Ok(_) = self.consume_token("=") { 305 self.consume_whitespace(); 306 default = Some(self.parse_string()?); 307 } 308 Ok(ast::Global { 309 name, 310 quantifier, 311 default, 312 location, 313 }) 314 } 315 316 fn parse_shorthand(&mut self) -> Result<ast::AttributeShorthand, ParseError> { 317 let location = self.location; 318 let name = self.parse_identifier("shorthand name")?; 319 self.consume_whitespace(); 320 self.consume_token("=")?; 321 self.consume_whitespace(); 322 let variable = self.parse_unscoped_variable()?; 323 self.consume_whitespace(); 324 self.consume_token("=>")?; 325 self.consume_whitespace(); 326 let attributes = self.parse_attributes()?; 327 Ok(ast::AttributeShorthand { 328 name, 329 variable, 330 attributes, 331 location, 332 }) 333 } 334 335 fn parse_quantifier(&mut self) -> Result<CaptureQuantifier, ParseError> { 336 let mut quantifier = One; 337 if let Some(c) = self.try_peek() { 338 self.skip().unwrap(); 339 if c == '?' { 340 quantifier = ZeroOrOne; 341 } else if c == '*' { 342 quantifier = ZeroOrMore; 343 } else if c == '+' { 344 quantifier = OneOrMore; 345 } else if !c.is_whitespace() { 346 return Err(ParseError::ExpectedQuantifier(self.location)); 347 } 348 } 349 Ok(quantifier) 350 } 351 352 fn parse_stanza(&mut self, language: Language) -> Result<ast::Stanza, ParseError> { 353 let location = self.location; 354 let (query, full_match_stanza_capture_index) = self.parse_query(language)?; 355 self.consume_whitespace(); 356 let statements = self.parse_statements()?; 357 Ok(ast::Stanza { 358 query, 359 statements, 360 full_match_stanza_capture_index, 361 full_match_file_capture_index: usize::MAX, // set in checker 362 location, 363 }) 364 } 365 366 fn parse_query(&mut self, language: Language) -> Result<(Query, usize), ParseError> { 367 let location = self.location; 368 let query_start = self.offset; 369 self.skip_query()?; 370 let query_end = self.offset; 371 let query_source = self.source[query_start..query_end].to_owned() + "@" + FULL_MATCH; 372 // If tree-sitter allowed us to incrementally add patterns to a query, we wouldn't need 373 // the global query_source. 374 self.query_source += &query_source; 375 self.query_source += "\n"; 376 let query = Query::new(language, &query_source).map_err(|mut e| { 377 // the column of the first row of a query pattern must be shifted by the whitespace 378 // that was already consumed 379 if e.row == 0 { 380 // must come before we update e.row! 381 e.column += location.column; 382 } 383 e.row += location.row; 384 e.offset += query_start; 385 e 386 })?; 387 if query.pattern_count() > 1 { 388 return Err(ParseError::UnexpectedQueryPatterns(location)); 389 } 390 let full_match_capture_index = query 391 .capture_index_for_name(FULL_MATCH) 392 .expect("missing capture index for full match") 393 as usize; 394 Ok((query, full_match_capture_index)) 395 } 396 397 fn skip_query(&mut self) -> Result<(), ParseError> { 398 let mut paren_depth = 0; 399 let mut in_string = false; 400 let mut in_escape = false; 401 let mut in_comment = false; 402 loop { 403 let ch = self.peek()?; 404 if in_escape { 405 in_escape = false; 406 } else if in_string { 407 match ch { 408 '\\' => { 409 in_escape = true; 410 } 411 '"' | '\n' => { 412 in_string = false; 413 } 414 _ => {} 415 } 416 } else if in_comment { 417 if ch == '\n' { 418 in_comment = false; 419 } 420 } else { 421 match ch { 422 '"' => in_string = true, 423 '(' => paren_depth += 1, 424 ')' => { 425 if paren_depth > 0 { 426 paren_depth -= 1; 427 } 428 } 429 '{' => return Ok(()), 430 ';' => in_comment = true, 431 _ => {} 432 } 433 } 434 self.skip().unwrap(); 435 } 436 } 437 438 fn parse_statements(&mut self) -> Result<Vec<ast::Statement>, ParseError> { 439 self.consume_token("{")?; 440 let mut statements = Vec::new(); 441 self.consume_whitespace(); 442 while self.peek()? != '}' { 443 let statement = self.parse_statement()?; 444 statements.push(statement); 445 self.consume_whitespace(); 446 } 447 self.consume_token("}")?; 448 Ok(statements) 449 } 450 451 fn parse_name(&mut self, within: &'static str) -> Result<&'a str, ParseError> { 452 let start = self.offset; 453 let ch = self.next()?; 454 if !is_ident_start(ch) { 455 return Err(ParseError::UnexpectedCharacter(ch, within, self.location)); 456 } 457 self.consume_while(is_ident); 458 let end = self.offset; 459 Ok(&self.source[start..end]) 460 } 461 462 fn parse_statement(&mut self) -> Result<ast::Statement, ParseError> { 463 let keyword_location = self.location; 464 let keyword = self.parse_name("keyword")?; 465 self.consume_whitespace(); 466 if keyword == "let" { 467 let variable = self.parse_variable()?; 468 self.consume_whitespace(); 469 self.consume_token("=")?; 470 self.consume_whitespace(); 471 let value = self.parse_expression()?; 472 Ok(ast::DeclareImmutable { 473 variable, 474 value, 475 location: keyword_location, 476 } 477 .into()) 478 } else if keyword == "var" { 479 let variable = self.parse_variable()?; 480 self.consume_whitespace(); 481 self.consume_token("=")?; 482 self.consume_whitespace(); 483 let value = self.parse_expression()?; 484 Ok(ast::DeclareMutable { 485 variable, 486 value, 487 location: keyword_location, 488 } 489 .into()) 490 } else if keyword == "set" { 491 let variable = self.parse_variable()?; 492 self.consume_whitespace(); 493 self.consume_token("=")?; 494 self.consume_whitespace(); 495 let value = self.parse_expression()?; 496 Ok(ast::Assign { 497 variable, 498 value, 499 location: keyword_location, 500 } 501 .into()) 502 } else if keyword == "node" { 503 let node = self.parse_variable()?; 504 Ok(ast::CreateGraphNode { 505 node, 506 location: keyword_location, 507 } 508 .into()) 509 } else if keyword == "edge" { 510 let source = self.parse_expression()?; 511 self.consume_whitespace(); 512 self.consume_token("->")?; 513 self.consume_whitespace(); 514 let sink = self.parse_expression()?; 515 Ok(ast::CreateEdge { 516 source, 517 sink, 518 location: keyword_location, 519 } 520 .into()) 521 } else if keyword == "attr" { 522 self.consume_token("(")?; 523 self.consume_whitespace(); 524 let node_or_source = self.parse_expression()?; 525 self.consume_whitespace(); 526 527 if self.peek()? == '-' { 528 let source = node_or_source; 529 self.consume_token("->")?; 530 self.consume_whitespace(); 531 let sink = self.parse_expression()?; 532 self.consume_whitespace(); 533 self.consume_token(")")?; 534 self.consume_whitespace(); 535 let attributes = self.parse_attributes()?; 536 Ok(ast::AddEdgeAttribute { 537 source, 538 sink, 539 attributes, 540 location: keyword_location, 541 } 542 .into()) 543 } else { 544 let node = node_or_source; 545 self.consume_whitespace(); 546 self.consume_token(")")?; 547 self.consume_whitespace(); 548 let attributes = self.parse_attributes()?; 549 Ok(ast::AddGraphNodeAttribute { 550 node, 551 attributes, 552 location: keyword_location, 553 } 554 .into()) 555 } 556 } else if keyword == "print" { 557 let mut values = vec![self.parse_expression()?]; 558 self.consume_whitespace(); 559 while self.try_peek() == Some(',') { 560 self.consume_token(",")?; 561 self.consume_whitespace(); 562 values.push(self.parse_expression()?); 563 self.consume_whitespace(); 564 } 565 self.consume_whitespace(); 566 Ok(ast::Print { 567 values, 568 location: keyword_location, 569 } 570 .into()) 571 } else if keyword == "scan" { 572 let value = self.parse_expression()?; 573 self.consume_whitespace(); 574 self.consume_token("{")?; 575 self.consume_whitespace(); 576 let mut arms = Vec::new(); 577 while self.peek()? != '}' { 578 let pattern_location = self.location; 579 let pattern = self.parse_string()?; 580 let regex = Regex::new(&pattern) 581 .map_err(|_| ParseError::InvalidRegex(pattern.into(), pattern_location))?; 582 self.consume_whitespace(); 583 let statements = self.parse_statements()?; 584 arms.push(ast::ScanArm { 585 regex, 586 statements, 587 location: keyword_location, 588 }); 589 self.consume_whitespace(); 590 } 591 self.consume_token("}")?; 592 Ok(ast::Scan { 593 value, 594 arms, 595 location: keyword_location, 596 } 597 .into()) 598 } else if keyword == "if" { 599 let mut arms = Vec::new(); 600 601 // if 602 let location = keyword_location; 603 self.consume_whitespace(); 604 let conditions = self.parse_conditions()?; 605 self.consume_whitespace(); 606 let statements = self.parse_statements()?; 607 self.consume_whitespace(); 608 arms.push(ast::IfArm { 609 conditions, 610 statements, 611 location, 612 }); 613 614 // elif 615 let mut location = self.location; 616 while let Ok(_) = self.consume_token("elif") { 617 self.consume_whitespace(); 618 let conditions = self.parse_conditions()?; 619 self.consume_whitespace(); 620 let statements = self.parse_statements()?; 621 self.consume_whitespace(); 622 arms.push(ast::IfArm { 623 conditions, 624 statements, 625 location, 626 }); 627 self.consume_whitespace(); 628 location = self.location; 629 } 630 631 // else 632 let location = self.location; 633 if let Ok(_) = self.consume_token("else") { 634 let conditions = vec![]; 635 self.consume_whitespace(); 636 let statements = self.parse_statements()?; 637 self.consume_whitespace(); 638 arms.push(ast::IfArm { 639 conditions, 640 statements, 641 location, 642 }); 643 self.consume_whitespace(); 644 } 645 646 Ok(ast::If { 647 arms, 648 location: keyword_location, 649 } 650 .into()) 651 } else if keyword == "for" { 652 self.consume_whitespace(); 653 let variable = self.parse_unscoped_variable()?; 654 self.consume_whitespace(); 655 self.consume_token("in")?; 656 self.consume_whitespace(); 657 let value = self.parse_expression()?; 658 self.consume_whitespace(); 659 let statements = self.parse_statements()?; 660 Ok(ast::ForIn { 661 variable, 662 value, 663 statements, 664 location: keyword_location, 665 } 666 .into()) 667 } else { 668 Err(ParseError::UnexpectedKeyword( 669 keyword.into(), 670 keyword_location, 671 )) 672 } 673 } 674 675 fn parse_conditions(&mut self) -> Result<Vec<ast::Condition>, ParseError> { 676 let mut conditions = Vec::new(); 677 let mut has_next = true; 678 while has_next { 679 conditions.push(self.parse_condition()?); 680 self.consume_whitespace(); 681 if let Some(',') = self.try_peek() { 682 self.consume_token(",")?; 683 self.consume_whitespace(); 684 has_next = true; 685 } else { 686 has_next = false; 687 } 688 } 689 Ok(conditions) 690 } 691 692 fn parse_condition(&mut self) -> Result<ast::Condition, ParseError> { 693 let location = self.location; 694 let condition = if let Ok(_) = self.consume_token("some") { 695 self.consume_whitespace(); 696 let value = self.parse_expression()?; 697 ast::Condition::Some { value, location } 698 } else if let Ok(_) = self.consume_token("none") { 699 self.consume_whitespace(); 700 let value = self.parse_expression()?; 701 ast::Condition::None { value, location } 702 } else if let Ok(value) = self.parse_expression() { 703 self.consume_whitespace(); 704 ast::Condition::Bool { value, location } 705 } else { 706 return Err(ParseError::ExpectedToken( 707 "(some|none)? EXPRESSION", 708 location, 709 )); 710 }; 711 self.consume_whitespace(); 712 Ok(condition) 713 } 714 715 fn parse_identifier(&mut self, within: &'static str) -> Result<Identifier, ParseError> { 716 let content = self.parse_name(within)?; 717 Ok(Identifier::from(content)) 718 } 719 720 fn parse_string(&mut self) -> Result<String, ParseError> { 721 self.consume_token("\"")?; 722 let mut escape = false; 723 let mut value = String::new(); 724 loop { 725 let ch = self.next()?; 726 if escape { 727 escape = false; 728 value.push(match ch { 729 '0' => '\0', 730 'n' => '\n', 731 'r' => '\r', 732 't' => '\t', 733 _ => ch, 734 }); 735 } else { 736 match ch { 737 '"' => return Ok(value), 738 '\\' => escape = true, 739 _ => value.push(ch), 740 } 741 } 742 } 743 } 744 745 fn parse_expression(&mut self) -> Result<ast::Expression, ParseError> { 746 let mut expression = match self.peek()? { 747 '#' => self.parse_literal()?, 748 '"' => self.parse_string()?.into(), 749 '@' => self.parse_capture()?.into(), 750 '$' => self.parse_regex_capture()?.into(), 751 '(' => self.parse_call()?, 752 '[' => self.parse_list()?, 753 '{' => self.parse_set()?, 754 ch if ch.is_ascii_digit() => self.parse_integer_constant()?, 755 ch if is_ident_start(ch) => { 756 let location = self.location; 757 let name = self.parse_identifier("variable name")?; 758 ast::UnscopedVariable { name, location }.into() 759 } 760 ch => { 761 return Err(ParseError::UnexpectedCharacter( 762 ch, 763 "expression", 764 self.location, 765 )) 766 } 767 }; 768 self.consume_whitespace(); 769 while self.try_peek() == Some('.') { 770 self.skip().unwrap(); 771 self.consume_whitespace(); 772 let location = self.location; 773 let scope = Box::new(expression); 774 let name = self.parse_identifier("scoped variable name")?; 775 self.consume_whitespace(); 776 expression = ast::ScopedVariable { 777 scope, 778 name, 779 location, 780 } 781 .into(); 782 } 783 Ok(expression) 784 } 785 786 fn parse_call(&mut self) -> Result<ast::Expression, ParseError> { 787 self.consume_token("(")?; 788 self.consume_whitespace(); 789 let function = self.parse_identifier("function name")?; 790 self.consume_whitespace(); 791 let mut parameters = Vec::new(); 792 while self.peek()? != ')' { 793 parameters.push(self.parse_expression()?); 794 self.consume_whitespace(); 795 } 796 self.consume_token(")")?; 797 Ok(ast::Call { 798 function, 799 parameters, 800 } 801 .into()) 802 } 803 804 fn parse_sequence(&mut self, end_marker: char) -> Result<Vec<ast::Expression>, ParseError> { 805 let mut elements = Vec::new(); 806 while self.peek()? != end_marker { 807 elements.push(self.parse_expression()?); 808 self.consume_whitespace(); 809 if self.peek()? != end_marker { 810 self.consume_token(",")?; 811 self.consume_whitespace(); 812 } 813 } 814 Ok(elements) 815 } 816 817 fn parse_list(&mut self) -> Result<ast::Expression, ParseError> { 818 let location = self.location; 819 self.consume_token("[")?; 820 self.consume_whitespace(); 821 if let Ok(_) = self.consume_token("]") { 822 return Ok(ast::ListLiteral { elements: vec![] }.into()); 823 } 824 let first_element = self.parse_expression()?; 825 self.consume_whitespace(); 826 if let Ok(_) = self.consume_token("]") { 827 let elements = vec![first_element]; 828 Ok(ast::ListLiteral { elements }.into()) 829 } else if let Ok(_) = self.consume_token(",") { 830 self.consume_whitespace(); 831 let mut elements = self.parse_sequence(']')?; 832 self.consume_whitespace(); 833 self.consume_token("]")?; 834 elements.insert(0, first_element); 835 Ok(ast::ListLiteral { elements }.into()) 836 } else { 837 self.consume_token("for")?; 838 self.consume_whitespace(); 839 let variable = self.parse_unscoped_variable()?; 840 self.consume_whitespace(); 841 self.consume_token("in")?; 842 self.consume_whitespace(); 843 let value = self.parse_expression()?; 844 self.consume_whitespace(); 845 self.consume_token("]")?; 846 Ok(ast::ListComprehension { 847 element: first_element.into(), 848 variable, 849 value: value.into(), 850 location, 851 } 852 .into()) 853 } 854 } 855 856 fn parse_set(&mut self) -> Result<ast::Expression, ParseError> { 857 let location = self.location; 858 self.consume_token("{")?; 859 self.consume_whitespace(); 860 if let Ok(_) = self.consume_token("}") { 861 return Ok(ast::SetLiteral { elements: vec![] }.into()); 862 } 863 let first_element = self.parse_expression()?; 864 self.consume_whitespace(); 865 if let Ok(_) = self.consume_token("}") { 866 let elements = vec![first_element]; 867 Ok(ast::SetLiteral { elements }.into()) 868 } else if let Ok(_) = self.consume_token(",") { 869 self.consume_whitespace(); 870 let mut elements = self.parse_sequence('}')?; 871 self.consume_whitespace(); 872 self.consume_token("}")?; 873 elements.insert(0, first_element); 874 Ok(ast::SetLiteral { elements }.into()) 875 } else { 876 self.consume_token("for")?; 877 self.consume_whitespace(); 878 let variable = self.parse_unscoped_variable()?; 879 self.consume_whitespace(); 880 self.consume_token("in")?; 881 self.consume_whitespace(); 882 let value = self.parse_expression()?; 883 self.consume_whitespace(); 884 self.consume_token("}")?; 885 Ok(ast::SetComprehension { 886 element: first_element.into(), 887 variable, 888 value: value.into(), 889 location, 890 } 891 .into()) 892 } 893 } 894 895 fn parse_capture(&mut self) -> Result<ast::Capture, ParseError> { 896 let location = self.location; 897 let start = self.offset; 898 self.consume_token("@")?; 899 let ch = self.next()?; 900 if !is_ident_start(ch) { 901 return Err(ParseError::UnexpectedCharacter( 902 ch, 903 "query capture", 904 self.location, 905 )); 906 } 907 self.consume_while(is_ident); 908 let end = self.offset; 909 let name = Identifier::from(&self.source[start + 1..end]); 910 Ok(ast::Capture { 911 name, 912 quantifier: Zero, // set in checker 913 file_capture_index: usize::MAX, // set in checker 914 stanza_capture_index: usize::MAX, // set in checker 915 location, 916 } 917 .into()) 918 } 919 920 fn parse_integer_constant(&mut self) -> Result<ast::Expression, ParseError> { 921 // We'll have already verified that the next digit is an integer. 922 let start = self.offset; 923 self.consume_while(|ch| ch.is_ascii_digit()); 924 let end = self.offset; 925 let value = u32::from_str_radix(&self.source[start..end], 10).unwrap(); 926 Ok(ast::IntegerConstant { value }.into()) 927 } 928 929 fn parse_literal(&mut self) -> Result<ast::Expression, ParseError> { 930 let literal_location = self.location; 931 self.consume_token("#")?; 932 let literal = self.parse_name("literal")?; 933 if literal == "false" { 934 return Ok(ast::Expression::FalseLiteral); 935 } else if literal == "null" { 936 return Ok(ast::Expression::NullLiteral); 937 } else if literal == "true" { 938 return Ok(ast::Expression::TrueLiteral); 939 } else { 940 Err(ParseError::UnexpectedLiteral( 941 literal.into(), 942 literal_location, 943 )) 944 } 945 } 946 947 fn parse_regex_capture(&mut self) -> Result<ast::RegexCapture, ParseError> { 948 let regex_capture_location = self.location; 949 self.consume_token("$")?; 950 let start = self.offset; 951 self.consume_while(|ch| ch.is_ascii_digit()); 952 let end = self.offset; 953 if start == end { 954 return Err(ParseError::InvalidRegexCapture(regex_capture_location)); 955 } 956 let match_index = usize::from_str_radix(&self.source[start..end], 10).unwrap(); 957 Ok(ast::RegexCapture { match_index }.into()) 958 } 959 960 fn parse_attributes(&mut self) -> Result<Vec<ast::Attribute>, ParseError> { 961 let mut attributes = vec![self.parse_attribute()?]; 962 self.consume_whitespace(); 963 while self.try_peek() == Some(',') { 964 self.skip().unwrap(); 965 self.consume_whitespace(); 966 attributes.push(self.parse_attribute()?); 967 self.consume_whitespace(); 968 } 969 Ok(attributes) 970 } 971 972 fn parse_attribute(&mut self) -> Result<ast::Attribute, ParseError> { 973 let name = self.parse_identifier("attribute name")?; 974 self.consume_whitespace(); 975 let value = if self.try_peek() == Some('=') { 976 self.consume_token("=")?; 977 self.consume_whitespace(); 978 self.parse_expression()? 979 } else { 980 ast::Expression::TrueLiteral 981 }; 982 Ok(ast::Attribute { name, value }) 983 } 984 985 fn parse_variable(&mut self) -> Result<ast::Variable, ParseError> { 986 let expression_location = self.location; 987 match self.parse_expression()? { 988 ast::Expression::Variable(variable) => Ok(variable), 989 _ => Err(ParseError::ExpectedVariable(expression_location)), 990 } 991 } 992 993 fn parse_unscoped_variable(&mut self) -> Result<ast::UnscopedVariable, ParseError> { 994 match self.parse_variable()? { 995 ast::Variable::Unscoped(variable) => Ok(variable), 996 ast::Variable::Scoped(variable) => { 997 Err(ParseError::ExpectedUnscopedVariable(variable.location)) 998 } 999 } 1000 } 1001}