Next Generation WASM Microkernel Operating System
at trap_handler 1460 lines 52 kB view raw
1//! Traits for parsing the WebAssembly Text format 2//! 3//! This module contains the traits, abstractions, and utilities needed to 4//! define custom parsers for WebAssembly text format items. This module exposes 5//! a recursive descent parsing strategy and centers around the [`Parse`] trait 6//! for defining new fragments of WebAssembly text syntax. 7//! 8//! The top-level [`parse`] function can be used to fully parse AST fragments: 9//! 10//! ``` 11//! use wast::Wat; 12//! use wast::parser::{self, ParseBuffer}; 13//! 14//! # fn foo() -> Result<(), wast::Error> { 15//! let wat = "(module (func))"; 16//! let buf = ParseBuffer::new(wat)?; 17//! let module = parser::parse::<Wat>(&buf)?; 18//! # Ok(()) 19//! # } 20//! ``` 21//! 22//! and you can also define your own new syntax with the [`Parse`] trait: 23//! 24//! ``` 25//! use wast::kw; 26//! use wast::core::{Import, Func}; 27//! use wast::parser::{Parser, Parse, Result}; 28//! 29//! // Fields of a WebAssembly which only allow imports and functions, and all 30//! // imports must come before all the functions 31//! struct OnlyImportsAndFunctions<'a> { 32//! imports: Vec<Import<'a>>, 33//! functions: Vec<Func<'a>>, 34//! } 35//! 36//! impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> { 37//! fn parse(parser: Parser<'a>) -> Result<Self> { 38//! // While the second token is `import` (the first is `(`, so we care 39//! // about the second) we parse an `ast::ModuleImport` inside of 40//! // parentheses. The `parens` function here ensures that what we 41//! // parse inside of it is surrounded by `(` and `)`. 42//! let mut imports = Vec::new(); 43//! while parser.peek2::<kw::import>()? { 44//! let import = parser.parens(|p| p.parse())?; 45//! imports.push(import); 46//! } 47//! 48//! // Afterwards we assume everything else is a function. Note that 49//! // `parse` here is a generic function and type inference figures out 50//! // that we're parsing functions here and imports above. 51//! let mut functions = Vec::new(); 52//! while !parser.is_empty() { 53//! let func = parser.parens(|p| p.parse())?; 54//! functions.push(func); 55//! } 56//! 57//! Ok(OnlyImportsAndFunctions { imports, functions }) 58//! } 59//! } 60//! ``` 61//! 62//! This module is heavily inspired by [`syn`](https://docs.rs/syn) so you can 63//! likely also draw inspiration from the excellent examples in the `syn` crate. 64 65use crate::Error; 66use crate::lexer::{Float, Integer, Lexer, Token, TokenKind}; 67use crate::token::Span; 68use alloc::borrow::Cow; 69use alloc::boxed::Box; 70use alloc::format; 71use alloc::string::String; 72use alloc::string::ToString; 73use alloc::vec::Vec; 74use bumpalo::Bump; 75use core::cell::{Cell, RefCell}; 76use core::fmt; 77use hashbrown::HashMap; 78 79/// The maximum recursive depth of parens to parse. 80/// 81/// This is sort of a fundamental limitation of the way this crate is 82/// designed. Everything is done through recursive descent parsing which 83/// means, well, that we're recursively going down the stack as we parse 84/// nested data structures. While we can handle this for wasm expressions 85/// since that's a pretty local decision, handling this for nested 86/// modules/components which be far trickier. For now we just say that when 87/// the parser goes too deep we return an error saying there's too many 88/// nested items. It would be great to not return an error here, though! 89#[cfg(feature = "wasm-module")] 90pub(crate) const MAX_PARENS_DEPTH: usize = 100; 91 92/// A top-level convenience parsing function that parses a `T` from `buf` and 93/// requires that all tokens in `buf` are consume. 94/// 95/// This generic parsing function can be used to parse any `T` implementing the 96/// [`Parse`] trait. It is not used from [`Parse`] trait implementations. 97/// 98/// # Examples 99/// 100/// ``` 101/// use wast::Wat; 102/// use wast::parser::{self, ParseBuffer}; 103/// 104/// # fn foo() -> Result<(), wast::Error> { 105/// let wat = "(module (func))"; 106/// let buf = ParseBuffer::new(wat)?; 107/// let module = parser::parse::<Wat>(&buf)?; 108/// # Ok(()) 109/// # } 110/// ``` 111/// 112/// or parsing simply a fragment 113/// 114/// ``` 115/// use wast::parser::{self, ParseBuffer}; 116/// 117/// # fn foo() -> Result<(), wast::Error> { 118/// let wat = "12"; 119/// let buf = ParseBuffer::new(wat)?; 120/// let val = parser::parse::<u32>(&buf)?; 121/// assert_eq!(val, 12); 122/// # Ok(()) 123/// # } 124/// ``` 125pub fn parse<'a, T: Parse<'a>>(buf: &'a ParseBuffer<'a>) -> Result<T> { 126 let parser = buf.parser(); 127 let result = parser.parse()?; 128 if parser.cursor().token()?.is_none() { 129 Ok(result) 130 } else { 131 Err(parser.error("extra tokens remaining after parse")) 132 } 133} 134 135/// A trait for parsing a fragment of syntax in a recursive descent fashion. 136/// 137/// The [`Parse`] trait is main abstraction you'll be working with when defining 138/// custom parser or custom syntax for your WebAssembly text format (or when 139/// using the official format items). Almost all items in the 140/// [`core`](crate::core) module implement the [`Parse`] trait, and you'll 141/// commonly use this with: 142/// 143/// * The top-level [`parse`] function to parse an entire input. 144/// * The intermediate [`Parser::parse`] function to parse an item out of an 145/// input stream and then parse remaining items. 146/// 147/// Implementation of [`Parse`] take a [`Parser`] as input and will mutate the 148/// parser as they parse syntax. Once a token is consume it cannot be 149/// "un-consumed". Utilities such as [`Parser::peek`] and [`Parser::lookahead1`] 150/// can be used to determine what to parse next. 151/// 152/// ## When to parse `(` and `)`? 153/// 154/// Conventionally types are not responsible for parsing their own `(` and `)` 155/// tokens which surround the type. For example WebAssembly imports look like: 156/// 157/// ```text 158/// (import "foo" "bar" (func (type 0))) 159/// ``` 160/// 161/// but the [`Import`](crate::core::Import) type parser looks like: 162/// 163/// ``` 164/// # use wast::kw; 165/// # use wast::parser::{Parser, Parse, Result}; 166/// # struct Import<'a>(&'a str); 167/// impl<'a> Parse<'a> for Import<'a> { 168/// fn parse(parser: Parser<'a>) -> Result<Self> { 169/// parser.parse::<kw::import>()?; 170/// // ... 171/// # panic!() 172/// } 173/// } 174/// ``` 175/// 176/// It is assumed here that the `(` and `)` tokens which surround an `import` 177/// statement in the WebAssembly text format are parsed by the parent item 178/// parsing `Import`. 179/// 180/// Note that this is just a convention, so it's not necessarily required for 181/// all types. It's recommended that your types stick to this convention where 182/// possible to avoid nested calls to [`Parser::parens`] or accidentally trying 183/// to parse too many parenthesis. 184/// 185/// # Examples 186/// 187/// Let's say you want to define your own WebAssembly text format which only 188/// contains imports and functions. You also require all imports to be listed 189/// before all functions. An example [`Parse`] implementation might look like: 190/// 191/// ``` 192/// use wast::core::{Import, Func}; 193/// use wast::kw; 194/// use wast::parser::{Parser, Parse, Result}; 195/// 196/// // Fields of a WebAssembly which only allow imports and functions, and all 197/// // imports must come before all the functions 198/// struct OnlyImportsAndFunctions<'a> { 199/// imports: Vec<Import<'a>>, 200/// functions: Vec<Func<'a>>, 201/// } 202/// 203/// impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> { 204/// fn parse(parser: Parser<'a>) -> Result<Self> { 205/// // While the second token is `import` (the first is `(`, so we care 206/// // about the second) we parse an `ast::ModuleImport` inside of 207/// // parentheses. The `parens` function here ensures that what we 208/// // parse inside of it is surrounded by `(` and `)`. 209/// let mut imports = Vec::new(); 210/// while parser.peek2::<kw::import>()? { 211/// let import = parser.parens(|p| p.parse())?; 212/// imports.push(import); 213/// } 214/// 215/// // Afterwards we assume everything else is a function. Note that 216/// // `parse` here is a generic function and type inference figures out 217/// // that we're parsing functions here and imports above. 218/// let mut functions = Vec::new(); 219/// while !parser.is_empty() { 220/// let func = parser.parens(|p| p.parse())?; 221/// functions.push(func); 222/// } 223/// 224/// Ok(OnlyImportsAndFunctions { imports, functions }) 225/// } 226/// } 227/// ``` 228pub trait Parse<'a>: Sized { 229 /// Attempts to parse `Self` from `parser`, returning an error if it could 230 /// not be parsed. 231 /// 232 /// This method will mutate the state of `parser` after attempting to parse 233 /// an instance of `Self`. If an error happens then it is likely fatal and 234 /// there is no guarantee of how many tokens have been consumed from 235 /// `parser`. 236 /// 237 /// As recommended in the documentation of [`Parse`], implementations of 238 /// this function should not start out by parsing `(` and `)` tokens, but 239 /// rather parents calling recursive parsers should parse the `(` and `)` 240 /// tokens for their child item that's being parsed. 241 /// 242 /// # Errors 243 /// 244 /// This function will return an error if `Self` could not be parsed. Note 245 /// that creating an [`Error`] is not exactly a cheap operation, so 246 /// [`Error`] is typically fatal and propagated all the way back to the top 247 /// parse call site. 248 fn parse(parser: Parser<'a>) -> Result<Self>; 249} 250 251impl<'a, T> Parse<'a> for Box<T> 252where 253 T: Parse<'a>, 254{ 255 fn parse(parser: Parser<'a>) -> Result<Self> { 256 Ok(Box::new(parser.parse()?)) 257 } 258} 259 260/// A trait for types which be used to "peek" to see if they're the next token 261/// in an input stream of [`Parser`]. 262/// 263/// Often when implementing [`Parse`] you'll need to query what the next token 264/// in the stream is to figure out what to parse next. This [`Peek`] trait 265/// defines the set of types that can be tested whether they're the next token 266/// in the input stream. 267/// 268/// Implementations of [`Peek`] should only be present on types that consume 269/// exactly one token (not zero, not more, exactly one). Types implementing 270/// [`Peek`] should also typically implement [`Parse`] should also typically 271/// implement [`Parse`]. 272/// 273/// See the documentation of [`Parser::peek`] for example usage. 274pub trait Peek { 275 /// Tests to see whether this token is the first token within the [`Cursor`] 276 /// specified. 277 /// 278 /// Returns `true` if [`Parse`] for this type is highly likely to succeed 279 /// failing no other error conditions happening (like an integer literal 280 /// being too big). 281 fn peek(cursor: Cursor<'_>) -> Result<bool>; 282 283 /// The same as `peek`, except it checks the token immediately following 284 /// the current token. 285 fn peek2(mut cursor: Cursor<'_>) -> Result<bool> { 286 match cursor.token()? { 287 Some(token) => cursor.advance_past(&token), 288 None => return Ok(false), 289 } 290 Self::peek(cursor) 291 } 292 293 /// Returns a human-readable name of this token to display when generating 294 /// errors about this token missing. 295 fn display() -> &'static str; 296} 297 298/// A convenience type definition for `Result` where the error is hardwired to 299/// [`Error`]. 300pub type Result<T, E = Error> = core::result::Result<T, E>; 301 302/// A low-level buffer of tokens which represents a completely lexed file. 303/// 304/// A `ParseBuffer` will immediately lex an entire file and then store all 305/// tokens internally. A `ParseBuffer` only used to pass to the top-level 306/// [`parse`] function. 307pub struct ParseBuffer<'a> { 308 lexer: Lexer<'a>, 309 cur: Cell<Position>, 310 known_annotations: RefCell<HashMap<String, usize>>, 311 track_instr_spans: bool, 312 depth: Cell<usize>, 313 strings: Bump, 314} 315 316/// The current position within a `Lexer` that we're at. This simultaneously 317/// stores the byte position that the lexer was last positioned at as well as 318/// the next significant token. 319/// 320/// Note that "significant" here does not mean that `token` is the next token 321/// to be lexed at `offset`. Instead it's the next non-whitespace, 322/// non-annotation, non-comment token. This simple cache-of-sorts avoids 323/// re-parsing tokens the majority of the time, or at least that's the 324/// intention. 325/// 326/// If `token` is set to `None` then it means that either it hasn't been 327/// calculated at or the lexer is at EOF. Basically it means go talk to the 328/// lexer. 329#[derive(Copy, Clone)] 330struct Position { 331 offset: usize, 332 token: Option<Token>, 333} 334 335/// An in-progress parser for the tokens of a WebAssembly text file. 336/// 337/// A `Parser` is argument to the [`Parse`] trait and is now the input stream is 338/// interacted with to parse new items. Cloning [`Parser`] or copying a parser 339/// refers to the same stream of tokens to parse, you cannot clone a [`Parser`] 340/// and clone two items. 341/// 342/// For more information about a [`Parser`] see its methods. 343#[derive(Copy, Clone)] 344pub struct Parser<'a> { 345 buf: &'a ParseBuffer<'a>, 346} 347 348/// A helpful structure to perform a lookahead of one token to determine what to 349/// parse. 350/// 351/// For more information see the [`Parser::lookahead1`] method. 352pub struct Lookahead1<'a> { 353 parser: Parser<'a>, 354 attempts: Vec<&'static str>, 355} 356 357/// An immutable cursor into a list of tokens. 358/// 359/// This cursor cannot be mutated but can be used to parse more tokens in a list 360/// of tokens. Cursors are created from the [`Parser::step`] method. This is a 361/// very low-level parsing structure and you likely won't use it much. 362#[derive(Copy, Clone)] 363pub struct Cursor<'a> { 364 parser: Parser<'a>, 365 pos: Position, 366} 367 368impl ParseBuffer<'_> { 369 /// Creates a new [`ParseBuffer`] by lexing the given `input` completely. 370 /// 371 /// # Errors 372 /// 373 /// Returns an error if `input` fails to lex. 374 pub fn new(input: &str) -> Result<ParseBuffer<'_>> { 375 ParseBuffer::new_with_lexer(Lexer::new(input)) 376 } 377 378 /// Creates a new [`ParseBuffer`] by lexing the given `input` completely. 379 /// 380 /// # Errors 381 /// 382 /// Returns an error if `input` fails to lex. 383 pub fn new_with_lexer(lexer: Lexer<'_>) -> Result<ParseBuffer<'_>> { 384 Ok(ParseBuffer { 385 lexer, 386 depth: Cell::new(0), 387 cur: Cell::new(Position { 388 offset: 0, 389 token: None, 390 }), 391 known_annotations: Default::default(), 392 strings: Default::default(), 393 track_instr_spans: false, 394 }) 395 } 396 397 /// Indicates whether the [`Expression::instr_spans`] field will be filled 398 /// in. 399 /// 400 /// This is useful when enabling DWARF debugging information via 401 /// [`EncodeOptions::dwarf`], for example. 402 /// 403 /// [`Expression::instr_spans`]: crate::core::Expression::instr_spans 404 /// [`EncodeOptions::dwarf`]: crate::core::EncodeOptions::dwarf 405 pub fn track_instr_spans(&mut self, track: bool) -> &mut Self { 406 self.track_instr_spans = track; 407 self 408 } 409 410 fn parser(&self) -> Parser<'_> { 411 Parser { buf: self } 412 } 413 414 /// Stores an owned allocation in this `Parser` to attach the lifetime of 415 /// the vector to `self`. 416 /// 417 /// This will return a reference to `s`, but one that's safely rooted in the 418 /// `Parser`. 419 fn push_str(&self, s: Vec<u8>) -> &[u8] { 420 self.strings.alloc_slice_copy(&s) 421 } 422 423 /// Lexes the next "significant" token from the `pos` specified. 424 /// 425 /// This will skip irrelevant tokens such as whitespace, comments, and 426 /// unknown annotations. 427 fn advance_token(&self, mut pos: usize) -> Result<Option<Token>> { 428 let token = loop { 429 let token = match self.lexer.parse(&mut pos)? { 430 Some(token) => token, 431 None => return Ok(None), 432 }; 433 match token.kind { 434 // Always skip whitespace and comments. 435 TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment => { 436 continue; 437 } 438 439 // If an lparen is seen then this may be skipped if it's an 440 // annotation of the form `(@foo ...)`. In this situation 441 // everything up to and including the closing rparen is skipped. 442 // 443 // Note that the annotation is only skipped if it's an unknown 444 // annotation as known annotations are specifically registered 445 // as "someone's gonna parse this". 446 TokenKind::LParen => { 447 if let Some(annotation) = self.lexer.annotation(pos)? { 448 let text = annotation.annotation(self.lexer.input())?; 449 match self.known_annotations.borrow().get(&text[..]) { 450 Some(0) | None => { 451 self.skip_annotation(&mut pos)?; 452 continue; 453 } 454 Some(_) => {} 455 } 456 } 457 break token; 458 } 459 _ => break token, 460 } 461 }; 462 Ok(Some(token)) 463 } 464 465 fn skip_annotation(&self, pos: &mut usize) -> Result<()> { 466 let mut depth = 1; 467 let span = Span { offset: *pos }; 468 loop { 469 let token = match self.lexer.parse(pos)? { 470 Some(token) => token, 471 None => { 472 break Err(Error::new(span, "unclosed annotation".to_string())); 473 } 474 }; 475 match token.kind { 476 TokenKind::LParen => depth += 1, 477 TokenKind::RParen => { 478 depth -= 1; 479 if depth == 0 { 480 break Ok(()); 481 } 482 } 483 _ => {} 484 } 485 } 486 } 487} 488 489impl<'a> Parser<'a> { 490 /// Returns whether there are no more `Token` tokens to parse from this 491 /// [`Parser`]. 492 /// 493 /// This indicates that either we've reached the end of the input, or we're 494 /// a sub-[`Parser`] inside of a parenthesized expression and we've hit the 495 /// `)` token. 496 /// 497 /// Note that if `false` is returned there *may* be more comments. Comments 498 /// and whitespace are not considered for whether this parser is empty. 499 pub fn is_empty(self) -> bool { 500 match self.cursor().token() { 501 Ok(Some(token)) => matches!(token.kind, TokenKind::RParen), 502 Ok(None) => true, 503 Err(_) => false, 504 } 505 } 506 507 #[cfg(feature = "wasm-module")] 508 pub(crate) fn has_meaningful_tokens(self) -> bool { 509 self.buf.lexer.iter(0).any(|t| match t { 510 Ok(token) => !matches!( 511 token.kind, 512 TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment 513 ), 514 Err(_) => true, 515 }) 516 } 517 518 /// Parses a `T` from this [`Parser`]. 519 /// 520 /// This method has a trivial definition (it simply calls 521 /// [`T::parse`](Parse::parse)) but is here for syntactic purposes. This is 522 /// what you'll call 99% of the time in a [`Parse`] implementation in order 523 /// to parse sub-items. 524 /// 525 /// Typically you always want to use `?` with the result of this method, you 526 /// should not handle errors and decide what else to parse. To handle 527 /// branches in parsing, use [`Parser::peek`]. 528 /// 529 /// # Examples 530 /// 531 /// A good example of using `parse` is to see how the [`TableType`] type is 532 /// parsed in this crate. A [`TableType`] is defined in the official 533 /// specification as [`tabletype`][spec] and is defined as: 534 /// 535 /// [spec]: https://webassembly.github.io/spec/core/text/types.html#table-types 536 /// 537 /// ```text 538 /// tabletype ::= lim:limits et:reftype 539 /// ``` 540 /// 541 /// so to parse a [`TableType`] we recursively need to parse a [`Limits`] 542 /// and a [`RefType`] 543 /// 544 /// ``` 545 /// # use wast::core::*; 546 /// # use wast::parser::*; 547 /// struct TableType<'a> { 548 /// limits: Limits, 549 /// elem: RefType<'a>, 550 /// } 551 /// 552 /// impl<'a> Parse<'a> for TableType<'a> { 553 /// fn parse(parser: Parser<'a>) -> Result<Self> { 554 /// // parse the `lim` then `et` in sequence 555 /// Ok(TableType { 556 /// limits: parser.parse()?, 557 /// elem: parser.parse()?, 558 /// }) 559 /// } 560 /// } 561 /// ``` 562 /// 563 /// [`Limits`]: crate::core::Limits 564 /// [`TableType`]: crate::core::TableType 565 /// [`RefType`]: crate::core::RefType 566 pub fn parse<T: Parse<'a>>(self) -> Result<T> { 567 T::parse(self) 568 } 569 570 /// Performs a cheap test to see whether the current token in this stream is 571 /// `T`. 572 /// 573 /// This method can be used to efficiently determine what next to parse. The 574 /// [`Peek`] trait is defined for types which can be used to test if they're 575 /// the next item in the input stream. 576 /// 577 /// Nothing is actually parsed in this method, nor does this mutate the 578 /// state of this [`Parser`]. Instead, this simply performs a check. 579 /// 580 /// This method is frequently combined with the [`Parser::lookahead1`] 581 /// method to automatically produce nice error messages if some tokens 582 /// aren't found. 583 /// 584 /// # Examples 585 /// 586 /// For an example of using the `peek` method let's take a look at parsing 587 /// the [`Limits`] type. This is [defined in the official spec][spec] as: 588 /// 589 /// ```text 590 /// limits ::= n:u32 591 /// | n:u32 m:u32 592 /// ``` 593 /// 594 /// which means that it's either one `u32` token or two, so we need to know 595 /// whether to consume two tokens or one: 596 /// 597 /// ``` 598 /// # use wast::parser::*; 599 /// struct Limits { 600 /// min: u32, 601 /// max: Option<u32>, 602 /// } 603 /// 604 /// impl<'a> Parse<'a> for Limits { 605 /// fn parse(parser: Parser<'a>) -> Result<Self> { 606 /// // Always parse the first number... 607 /// let min = parser.parse()?; 608 /// 609 /// // ... and then test if there's a second number before parsing 610 /// let max = if parser.peek::<u32>()? { 611 /// Some(parser.parse()?) 612 /// } else { 613 /// None 614 /// }; 615 /// 616 /// Ok(Limits { min, max }) 617 /// } 618 /// } 619 /// ``` 620 /// 621 /// [spec]: https://webassembly.github.io/spec/core/text/types.html#limits 622 /// [`Limits`]: crate::core::Limits 623 pub fn peek<T: Peek>(self) -> Result<bool> { 624 T::peek(self.cursor()) 625 } 626 627 /// Same as the [`Parser::peek`] method, except checks the next token, not 628 /// the current token. 629 pub fn peek2<T: Peek>(self) -> Result<bool> { 630 T::peek2(self.cursor()) 631 } 632 633 /// Same as the [`Parser::peek2`] method, except checks the next next token, 634 /// not the next token. 635 pub fn peek3<T: Peek>(self) -> Result<bool> { 636 let mut cursor = self.cursor(); 637 match cursor.token()? { 638 Some(token) => cursor.advance_past(&token), 639 None => return Ok(false), 640 } 641 match cursor.token()? { 642 Some(token) => cursor.advance_past(&token), 643 None => return Ok(false), 644 } 645 T::peek(cursor) 646 } 647 648 /// A helper structure to perform a sequence of `peek` operations and if 649 /// they all fail produce a nice error message. 650 /// 651 /// This method purely exists for conveniently producing error messages and 652 /// provides no functionality that [`Parser::peek`] doesn't already give. 653 /// The [`Lookahead1`] structure has one main method [`Lookahead1::peek`], 654 /// which is the same method as [`Parser::peek`]. The difference is that the 655 /// [`Lookahead1::error`] method needs no arguments. 656 /// 657 /// # Examples 658 /// 659 /// Let's look at the parsing of [`Index`]. This type is either a `u32` or 660 /// an [`Id`] and is used in name resolution primarily. The [official 661 /// grammar for an index][spec] is: 662 /// 663 /// ```text 664 /// idx ::= x:u32 665 /// | v:id 666 /// ``` 667 /// 668 /// Which is to say that an index is either a `u32` or an [`Id`]. When 669 /// parsing an [`Index`] we can do: 670 /// 671 /// ``` 672 /// # use wast::token::*; 673 /// # use wast::parser::*; 674 /// enum Index<'a> { 675 /// Num(u32), 676 /// Id(Id<'a>), 677 /// } 678 /// 679 /// impl<'a> Parse<'a> for Index<'a> { 680 /// fn parse(parser: Parser<'a>) -> Result<Self> { 681 /// let mut l = parser.lookahead1(); 682 /// if l.peek::<Id>()? { 683 /// Ok(Index::Id(parser.parse()?)) 684 /// } else if l.peek::<u32>()? { 685 /// Ok(Index::Num(parser.parse()?)) 686 /// } else { 687 /// // produces error message of `expected identifier or u32` 688 /// Err(l.error()) 689 /// } 690 /// } 691 /// } 692 /// ``` 693 /// 694 /// [spec]: https://webassembly.github.io/spec/core/text/modules.html#indices 695 /// [`Index`]: crate::token::Index 696 /// [`Id`]: crate::token::Id 697 pub fn lookahead1(self) -> Lookahead1<'a> { 698 Lookahead1 { 699 attempts: Vec::new(), 700 parser: self, 701 } 702 } 703 704 /// Parse an item surrounded by parentheses. 705 /// 706 /// WebAssembly's text format is all based on s-expressions, so naturally 707 /// you're going to want to parse a lot of parenthesized things! As noted in 708 /// the documentation of [`Parse`] you typically don't parse your own 709 /// surrounding `(` and `)` tokens, but the parser above you parsed them for 710 /// you. This is method method the parser above you uses. 711 /// 712 /// This method will parse a `(` token, and then call `f` on a sub-parser 713 /// which when finished asserts that a `)` token is the next token. This 714 /// requires that `f` consumes all tokens leading up to the paired `)`. 715 /// 716 /// Usage will often simply be `parser.parens(|p| p.parse())?` to 717 /// automatically parse a type within parentheses, but you can, as always, 718 /// go crazy and do whatever you'd like too. 719 /// 720 /// # Examples 721 /// 722 /// A good example of this is to see how a `Module` is parsed. This isn't 723 /// the exact definition, but it's close enough! 724 /// 725 /// ``` 726 /// # use wast::kw; 727 /// # use wast::core::*; 728 /// # use wast::parser::*; 729 /// struct Module<'a> { 730 /// fields: Vec<ModuleField<'a>>, 731 /// } 732 /// 733 /// impl<'a> Parse<'a> for Module<'a> { 734 /// fn parse(parser: Parser<'a>) -> Result<Self> { 735 /// // Modules start out with a `module` keyword 736 /// parser.parse::<kw::module>()?; 737 /// 738 /// // And then everything else is `(field ...)`, so while we've got 739 /// // items left we continuously parse parenthesized items. 740 /// let mut fields = Vec::new(); 741 /// while !parser.is_empty() { 742 /// fields.push(parser.parens(|p| p.parse())?); 743 /// } 744 /// Ok(Module { fields }) 745 /// } 746 /// } 747 /// ``` 748 pub fn parens<T>(self, f: impl FnOnce(Parser<'a>) -> Result<T>) -> Result<T> { 749 self.buf.depth.set(self.buf.depth.get() + 1); 750 let before = self.buf.cur.get(); 751 let res = self.step(|cursor| { 752 let mut cursor = match cursor.lparen()? { 753 Some(rest) => rest, 754 None => return Err(cursor.error("expected `(`")), 755 }; 756 cursor.parser.buf.cur.set(cursor.pos); 757 let result = f(cursor.parser)?; 758 759 // Reset our cursor's state to whatever the current state of the 760 // parser is. 761 cursor.pos = cursor.parser.buf.cur.get(); 762 763 match cursor.rparen()? { 764 Some(rest) => Ok((result, rest)), 765 None => Err(cursor.error("expected `)`")), 766 } 767 }); 768 self.buf.depth.set(self.buf.depth.get() - 1); 769 if res.is_err() { 770 self.buf.cur.set(before); 771 } 772 res 773 } 774 775 /// Return the depth of nested parens we've parsed so far. 776 /// 777 /// This is a low-level method that is only useful for implementing 778 /// recursion limits in custom parsers. 779 pub fn parens_depth(&self) -> usize { 780 self.buf.depth.get() 781 } 782 783 /// Checks that the parser parens depth hasn't exceeded the maximum depth. 784 #[cfg(feature = "wasm-module")] 785 pub(crate) fn depth_check(&self) -> Result<()> { 786 if self.parens_depth() > MAX_PARENS_DEPTH { 787 Err(self.error("item nesting too deep")) 788 } else { 789 Ok(()) 790 } 791 } 792 793 fn cursor(self) -> Cursor<'a> { 794 Cursor { 795 parser: self, 796 pos: self.buf.cur.get(), 797 } 798 } 799 800 /// A low-level parsing method you probably won't use. 801 /// 802 /// This is used to implement parsing of the most primitive types in the 803 /// [`core`](crate::core) module. You probably don't want to use this, but 804 /// probably want to use something like [`Parser::parse`] or 805 /// [`Parser::parens`]. 806 pub fn step<F, T>(self, f: F) -> Result<T> 807 where 808 F: FnOnce(Cursor<'a>) -> Result<(T, Cursor<'a>)>, 809 { 810 let (result, cursor) = f(self.cursor())?; 811 self.buf.cur.set(cursor.pos); 812 Ok(result) 813 } 814 815 /// Creates an error whose line/column information is pointing at the 816 /// current token. 817 /// 818 /// This is used to produce human-readable error messages which point to the 819 /// right location in the input stream, and the `msg` here is arbitrary text 820 /// used to associate with the error and indicate why it was generated. 821 pub fn error(self, msg: impl fmt::Display) -> Error { 822 self.error_at(self.cursor().cur_span(), msg) 823 } 824 825 /// Creates an error whose line/column information is pointing at the 826 /// given span. 827 pub fn error_at(self, span: Span, msg: impl fmt::Display) -> Error { 828 Error::parse(span, self.buf.lexer.input(), msg.to_string()) 829 } 830 831 /// Returns the span of the current token 832 pub fn cur_span(&self) -> Span { 833 self.cursor().cur_span() 834 } 835 836 /// Returns the span of the previous token 837 pub fn prev_span(&self) -> Span { 838 self.cursor() 839 .prev_span() 840 .unwrap_or_else(|| Span::from_offset(0)) 841 } 842 843 /// Registers a new known annotation with this parser to allow parsing 844 /// annotations with this name. 845 /// 846 /// [WebAssembly annotations][annotation] are a proposal for the text format 847 /// which allows decorating the text format with custom structured 848 /// information. By default all annotations are ignored when parsing, but 849 /// the whole purpose of them is to sometimes parse them! 850 /// 851 /// To support parsing text annotations this method is used to allow 852 /// annotations and their tokens to *not* be skipped. Once an annotation is 853 /// registered with this method, then while the return value has not been 854 /// dropped (e.g. the scope of where this function is called) annotations 855 /// with the name `annotation` will be parse of the token stream and not 856 /// implicitly skipped. 857 /// 858 /// # Skipping annotations 859 /// 860 /// The behavior of skipping unknown/unregistered annotations can be 861 /// somewhat subtle and surprising, so if you're interested in parsing 862 /// annotations it's important to point out the importance of this method 863 /// and where to call it. 864 /// 865 /// Generally when parsing tokens you'll be bottoming out in various 866 /// `Cursor` methods. These are all documented as advancing the stream as 867 /// much as possible to the next token, skipping "irrelevant stuff" like 868 /// comments, whitespace, etc. The `Cursor` methods will also skip unknown 869 /// annotations. This means that if you parse *any* token, it will skip over 870 /// any number of annotations that are unknown at all times. 871 /// 872 /// To parse an annotation you must, before parsing any token of the 873 /// annotation, register the annotation via this method. This includes the 874 /// beginning `(` token, which is otherwise skipped if the annotation isn't 875 /// marked as registered. Typically parser parse the *contents* of an 876 /// s-expression, so this means that the outer parser of an s-expression 877 /// must register the custom annotation name, rather than the inner parser. 878 /// 879 /// # Return 880 /// 881 /// This function returns an RAII guard which, when dropped, will unregister 882 /// the `annotation` given. Parsing `annotation` is only supported while the 883 /// returned value is still alive, and once dropped the parser will go back 884 /// to skipping annotations with the name `annotation`. 885 /// 886 /// # Example 887 /// 888 /// Let's see an example of how the `@name` annotation is parsed for modules 889 /// to get an idea of how this works: 890 /// 891 /// ``` 892 /// # use wast::kw; 893 /// # use wast::token::NameAnnotation; 894 /// # use wast::parser::*; 895 /// struct Module<'a> { 896 /// name: Option<NameAnnotation<'a>>, 897 /// } 898 /// 899 /// impl<'a> Parse<'a> for Module<'a> { 900 /// fn parse(parser: Parser<'a>) -> Result<Self> { 901 /// // Modules start out with a `module` keyword 902 /// parser.parse::<kw::module>()?; 903 /// 904 /// // Next may be `(@name "foo")`. Typically this annotation would 905 /// // skipped, but we don't want it skipped, so we register it. 906 /// // Note that the parse implementation of 907 /// // `Option<NameAnnotation>` is the one that consumes the 908 /// // parentheses here. 909 /// let _r = parser.register_annotation("name"); 910 /// let name = parser.parse()?; 911 /// 912 /// // ... and normally you'd otherwise parse module fields here ... 913 /// 914 /// Ok(Module { name }) 915 /// } 916 /// } 917 /// ``` 918 /// 919 /// Another example is how we parse the `@custom` annotation. Note that this 920 /// is parsed as part of `ModuleField`, so note how the annotation is 921 /// registered *before* we parse the parentheses of the annotation. 922 /// 923 /// ``` 924 /// # use wast::{kw, annotation}; 925 /// # use wast::core::Custom; 926 /// # use wast::parser::*; 927 /// struct Module<'a> { 928 /// fields: Vec<ModuleField<'a>>, 929 /// } 930 /// 931 /// impl<'a> Parse<'a> for Module<'a> { 932 /// fn parse(parser: Parser<'a>) -> Result<Self> { 933 /// // Modules start out with a `module` keyword 934 /// parser.parse::<kw::module>()?; 935 /// 936 /// // register the `@custom` annotation *first* before we start 937 /// // parsing fields, because each field is contained in 938 /// // parentheses and to parse the parentheses of an annotation we 939 /// // have to known to not skip it. 940 /// let _r = parser.register_annotation("custom"); 941 /// 942 /// let mut fields = Vec::new(); 943 /// while !parser.is_empty() { 944 /// fields.push(parser.parens(|p| p.parse())?); 945 /// } 946 /// Ok(Module { fields }) 947 /// } 948 /// } 949 /// 950 /// enum ModuleField<'a> { 951 /// Custom(Custom<'a>), 952 /// // ... 953 /// } 954 /// 955 /// impl<'a> Parse<'a> for ModuleField<'a> { 956 /// fn parse(parser: Parser<'a>) -> Result<Self> { 957 /// // Note that because we have previously registered the `@custom` 958 /// // annotation with the parser we known that `peek` methods like 959 /// // this, working on the annotation token, are enabled to ever 960 /// // return `true`. 961 /// if parser.peek::<annotation::custom>()? { 962 /// return Ok(ModuleField::Custom(parser.parse()?)); 963 /// } 964 /// 965 /// // .. typically we'd parse other module fields here... 966 /// 967 /// Err(parser.error("unknown module field")) 968 /// } 969 /// } 970 /// ``` 971 /// 972 /// [annotation]: https://github.com/WebAssembly/annotations 973 pub fn register_annotation<'b>(self, annotation: &'b str) -> impl Drop + 'b 974 where 975 'a: 'b, 976 { 977 let mut annotations = self.buf.known_annotations.borrow_mut(); 978 if !annotations.contains_key(annotation) { 979 annotations.insert(annotation.to_string(), 0); 980 } 981 *annotations.get_mut(annotation).unwrap() += 1; 982 983 return RemoveOnDrop(self, annotation); 984 985 struct RemoveOnDrop<'a>(Parser<'a>, &'a str); 986 987 impl Drop for RemoveOnDrop<'_> { 988 fn drop(&mut self) { 989 let mut annotations = self.0.buf.known_annotations.borrow_mut(); 990 let slot = annotations.get_mut(self.1).unwrap(); 991 *slot -= 1; 992 } 993 } 994 } 995 996 #[cfg(feature = "wasm-module")] 997 pub(crate) fn track_instr_spans(&self) -> bool { 998 self.buf.track_instr_spans 999 } 1000 1001 #[cfg(feature = "wasm-module")] 1002 pub(crate) fn with_standard_annotations_registered<R>( 1003 self, 1004 f: impl FnOnce(Self) -> Result<R>, 1005 ) -> Result<R> { 1006 let _r = self.register_annotation("custom"); 1007 let _r = self.register_annotation("producers"); 1008 let _r = self.register_annotation("name"); 1009 let _r = self.register_annotation("dylink.0"); 1010 let _r = self.register_annotation("metadata.code.branch_hint"); 1011 f(self) 1012 } 1013} 1014 1015impl<'a> Cursor<'a> { 1016 /// Returns the span of the next `Token` token. 1017 /// 1018 /// Does not take into account whitespace or comments. 1019 pub fn cur_span(&self) -> Span { 1020 let offset = match self.token() { 1021 Ok(Some(t)) => t.offset, 1022 Ok(None) => self.parser.buf.lexer.input().len(), 1023 Err(_) => self.pos.offset, 1024 }; 1025 Span { offset } 1026 } 1027 1028 /// Returns the span of the previous `Token` token. 1029 /// 1030 /// Does not take into account whitespace or comments. 1031 pub(crate) fn prev_span(&self) -> Option<Span> { 1032 // TODO 1033 Some(Span { 1034 offset: self.pos.offset, 1035 }) 1036 // let (token, _) = self.parser.buf.tokens.get(self.cur.checked_sub(1)?)?; 1037 // Some(Span { 1038 // offset: token.offset, 1039 // }) 1040 } 1041 1042 /// Same as [`Parser::error`], but works with the current token in this 1043 /// [`Cursor`] instead. 1044 pub fn error(&self, msg: impl fmt::Display) -> Error { 1045 self.parser.error_at(self.cur_span(), msg) 1046 } 1047 1048 /// Tests whether the next token is an lparen 1049 pub fn peek_lparen(self) -> Result<bool> { 1050 Ok(matches!( 1051 self.token()?, 1052 Some(Token { 1053 kind: TokenKind::LParen, 1054 .. 1055 }) 1056 )) 1057 } 1058 1059 /// Tests whether the next token is an rparen 1060 pub fn peek_rparen(self) -> Result<bool> { 1061 Ok(matches!( 1062 self.token()?, 1063 Some(Token { 1064 kind: TokenKind::RParen, 1065 .. 1066 }) 1067 )) 1068 } 1069 1070 /// Tests whether the next token is an id 1071 pub fn peek_id(self) -> Result<bool> { 1072 Ok(matches!( 1073 self.token()?, 1074 Some(Token { 1075 kind: TokenKind::Id, 1076 .. 1077 }) 1078 )) 1079 } 1080 1081 /// Tests whether the next token is reserved 1082 pub fn peek_reserved(self) -> Result<bool> { 1083 Ok(matches!( 1084 self.token()?, 1085 Some(Token { 1086 kind: TokenKind::Reserved, 1087 .. 1088 }) 1089 )) 1090 } 1091 1092 /// Tests whether the next token is a keyword 1093 pub fn peek_keyword(self) -> Result<bool> { 1094 Ok(matches!( 1095 self.token()?, 1096 Some(Token { 1097 kind: TokenKind::Keyword, 1098 .. 1099 }) 1100 )) 1101 } 1102 1103 /// Tests whether the next token is an integer 1104 pub fn peek_integer(self) -> Result<bool> { 1105 Ok(matches!( 1106 self.token()?, 1107 Some(Token { 1108 kind: TokenKind::Integer(_), 1109 .. 1110 }) 1111 )) 1112 } 1113 1114 /// Tests whether the next token is a float 1115 pub fn peek_float(self) -> Result<bool> { 1116 Ok(matches!( 1117 self.token()?, 1118 Some(Token { 1119 kind: TokenKind::Float(_), 1120 .. 1121 }) 1122 )) 1123 } 1124 1125 /// Tests whether the next token is a string 1126 pub fn peek_string(self) -> Result<bool> { 1127 Ok(matches!( 1128 self.token()?, 1129 Some(Token { 1130 kind: TokenKind::String, 1131 .. 1132 }) 1133 )) 1134 } 1135 1136 /// Attempts to advance this cursor if the current token is a `(`. 1137 /// 1138 /// If the current token is `(`, returns a new [`Cursor`] pointing at the 1139 /// rest of the tokens in the stream. Otherwise returns `None`. 1140 /// 1141 /// This function will automatically skip over any comments, whitespace, or 1142 /// unknown annotations. 1143 pub fn lparen(mut self) -> Result<Option<Self>> { 1144 let token = match self.token()? { 1145 Some(token) => token, 1146 None => return Ok(None), 1147 }; 1148 match token.kind { 1149 TokenKind::LParen => {} 1150 _ => return Ok(None), 1151 } 1152 self.advance_past(&token); 1153 Ok(Some(self)) 1154 } 1155 1156 /// Attempts to advance this cursor if the current token is a `)`. 1157 /// 1158 /// If the current token is `)`, returns a new [`Cursor`] pointing at the 1159 /// rest of the tokens in the stream. Otherwise returns `None`. 1160 /// 1161 /// This function will automatically skip over any comments, whitespace, or 1162 /// unknown annotations. 1163 pub fn rparen(mut self) -> Result<Option<Self>> { 1164 let token = match self.token()? { 1165 Some(token) => token, 1166 None => return Ok(None), 1167 }; 1168 match token.kind { 1169 TokenKind::RParen => {} 1170 _ => return Ok(None), 1171 } 1172 self.advance_past(&token); 1173 Ok(Some(self)) 1174 } 1175 1176 /// Attempts to advance this cursor if the current token is a 1177 /// [`Token::Id`](crate::lexer::Token) 1178 /// 1179 /// If the current token is `Id`, returns the identifier minus the leading 1180 /// `$` character as well as a new [`Cursor`] pointing at the rest of the 1181 /// tokens in the stream. Otherwise returns `None`. 1182 /// 1183 /// This function will automatically skip over any comments, whitespace, or 1184 /// unknown annotations. 1185 pub fn id(mut self) -> Result<Option<(&'a str, Self)>> { 1186 let token = match self.token()? { 1187 Some(token) => token, 1188 None => return Ok(None), 1189 }; 1190 match token.kind { 1191 TokenKind::Id => {} 1192 _ => return Ok(None), 1193 } 1194 self.advance_past(&token); 1195 let id = match token.id(self.parser.buf.lexer.input())? { 1196 Cow::Borrowed(id) => id, 1197 // Our `self.parser.buf` only retains `Vec<u8>` so briefly convert 1198 // this owned string to `Vec<u8>` and then convert it back to `&str` 1199 // out the other end. 1200 Cow::Owned(s) => { 1201 core::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap() 1202 } 1203 }; 1204 Ok(Some((id, self))) 1205 } 1206 1207 /// Attempts to advance this cursor if the current token is a 1208 /// [`Token::Keyword`](crate::lexer::Token) 1209 /// 1210 /// If the current token is `Keyword`, returns the keyword as well as a new 1211 /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise 1212 /// returns `None`. 1213 /// 1214 /// This function will automatically skip over any comments, whitespace, or 1215 /// unknown annotations. 1216 pub fn keyword(mut self) -> Result<Option<(&'a str, Self)>> { 1217 let token = match self.token()? { 1218 Some(token) => token, 1219 None => return Ok(None), 1220 }; 1221 match token.kind { 1222 TokenKind::Keyword => {} 1223 _ => return Ok(None), 1224 } 1225 self.advance_past(&token); 1226 Ok(Some((token.keyword(self.parser.buf.lexer.input()), self))) 1227 } 1228 1229 /// Attempts to advance this cursor if the current token is a 1230 /// [`Token::Annotation`](crate::lexer::Token) 1231 /// 1232 /// If the current token is `Annotation`, returns the annotation token as well 1233 /// as a new [`Cursor`] pointing at the rest of the tokens in the stream. 1234 /// Otherwise returns `None`. 1235 /// 1236 /// This function will automatically skip over any comments, whitespace, or 1237 /// unknown annotations. 1238 pub fn annotation(mut self) -> Result<Option<(&'a str, Self)>> { 1239 let token = match self.token()? { 1240 Some(token) => token, 1241 None => return Ok(None), 1242 }; 1243 match token.kind { 1244 TokenKind::Annotation => {} 1245 _ => return Ok(None), 1246 } 1247 self.advance_past(&token); 1248 let annotation = match token.annotation(self.parser.buf.lexer.input())? { 1249 Cow::Borrowed(id) => id, 1250 // Our `self.parser.buf` only retains `Vec<u8>` so briefly convert 1251 // this owned string to `Vec<u8>` and then convert it back to `&str` 1252 // out the other end. 1253 Cow::Owned(s) => { 1254 core::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap() 1255 } 1256 }; 1257 Ok(Some((annotation, self))) 1258 } 1259 1260 /// Attempts to advance this cursor if the current token is a 1261 /// [`Token::Reserved`](crate::lexer::Token) 1262 /// 1263 /// If the current token is `Reserved`, returns the reserved token as well 1264 /// as a new [`Cursor`] pointing at the rest of the tokens in the stream. 1265 /// Otherwise returns `None`. 1266 /// 1267 /// This function will automatically skip over any comments, whitespace, or 1268 /// unknown annotations. 1269 pub fn reserved(mut self) -> Result<Option<(&'a str, Self)>> { 1270 let token = match self.token()? { 1271 Some(token) => token, 1272 None => return Ok(None), 1273 }; 1274 match token.kind { 1275 TokenKind::Reserved => {} 1276 _ => return Ok(None), 1277 } 1278 self.advance_past(&token); 1279 Ok(Some((token.reserved(self.parser.buf.lexer.input()), self))) 1280 } 1281 1282 /// Attempts to advance this cursor if the current token is a 1283 /// [`Token::Integer`](crate::lexer::Token) 1284 /// 1285 /// If the current token is `Integer`, returns the integer as well as a new 1286 /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise 1287 /// returns `None`. 1288 /// 1289 /// This function will automatically skip over any comments, whitespace, or 1290 /// unknown annotations. 1291 pub fn integer(mut self) -> Result<Option<(Integer<'a>, Self)>> { 1292 let token = match self.token()? { 1293 Some(token) => token, 1294 None => return Ok(None), 1295 }; 1296 let i = match token.kind { 1297 TokenKind::Integer(i) => i, 1298 _ => return Ok(None), 1299 }; 1300 self.advance_past(&token); 1301 Ok(Some(( 1302 token.integer(self.parser.buf.lexer.input(), i), 1303 self, 1304 ))) 1305 } 1306 1307 /// Attempts to advance this cursor if the current token is a 1308 /// [`Token::Float`](crate::lexer::Token) 1309 /// 1310 /// If the current token is `Float`, returns the float as well as a new 1311 /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise 1312 /// returns `None`. 1313 /// 1314 /// This function will automatically skip over any comments, whitespace, or 1315 /// unknown annotations. 1316 pub fn float(mut self) -> Result<Option<(Float<'a>, Self)>> { 1317 let token = match self.token()? { 1318 Some(token) => token, 1319 None => return Ok(None), 1320 }; 1321 let f = match token.kind { 1322 TokenKind::Float(f) => f, 1323 _ => return Ok(None), 1324 }; 1325 self.advance_past(&token); 1326 Ok(Some((token.float(self.parser.buf.lexer.input(), f), self))) 1327 } 1328 1329 /// Attempts to advance this cursor if the current token is a 1330 /// [`Token::String`](crate::lexer::Token) 1331 /// 1332 /// If the current token is `String`, returns the byte value of the string 1333 /// as well as a new [`Cursor`] pointing at the rest of the tokens in the 1334 /// stream. Otherwise returns `None`. 1335 /// 1336 /// This function will automatically skip over any comments, whitespace, or 1337 /// unknown annotations. 1338 pub fn string(mut self) -> Result<Option<(&'a [u8], Self)>> { 1339 let token = match self.token()? { 1340 Some(token) => token, 1341 None => return Ok(None), 1342 }; 1343 match token.kind { 1344 TokenKind::String => {} 1345 _ => return Ok(None), 1346 } 1347 let string = match token.string(self.parser.buf.lexer.input()) { 1348 Cow::Borrowed(s) => s, 1349 Cow::Owned(s) => self.parser.buf.push_str(s), 1350 }; 1351 self.advance_past(&token); 1352 Ok(Some((string, self))) 1353 } 1354 1355 /// Attempts to advance this cursor if the current token is a 1356 /// [`Token::LineComment`](crate::lexer::Token) or a 1357 /// [`Token::BlockComment`](crate::lexer::Token) 1358 /// 1359 /// This function will only skip whitespace, no other tokens. 1360 pub fn comment(mut self) -> Result<Option<(&'a str, Self)>> { 1361 let start = self.pos.offset; 1362 self.pos.token = None; 1363 let comment = loop { 1364 let token = match self.parser.buf.lexer.parse(&mut self.pos.offset)? { 1365 Some(token) => token, 1366 None => return Ok(None), 1367 }; 1368 match token.kind { 1369 TokenKind::LineComment | TokenKind::BlockComment => { 1370 break token.src(self.parser.buf.lexer.input()); 1371 } 1372 TokenKind::Whitespace => {} 1373 _ => { 1374 self.pos.offset = start; 1375 return Ok(None); 1376 } 1377 } 1378 }; 1379 Ok(Some((comment, self))) 1380 } 1381 1382 fn token(&self) -> Result<Option<Token>> { 1383 match self.pos.token { 1384 Some(token) => Ok(Some(token)), 1385 None => self.parser.buf.advance_token(self.pos.offset), 1386 } 1387 } 1388 1389 fn advance_past(&mut self, token: &Token) { 1390 self.pos.offset = token.offset + (token.len as usize); 1391 self.pos.token = self 1392 .parser 1393 .buf 1394 .advance_token(self.pos.offset) 1395 .unwrap_or(None); 1396 } 1397} 1398 1399impl<'a> Lookahead1<'a> { 1400 /// Attempts to see if `T` is the next token in the [`Parser`] this 1401 /// [`Lookahead1`] references. 1402 /// 1403 /// For more information see [`Parser::lookahead1`] and [`Parser::peek`] 1404 pub fn peek<T: Peek>(&mut self) -> Result<bool> { 1405 Ok(if self.parser.peek::<T>()? { 1406 true 1407 } else { 1408 self.attempts.push(T::display()); 1409 false 1410 }) 1411 } 1412 1413 /// Returns the underlying parser that this lookahead is looking at. 1414 pub fn parser(&self) -> Parser<'a> { 1415 self.parser 1416 } 1417 1418 /// Generates an error message saying that one of the tokens passed to 1419 /// [`Lookahead1::peek`] method was expected. 1420 /// 1421 /// Before calling this method you should call [`Lookahead1::peek`] for all 1422 /// possible tokens you'd like to parse. 1423 pub fn error(self) -> Error { 1424 match self.attempts.len() { 1425 0 => { 1426 if self.parser.is_empty() { 1427 self.parser.error("unexpected end of input") 1428 } else { 1429 self.parser.error("unexpected token") 1430 } 1431 } 1432 1 => { 1433 let message = format!("unexpected token, expected {}", self.attempts[0]); 1434 self.parser.error(&message) 1435 } 1436 2 => { 1437 let message = format!( 1438 "unexpected token, expected {} or {}", 1439 self.attempts[0], self.attempts[1] 1440 ); 1441 self.parser.error(&message) 1442 } 1443 _ => { 1444 let join = self.attempts.join(", "); 1445 let message = format!("unexpected token, expected one of: {}", join); 1446 self.parser.error(&message) 1447 } 1448 } 1449 } 1450} 1451 1452impl<'a, T: Peek + Parse<'a>> Parse<'a> for Option<T> { 1453 fn parse(parser: Parser<'a>) -> Result<Option<T>> { 1454 if parser.peek::<T>()? { 1455 Ok(Some(parser.parse()?)) 1456 } else { 1457 Ok(None) 1458 } 1459 } 1460}