libs/wast/src/parser.rs at trap_handler · jonaskruckenberg.de/k23

jonaskruckenberg.de / k23
fork atom
Next Generation WASM Microkernel Operating System
fork atom
k23 / libs / wast / src / parser.rs
at trap_handler 1460 lines 52 kB view raw
wrap content
Jonas Kruckenberg wip: Wasm test functions (#385) 11mo ago
429b5c45
   1//! Traits for parsing the WebAssembly Text format
   2//!
   3//! This module contains the traits, abstractions, and utilities needed to
   4//! define custom parsers for WebAssembly text format items. This module exposes
   5//! a recursive descent parsing strategy and centers around the [`Parse`] trait
   6//! for defining new fragments of WebAssembly text syntax.
   7//!
   8//! The top-level [`parse`] function can be used to fully parse AST fragments:
   9//!
  10//! ```
  11//! use wast::Wat;
  12//! use wast::parser::{self, ParseBuffer};
  13//!
  14//! # fn foo() -> Result<(), wast::Error> {
  15//! let wat = "(module (func))";
  16//! let buf = ParseBuffer::new(wat)?;
  17//! let module = parser::parse::<Wat>(&buf)?;
  18//! # Ok(())
  19//! # }
  20//! ```
  21//!
  22//! and you can also define your own new syntax with the [`Parse`] trait:
  23//!
  24//! ```
  25//! use wast::kw;
  26//! use wast::core::{Import, Func};
  27//! use wast::parser::{Parser, Parse, Result};
  28//!
  29//! // Fields of a WebAssembly which only allow imports and functions, and all
  30//! // imports must come before all the functions
  31//! struct OnlyImportsAndFunctions<'a> {
  32//!     imports: Vec<Import<'a>>,
  33//!     functions: Vec<Func<'a>>,
  34//! }
  35//!
  36//! impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> {
  37//!     fn parse(parser: Parser<'a>) -> Result<Self> {
  38//!         // While the second token is `import` (the first is `(`, so we care
  39//!         // about the second) we parse an `ast::ModuleImport` inside of
  40//!         // parentheses. The `parens` function here ensures that what we
  41//!         // parse inside of it is surrounded by `(` and `)`.
  42//!         let mut imports = Vec::new();
  43//!         while parser.peek2::<kw::import>()? {
  44//!             let import = parser.parens(|p| p.parse())?;
  45//!             imports.push(import);
  46//!         }
  47//!
  48//!         // Afterwards we assume everything else is a function. Note that
  49//!         // `parse` here is a generic function and type inference figures out
  50//!         // that we're parsing functions here and imports above.
  51//!         let mut functions = Vec::new();
  52//!         while !parser.is_empty() {
  53//!             let func = parser.parens(|p| p.parse())?;
  54//!             functions.push(func);
  55//!         }
  56//!
  57//!         Ok(OnlyImportsAndFunctions { imports, functions })
  58//!     }
  59//! }
  60//! ```
  61//!
  62//! This module is heavily inspired by [`syn`](https://docs.rs/syn) so you can
  63//! likely also draw inspiration from the excellent examples in the `syn` crate.
  64
  65use crate::Error;
  66use crate::lexer::{Float, Integer, Lexer, Token, TokenKind};
  67use crate::token::Span;
  68use alloc::borrow::Cow;
  69use alloc::boxed::Box;
  70use alloc::format;
  71use alloc::string::String;
  72use alloc::string::ToString;
  73use alloc::vec::Vec;
  74use bumpalo::Bump;
  75use core::cell::{Cell, RefCell};
  76use core::fmt;
  77use hashbrown::HashMap;
  78
  79/// The maximum recursive depth of parens to parse.
  80///
  81/// This is sort of a fundamental limitation of the way this crate is
  82/// designed. Everything is done through recursive descent parsing which
  83/// means, well, that we're recursively going down the stack as we parse
  84/// nested data structures. While we can handle this for wasm expressions
  85/// since that's a pretty local decision, handling this for nested
  86/// modules/components which be far trickier. For now we just say that when
  87/// the parser goes too deep we return an error saying there's too many
  88/// nested items. It would be great to not return an error here, though!
  89#[cfg(feature = "wasm-module")]
  90pub(crate) const MAX_PARENS_DEPTH: usize = 100;
  91
  92/// A top-level convenience parsing function that parses a `T` from `buf` and
  93/// requires that all tokens in `buf` are consume.
  94///
  95/// This generic parsing function can be used to parse any `T` implementing the
  96/// [`Parse`] trait. It is not used from [`Parse`] trait implementations.
  97///
  98/// # Examples
  99///
 100/// ```
 101/// use wast::Wat;
 102/// use wast::parser::{self, ParseBuffer};
 103///
 104/// # fn foo() -> Result<(), wast::Error> {
 105/// let wat = "(module (func))";
 106/// let buf = ParseBuffer::new(wat)?;
 107/// let module = parser::parse::<Wat>(&buf)?;
 108/// # Ok(())
 109/// # }
 110/// ```
 111///
 112/// or parsing simply a fragment
 113///
 114/// ```
 115/// use wast::parser::{self, ParseBuffer};
 116///
 117/// # fn foo() -> Result<(), wast::Error> {
 118/// let wat = "12";
 119/// let buf = ParseBuffer::new(wat)?;
 120/// let val = parser::parse::<u32>(&buf)?;
 121/// assert_eq!(val, 12);
 122/// # Ok(())
 123/// # }
 124/// ```
 125pub fn parse<'a, T: Parse<'a>>(buf: &'a ParseBuffer<'a>) -> Result<T> {
 126    let parser = buf.parser();
 127    let result = parser.parse()?;
 128    if parser.cursor().token()?.is_none() {
 129        Ok(result)
 130    } else {
 131        Err(parser.error("extra tokens remaining after parse"))
 132    }
 133}
 134
 135/// A trait for parsing a fragment of syntax in a recursive descent fashion.
 136///
 137/// The [`Parse`] trait is main abstraction you'll be working with when defining
 138/// custom parser or custom syntax for your WebAssembly text format (or when
 139/// using the official format items). Almost all items in the
 140/// [`core`](crate::core) module implement the [`Parse`] trait, and you'll
 141/// commonly use this with:
 142///
 143/// * The top-level [`parse`] function to parse an entire input.
 144/// * The intermediate [`Parser::parse`] function to parse an item out of an
 145///   input stream and then parse remaining items.
 146///
 147/// Implementation of [`Parse`] take a [`Parser`] as input and will mutate the
 148/// parser as they parse syntax. Once a token is consume it cannot be
 149/// "un-consumed". Utilities such as [`Parser::peek`] and [`Parser::lookahead1`]
 150/// can be used to determine what to parse next.
 151///
 152/// ## When to parse `(` and `)`?
 153///
 154/// Conventionally types are not responsible for parsing their own `(` and `)`
 155/// tokens which surround the type. For example WebAssembly imports look like:
 156///
 157/// ```text
 158/// (import "foo" "bar" (func (type 0)))
 159/// ```
 160///
 161/// but the [`Import`](crate::core::Import) type parser looks like:
 162///
 163/// ```
 164/// # use wast::kw;
 165/// # use wast::parser::{Parser, Parse, Result};
 166/// # struct Import<'a>(&'a str);
 167/// impl<'a> Parse<'a> for Import<'a> {
 168///     fn parse(parser: Parser<'a>) -> Result<Self> {
 169///         parser.parse::<kw::import>()?;
 170///         // ...
 171/// # panic!()
 172///     }
 173/// }
 174/// ```
 175///
 176/// It is assumed here that the `(` and `)` tokens which surround an `import`
 177/// statement in the WebAssembly text format are parsed by the parent item
 178/// parsing `Import`.
 179///
 180/// Note that this is just a convention, so it's not necessarily required for
 181/// all types. It's recommended that your types stick to this convention where
 182/// possible to avoid nested calls to [`Parser::parens`] or accidentally trying
 183/// to parse too many parenthesis.
 184///
 185/// # Examples
 186///
 187/// Let's say you want to define your own WebAssembly text format which only
 188/// contains imports and functions. You also require all imports to be listed
 189/// before all functions. An example [`Parse`] implementation might look like:
 190///
 191/// ```
 192/// use wast::core::{Import, Func};
 193/// use wast::kw;
 194/// use wast::parser::{Parser, Parse, Result};
 195///
 196/// // Fields of a WebAssembly which only allow imports and functions, and all
 197/// // imports must come before all the functions
 198/// struct OnlyImportsAndFunctions<'a> {
 199///     imports: Vec<Import<'a>>,
 200///     functions: Vec<Func<'a>>,
 201/// }
 202///
 203/// impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> {
 204///     fn parse(parser: Parser<'a>) -> Result<Self> {
 205///         // While the second token is `import` (the first is `(`, so we care
 206///         // about the second) we parse an `ast::ModuleImport` inside of
 207///         // parentheses. The `parens` function here ensures that what we
 208///         // parse inside of it is surrounded by `(` and `)`.
 209///         let mut imports = Vec::new();
 210///         while parser.peek2::<kw::import>()? {
 211///             let import = parser.parens(|p| p.parse())?;
 212///             imports.push(import);
 213///         }
 214///
 215///         // Afterwards we assume everything else is a function. Note that
 216///         // `parse` here is a generic function and type inference figures out
 217///         // that we're parsing functions here and imports above.
 218///         let mut functions = Vec::new();
 219///         while !parser.is_empty() {
 220///             let func = parser.parens(|p| p.parse())?;
 221///             functions.push(func);
 222///         }
 223///
 224///         Ok(OnlyImportsAndFunctions { imports, functions })
 225///     }
 226/// }
 227/// ```
 228pub trait Parse<'a>: Sized {
 229    /// Attempts to parse `Self` from `parser`, returning an error if it could
 230    /// not be parsed.
 231    ///
 232    /// This method will mutate the state of `parser` after attempting to parse
 233    /// an instance of `Self`. If an error happens then it is likely fatal and
 234    /// there is no guarantee of how many tokens have been consumed from
 235    /// `parser`.
 236    ///
 237    /// As recommended in the documentation of [`Parse`], implementations of
 238    /// this function should not start out by parsing `(` and `)` tokens, but
 239    /// rather parents calling recursive parsers should parse the `(` and `)`
 240    /// tokens for their child item that's being parsed.
 241    ///
 242    /// # Errors
 243    ///
 244    /// This function will return an error if `Self` could not be parsed. Note
 245    /// that creating an [`Error`] is not exactly a cheap operation, so
 246    /// [`Error`] is typically fatal and propagated all the way back to the top
 247    /// parse call site.
 248    fn parse(parser: Parser<'a>) -> Result<Self>;
 249}
 250
 251impl<'a, T> Parse<'a> for Box<T>
 252where
 253    T: Parse<'a>,
 254{
 255    fn parse(parser: Parser<'a>) -> Result<Self> {
 256        Ok(Box::new(parser.parse()?))
 257    }
 258}
 259
 260/// A trait for types which be used to "peek" to see if they're the next token
 261/// in an input stream of [`Parser`].
 262///
 263/// Often when implementing [`Parse`] you'll need to query what the next token
 264/// in the stream is to figure out what to parse next. This [`Peek`] trait
 265/// defines the set of types that can be tested whether they're the next token
 266/// in the input stream.
 267///
 268/// Implementations of [`Peek`] should only be present on types that consume
 269/// exactly one token (not zero, not more, exactly one). Types implementing
 270/// [`Peek`] should also typically implement [`Parse`] should also typically
 271/// implement [`Parse`].
 272///
 273/// See the documentation of [`Parser::peek`] for example usage.
 274pub trait Peek {
 275    /// Tests to see whether this token is the first token within the [`Cursor`]
 276    /// specified.
 277    ///
 278    /// Returns `true` if [`Parse`] for this type is highly likely to succeed
 279    /// failing no other error conditions happening (like an integer literal
 280    /// being too big).
 281    fn peek(cursor: Cursor<'_>) -> Result<bool>;
 282
 283    /// The same as `peek`, except it checks the token immediately following
 284    /// the current token.
 285    fn peek2(mut cursor: Cursor<'_>) -> Result<bool> {
 286        match cursor.token()? {
 287            Some(token) => cursor.advance_past(&token),
 288            None => return Ok(false),
 289        }
 290        Self::peek(cursor)
 291    }
 292
 293    /// Returns a human-readable name of this token to display when generating
 294    /// errors about this token missing.
 295    fn display() -> &'static str;
 296}
 297
 298/// A convenience type definition for `Result` where the error is hardwired to
 299/// [`Error`].
 300pub type Result<T, E = Error> = core::result::Result<T, E>;
 301
 302/// A low-level buffer of tokens which represents a completely lexed file.
 303///
 304/// A `ParseBuffer` will immediately lex an entire file and then store all
 305/// tokens internally. A `ParseBuffer` only used to pass to the top-level
 306/// [`parse`] function.
 307pub struct ParseBuffer<'a> {
 308    lexer: Lexer<'a>,
 309    cur: Cell<Position>,
 310    known_annotations: RefCell<HashMap<String, usize>>,
 311    track_instr_spans: bool,
 312    depth: Cell<usize>,
 313    strings: Bump,
 314}
 315
 316/// The current position within a `Lexer` that we're at. This simultaneously
 317/// stores the byte position that the lexer was last positioned at as well as
 318/// the next significant token.
 319///
 320/// Note that "significant" here does not mean that `token` is the next token
 321/// to be lexed at `offset`. Instead it's the next non-whitespace,
 322/// non-annotation, non-comment token. This simple cache-of-sorts avoids
 323/// re-parsing tokens the majority of the time, or at least that's the
 324/// intention.
 325///
 326/// If `token` is set to `None` then it means that either it hasn't been
 327/// calculated at or the lexer is at EOF. Basically it means go talk to the
 328/// lexer.
 329#[derive(Copy, Clone)]
 330struct Position {
 331    offset: usize,
 332    token: Option<Token>,
 333}
 334
 335/// An in-progress parser for the tokens of a WebAssembly text file.
 336///
 337/// A `Parser` is argument to the [`Parse`] trait and is now the input stream is
 338/// interacted with to parse new items. Cloning [`Parser`] or copying a parser
 339/// refers to the same stream of tokens to parse, you cannot clone a [`Parser`]
 340/// and clone two items.
 341///
 342/// For more information about a [`Parser`] see its methods.
 343#[derive(Copy, Clone)]
 344pub struct Parser<'a> {
 345    buf: &'a ParseBuffer<'a>,
 346}
 347
 348/// A helpful structure to perform a lookahead of one token to determine what to
 349/// parse.
 350///
 351/// For more information see the [`Parser::lookahead1`] method.
 352pub struct Lookahead1<'a> {
 353    parser: Parser<'a>,
 354    attempts: Vec<&'static str>,
 355}
 356
 357/// An immutable cursor into a list of tokens.
 358///
 359/// This cursor cannot be mutated but can be used to parse more tokens in a list
 360/// of tokens. Cursors are created from the [`Parser::step`] method. This is a
 361/// very low-level parsing structure and you likely won't use it much.
 362#[derive(Copy, Clone)]
 363pub struct Cursor<'a> {
 364    parser: Parser<'a>,
 365    pos: Position,
 366}
 367
 368impl ParseBuffer<'_> {
 369    /// Creates a new [`ParseBuffer`] by lexing the given `input` completely.
 370    ///
 371    /// # Errors
 372    ///
 373    /// Returns an error if `input` fails to lex.
 374    pub fn new(input: &str) -> Result<ParseBuffer<'_>> {
 375        ParseBuffer::new_with_lexer(Lexer::new(input))
 376    }
 377
 378    /// Creates a new [`ParseBuffer`] by lexing the given `input` completely.
 379    ///
 380    /// # Errors
 381    ///
 382    /// Returns an error if `input` fails to lex.
 383    pub fn new_with_lexer(lexer: Lexer<'_>) -> Result<ParseBuffer<'_>> {
 384        Ok(ParseBuffer {
 385            lexer,
 386            depth: Cell::new(0),
 387            cur: Cell::new(Position {
 388                offset: 0,
 389                token: None,
 390            }),
 391            known_annotations: Default::default(),
 392            strings: Default::default(),
 393            track_instr_spans: false,
 394        })
 395    }
 396
 397    /// Indicates whether the [`Expression::instr_spans`] field will be filled
 398    /// in.
 399    ///
 400    /// This is useful when enabling DWARF debugging information via
 401    /// [`EncodeOptions::dwarf`], for example.
 402    ///
 403    /// [`Expression::instr_spans`]: crate::core::Expression::instr_spans
 404    /// [`EncodeOptions::dwarf`]: crate::core::EncodeOptions::dwarf
 405    pub fn track_instr_spans(&mut self, track: bool) -> &mut Self {
 406        self.track_instr_spans = track;
 407        self
 408    }
 409
 410    fn parser(&self) -> Parser<'_> {
 411        Parser { buf: self }
 412    }
 413
 414    /// Stores an owned allocation in this `Parser` to attach the lifetime of
 415    /// the vector to `self`.
 416    ///
 417    /// This will return a reference to `s`, but one that's safely rooted in the
 418    /// `Parser`.
 419    fn push_str(&self, s: Vec<u8>) -> &[u8] {
 420        self.strings.alloc_slice_copy(&s)
 421    }
 422
 423    /// Lexes the next "significant" token from the `pos` specified.
 424    ///
 425    /// This will skip irrelevant tokens such as whitespace, comments, and
 426    /// unknown annotations.
 427    fn advance_token(&self, mut pos: usize) -> Result<Option<Token>> {
 428        let token = loop {
 429            let token = match self.lexer.parse(&mut pos)? {
 430                Some(token) => token,
 431                None => return Ok(None),
 432            };
 433            match token.kind {
 434                // Always skip whitespace and comments.
 435                TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment => {
 436                    continue;
 437                }
 438
 439                // If an lparen is seen then this may be skipped if it's an
 440                // annotation of the form `(@foo ...)`. In this situation
 441                // everything up to and including the closing rparen is skipped.
 442                //
 443                // Note that the annotation is only skipped if it's an unknown
 444                // annotation as known annotations are specifically registered
 445                // as "someone's gonna parse this".
 446                TokenKind::LParen => {
 447                    if let Some(annotation) = self.lexer.annotation(pos)? {
 448                        let text = annotation.annotation(self.lexer.input())?;
 449                        match self.known_annotations.borrow().get(&text[..]) {
 450                            Some(0) | None => {
 451                                self.skip_annotation(&mut pos)?;
 452                                continue;
 453                            }
 454                            Some(_) => {}
 455                        }
 456                    }
 457                    break token;
 458                }
 459                _ => break token,
 460            }
 461        };
 462        Ok(Some(token))
 463    }
 464
 465    fn skip_annotation(&self, pos: &mut usize) -> Result<()> {
 466        let mut depth = 1;
 467        let span = Span { offset: *pos };
 468        loop {
 469            let token = match self.lexer.parse(pos)? {
 470                Some(token) => token,
 471                None => {
 472                    break Err(Error::new(span, "unclosed annotation".to_string()));
 473                }
 474            };
 475            match token.kind {
 476                TokenKind::LParen => depth += 1,
 477                TokenKind::RParen => {
 478                    depth -= 1;
 479                    if depth == 0 {
 480                        break Ok(());
 481                    }
 482                }
 483                _ => {}
 484            }
 485        }
 486    }
 487}
 488
 489impl<'a> Parser<'a> {
 490    /// Returns whether there are no more `Token` tokens to parse from this
 491    /// [`Parser`].
 492    ///
 493    /// This indicates that either we've reached the end of the input, or we're
 494    /// a sub-[`Parser`] inside of a parenthesized expression and we've hit the
 495    /// `)` token.
 496    ///
 497    /// Note that if `false` is returned there *may* be more comments. Comments
 498    /// and whitespace are not considered for whether this parser is empty.
 499    pub fn is_empty(self) -> bool {
 500        match self.cursor().token() {
 501            Ok(Some(token)) => matches!(token.kind, TokenKind::RParen),
 502            Ok(None) => true,
 503            Err(_) => false,
 504        }
 505    }
 506
 507    #[cfg(feature = "wasm-module")]
 508    pub(crate) fn has_meaningful_tokens(self) -> bool {
 509        self.buf.lexer.iter(0).any(|t| match t {
 510            Ok(token) => !matches!(
 511                token.kind,
 512                TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment
 513            ),
 514            Err(_) => true,
 515        })
 516    }
 517
 518    /// Parses a `T` from this [`Parser`].
 519    ///
 520    /// This method has a trivial definition (it simply calls
 521    /// [`T::parse`](Parse::parse)) but is here for syntactic purposes. This is
 522    /// what you'll call 99% of the time in a [`Parse`] implementation in order
 523    /// to parse sub-items.
 524    ///
 525    /// Typically you always want to use `?` with the result of this method, you
 526    /// should not handle errors and decide what else to parse. To handle
 527    /// branches in parsing, use [`Parser::peek`].
 528    ///
 529    /// # Examples
 530    ///
 531    /// A good example of using `parse` is to see how the [`TableType`] type is
 532    /// parsed in this crate. A [`TableType`] is defined in the official
 533    /// specification as [`tabletype`][spec] and is defined as:
 534    ///
 535    /// [spec]: https://webassembly.github.io/spec/core/text/types.html#table-types
 536    ///
 537    /// ```text
 538    /// tabletype ::= lim:limits et:reftype
 539    /// ```
 540    ///
 541    /// so to parse a [`TableType`] we recursively need to parse a [`Limits`]
 542    /// and a [`RefType`]
 543    ///
 544    /// ```
 545    /// # use wast::core::*;
 546    /// # use wast::parser::*;
 547    /// struct TableType<'a> {
 548    ///     limits: Limits,
 549    ///     elem: RefType<'a>,
 550    /// }
 551    ///
 552    /// impl<'a> Parse<'a> for TableType<'a> {
 553    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
 554    ///         // parse the `lim` then `et` in sequence
 555    ///         Ok(TableType {
 556    ///             limits: parser.parse()?,
 557    ///             elem: parser.parse()?,
 558    ///         })
 559    ///     }
 560    /// }
 561    /// ```
 562    ///
 563    /// [`Limits`]: crate::core::Limits
 564    /// [`TableType`]: crate::core::TableType
 565    /// [`RefType`]: crate::core::RefType
 566    pub fn parse<T: Parse<'a>>(self) -> Result<T> {
 567        T::parse(self)
 568    }
 569
 570    /// Performs a cheap test to see whether the current token in this stream is
 571    /// `T`.
 572    ///
 573    /// This method can be used to efficiently determine what next to parse. The
 574    /// [`Peek`] trait is defined for types which can be used to test if they're
 575    /// the next item in the input stream.
 576    ///
 577    /// Nothing is actually parsed in this method, nor does this mutate the
 578    /// state of this [`Parser`]. Instead, this simply performs a check.
 579    ///
 580    /// This method is frequently combined with the [`Parser::lookahead1`]
 581    /// method to automatically produce nice error messages if some tokens
 582    /// aren't found.
 583    ///
 584    /// # Examples
 585    ///
 586    /// For an example of using the `peek` method let's take a look at parsing
 587    /// the [`Limits`] type. This is [defined in the official spec][spec] as:
 588    ///
 589    /// ```text
 590    /// limits ::= n:u32
 591    ///          | n:u32 m:u32
 592    /// ```
 593    ///
 594    /// which means that it's either one `u32` token or two, so we need to know
 595    /// whether to consume two tokens or one:
 596    ///
 597    /// ```
 598    /// # use wast::parser::*;
 599    /// struct Limits {
 600    ///     min: u32,
 601    ///     max: Option<u32>,
 602    /// }
 603    ///
 604    /// impl<'a> Parse<'a> for Limits {
 605    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
 606    ///         // Always parse the first number...
 607    ///         let min = parser.parse()?;
 608    ///
 609    ///         // ... and then test if there's a second number before parsing
 610    ///         let max = if parser.peek::<u32>()? {
 611    ///             Some(parser.parse()?)
 612    ///         } else {
 613    ///             None
 614    ///         };
 615    ///
 616    ///         Ok(Limits { min, max })
 617    ///     }
 618    /// }
 619    /// ```
 620    ///
 621    /// [spec]: https://webassembly.github.io/spec/core/text/types.html#limits
 622    /// [`Limits`]: crate::core::Limits
 623    pub fn peek<T: Peek>(self) -> Result<bool> {
 624        T::peek(self.cursor())
 625    }
 626
 627    /// Same as the [`Parser::peek`] method, except checks the next token, not
 628    /// the current token.
 629    pub fn peek2<T: Peek>(self) -> Result<bool> {
 630        T::peek2(self.cursor())
 631    }
 632
 633    /// Same as the [`Parser::peek2`] method, except checks the next next token,
 634    /// not the next token.
 635    pub fn peek3<T: Peek>(self) -> Result<bool> {
 636        let mut cursor = self.cursor();
 637        match cursor.token()? {
 638            Some(token) => cursor.advance_past(&token),
 639            None => return Ok(false),
 640        }
 641        match cursor.token()? {
 642            Some(token) => cursor.advance_past(&token),
 643            None => return Ok(false),
 644        }
 645        T::peek(cursor)
 646    }
 647
 648    /// A helper structure to perform a sequence of `peek` operations and if
 649    /// they all fail produce a nice error message.
 650    ///
 651    /// This method purely exists for conveniently producing error messages and
 652    /// provides no functionality that [`Parser::peek`] doesn't already give.
 653    /// The [`Lookahead1`] structure has one main method [`Lookahead1::peek`],
 654    /// which is the same method as [`Parser::peek`]. The difference is that the
 655    /// [`Lookahead1::error`] method needs no arguments.
 656    ///
 657    /// # Examples
 658    ///
 659    /// Let's look at the parsing of [`Index`]. This type is either a `u32` or
 660    /// an [`Id`] and is used in name resolution primarily. The [official
 661    /// grammar for an index][spec] is:
 662    ///
 663    /// ```text
 664    /// idx ::= x:u32
 665    ///       | v:id
 666    /// ```
 667    ///
 668    /// Which is to say that an index is either a `u32` or an [`Id`]. When
 669    /// parsing an [`Index`] we can do:
 670    ///
 671    /// ```
 672    /// # use wast::token::*;
 673    /// # use wast::parser::*;
 674    /// enum Index<'a> {
 675    ///     Num(u32),
 676    ///     Id(Id<'a>),
 677    /// }
 678    ///
 679    /// impl<'a> Parse<'a> for Index<'a> {
 680    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
 681    ///         let mut l = parser.lookahead1();
 682    ///         if l.peek::<Id>()? {
 683    ///             Ok(Index::Id(parser.parse()?))
 684    ///         } else if l.peek::<u32>()? {
 685    ///             Ok(Index::Num(parser.parse()?))
 686    ///         } else {
 687    ///             // produces error message of `expected identifier or u32`
 688    ///             Err(l.error())
 689    ///         }
 690    ///     }
 691    /// }
 692    /// ```
 693    ///
 694    /// [spec]: https://webassembly.github.io/spec/core/text/modules.html#indices
 695    /// [`Index`]: crate::token::Index
 696    /// [`Id`]: crate::token::Id
 697    pub fn lookahead1(self) -> Lookahead1<'a> {
 698        Lookahead1 {
 699            attempts: Vec::new(),
 700            parser: self,
 701        }
 702    }
 703
 704    /// Parse an item surrounded by parentheses.
 705    ///
 706    /// WebAssembly's text format is all based on s-expressions, so naturally
 707    /// you're going to want to parse a lot of parenthesized things! As noted in
 708    /// the documentation of [`Parse`] you typically don't parse your own
 709    /// surrounding `(` and `)` tokens, but the parser above you parsed them for
 710    /// you. This is method method the parser above you uses.
 711    ///
 712    /// This method will parse a `(` token, and then call `f` on a sub-parser
 713    /// which when finished asserts that a `)` token is the next token. This
 714    /// requires that `f` consumes all tokens leading up to the paired `)`.
 715    ///
 716    /// Usage will often simply be `parser.parens(|p| p.parse())?` to
 717    /// automatically parse a type within parentheses, but you can, as always,
 718    /// go crazy and do whatever you'd like too.
 719    ///
 720    /// # Examples
 721    ///
 722    /// A good example of this is to see how a `Module` is parsed. This isn't
 723    /// the exact definition, but it's close enough!
 724    ///
 725    /// ```
 726    /// # use wast::kw;
 727    /// # use wast::core::*;
 728    /// # use wast::parser::*;
 729    /// struct Module<'a> {
 730    ///     fields: Vec<ModuleField<'a>>,
 731    /// }
 732    ///
 733    /// impl<'a> Parse<'a> for Module<'a> {
 734    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
 735    ///         // Modules start out with a `module` keyword
 736    ///         parser.parse::<kw::module>()?;
 737    ///
 738    ///         // And then everything else is `(field ...)`, so while we've got
 739    ///         // items left we continuously parse parenthesized items.
 740    ///         let mut fields = Vec::new();
 741    ///         while !parser.is_empty() {
 742    ///             fields.push(parser.parens(|p| p.parse())?);
 743    ///         }
 744    ///         Ok(Module { fields })
 745    ///     }
 746    /// }
 747    /// ```
 748    pub fn parens<T>(self, f: impl FnOnce(Parser<'a>) -> Result<T>) -> Result<T> {
 749        self.buf.depth.set(self.buf.depth.get() + 1);
 750        let before = self.buf.cur.get();
 751        let res = self.step(|cursor| {
 752            let mut cursor = match cursor.lparen()? {
 753                Some(rest) => rest,
 754                None => return Err(cursor.error("expected `(`")),
 755            };
 756            cursor.parser.buf.cur.set(cursor.pos);
 757            let result = f(cursor.parser)?;
 758
 759            // Reset our cursor's state to whatever the current state of the
 760            // parser is.
 761            cursor.pos = cursor.parser.buf.cur.get();
 762
 763            match cursor.rparen()? {
 764                Some(rest) => Ok((result, rest)),
 765                None => Err(cursor.error("expected `)`")),
 766            }
 767        });
 768        self.buf.depth.set(self.buf.depth.get() - 1);
 769        if res.is_err() {
 770            self.buf.cur.set(before);
 771        }
 772        res
 773    }
 774
 775    /// Return the depth of nested parens we've parsed so far.
 776    ///
 777    /// This is a low-level method that is only useful for implementing
 778    /// recursion limits in custom parsers.
 779    pub fn parens_depth(&self) -> usize {
 780        self.buf.depth.get()
 781    }
 782
 783    /// Checks that the parser parens depth hasn't exceeded the maximum depth.
 784    #[cfg(feature = "wasm-module")]
 785    pub(crate) fn depth_check(&self) -> Result<()> {
 786        if self.parens_depth() > MAX_PARENS_DEPTH {
 787            Err(self.error("item nesting too deep"))
 788        } else {
 789            Ok(())
 790        }
 791    }
 792
 793    fn cursor(self) -> Cursor<'a> {
 794        Cursor {
 795            parser: self,
 796            pos: self.buf.cur.get(),
 797        }
 798    }
 799
 800    /// A low-level parsing method you probably won't use.
 801    ///
 802    /// This is used to implement parsing of the most primitive types in the
 803    /// [`core`](crate::core) module. You probably don't want to use this, but
 804    /// probably want to use something like [`Parser::parse`] or
 805    /// [`Parser::parens`].
 806    pub fn step<F, T>(self, f: F) -> Result<T>
 807    where
 808        F: FnOnce(Cursor<'a>) -> Result<(T, Cursor<'a>)>,
 809    {
 810        let (result, cursor) = f(self.cursor())?;
 811        self.buf.cur.set(cursor.pos);
 812        Ok(result)
 813    }
 814
 815    /// Creates an error whose line/column information is pointing at the
 816    /// current token.
 817    ///
 818    /// This is used to produce human-readable error messages which point to the
 819    /// right location in the input stream, and the `msg` here is arbitrary text
 820    /// used to associate with the error and indicate why it was generated.
 821    pub fn error(self, msg: impl fmt::Display) -> Error {
 822        self.error_at(self.cursor().cur_span(), msg)
 823    }
 824
 825    /// Creates an error whose line/column information is pointing at the
 826    /// given span.
 827    pub fn error_at(self, span: Span, msg: impl fmt::Display) -> Error {
 828        Error::parse(span, self.buf.lexer.input(), msg.to_string())
 829    }
 830
 831    /// Returns the span of the current token
 832    pub fn cur_span(&self) -> Span {
 833        self.cursor().cur_span()
 834    }
 835
 836    /// Returns the span of the previous token
 837    pub fn prev_span(&self) -> Span {
 838        self.cursor()
 839            .prev_span()
 840            .unwrap_or_else(|| Span::from_offset(0))
 841    }
 842
 843    /// Registers a new known annotation with this parser to allow parsing
 844    /// annotations with this name.
 845    ///
 846    /// [WebAssembly annotations][annotation] are a proposal for the text format
 847    /// which allows decorating the text format with custom structured
 848    /// information. By default all annotations are ignored when parsing, but
 849    /// the whole purpose of them is to sometimes parse them!
 850    ///
 851    /// To support parsing text annotations this method is used to allow
 852    /// annotations and their tokens to *not* be skipped. Once an annotation is
 853    /// registered with this method, then while the return value has not been
 854    /// dropped (e.g. the scope of where this function is called) annotations
 855    /// with the name `annotation` will be parse of the token stream and not
 856    /// implicitly skipped.
 857    ///
 858    /// # Skipping annotations
 859    ///
 860    /// The behavior of skipping unknown/unregistered annotations can be
 861    /// somewhat subtle and surprising, so if you're interested in parsing
 862    /// annotations it's important to point out the importance of this method
 863    /// and where to call it.
 864    ///
 865    /// Generally when parsing tokens you'll be bottoming out in various
 866    /// `Cursor` methods. These are all documented as advancing the stream as
 867    /// much as possible to the next token, skipping "irrelevant stuff" like
 868    /// comments, whitespace, etc. The `Cursor` methods will also skip unknown
 869    /// annotations. This means that if you parse *any* token, it will skip over
 870    /// any number of annotations that are unknown at all times.
 871    ///
 872    /// To parse an annotation you must, before parsing any token of the
 873    /// annotation, register the annotation via this method. This includes the
 874    /// beginning `(` token, which is otherwise skipped if the annotation isn't
 875    /// marked as registered. Typically parser parse the *contents* of an
 876    /// s-expression, so this means that the outer parser of an s-expression
 877    /// must register the custom annotation name, rather than the inner parser.
 878    ///
 879    /// # Return
 880    ///
 881    /// This function returns an RAII guard which, when dropped, will unregister
 882    /// the `annotation` given. Parsing `annotation` is only supported while the
 883    /// returned value is still alive, and once dropped the parser will go back
 884    /// to skipping annotations with the name `annotation`.
 885    ///
 886    /// # Example
 887    ///
 888    /// Let's see an example of how the `@name` annotation is parsed for modules
 889    /// to get an idea of how this works:
 890    ///
 891    /// ```
 892    /// # use wast::kw;
 893    /// # use wast::token::NameAnnotation;
 894    /// # use wast::parser::*;
 895    /// struct Module<'a> {
 896    ///     name: Option<NameAnnotation<'a>>,
 897    /// }
 898    ///
 899    /// impl<'a> Parse<'a> for Module<'a> {
 900    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
 901    ///         // Modules start out with a `module` keyword
 902    ///         parser.parse::<kw::module>()?;
 903    ///
 904    ///         // Next may be `(@name "foo")`. Typically this annotation would
 905    ///         // skipped, but we don't want it skipped, so we register it.
 906    ///         // Note that the parse implementation of
 907    ///         // `Option<NameAnnotation>` is the one that consumes the
 908    ///         // parentheses here.
 909    ///         let _r = parser.register_annotation("name");
 910    ///         let name = parser.parse()?;
 911    ///
 912    ///         // ... and normally you'd otherwise parse module fields here ...
 913    ///
 914    ///         Ok(Module { name })
 915    ///     }
 916    /// }
 917    /// ```
 918    ///
 919    /// Another example is how we parse the `@custom` annotation. Note that this
 920    /// is parsed as part of `ModuleField`, so note how the annotation is
 921    /// registered *before* we parse the parentheses of the annotation.
 922    ///
 923    /// ```
 924    /// # use wast::{kw, annotation};
 925    /// # use wast::core::Custom;
 926    /// # use wast::parser::*;
 927    /// struct Module<'a> {
 928    ///     fields: Vec<ModuleField<'a>>,
 929    /// }
 930    ///
 931    /// impl<'a> Parse<'a> for Module<'a> {
 932    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
 933    ///         // Modules start out with a `module` keyword
 934    ///         parser.parse::<kw::module>()?;
 935    ///
 936    ///         // register the `@custom` annotation *first* before we start
 937    ///         // parsing fields, because each field is contained in
 938    ///         // parentheses and to parse the parentheses of an annotation we
 939    ///         // have to known to not skip it.
 940    ///         let _r = parser.register_annotation("custom");
 941    ///
 942    ///         let mut fields = Vec::new();
 943    ///         while !parser.is_empty() {
 944    ///             fields.push(parser.parens(|p| p.parse())?);
 945    ///         }
 946    ///         Ok(Module { fields })
 947    ///     }
 948    /// }
 949    ///
 950    /// enum ModuleField<'a> {
 951    ///     Custom(Custom<'a>),
 952    ///     // ...
 953    /// }
 954    ///
 955    /// impl<'a> Parse<'a> for ModuleField<'a> {
 956    ///     fn parse(parser: Parser<'a>) -> Result<Self> {
 957    ///         // Note that because we have previously registered the `@custom`
 958    ///         // annotation with the parser we known that `peek` methods like
 959    ///         // this, working on the annotation token, are enabled to ever
 960    ///         // return `true`.
 961    ///         if parser.peek::<annotation::custom>()? {
 962    ///             return Ok(ModuleField::Custom(parser.parse()?));
 963    ///         }
 964    ///
 965    ///         // .. typically we'd parse other module fields here...
 966    ///
 967    ///         Err(parser.error("unknown module field"))
 968    ///     }
 969    /// }
 970    /// ```
 971    ///
 972    /// [annotation]: https://github.com/WebAssembly/annotations
 973    pub fn register_annotation<'b>(self, annotation: &'b str) -> impl Drop + 'b
 974    where
 975        'a: 'b,
 976    {
 977        let mut annotations = self.buf.known_annotations.borrow_mut();
 978        if !annotations.contains_key(annotation) {
 979            annotations.insert(annotation.to_string(), 0);
 980        }
 981        *annotations.get_mut(annotation).unwrap() += 1;
 982
 983        return RemoveOnDrop(self, annotation);
 984
 985        struct RemoveOnDrop<'a>(Parser<'a>, &'a str);
 986
 987        impl Drop for RemoveOnDrop<'_> {
 988            fn drop(&mut self) {
 989                let mut annotations = self.0.buf.known_annotations.borrow_mut();
 990                let slot = annotations.get_mut(self.1).unwrap();
 991                *slot -= 1;
 992            }
 993        }
 994    }
 995
 996    #[cfg(feature = "wasm-module")]
 997    pub(crate) fn track_instr_spans(&self) -> bool {
 998        self.buf.track_instr_spans
 999    }
1000
1001    #[cfg(feature = "wasm-module")]
1002    pub(crate) fn with_standard_annotations_registered<R>(
1003        self,
1004        f: impl FnOnce(Self) -> Result<R>,
1005    ) -> Result<R> {
1006        let _r = self.register_annotation("custom");
1007        let _r = self.register_annotation("producers");
1008        let _r = self.register_annotation("name");
1009        let _r = self.register_annotation("dylink.0");
1010        let _r = self.register_annotation("metadata.code.branch_hint");
1011        f(self)
1012    }
1013}
1014
1015impl<'a> Cursor<'a> {
1016    /// Returns the span of the next `Token` token.
1017    ///
1018    /// Does not take into account whitespace or comments.
1019    pub fn cur_span(&self) -> Span {
1020        let offset = match self.token() {
1021            Ok(Some(t)) => t.offset,
1022            Ok(None) => self.parser.buf.lexer.input().len(),
1023            Err(_) => self.pos.offset,
1024        };
1025        Span { offset }
1026    }
1027
1028    /// Returns the span of the previous `Token` token.
1029    ///
1030    /// Does not take into account whitespace or comments.
1031    pub(crate) fn prev_span(&self) -> Option<Span> {
1032        // TODO
1033        Some(Span {
1034            offset: self.pos.offset,
1035        })
1036        // let (token, _) = self.parser.buf.tokens.get(self.cur.checked_sub(1)?)?;
1037        // Some(Span {
1038        //     offset: token.offset,
1039        // })
1040    }
1041
1042    /// Same as [`Parser::error`], but works with the current token in this
1043    /// [`Cursor`] instead.
1044    pub fn error(&self, msg: impl fmt::Display) -> Error {
1045        self.parser.error_at(self.cur_span(), msg)
1046    }
1047
1048    /// Tests whether the next token is an lparen
1049    pub fn peek_lparen(self) -> Result<bool> {
1050        Ok(matches!(
1051            self.token()?,
1052            Some(Token {
1053                kind: TokenKind::LParen,
1054                ..
1055            })
1056        ))
1057    }
1058
1059    /// Tests whether the next token is an rparen
1060    pub fn peek_rparen(self) -> Result<bool> {
1061        Ok(matches!(
1062            self.token()?,
1063            Some(Token {
1064                kind: TokenKind::RParen,
1065                ..
1066            })
1067        ))
1068    }
1069
1070    /// Tests whether the next token is an id
1071    pub fn peek_id(self) -> Result<bool> {
1072        Ok(matches!(
1073            self.token()?,
1074            Some(Token {
1075                kind: TokenKind::Id,
1076                ..
1077            })
1078        ))
1079    }
1080
1081    /// Tests whether the next token is reserved
1082    pub fn peek_reserved(self) -> Result<bool> {
1083        Ok(matches!(
1084            self.token()?,
1085            Some(Token {
1086                kind: TokenKind::Reserved,
1087                ..
1088            })
1089        ))
1090    }
1091
1092    /// Tests whether the next token is a keyword
1093    pub fn peek_keyword(self) -> Result<bool> {
1094        Ok(matches!(
1095            self.token()?,
1096            Some(Token {
1097                kind: TokenKind::Keyword,
1098                ..
1099            })
1100        ))
1101    }
1102
1103    /// Tests whether the next token is an integer
1104    pub fn peek_integer(self) -> Result<bool> {
1105        Ok(matches!(
1106            self.token()?,
1107            Some(Token {
1108                kind: TokenKind::Integer(_),
1109                ..
1110            })
1111        ))
1112    }
1113
1114    /// Tests whether the next token is a float
1115    pub fn peek_float(self) -> Result<bool> {
1116        Ok(matches!(
1117            self.token()?,
1118            Some(Token {
1119                kind: TokenKind::Float(_),
1120                ..
1121            })
1122        ))
1123    }
1124
1125    /// Tests whether the next token is a string
1126    pub fn peek_string(self) -> Result<bool> {
1127        Ok(matches!(
1128            self.token()?,
1129            Some(Token {
1130                kind: TokenKind::String,
1131                ..
1132            })
1133        ))
1134    }
1135
1136    /// Attempts to advance this cursor if the current token is a `(`.
1137    ///
1138    /// If the current token is `(`, returns a new [`Cursor`] pointing at the
1139    /// rest of the tokens in the stream. Otherwise returns `None`.
1140    ///
1141    /// This function will automatically skip over any comments, whitespace, or
1142    /// unknown annotations.
1143    pub fn lparen(mut self) -> Result<Option<Self>> {
1144        let token = match self.token()? {
1145            Some(token) => token,
1146            None => return Ok(None),
1147        };
1148        match token.kind {
1149            TokenKind::LParen => {}
1150            _ => return Ok(None),
1151        }
1152        self.advance_past(&token);
1153        Ok(Some(self))
1154    }
1155
1156    /// Attempts to advance this cursor if the current token is a `)`.
1157    ///
1158    /// If the current token is `)`, returns a new [`Cursor`] pointing at the
1159    /// rest of the tokens in the stream. Otherwise returns `None`.
1160    ///
1161    /// This function will automatically skip over any comments, whitespace, or
1162    /// unknown annotations.
1163    pub fn rparen(mut self) -> Result<Option<Self>> {
1164        let token = match self.token()? {
1165            Some(token) => token,
1166            None => return Ok(None),
1167        };
1168        match token.kind {
1169            TokenKind::RParen => {}
1170            _ => return Ok(None),
1171        }
1172        self.advance_past(&token);
1173        Ok(Some(self))
1174    }
1175
1176    /// Attempts to advance this cursor if the current token is a
1177    /// [`Token::Id`](crate::lexer::Token)
1178    ///
1179    /// If the current token is `Id`, returns the identifier minus the leading
1180    /// `$` character as well as a new [`Cursor`] pointing at the rest of the
1181    /// tokens in the stream. Otherwise returns `None`.
1182    ///
1183    /// This function will automatically skip over any comments, whitespace, or
1184    /// unknown annotations.
1185    pub fn id(mut self) -> Result<Option<(&'a str, Self)>> {
1186        let token = match self.token()? {
1187            Some(token) => token,
1188            None => return Ok(None),
1189        };
1190        match token.kind {
1191            TokenKind::Id => {}
1192            _ => return Ok(None),
1193        }
1194        self.advance_past(&token);
1195        let id = match token.id(self.parser.buf.lexer.input())? {
1196            Cow::Borrowed(id) => id,
1197            // Our `self.parser.buf` only retains `Vec<u8>` so briefly convert
1198            // this owned string to `Vec<u8>` and then convert it back to `&str`
1199            // out the other end.
1200            Cow::Owned(s) => {
1201                core::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap()
1202            }
1203        };
1204        Ok(Some((id, self)))
1205    }
1206
1207    /// Attempts to advance this cursor if the current token is a
1208    /// [`Token::Keyword`](crate::lexer::Token)
1209    ///
1210    /// If the current token is `Keyword`, returns the keyword as well as a new
1211    /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1212    /// returns `None`.
1213    ///
1214    /// This function will automatically skip over any comments, whitespace, or
1215    /// unknown annotations.
1216    pub fn keyword(mut self) -> Result<Option<(&'a str, Self)>> {
1217        let token = match self.token()? {
1218            Some(token) => token,
1219            None => return Ok(None),
1220        };
1221        match token.kind {
1222            TokenKind::Keyword => {}
1223            _ => return Ok(None),
1224        }
1225        self.advance_past(&token);
1226        Ok(Some((token.keyword(self.parser.buf.lexer.input()), self)))
1227    }
1228
1229    /// Attempts to advance this cursor if the current token is a
1230    /// [`Token::Annotation`](crate::lexer::Token)
1231    ///
1232    /// If the current token is `Annotation`, returns the annotation token as well
1233    /// as a new [`Cursor`] pointing at the rest of the tokens in the stream.
1234    /// Otherwise returns `None`.
1235    ///
1236    /// This function will automatically skip over any comments, whitespace, or
1237    /// unknown annotations.
1238    pub fn annotation(mut self) -> Result<Option<(&'a str, Self)>> {
1239        let token = match self.token()? {
1240            Some(token) => token,
1241            None => return Ok(None),
1242        };
1243        match token.kind {
1244            TokenKind::Annotation => {}
1245            _ => return Ok(None),
1246        }
1247        self.advance_past(&token);
1248        let annotation = match token.annotation(self.parser.buf.lexer.input())? {
1249            Cow::Borrowed(id) => id,
1250            // Our `self.parser.buf` only retains `Vec<u8>` so briefly convert
1251            // this owned string to `Vec<u8>` and then convert it back to `&str`
1252            // out the other end.
1253            Cow::Owned(s) => {
1254                core::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap()
1255            }
1256        };
1257        Ok(Some((annotation, self)))
1258    }
1259
1260    /// Attempts to advance this cursor if the current token is a
1261    /// [`Token::Reserved`](crate::lexer::Token)
1262    ///
1263    /// If the current token is `Reserved`, returns the reserved token as well
1264    /// as a new [`Cursor`] pointing at the rest of the tokens in the stream.
1265    /// Otherwise returns `None`.
1266    ///
1267    /// This function will automatically skip over any comments, whitespace, or
1268    /// unknown annotations.
1269    pub fn reserved(mut self) -> Result<Option<(&'a str, Self)>> {
1270        let token = match self.token()? {
1271            Some(token) => token,
1272            None => return Ok(None),
1273        };
1274        match token.kind {
1275            TokenKind::Reserved => {}
1276            _ => return Ok(None),
1277        }
1278        self.advance_past(&token);
1279        Ok(Some((token.reserved(self.parser.buf.lexer.input()), self)))
1280    }
1281
1282    /// Attempts to advance this cursor if the current token is a
1283    /// [`Token::Integer`](crate::lexer::Token)
1284    ///
1285    /// If the current token is `Integer`, returns the integer as well as a new
1286    /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1287    /// returns `None`.
1288    ///
1289    /// This function will automatically skip over any comments, whitespace, or
1290    /// unknown annotations.
1291    pub fn integer(mut self) -> Result<Option<(Integer<'a>, Self)>> {
1292        let token = match self.token()? {
1293            Some(token) => token,
1294            None => return Ok(None),
1295        };
1296        let i = match token.kind {
1297            TokenKind::Integer(i) => i,
1298            _ => return Ok(None),
1299        };
1300        self.advance_past(&token);
1301        Ok(Some((
1302            token.integer(self.parser.buf.lexer.input(), i),
1303            self,
1304        )))
1305    }
1306
1307    /// Attempts to advance this cursor if the current token is a
1308    /// [`Token::Float`](crate::lexer::Token)
1309    ///
1310    /// If the current token is `Float`, returns the float as well as a new
1311    /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1312    /// returns `None`.
1313    ///
1314    /// This function will automatically skip over any comments, whitespace, or
1315    /// unknown annotations.
1316    pub fn float(mut self) -> Result<Option<(Float<'a>, Self)>> {
1317        let token = match self.token()? {
1318            Some(token) => token,
1319            None => return Ok(None),
1320        };
1321        let f = match token.kind {
1322            TokenKind::Float(f) => f,
1323            _ => return Ok(None),
1324        };
1325        self.advance_past(&token);
1326        Ok(Some((token.float(self.parser.buf.lexer.input(), f), self)))
1327    }
1328
1329    /// Attempts to advance this cursor if the current token is a
1330    /// [`Token::String`](crate::lexer::Token)
1331    ///
1332    /// If the current token is `String`, returns the byte value of the string
1333    /// as well as a new [`Cursor`] pointing at the rest of the tokens in the
1334    /// stream. Otherwise returns `None`.
1335    ///
1336    /// This function will automatically skip over any comments, whitespace, or
1337    /// unknown annotations.
1338    pub fn string(mut self) -> Result<Option<(&'a [u8], Self)>> {
1339        let token = match self.token()? {
1340            Some(token) => token,
1341            None => return Ok(None),
1342        };
1343        match token.kind {
1344            TokenKind::String => {}
1345            _ => return Ok(None),
1346        }
1347        let string = match token.string(self.parser.buf.lexer.input()) {
1348            Cow::Borrowed(s) => s,
1349            Cow::Owned(s) => self.parser.buf.push_str(s),
1350        };
1351        self.advance_past(&token);
1352        Ok(Some((string, self)))
1353    }
1354
1355    /// Attempts to advance this cursor if the current token is a
1356    /// [`Token::LineComment`](crate::lexer::Token) or a
1357    /// [`Token::BlockComment`](crate::lexer::Token)
1358    ///
1359    /// This function will only skip whitespace, no other tokens.
1360    pub fn comment(mut self) -> Result<Option<(&'a str, Self)>> {
1361        let start = self.pos.offset;
1362        self.pos.token = None;
1363        let comment = loop {
1364            let token = match self.parser.buf.lexer.parse(&mut self.pos.offset)? {
1365                Some(token) => token,
1366                None => return Ok(None),
1367            };
1368            match token.kind {
1369                TokenKind::LineComment | TokenKind::BlockComment => {
1370                    break token.src(self.parser.buf.lexer.input());
1371                }
1372                TokenKind::Whitespace => {}
1373                _ => {
1374                    self.pos.offset = start;
1375                    return Ok(None);
1376                }
1377            }
1378        };
1379        Ok(Some((comment, self)))
1380    }
1381
1382    fn token(&self) -> Result<Option<Token>> {
1383        match self.pos.token {
1384            Some(token) => Ok(Some(token)),
1385            None => self.parser.buf.advance_token(self.pos.offset),
1386        }
1387    }
1388
1389    fn advance_past(&mut self, token: &Token) {
1390        self.pos.offset = token.offset + (token.len as usize);
1391        self.pos.token = self
1392            .parser
1393            .buf
1394            .advance_token(self.pos.offset)
1395            .unwrap_or(None);
1396    }
1397}
1398
1399impl<'a> Lookahead1<'a> {
1400    /// Attempts to see if `T` is the next token in the [`Parser`] this
1401    /// [`Lookahead1`] references.
1402    ///
1403    /// For more information see [`Parser::lookahead1`] and [`Parser::peek`]
1404    pub fn peek<T: Peek>(&mut self) -> Result<bool> {
1405        Ok(if self.parser.peek::<T>()? {
1406            true
1407        } else {
1408            self.attempts.push(T::display());
1409            false
1410        })
1411    }
1412
1413    /// Returns the underlying parser that this lookahead is looking at.
1414    pub fn parser(&self) -> Parser<'a> {
1415        self.parser
1416    }
1417
1418    /// Generates an error message saying that one of the tokens passed to
1419    /// [`Lookahead1::peek`] method was expected.
1420    ///
1421    /// Before calling this method you should call [`Lookahead1::peek`] for all
1422    /// possible tokens you'd like to parse.
1423    pub fn error(self) -> Error {
1424        match self.attempts.len() {
1425            0 => {
1426                if self.parser.is_empty() {
1427                    self.parser.error("unexpected end of input")
1428                } else {
1429                    self.parser.error("unexpected token")
1430                }
1431            }
1432            1 => {
1433                let message = format!("unexpected token, expected {}", self.attempts[0]);
1434                self.parser.error(&message)
1435            }
1436            2 => {
1437                let message = format!(
1438                    "unexpected token, expected {} or {}",
1439                    self.attempts[0], self.attempts[1]
1440                );
1441                self.parser.error(&message)
1442            }
1443            _ => {
1444                let join = self.attempts.join(", ");
1445                let message = format!("unexpected token, expected one of: {}", join);
1446                self.parser.error(&message)
1447            }
1448        }
1449    }
1450}
1451
1452impl<'a, T: Peek + Parse<'a>> Parse<'a> for Option<T> {
1453    fn parse(parser: Parser<'a>) -> Result<Option<T>> {
1454        if parser.peek::<T>()? {
1455            Ok(Some(parser.parse()?))
1456        } else {
1457            Ok(None)
1458        }
1459    }
1460}