Next Generation WASM Microkernel Operating System
1//! Traits for parsing the WebAssembly Text format
2//!
3//! This module contains the traits, abstractions, and utilities needed to
4//! define custom parsers for WebAssembly text format items. This module exposes
5//! a recursive descent parsing strategy and centers around the [`Parse`] trait
6//! for defining new fragments of WebAssembly text syntax.
7//!
8//! The top-level [`parse`] function can be used to fully parse AST fragments:
9//!
10//! ```
11//! use wast::Wat;
12//! use wast::parser::{self, ParseBuffer};
13//!
14//! # fn foo() -> Result<(), wast::Error> {
15//! let wat = "(module (func))";
16//! let buf = ParseBuffer::new(wat)?;
17//! let module = parser::parse::<Wat>(&buf)?;
18//! # Ok(())
19//! # }
20//! ```
21//!
22//! and you can also define your own new syntax with the [`Parse`] trait:
23//!
24//! ```
25//! use wast::kw;
26//! use wast::core::{Import, Func};
27//! use wast::parser::{Parser, Parse, Result};
28//!
29//! // Fields of a WebAssembly which only allow imports and functions, and all
30//! // imports must come before all the functions
31//! struct OnlyImportsAndFunctions<'a> {
32//! imports: Vec<Import<'a>>,
33//! functions: Vec<Func<'a>>,
34//! }
35//!
36//! impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> {
37//! fn parse(parser: Parser<'a>) -> Result<Self> {
38//! // While the second token is `import` (the first is `(`, so we care
39//! // about the second) we parse an `ast::ModuleImport` inside of
40//! // parentheses. The `parens` function here ensures that what we
41//! // parse inside of it is surrounded by `(` and `)`.
42//! let mut imports = Vec::new();
43//! while parser.peek2::<kw::import>()? {
44//! let import = parser.parens(|p| p.parse())?;
45//! imports.push(import);
46//! }
47//!
48//! // Afterwards we assume everything else is a function. Note that
49//! // `parse` here is a generic function and type inference figures out
50//! // that we're parsing functions here and imports above.
51//! let mut functions = Vec::new();
52//! while !parser.is_empty() {
53//! let func = parser.parens(|p| p.parse())?;
54//! functions.push(func);
55//! }
56//!
57//! Ok(OnlyImportsAndFunctions { imports, functions })
58//! }
59//! }
60//! ```
61//!
62//! This module is heavily inspired by [`syn`](https://docs.rs/syn) so you can
63//! likely also draw inspiration from the excellent examples in the `syn` crate.
64
65use crate::Error;
66use crate::lexer::{Float, Integer, Lexer, Token, TokenKind};
67use crate::token::Span;
68use alloc::borrow::Cow;
69use alloc::boxed::Box;
70use alloc::format;
71use alloc::string::String;
72use alloc::string::ToString;
73use alloc::vec::Vec;
74use bumpalo::Bump;
75use core::cell::{Cell, RefCell};
76use core::fmt;
77use hashbrown::HashMap;
78
79/// The maximum recursive depth of parens to parse.
80///
81/// This is sort of a fundamental limitation of the way this crate is
82/// designed. Everything is done through recursive descent parsing which
83/// means, well, that we're recursively going down the stack as we parse
84/// nested data structures. While we can handle this for wasm expressions
85/// since that's a pretty local decision, handling this for nested
86/// modules/components which be far trickier. For now we just say that when
87/// the parser goes too deep we return an error saying there's too many
88/// nested items. It would be great to not return an error here, though!
89#[cfg(feature = "wasm-module")]
90pub(crate) const MAX_PARENS_DEPTH: usize = 100;
91
92/// A top-level convenience parsing function that parses a `T` from `buf` and
93/// requires that all tokens in `buf` are consume.
94///
95/// This generic parsing function can be used to parse any `T` implementing the
96/// [`Parse`] trait. It is not used from [`Parse`] trait implementations.
97///
98/// # Examples
99///
100/// ```
101/// use wast::Wat;
102/// use wast::parser::{self, ParseBuffer};
103///
104/// # fn foo() -> Result<(), wast::Error> {
105/// let wat = "(module (func))";
106/// let buf = ParseBuffer::new(wat)?;
107/// let module = parser::parse::<Wat>(&buf)?;
108/// # Ok(())
109/// # }
110/// ```
111///
112/// or parsing simply a fragment
113///
114/// ```
115/// use wast::parser::{self, ParseBuffer};
116///
117/// # fn foo() -> Result<(), wast::Error> {
118/// let wat = "12";
119/// let buf = ParseBuffer::new(wat)?;
120/// let val = parser::parse::<u32>(&buf)?;
121/// assert_eq!(val, 12);
122/// # Ok(())
123/// # }
124/// ```
125pub fn parse<'a, T: Parse<'a>>(buf: &'a ParseBuffer<'a>) -> Result<T> {
126 let parser = buf.parser();
127 let result = parser.parse()?;
128 if parser.cursor().token()?.is_none() {
129 Ok(result)
130 } else {
131 Err(parser.error("extra tokens remaining after parse"))
132 }
133}
134
135/// A trait for parsing a fragment of syntax in a recursive descent fashion.
136///
137/// The [`Parse`] trait is main abstraction you'll be working with when defining
138/// custom parser or custom syntax for your WebAssembly text format (or when
139/// using the official format items). Almost all items in the
140/// [`core`](crate::core) module implement the [`Parse`] trait, and you'll
141/// commonly use this with:
142///
143/// * The top-level [`parse`] function to parse an entire input.
144/// * The intermediate [`Parser::parse`] function to parse an item out of an
145/// input stream and then parse remaining items.
146///
147/// Implementation of [`Parse`] take a [`Parser`] as input and will mutate the
148/// parser as they parse syntax. Once a token is consume it cannot be
149/// "un-consumed". Utilities such as [`Parser::peek`] and [`Parser::lookahead1`]
150/// can be used to determine what to parse next.
151///
152/// ## When to parse `(` and `)`?
153///
154/// Conventionally types are not responsible for parsing their own `(` and `)`
155/// tokens which surround the type. For example WebAssembly imports look like:
156///
157/// ```text
158/// (import "foo" "bar" (func (type 0)))
159/// ```
160///
161/// but the [`Import`](crate::core::Import) type parser looks like:
162///
163/// ```
164/// # use wast::kw;
165/// # use wast::parser::{Parser, Parse, Result};
166/// # struct Import<'a>(&'a str);
167/// impl<'a> Parse<'a> for Import<'a> {
168/// fn parse(parser: Parser<'a>) -> Result<Self> {
169/// parser.parse::<kw::import>()?;
170/// // ...
171/// # panic!()
172/// }
173/// }
174/// ```
175///
176/// It is assumed here that the `(` and `)` tokens which surround an `import`
177/// statement in the WebAssembly text format are parsed by the parent item
178/// parsing `Import`.
179///
180/// Note that this is just a convention, so it's not necessarily required for
181/// all types. It's recommended that your types stick to this convention where
182/// possible to avoid nested calls to [`Parser::parens`] or accidentally trying
183/// to parse too many parenthesis.
184///
185/// # Examples
186///
187/// Let's say you want to define your own WebAssembly text format which only
188/// contains imports and functions. You also require all imports to be listed
189/// before all functions. An example [`Parse`] implementation might look like:
190///
191/// ```
192/// use wast::core::{Import, Func};
193/// use wast::kw;
194/// use wast::parser::{Parser, Parse, Result};
195///
196/// // Fields of a WebAssembly which only allow imports and functions, and all
197/// // imports must come before all the functions
198/// struct OnlyImportsAndFunctions<'a> {
199/// imports: Vec<Import<'a>>,
200/// functions: Vec<Func<'a>>,
201/// }
202///
203/// impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> {
204/// fn parse(parser: Parser<'a>) -> Result<Self> {
205/// // While the second token is `import` (the first is `(`, so we care
206/// // about the second) we parse an `ast::ModuleImport` inside of
207/// // parentheses. The `parens` function here ensures that what we
208/// // parse inside of it is surrounded by `(` and `)`.
209/// let mut imports = Vec::new();
210/// while parser.peek2::<kw::import>()? {
211/// let import = parser.parens(|p| p.parse())?;
212/// imports.push(import);
213/// }
214///
215/// // Afterwards we assume everything else is a function. Note that
216/// // `parse` here is a generic function and type inference figures out
217/// // that we're parsing functions here and imports above.
218/// let mut functions = Vec::new();
219/// while !parser.is_empty() {
220/// let func = parser.parens(|p| p.parse())?;
221/// functions.push(func);
222/// }
223///
224/// Ok(OnlyImportsAndFunctions { imports, functions })
225/// }
226/// }
227/// ```
228pub trait Parse<'a>: Sized {
229 /// Attempts to parse `Self` from `parser`, returning an error if it could
230 /// not be parsed.
231 ///
232 /// This method will mutate the state of `parser` after attempting to parse
233 /// an instance of `Self`. If an error happens then it is likely fatal and
234 /// there is no guarantee of how many tokens have been consumed from
235 /// `parser`.
236 ///
237 /// As recommended in the documentation of [`Parse`], implementations of
238 /// this function should not start out by parsing `(` and `)` tokens, but
239 /// rather parents calling recursive parsers should parse the `(` and `)`
240 /// tokens for their child item that's being parsed.
241 ///
242 /// # Errors
243 ///
244 /// This function will return an error if `Self` could not be parsed. Note
245 /// that creating an [`Error`] is not exactly a cheap operation, so
246 /// [`Error`] is typically fatal and propagated all the way back to the top
247 /// parse call site.
248 fn parse(parser: Parser<'a>) -> Result<Self>;
249}
250
251impl<'a, T> Parse<'a> for Box<T>
252where
253 T: Parse<'a>,
254{
255 fn parse(parser: Parser<'a>) -> Result<Self> {
256 Ok(Box::new(parser.parse()?))
257 }
258}
259
260/// A trait for types which be used to "peek" to see if they're the next token
261/// in an input stream of [`Parser`].
262///
263/// Often when implementing [`Parse`] you'll need to query what the next token
264/// in the stream is to figure out what to parse next. This [`Peek`] trait
265/// defines the set of types that can be tested whether they're the next token
266/// in the input stream.
267///
268/// Implementations of [`Peek`] should only be present on types that consume
269/// exactly one token (not zero, not more, exactly one). Types implementing
270/// [`Peek`] should also typically implement [`Parse`] should also typically
271/// implement [`Parse`].
272///
273/// See the documentation of [`Parser::peek`] for example usage.
274pub trait Peek {
275 /// Tests to see whether this token is the first token within the [`Cursor`]
276 /// specified.
277 ///
278 /// Returns `true` if [`Parse`] for this type is highly likely to succeed
279 /// failing no other error conditions happening (like an integer literal
280 /// being too big).
281 fn peek(cursor: Cursor<'_>) -> Result<bool>;
282
283 /// The same as `peek`, except it checks the token immediately following
284 /// the current token.
285 fn peek2(mut cursor: Cursor<'_>) -> Result<bool> {
286 match cursor.token()? {
287 Some(token) => cursor.advance_past(&token),
288 None => return Ok(false),
289 }
290 Self::peek(cursor)
291 }
292
293 /// Returns a human-readable name of this token to display when generating
294 /// errors about this token missing.
295 fn display() -> &'static str;
296}
297
298/// A convenience type definition for `Result` where the error is hardwired to
299/// [`Error`].
300pub type Result<T, E = Error> = core::result::Result<T, E>;
301
302/// A low-level buffer of tokens which represents a completely lexed file.
303///
304/// A `ParseBuffer` will immediately lex an entire file and then store all
305/// tokens internally. A `ParseBuffer` only used to pass to the top-level
306/// [`parse`] function.
307pub struct ParseBuffer<'a> {
308 lexer: Lexer<'a>,
309 cur: Cell<Position>,
310 known_annotations: RefCell<HashMap<String, usize>>,
311 track_instr_spans: bool,
312 depth: Cell<usize>,
313 strings: Bump,
314}
315
316/// The current position within a `Lexer` that we're at. This simultaneously
317/// stores the byte position that the lexer was last positioned at as well as
318/// the next significant token.
319///
320/// Note that "significant" here does not mean that `token` is the next token
321/// to be lexed at `offset`. Instead it's the next non-whitespace,
322/// non-annotation, non-comment token. This simple cache-of-sorts avoids
323/// re-parsing tokens the majority of the time, or at least that's the
324/// intention.
325///
326/// If `token` is set to `None` then it means that either it hasn't been
327/// calculated at or the lexer is at EOF. Basically it means go talk to the
328/// lexer.
329#[derive(Copy, Clone)]
330struct Position {
331 offset: usize,
332 token: Option<Token>,
333}
334
335/// An in-progress parser for the tokens of a WebAssembly text file.
336///
337/// A `Parser` is argument to the [`Parse`] trait and is now the input stream is
338/// interacted with to parse new items. Cloning [`Parser`] or copying a parser
339/// refers to the same stream of tokens to parse, you cannot clone a [`Parser`]
340/// and clone two items.
341///
342/// For more information about a [`Parser`] see its methods.
343#[derive(Copy, Clone)]
344pub struct Parser<'a> {
345 buf: &'a ParseBuffer<'a>,
346}
347
348/// A helpful structure to perform a lookahead of one token to determine what to
349/// parse.
350///
351/// For more information see the [`Parser::lookahead1`] method.
352pub struct Lookahead1<'a> {
353 parser: Parser<'a>,
354 attempts: Vec<&'static str>,
355}
356
357/// An immutable cursor into a list of tokens.
358///
359/// This cursor cannot be mutated but can be used to parse more tokens in a list
360/// of tokens. Cursors are created from the [`Parser::step`] method. This is a
361/// very low-level parsing structure and you likely won't use it much.
362#[derive(Copy, Clone)]
363pub struct Cursor<'a> {
364 parser: Parser<'a>,
365 pos: Position,
366}
367
368impl ParseBuffer<'_> {
369 /// Creates a new [`ParseBuffer`] by lexing the given `input` completely.
370 ///
371 /// # Errors
372 ///
373 /// Returns an error if `input` fails to lex.
374 pub fn new(input: &str) -> Result<ParseBuffer<'_>> {
375 ParseBuffer::new_with_lexer(Lexer::new(input))
376 }
377
378 /// Creates a new [`ParseBuffer`] by lexing the given `input` completely.
379 ///
380 /// # Errors
381 ///
382 /// Returns an error if `input` fails to lex.
383 pub fn new_with_lexer(lexer: Lexer<'_>) -> Result<ParseBuffer<'_>> {
384 Ok(ParseBuffer {
385 lexer,
386 depth: Cell::new(0),
387 cur: Cell::new(Position {
388 offset: 0,
389 token: None,
390 }),
391 known_annotations: Default::default(),
392 strings: Default::default(),
393 track_instr_spans: false,
394 })
395 }
396
397 /// Indicates whether the [`Expression::instr_spans`] field will be filled
398 /// in.
399 ///
400 /// This is useful when enabling DWARF debugging information via
401 /// [`EncodeOptions::dwarf`], for example.
402 ///
403 /// [`Expression::instr_spans`]: crate::core::Expression::instr_spans
404 /// [`EncodeOptions::dwarf`]: crate::core::EncodeOptions::dwarf
405 pub fn track_instr_spans(&mut self, track: bool) -> &mut Self {
406 self.track_instr_spans = track;
407 self
408 }
409
410 fn parser(&self) -> Parser<'_> {
411 Parser { buf: self }
412 }
413
414 /// Stores an owned allocation in this `Parser` to attach the lifetime of
415 /// the vector to `self`.
416 ///
417 /// This will return a reference to `s`, but one that's safely rooted in the
418 /// `Parser`.
419 fn push_str(&self, s: Vec<u8>) -> &[u8] {
420 self.strings.alloc_slice_copy(&s)
421 }
422
423 /// Lexes the next "significant" token from the `pos` specified.
424 ///
425 /// This will skip irrelevant tokens such as whitespace, comments, and
426 /// unknown annotations.
427 fn advance_token(&self, mut pos: usize) -> Result<Option<Token>> {
428 let token = loop {
429 let token = match self.lexer.parse(&mut pos)? {
430 Some(token) => token,
431 None => return Ok(None),
432 };
433 match token.kind {
434 // Always skip whitespace and comments.
435 TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment => {
436 continue;
437 }
438
439 // If an lparen is seen then this may be skipped if it's an
440 // annotation of the form `(@foo ...)`. In this situation
441 // everything up to and including the closing rparen is skipped.
442 //
443 // Note that the annotation is only skipped if it's an unknown
444 // annotation as known annotations are specifically registered
445 // as "someone's gonna parse this".
446 TokenKind::LParen => {
447 if let Some(annotation) = self.lexer.annotation(pos)? {
448 let text = annotation.annotation(self.lexer.input())?;
449 match self.known_annotations.borrow().get(&text[..]) {
450 Some(0) | None => {
451 self.skip_annotation(&mut pos)?;
452 continue;
453 }
454 Some(_) => {}
455 }
456 }
457 break token;
458 }
459 _ => break token,
460 }
461 };
462 Ok(Some(token))
463 }
464
465 fn skip_annotation(&self, pos: &mut usize) -> Result<()> {
466 let mut depth = 1;
467 let span = Span { offset: *pos };
468 loop {
469 let token = match self.lexer.parse(pos)? {
470 Some(token) => token,
471 None => {
472 break Err(Error::new(span, "unclosed annotation".to_string()));
473 }
474 };
475 match token.kind {
476 TokenKind::LParen => depth += 1,
477 TokenKind::RParen => {
478 depth -= 1;
479 if depth == 0 {
480 break Ok(());
481 }
482 }
483 _ => {}
484 }
485 }
486 }
487}
488
489impl<'a> Parser<'a> {
490 /// Returns whether there are no more `Token` tokens to parse from this
491 /// [`Parser`].
492 ///
493 /// This indicates that either we've reached the end of the input, or we're
494 /// a sub-[`Parser`] inside of a parenthesized expression and we've hit the
495 /// `)` token.
496 ///
497 /// Note that if `false` is returned there *may* be more comments. Comments
498 /// and whitespace are not considered for whether this parser is empty.
499 pub fn is_empty(self) -> bool {
500 match self.cursor().token() {
501 Ok(Some(token)) => matches!(token.kind, TokenKind::RParen),
502 Ok(None) => true,
503 Err(_) => false,
504 }
505 }
506
507 #[cfg(feature = "wasm-module")]
508 pub(crate) fn has_meaningful_tokens(self) -> bool {
509 self.buf.lexer.iter(0).any(|t| match t {
510 Ok(token) => !matches!(
511 token.kind,
512 TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment
513 ),
514 Err(_) => true,
515 })
516 }
517
518 /// Parses a `T` from this [`Parser`].
519 ///
520 /// This method has a trivial definition (it simply calls
521 /// [`T::parse`](Parse::parse)) but is here for syntactic purposes. This is
522 /// what you'll call 99% of the time in a [`Parse`] implementation in order
523 /// to parse sub-items.
524 ///
525 /// Typically you always want to use `?` with the result of this method, you
526 /// should not handle errors and decide what else to parse. To handle
527 /// branches in parsing, use [`Parser::peek`].
528 ///
529 /// # Examples
530 ///
531 /// A good example of using `parse` is to see how the [`TableType`] type is
532 /// parsed in this crate. A [`TableType`] is defined in the official
533 /// specification as [`tabletype`][spec] and is defined as:
534 ///
535 /// [spec]: https://webassembly.github.io/spec/core/text/types.html#table-types
536 ///
537 /// ```text
538 /// tabletype ::= lim:limits et:reftype
539 /// ```
540 ///
541 /// so to parse a [`TableType`] we recursively need to parse a [`Limits`]
542 /// and a [`RefType`]
543 ///
544 /// ```
545 /// # use wast::core::*;
546 /// # use wast::parser::*;
547 /// struct TableType<'a> {
548 /// limits: Limits,
549 /// elem: RefType<'a>,
550 /// }
551 ///
552 /// impl<'a> Parse<'a> for TableType<'a> {
553 /// fn parse(parser: Parser<'a>) -> Result<Self> {
554 /// // parse the `lim` then `et` in sequence
555 /// Ok(TableType {
556 /// limits: parser.parse()?,
557 /// elem: parser.parse()?,
558 /// })
559 /// }
560 /// }
561 /// ```
562 ///
563 /// [`Limits`]: crate::core::Limits
564 /// [`TableType`]: crate::core::TableType
565 /// [`RefType`]: crate::core::RefType
566 pub fn parse<T: Parse<'a>>(self) -> Result<T> {
567 T::parse(self)
568 }
569
570 /// Performs a cheap test to see whether the current token in this stream is
571 /// `T`.
572 ///
573 /// This method can be used to efficiently determine what next to parse. The
574 /// [`Peek`] trait is defined for types which can be used to test if they're
575 /// the next item in the input stream.
576 ///
577 /// Nothing is actually parsed in this method, nor does this mutate the
578 /// state of this [`Parser`]. Instead, this simply performs a check.
579 ///
580 /// This method is frequently combined with the [`Parser::lookahead1`]
581 /// method to automatically produce nice error messages if some tokens
582 /// aren't found.
583 ///
584 /// # Examples
585 ///
586 /// For an example of using the `peek` method let's take a look at parsing
587 /// the [`Limits`] type. This is [defined in the official spec][spec] as:
588 ///
589 /// ```text
590 /// limits ::= n:u32
591 /// | n:u32 m:u32
592 /// ```
593 ///
594 /// which means that it's either one `u32` token or two, so we need to know
595 /// whether to consume two tokens or one:
596 ///
597 /// ```
598 /// # use wast::parser::*;
599 /// struct Limits {
600 /// min: u32,
601 /// max: Option<u32>,
602 /// }
603 ///
604 /// impl<'a> Parse<'a> for Limits {
605 /// fn parse(parser: Parser<'a>) -> Result<Self> {
606 /// // Always parse the first number...
607 /// let min = parser.parse()?;
608 ///
609 /// // ... and then test if there's a second number before parsing
610 /// let max = if parser.peek::<u32>()? {
611 /// Some(parser.parse()?)
612 /// } else {
613 /// None
614 /// };
615 ///
616 /// Ok(Limits { min, max })
617 /// }
618 /// }
619 /// ```
620 ///
621 /// [spec]: https://webassembly.github.io/spec/core/text/types.html#limits
622 /// [`Limits`]: crate::core::Limits
623 pub fn peek<T: Peek>(self) -> Result<bool> {
624 T::peek(self.cursor())
625 }
626
627 /// Same as the [`Parser::peek`] method, except checks the next token, not
628 /// the current token.
629 pub fn peek2<T: Peek>(self) -> Result<bool> {
630 T::peek2(self.cursor())
631 }
632
633 /// Same as the [`Parser::peek2`] method, except checks the next next token,
634 /// not the next token.
635 pub fn peek3<T: Peek>(self) -> Result<bool> {
636 let mut cursor = self.cursor();
637 match cursor.token()? {
638 Some(token) => cursor.advance_past(&token),
639 None => return Ok(false),
640 }
641 match cursor.token()? {
642 Some(token) => cursor.advance_past(&token),
643 None => return Ok(false),
644 }
645 T::peek(cursor)
646 }
647
648 /// A helper structure to perform a sequence of `peek` operations and if
649 /// they all fail produce a nice error message.
650 ///
651 /// This method purely exists for conveniently producing error messages and
652 /// provides no functionality that [`Parser::peek`] doesn't already give.
653 /// The [`Lookahead1`] structure has one main method [`Lookahead1::peek`],
654 /// which is the same method as [`Parser::peek`]. The difference is that the
655 /// [`Lookahead1::error`] method needs no arguments.
656 ///
657 /// # Examples
658 ///
659 /// Let's look at the parsing of [`Index`]. This type is either a `u32` or
660 /// an [`Id`] and is used in name resolution primarily. The [official
661 /// grammar for an index][spec] is:
662 ///
663 /// ```text
664 /// idx ::= x:u32
665 /// | v:id
666 /// ```
667 ///
668 /// Which is to say that an index is either a `u32` or an [`Id`]. When
669 /// parsing an [`Index`] we can do:
670 ///
671 /// ```
672 /// # use wast::token::*;
673 /// # use wast::parser::*;
674 /// enum Index<'a> {
675 /// Num(u32),
676 /// Id(Id<'a>),
677 /// }
678 ///
679 /// impl<'a> Parse<'a> for Index<'a> {
680 /// fn parse(parser: Parser<'a>) -> Result<Self> {
681 /// let mut l = parser.lookahead1();
682 /// if l.peek::<Id>()? {
683 /// Ok(Index::Id(parser.parse()?))
684 /// } else if l.peek::<u32>()? {
685 /// Ok(Index::Num(parser.parse()?))
686 /// } else {
687 /// // produces error message of `expected identifier or u32`
688 /// Err(l.error())
689 /// }
690 /// }
691 /// }
692 /// ```
693 ///
694 /// [spec]: https://webassembly.github.io/spec/core/text/modules.html#indices
695 /// [`Index`]: crate::token::Index
696 /// [`Id`]: crate::token::Id
697 pub fn lookahead1(self) -> Lookahead1<'a> {
698 Lookahead1 {
699 attempts: Vec::new(),
700 parser: self,
701 }
702 }
703
704 /// Parse an item surrounded by parentheses.
705 ///
706 /// WebAssembly's text format is all based on s-expressions, so naturally
707 /// you're going to want to parse a lot of parenthesized things! As noted in
708 /// the documentation of [`Parse`] you typically don't parse your own
709 /// surrounding `(` and `)` tokens, but the parser above you parsed them for
710 /// you. This is method method the parser above you uses.
711 ///
712 /// This method will parse a `(` token, and then call `f` on a sub-parser
713 /// which when finished asserts that a `)` token is the next token. This
714 /// requires that `f` consumes all tokens leading up to the paired `)`.
715 ///
716 /// Usage will often simply be `parser.parens(|p| p.parse())?` to
717 /// automatically parse a type within parentheses, but you can, as always,
718 /// go crazy and do whatever you'd like too.
719 ///
720 /// # Examples
721 ///
722 /// A good example of this is to see how a `Module` is parsed. This isn't
723 /// the exact definition, but it's close enough!
724 ///
725 /// ```
726 /// # use wast::kw;
727 /// # use wast::core::*;
728 /// # use wast::parser::*;
729 /// struct Module<'a> {
730 /// fields: Vec<ModuleField<'a>>,
731 /// }
732 ///
733 /// impl<'a> Parse<'a> for Module<'a> {
734 /// fn parse(parser: Parser<'a>) -> Result<Self> {
735 /// // Modules start out with a `module` keyword
736 /// parser.parse::<kw::module>()?;
737 ///
738 /// // And then everything else is `(field ...)`, so while we've got
739 /// // items left we continuously parse parenthesized items.
740 /// let mut fields = Vec::new();
741 /// while !parser.is_empty() {
742 /// fields.push(parser.parens(|p| p.parse())?);
743 /// }
744 /// Ok(Module { fields })
745 /// }
746 /// }
747 /// ```
748 pub fn parens<T>(self, f: impl FnOnce(Parser<'a>) -> Result<T>) -> Result<T> {
749 self.buf.depth.set(self.buf.depth.get() + 1);
750 let before = self.buf.cur.get();
751 let res = self.step(|cursor| {
752 let mut cursor = match cursor.lparen()? {
753 Some(rest) => rest,
754 None => return Err(cursor.error("expected `(`")),
755 };
756 cursor.parser.buf.cur.set(cursor.pos);
757 let result = f(cursor.parser)?;
758
759 // Reset our cursor's state to whatever the current state of the
760 // parser is.
761 cursor.pos = cursor.parser.buf.cur.get();
762
763 match cursor.rparen()? {
764 Some(rest) => Ok((result, rest)),
765 None => Err(cursor.error("expected `)`")),
766 }
767 });
768 self.buf.depth.set(self.buf.depth.get() - 1);
769 if res.is_err() {
770 self.buf.cur.set(before);
771 }
772 res
773 }
774
775 /// Return the depth of nested parens we've parsed so far.
776 ///
777 /// This is a low-level method that is only useful for implementing
778 /// recursion limits in custom parsers.
779 pub fn parens_depth(&self) -> usize {
780 self.buf.depth.get()
781 }
782
783 /// Checks that the parser parens depth hasn't exceeded the maximum depth.
784 #[cfg(feature = "wasm-module")]
785 pub(crate) fn depth_check(&self) -> Result<()> {
786 if self.parens_depth() > MAX_PARENS_DEPTH {
787 Err(self.error("item nesting too deep"))
788 } else {
789 Ok(())
790 }
791 }
792
793 fn cursor(self) -> Cursor<'a> {
794 Cursor {
795 parser: self,
796 pos: self.buf.cur.get(),
797 }
798 }
799
800 /// A low-level parsing method you probably won't use.
801 ///
802 /// This is used to implement parsing of the most primitive types in the
803 /// [`core`](crate::core) module. You probably don't want to use this, but
804 /// probably want to use something like [`Parser::parse`] or
805 /// [`Parser::parens`].
806 pub fn step<F, T>(self, f: F) -> Result<T>
807 where
808 F: FnOnce(Cursor<'a>) -> Result<(T, Cursor<'a>)>,
809 {
810 let (result, cursor) = f(self.cursor())?;
811 self.buf.cur.set(cursor.pos);
812 Ok(result)
813 }
814
815 /// Creates an error whose line/column information is pointing at the
816 /// current token.
817 ///
818 /// This is used to produce human-readable error messages which point to the
819 /// right location in the input stream, and the `msg` here is arbitrary text
820 /// used to associate with the error and indicate why it was generated.
821 pub fn error(self, msg: impl fmt::Display) -> Error {
822 self.error_at(self.cursor().cur_span(), msg)
823 }
824
825 /// Creates an error whose line/column information is pointing at the
826 /// given span.
827 pub fn error_at(self, span: Span, msg: impl fmt::Display) -> Error {
828 Error::parse(span, self.buf.lexer.input(), msg.to_string())
829 }
830
831 /// Returns the span of the current token
832 pub fn cur_span(&self) -> Span {
833 self.cursor().cur_span()
834 }
835
836 /// Returns the span of the previous token
837 pub fn prev_span(&self) -> Span {
838 self.cursor()
839 .prev_span()
840 .unwrap_or_else(|| Span::from_offset(0))
841 }
842
843 /// Registers a new known annotation with this parser to allow parsing
844 /// annotations with this name.
845 ///
846 /// [WebAssembly annotations][annotation] are a proposal for the text format
847 /// which allows decorating the text format with custom structured
848 /// information. By default all annotations are ignored when parsing, but
849 /// the whole purpose of them is to sometimes parse them!
850 ///
851 /// To support parsing text annotations this method is used to allow
852 /// annotations and their tokens to *not* be skipped. Once an annotation is
853 /// registered with this method, then while the return value has not been
854 /// dropped (e.g. the scope of where this function is called) annotations
855 /// with the name `annotation` will be parse of the token stream and not
856 /// implicitly skipped.
857 ///
858 /// # Skipping annotations
859 ///
860 /// The behavior of skipping unknown/unregistered annotations can be
861 /// somewhat subtle and surprising, so if you're interested in parsing
862 /// annotations it's important to point out the importance of this method
863 /// and where to call it.
864 ///
865 /// Generally when parsing tokens you'll be bottoming out in various
866 /// `Cursor` methods. These are all documented as advancing the stream as
867 /// much as possible to the next token, skipping "irrelevant stuff" like
868 /// comments, whitespace, etc. The `Cursor` methods will also skip unknown
869 /// annotations. This means that if you parse *any* token, it will skip over
870 /// any number of annotations that are unknown at all times.
871 ///
872 /// To parse an annotation you must, before parsing any token of the
873 /// annotation, register the annotation via this method. This includes the
874 /// beginning `(` token, which is otherwise skipped if the annotation isn't
875 /// marked as registered. Typically parser parse the *contents* of an
876 /// s-expression, so this means that the outer parser of an s-expression
877 /// must register the custom annotation name, rather than the inner parser.
878 ///
879 /// # Return
880 ///
881 /// This function returns an RAII guard which, when dropped, will unregister
882 /// the `annotation` given. Parsing `annotation` is only supported while the
883 /// returned value is still alive, and once dropped the parser will go back
884 /// to skipping annotations with the name `annotation`.
885 ///
886 /// # Example
887 ///
888 /// Let's see an example of how the `@name` annotation is parsed for modules
889 /// to get an idea of how this works:
890 ///
891 /// ```
892 /// # use wast::kw;
893 /// # use wast::token::NameAnnotation;
894 /// # use wast::parser::*;
895 /// struct Module<'a> {
896 /// name: Option<NameAnnotation<'a>>,
897 /// }
898 ///
899 /// impl<'a> Parse<'a> for Module<'a> {
900 /// fn parse(parser: Parser<'a>) -> Result<Self> {
901 /// // Modules start out with a `module` keyword
902 /// parser.parse::<kw::module>()?;
903 ///
904 /// // Next may be `(@name "foo")`. Typically this annotation would
905 /// // skipped, but we don't want it skipped, so we register it.
906 /// // Note that the parse implementation of
907 /// // `Option<NameAnnotation>` is the one that consumes the
908 /// // parentheses here.
909 /// let _r = parser.register_annotation("name");
910 /// let name = parser.parse()?;
911 ///
912 /// // ... and normally you'd otherwise parse module fields here ...
913 ///
914 /// Ok(Module { name })
915 /// }
916 /// }
917 /// ```
918 ///
919 /// Another example is how we parse the `@custom` annotation. Note that this
920 /// is parsed as part of `ModuleField`, so note how the annotation is
921 /// registered *before* we parse the parentheses of the annotation.
922 ///
923 /// ```
924 /// # use wast::{kw, annotation};
925 /// # use wast::core::Custom;
926 /// # use wast::parser::*;
927 /// struct Module<'a> {
928 /// fields: Vec<ModuleField<'a>>,
929 /// }
930 ///
931 /// impl<'a> Parse<'a> for Module<'a> {
932 /// fn parse(parser: Parser<'a>) -> Result<Self> {
933 /// // Modules start out with a `module` keyword
934 /// parser.parse::<kw::module>()?;
935 ///
936 /// // register the `@custom` annotation *first* before we start
937 /// // parsing fields, because each field is contained in
938 /// // parentheses and to parse the parentheses of an annotation we
939 /// // have to known to not skip it.
940 /// let _r = parser.register_annotation("custom");
941 ///
942 /// let mut fields = Vec::new();
943 /// while !parser.is_empty() {
944 /// fields.push(parser.parens(|p| p.parse())?);
945 /// }
946 /// Ok(Module { fields })
947 /// }
948 /// }
949 ///
950 /// enum ModuleField<'a> {
951 /// Custom(Custom<'a>),
952 /// // ...
953 /// }
954 ///
955 /// impl<'a> Parse<'a> for ModuleField<'a> {
956 /// fn parse(parser: Parser<'a>) -> Result<Self> {
957 /// // Note that because we have previously registered the `@custom`
958 /// // annotation with the parser we known that `peek` methods like
959 /// // this, working on the annotation token, are enabled to ever
960 /// // return `true`.
961 /// if parser.peek::<annotation::custom>()? {
962 /// return Ok(ModuleField::Custom(parser.parse()?));
963 /// }
964 ///
965 /// // .. typically we'd parse other module fields here...
966 ///
967 /// Err(parser.error("unknown module field"))
968 /// }
969 /// }
970 /// ```
971 ///
972 /// [annotation]: https://github.com/WebAssembly/annotations
973 pub fn register_annotation<'b>(self, annotation: &'b str) -> impl Drop + 'b
974 where
975 'a: 'b,
976 {
977 let mut annotations = self.buf.known_annotations.borrow_mut();
978 if !annotations.contains_key(annotation) {
979 annotations.insert(annotation.to_string(), 0);
980 }
981 *annotations.get_mut(annotation).unwrap() += 1;
982
983 return RemoveOnDrop(self, annotation);
984
985 struct RemoveOnDrop<'a>(Parser<'a>, &'a str);
986
987 impl Drop for RemoveOnDrop<'_> {
988 fn drop(&mut self) {
989 let mut annotations = self.0.buf.known_annotations.borrow_mut();
990 let slot = annotations.get_mut(self.1).unwrap();
991 *slot -= 1;
992 }
993 }
994 }
995
996 #[cfg(feature = "wasm-module")]
997 pub(crate) fn track_instr_spans(&self) -> bool {
998 self.buf.track_instr_spans
999 }
1000
1001 #[cfg(feature = "wasm-module")]
1002 pub(crate) fn with_standard_annotations_registered<R>(
1003 self,
1004 f: impl FnOnce(Self) -> Result<R>,
1005 ) -> Result<R> {
1006 let _r = self.register_annotation("custom");
1007 let _r = self.register_annotation("producers");
1008 let _r = self.register_annotation("name");
1009 let _r = self.register_annotation("dylink.0");
1010 let _r = self.register_annotation("metadata.code.branch_hint");
1011 f(self)
1012 }
1013}
1014
1015impl<'a> Cursor<'a> {
1016 /// Returns the span of the next `Token` token.
1017 ///
1018 /// Does not take into account whitespace or comments.
1019 pub fn cur_span(&self) -> Span {
1020 let offset = match self.token() {
1021 Ok(Some(t)) => t.offset,
1022 Ok(None) => self.parser.buf.lexer.input().len(),
1023 Err(_) => self.pos.offset,
1024 };
1025 Span { offset }
1026 }
1027
1028 /// Returns the span of the previous `Token` token.
1029 ///
1030 /// Does not take into account whitespace or comments.
1031 pub(crate) fn prev_span(&self) -> Option<Span> {
1032 // TODO
1033 Some(Span {
1034 offset: self.pos.offset,
1035 })
1036 // let (token, _) = self.parser.buf.tokens.get(self.cur.checked_sub(1)?)?;
1037 // Some(Span {
1038 // offset: token.offset,
1039 // })
1040 }
1041
1042 /// Same as [`Parser::error`], but works with the current token in this
1043 /// [`Cursor`] instead.
1044 pub fn error(&self, msg: impl fmt::Display) -> Error {
1045 self.parser.error_at(self.cur_span(), msg)
1046 }
1047
1048 /// Tests whether the next token is an lparen
1049 pub fn peek_lparen(self) -> Result<bool> {
1050 Ok(matches!(
1051 self.token()?,
1052 Some(Token {
1053 kind: TokenKind::LParen,
1054 ..
1055 })
1056 ))
1057 }
1058
1059 /// Tests whether the next token is an rparen
1060 pub fn peek_rparen(self) -> Result<bool> {
1061 Ok(matches!(
1062 self.token()?,
1063 Some(Token {
1064 kind: TokenKind::RParen,
1065 ..
1066 })
1067 ))
1068 }
1069
1070 /// Tests whether the next token is an id
1071 pub fn peek_id(self) -> Result<bool> {
1072 Ok(matches!(
1073 self.token()?,
1074 Some(Token {
1075 kind: TokenKind::Id,
1076 ..
1077 })
1078 ))
1079 }
1080
1081 /// Tests whether the next token is reserved
1082 pub fn peek_reserved(self) -> Result<bool> {
1083 Ok(matches!(
1084 self.token()?,
1085 Some(Token {
1086 kind: TokenKind::Reserved,
1087 ..
1088 })
1089 ))
1090 }
1091
1092 /// Tests whether the next token is a keyword
1093 pub fn peek_keyword(self) -> Result<bool> {
1094 Ok(matches!(
1095 self.token()?,
1096 Some(Token {
1097 kind: TokenKind::Keyword,
1098 ..
1099 })
1100 ))
1101 }
1102
1103 /// Tests whether the next token is an integer
1104 pub fn peek_integer(self) -> Result<bool> {
1105 Ok(matches!(
1106 self.token()?,
1107 Some(Token {
1108 kind: TokenKind::Integer(_),
1109 ..
1110 })
1111 ))
1112 }
1113
1114 /// Tests whether the next token is a float
1115 pub fn peek_float(self) -> Result<bool> {
1116 Ok(matches!(
1117 self.token()?,
1118 Some(Token {
1119 kind: TokenKind::Float(_),
1120 ..
1121 })
1122 ))
1123 }
1124
1125 /// Tests whether the next token is a string
1126 pub fn peek_string(self) -> Result<bool> {
1127 Ok(matches!(
1128 self.token()?,
1129 Some(Token {
1130 kind: TokenKind::String,
1131 ..
1132 })
1133 ))
1134 }
1135
1136 /// Attempts to advance this cursor if the current token is a `(`.
1137 ///
1138 /// If the current token is `(`, returns a new [`Cursor`] pointing at the
1139 /// rest of the tokens in the stream. Otherwise returns `None`.
1140 ///
1141 /// This function will automatically skip over any comments, whitespace, or
1142 /// unknown annotations.
1143 pub fn lparen(mut self) -> Result<Option<Self>> {
1144 let token = match self.token()? {
1145 Some(token) => token,
1146 None => return Ok(None),
1147 };
1148 match token.kind {
1149 TokenKind::LParen => {}
1150 _ => return Ok(None),
1151 }
1152 self.advance_past(&token);
1153 Ok(Some(self))
1154 }
1155
1156 /// Attempts to advance this cursor if the current token is a `)`.
1157 ///
1158 /// If the current token is `)`, returns a new [`Cursor`] pointing at the
1159 /// rest of the tokens in the stream. Otherwise returns `None`.
1160 ///
1161 /// This function will automatically skip over any comments, whitespace, or
1162 /// unknown annotations.
1163 pub fn rparen(mut self) -> Result<Option<Self>> {
1164 let token = match self.token()? {
1165 Some(token) => token,
1166 None => return Ok(None),
1167 };
1168 match token.kind {
1169 TokenKind::RParen => {}
1170 _ => return Ok(None),
1171 }
1172 self.advance_past(&token);
1173 Ok(Some(self))
1174 }
1175
1176 /// Attempts to advance this cursor if the current token is a
1177 /// [`Token::Id`](crate::lexer::Token)
1178 ///
1179 /// If the current token is `Id`, returns the identifier minus the leading
1180 /// `$` character as well as a new [`Cursor`] pointing at the rest of the
1181 /// tokens in the stream. Otherwise returns `None`.
1182 ///
1183 /// This function will automatically skip over any comments, whitespace, or
1184 /// unknown annotations.
1185 pub fn id(mut self) -> Result<Option<(&'a str, Self)>> {
1186 let token = match self.token()? {
1187 Some(token) => token,
1188 None => return Ok(None),
1189 };
1190 match token.kind {
1191 TokenKind::Id => {}
1192 _ => return Ok(None),
1193 }
1194 self.advance_past(&token);
1195 let id = match token.id(self.parser.buf.lexer.input())? {
1196 Cow::Borrowed(id) => id,
1197 // Our `self.parser.buf` only retains `Vec<u8>` so briefly convert
1198 // this owned string to `Vec<u8>` and then convert it back to `&str`
1199 // out the other end.
1200 Cow::Owned(s) => {
1201 core::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap()
1202 }
1203 };
1204 Ok(Some((id, self)))
1205 }
1206
1207 /// Attempts to advance this cursor if the current token is a
1208 /// [`Token::Keyword`](crate::lexer::Token)
1209 ///
1210 /// If the current token is `Keyword`, returns the keyword as well as a new
1211 /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1212 /// returns `None`.
1213 ///
1214 /// This function will automatically skip over any comments, whitespace, or
1215 /// unknown annotations.
1216 pub fn keyword(mut self) -> Result<Option<(&'a str, Self)>> {
1217 let token = match self.token()? {
1218 Some(token) => token,
1219 None => return Ok(None),
1220 };
1221 match token.kind {
1222 TokenKind::Keyword => {}
1223 _ => return Ok(None),
1224 }
1225 self.advance_past(&token);
1226 Ok(Some((token.keyword(self.parser.buf.lexer.input()), self)))
1227 }
1228
1229 /// Attempts to advance this cursor if the current token is a
1230 /// [`Token::Annotation`](crate::lexer::Token)
1231 ///
1232 /// If the current token is `Annotation`, returns the annotation token as well
1233 /// as a new [`Cursor`] pointing at the rest of the tokens in the stream.
1234 /// Otherwise returns `None`.
1235 ///
1236 /// This function will automatically skip over any comments, whitespace, or
1237 /// unknown annotations.
1238 pub fn annotation(mut self) -> Result<Option<(&'a str, Self)>> {
1239 let token = match self.token()? {
1240 Some(token) => token,
1241 None => return Ok(None),
1242 };
1243 match token.kind {
1244 TokenKind::Annotation => {}
1245 _ => return Ok(None),
1246 }
1247 self.advance_past(&token);
1248 let annotation = match token.annotation(self.parser.buf.lexer.input())? {
1249 Cow::Borrowed(id) => id,
1250 // Our `self.parser.buf` only retains `Vec<u8>` so briefly convert
1251 // this owned string to `Vec<u8>` and then convert it back to `&str`
1252 // out the other end.
1253 Cow::Owned(s) => {
1254 core::str::from_utf8(self.parser.buf.push_str(s.into_bytes())).unwrap()
1255 }
1256 };
1257 Ok(Some((annotation, self)))
1258 }
1259
1260 /// Attempts to advance this cursor if the current token is a
1261 /// [`Token::Reserved`](crate::lexer::Token)
1262 ///
1263 /// If the current token is `Reserved`, returns the reserved token as well
1264 /// as a new [`Cursor`] pointing at the rest of the tokens in the stream.
1265 /// Otherwise returns `None`.
1266 ///
1267 /// This function will automatically skip over any comments, whitespace, or
1268 /// unknown annotations.
1269 pub fn reserved(mut self) -> Result<Option<(&'a str, Self)>> {
1270 let token = match self.token()? {
1271 Some(token) => token,
1272 None => return Ok(None),
1273 };
1274 match token.kind {
1275 TokenKind::Reserved => {}
1276 _ => return Ok(None),
1277 }
1278 self.advance_past(&token);
1279 Ok(Some((token.reserved(self.parser.buf.lexer.input()), self)))
1280 }
1281
1282 /// Attempts to advance this cursor if the current token is a
1283 /// [`Token::Integer`](crate::lexer::Token)
1284 ///
1285 /// If the current token is `Integer`, returns the integer as well as a new
1286 /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1287 /// returns `None`.
1288 ///
1289 /// This function will automatically skip over any comments, whitespace, or
1290 /// unknown annotations.
1291 pub fn integer(mut self) -> Result<Option<(Integer<'a>, Self)>> {
1292 let token = match self.token()? {
1293 Some(token) => token,
1294 None => return Ok(None),
1295 };
1296 let i = match token.kind {
1297 TokenKind::Integer(i) => i,
1298 _ => return Ok(None),
1299 };
1300 self.advance_past(&token);
1301 Ok(Some((
1302 token.integer(self.parser.buf.lexer.input(), i),
1303 self,
1304 )))
1305 }
1306
1307 /// Attempts to advance this cursor if the current token is a
1308 /// [`Token::Float`](crate::lexer::Token)
1309 ///
1310 /// If the current token is `Float`, returns the float as well as a new
1311 /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise
1312 /// returns `None`.
1313 ///
1314 /// This function will automatically skip over any comments, whitespace, or
1315 /// unknown annotations.
1316 pub fn float(mut self) -> Result<Option<(Float<'a>, Self)>> {
1317 let token = match self.token()? {
1318 Some(token) => token,
1319 None => return Ok(None),
1320 };
1321 let f = match token.kind {
1322 TokenKind::Float(f) => f,
1323 _ => return Ok(None),
1324 };
1325 self.advance_past(&token);
1326 Ok(Some((token.float(self.parser.buf.lexer.input(), f), self)))
1327 }
1328
1329 /// Attempts to advance this cursor if the current token is a
1330 /// [`Token::String`](crate::lexer::Token)
1331 ///
1332 /// If the current token is `String`, returns the byte value of the string
1333 /// as well as a new [`Cursor`] pointing at the rest of the tokens in the
1334 /// stream. Otherwise returns `None`.
1335 ///
1336 /// This function will automatically skip over any comments, whitespace, or
1337 /// unknown annotations.
1338 pub fn string(mut self) -> Result<Option<(&'a [u8], Self)>> {
1339 let token = match self.token()? {
1340 Some(token) => token,
1341 None => return Ok(None),
1342 };
1343 match token.kind {
1344 TokenKind::String => {}
1345 _ => return Ok(None),
1346 }
1347 let string = match token.string(self.parser.buf.lexer.input()) {
1348 Cow::Borrowed(s) => s,
1349 Cow::Owned(s) => self.parser.buf.push_str(s),
1350 };
1351 self.advance_past(&token);
1352 Ok(Some((string, self)))
1353 }
1354
1355 /// Attempts to advance this cursor if the current token is a
1356 /// [`Token::LineComment`](crate::lexer::Token) or a
1357 /// [`Token::BlockComment`](crate::lexer::Token)
1358 ///
1359 /// This function will only skip whitespace, no other tokens.
1360 pub fn comment(mut self) -> Result<Option<(&'a str, Self)>> {
1361 let start = self.pos.offset;
1362 self.pos.token = None;
1363 let comment = loop {
1364 let token = match self.parser.buf.lexer.parse(&mut self.pos.offset)? {
1365 Some(token) => token,
1366 None => return Ok(None),
1367 };
1368 match token.kind {
1369 TokenKind::LineComment | TokenKind::BlockComment => {
1370 break token.src(self.parser.buf.lexer.input());
1371 }
1372 TokenKind::Whitespace => {}
1373 _ => {
1374 self.pos.offset = start;
1375 return Ok(None);
1376 }
1377 }
1378 };
1379 Ok(Some((comment, self)))
1380 }
1381
1382 fn token(&self) -> Result<Option<Token>> {
1383 match self.pos.token {
1384 Some(token) => Ok(Some(token)),
1385 None => self.parser.buf.advance_token(self.pos.offset),
1386 }
1387 }
1388
1389 fn advance_past(&mut self, token: &Token) {
1390 self.pos.offset = token.offset + (token.len as usize);
1391 self.pos.token = self
1392 .parser
1393 .buf
1394 .advance_token(self.pos.offset)
1395 .unwrap_or(None);
1396 }
1397}
1398
1399impl<'a> Lookahead1<'a> {
1400 /// Attempts to see if `T` is the next token in the [`Parser`] this
1401 /// [`Lookahead1`] references.
1402 ///
1403 /// For more information see [`Parser::lookahead1`] and [`Parser::peek`]
1404 pub fn peek<T: Peek>(&mut self) -> Result<bool> {
1405 Ok(if self.parser.peek::<T>()? {
1406 true
1407 } else {
1408 self.attempts.push(T::display());
1409 false
1410 })
1411 }
1412
1413 /// Returns the underlying parser that this lookahead is looking at.
1414 pub fn parser(&self) -> Parser<'a> {
1415 self.parser
1416 }
1417
1418 /// Generates an error message saying that one of the tokens passed to
1419 /// [`Lookahead1::peek`] method was expected.
1420 ///
1421 /// Before calling this method you should call [`Lookahead1::peek`] for all
1422 /// possible tokens you'd like to parse.
1423 pub fn error(self) -> Error {
1424 match self.attempts.len() {
1425 0 => {
1426 if self.parser.is_empty() {
1427 self.parser.error("unexpected end of input")
1428 } else {
1429 self.parser.error("unexpected token")
1430 }
1431 }
1432 1 => {
1433 let message = format!("unexpected token, expected {}", self.attempts[0]);
1434 self.parser.error(&message)
1435 }
1436 2 => {
1437 let message = format!(
1438 "unexpected token, expected {} or {}",
1439 self.attempts[0], self.attempts[1]
1440 );
1441 self.parser.error(&message)
1442 }
1443 _ => {
1444 let join = self.attempts.join(", ");
1445 let message = format!("unexpected token, expected one of: {}", join);
1446 self.parser.error(&message)
1447 }
1448 }
1449 }
1450}
1451
1452impl<'a, T: Peek + Parse<'a>> Parse<'a> for Option<T> {
1453 fn parse(parser: Parser<'a>) -> Result<Option<T>> {
1454 if parser.peek::<T>()? {
1455 Ok(Some(parser.parse()?))
1456 } else {
1457 Ok(None)
1458 }
1459 }
1460}