//! JavaScript lexer/tokenizer conforming to ECMAScript 2024. //! //! Converts JavaScript source text into a stream of [`Token`]s, each annotated //! with its [`Span`] (byte offset, line, column). use std::fmt; /// A position in the source text. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct SourcePos { /// 1-based line number. pub line: u32, /// 1-based column (in bytes from the start of the line). pub col: u32, } /// A span covering a range of source text. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Span { pub start: SourcePos, pub end: SourcePos, } /// A token produced by the lexer. #[derive(Debug, Clone, PartialEq)] pub struct Token { pub kind: TokenKind, pub span: Span, /// Whether at least one newline preceded this token (for ASI). pub preceded_by_newline: bool, } /// Every distinct token kind the lexer can produce. #[derive(Debug, Clone, PartialEq)] pub enum TokenKind { // ── Literals ────────────────────────────────────────────── /// Numeric literal (the parsed `f64` value). Number(f64), /// String literal (the decoded content, without quotes). String(std::string::String), /// Regular expression literal: pattern and flags. RegExp { pattern: std::string::String, flags: std::string::String, }, /// Template literal with no substitutions (full string content). TemplateFull(std::string::String), /// Opening part of a template literal (before the first `${`). TemplateHead(std::string::String), /// Middle part of a template literal (between `}` and next `${`). TemplateMiddle(std::string::String), /// Closing part of a template literal (after the last `}`). TemplateTail(std::string::String), // ── Identifiers & Keywords ─────────────────────────────── Identifier(std::string::String), // Keywords Await, Break, Case, Catch, Class, Const, Continue, Debugger, Default, Delete, Do, Else, Export, Extends, Finally, For, Function, If, Import, In, Instanceof, Let, New, Of, Return, Static, Super, Switch, This, Throw, Try, Typeof, Var, Void, While, With, Yield, Async, // Literal keywords True, False, Null, // ── Punctuators ────────────────────────────────────────── // Grouping LParen, // ( RParen, // ) LBracket, // [ RBracket, // ] LBrace, // { RBrace, // } // Delimiters Semicolon, // ; Comma, // , Colon, // : Dot, // . Ellipsis, // ... // Arrow Arrow, // => // Optional chaining QuestionDot, // ?. // Ternary Question, // ? // Assignment Assign, // = PlusAssign, // += MinusAssign, // -= StarAssign, // *= SlashAssign, // /= PercentAssign, // %= ExpAssign, // **= AmpAssign, // &= PipeAssign, // |= CaretAssign, // ^= ShlAssign, // <<= ShrAssign, // >>= UshrAssign, // >>>= AndAssign, // &&= OrAssign, // ||= NullishAssign, // ??= // Comparison Eq, // == Ne, // != StrictEq, // === StrictNe, // !== Lt, // < Gt, // > Le, // <= Ge, // >= // Arithmetic Plus, // + Minus, // - Star, // * Slash, // / Percent, // % Exp, // ** // Increment / Decrement PlusPlus, // ++ MinusMinus, // -- // Bitwise Amp, // & Pipe, // | Caret, // ^ Tilde, // ~ Shl, // << Shr, // >> Ushr, // >>> // Logical And, // && Or, // || Not, // ! Nullish, // ?? // ── Special ────────────────────────────────────────────── /// End of input. Eof, } impl fmt::Display for TokenKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { TokenKind::Number(n) => write!(f, "{}", n), TokenKind::String(s) => write!(f, "\"{}\"", s), TokenKind::RegExp { pattern, flags } => write!(f, "/{}/{}", pattern, flags), TokenKind::TemplateFull(s) => write!(f, "`{}`", s), TokenKind::TemplateHead(s) => write!(f, "`{}${{", s), TokenKind::TemplateMiddle(s) => write!(f, "}}{}${{", s), TokenKind::TemplateTail(s) => write!(f, "}}{}`", s), TokenKind::Identifier(s) => write!(f, "{}", s), TokenKind::Await => write!(f, "await"), TokenKind::Break => write!(f, "break"), TokenKind::Case => write!(f, "case"), TokenKind::Catch => write!(f, "catch"), TokenKind::Class => write!(f, "class"), TokenKind::Const => write!(f, "const"), TokenKind::Continue => write!(f, "continue"), TokenKind::Debugger => write!(f, "debugger"), TokenKind::Default => write!(f, "default"), TokenKind::Delete => write!(f, "delete"), TokenKind::Do => write!(f, "do"), TokenKind::Else => write!(f, "else"), TokenKind::Export => write!(f, "export"), TokenKind::Extends => write!(f, "extends"), TokenKind::Finally => write!(f, "finally"), TokenKind::For => write!(f, "for"), TokenKind::Function => write!(f, "function"), TokenKind::If => write!(f, "if"), TokenKind::Import => write!(f, "import"), TokenKind::In => write!(f, "in"), TokenKind::Instanceof => write!(f, "instanceof"), TokenKind::Let => write!(f, "let"), TokenKind::New => write!(f, "new"), TokenKind::Of => write!(f, "of"), TokenKind::Return => write!(f, "return"), TokenKind::Static => write!(f, "static"), TokenKind::Super => write!(f, "super"), TokenKind::Switch => write!(f, "switch"), TokenKind::This => write!(f, "this"), TokenKind::Throw => write!(f, "throw"), TokenKind::Try => write!(f, "try"), TokenKind::Typeof => write!(f, "typeof"), TokenKind::Var => write!(f, "var"), TokenKind::Void => write!(f, "void"), TokenKind::While => write!(f, "while"), TokenKind::With => write!(f, "with"), TokenKind::Yield => write!(f, "yield"), TokenKind::Async => write!(f, "async"), TokenKind::True => write!(f, "true"), TokenKind::False => write!(f, "false"), TokenKind::Null => write!(f, "null"), TokenKind::LParen => write!(f, "("), TokenKind::RParen => write!(f, ")"), TokenKind::LBracket => write!(f, "["), TokenKind::RBracket => write!(f, "]"), TokenKind::LBrace => write!(f, "{{"), TokenKind::RBrace => write!(f, "}}"), TokenKind::Semicolon => write!(f, ";"), TokenKind::Comma => write!(f, ","), TokenKind::Colon => write!(f, ":"), TokenKind::Dot => write!(f, "."), TokenKind::Ellipsis => write!(f, "..."), TokenKind::Arrow => write!(f, "=>"), TokenKind::QuestionDot => write!(f, "?."), TokenKind::Question => write!(f, "?"), TokenKind::Assign => write!(f, "="), TokenKind::PlusAssign => write!(f, "+="), TokenKind::MinusAssign => write!(f, "-="), TokenKind::StarAssign => write!(f, "*="), TokenKind::SlashAssign => write!(f, "/="), TokenKind::PercentAssign => write!(f, "%="), TokenKind::ExpAssign => write!(f, "**="), TokenKind::AmpAssign => write!(f, "&="), TokenKind::PipeAssign => write!(f, "|="), TokenKind::CaretAssign => write!(f, "^="), TokenKind::ShlAssign => write!(f, "<<="), TokenKind::ShrAssign => write!(f, ">>="), TokenKind::UshrAssign => write!(f, ">>>="), TokenKind::AndAssign => write!(f, "&&="), TokenKind::OrAssign => write!(f, "||="), TokenKind::NullishAssign => write!(f, "??="), TokenKind::Eq => write!(f, "=="), TokenKind::Ne => write!(f, "!="), TokenKind::StrictEq => write!(f, "==="), TokenKind::StrictNe => write!(f, "!=="), TokenKind::Lt => write!(f, "<"), TokenKind::Gt => write!(f, ">"), TokenKind::Le => write!(f, "<="), TokenKind::Ge => write!(f, ">="), TokenKind::Plus => write!(f, "+"), TokenKind::Minus => write!(f, "-"), TokenKind::Star => write!(f, "*"), TokenKind::Slash => write!(f, "/"), TokenKind::Percent => write!(f, "%"), TokenKind::Exp => write!(f, "**"), TokenKind::PlusPlus => write!(f, "++"), TokenKind::MinusMinus => write!(f, "--"), TokenKind::Amp => write!(f, "&"), TokenKind::Pipe => write!(f, "|"), TokenKind::Caret => write!(f, "^"), TokenKind::Tilde => write!(f, "~"), TokenKind::Shl => write!(f, "<<"), TokenKind::Shr => write!(f, ">>"), TokenKind::Ushr => write!(f, ">>>"), TokenKind::And => write!(f, "&&"), TokenKind::Or => write!(f, "||"), TokenKind::Not => write!(f, "!"), TokenKind::Nullish => write!(f, "??"), TokenKind::Eof => write!(f, ""), } } } /// The lexer converts JavaScript source text into tokens. pub struct Lexer<'a> { source: &'a [u8], /// Current byte offset into `source`. pos: usize, /// Current 1-based line number. line: u32, /// Current 1-based column (byte offset from line start). col: u32, /// Whether we have crossed at least one newline since the last token. saw_newline: bool, /// Nesting depth for template literal `${...}` expressions. /// When > 0, a `}` at the matching depth resumes template scanning. template_depth: u32, /// Stack tracking brace depth at each template nesting level. /// When we enter `${`, we push the current brace depth. template_brace_stack: Vec, /// Current brace depth (incremented on `{`, decremented on `}`). brace_depth: u32, /// Tracks whether the previous token could end an expression. /// Used to disambiguate `/` as division vs RegExp. prev_token_is_expr_end: bool, } impl<'a> Lexer<'a> { /// Create a new lexer for the given source text. pub fn new(source: &'a str) -> Self { Self { source: source.as_bytes(), pos: 0, line: 1, col: 1, saw_newline: false, template_depth: 0, template_brace_stack: Vec::new(), brace_depth: 0, prev_token_is_expr_end: false, } } /// Tokenize the entire source and return all tokens (including final `Eof`). pub fn tokenize(source: &str) -> Result, LexError> { let mut lexer = Lexer::new(source); let mut tokens = Vec::new(); loop { let tok = lexer.next_token()?; let is_eof = tok.kind == TokenKind::Eof; tokens.push(tok); if is_eof { break; } } Ok(tokens) } // ── Helpers ────────────────────────────────────────────── fn current_pos(&self) -> SourcePos { SourcePos { line: self.line, col: self.col, } } fn peek(&self) -> Option { self.source.get(self.pos).copied() } fn peek_at(&self, offset: usize) -> Option { self.source.get(self.pos + offset).copied() } fn advance(&mut self) -> Option { let b = self.source.get(self.pos).copied()?; self.pos += 1; if b == b'\n' { self.line += 1; self.col = 1; self.saw_newline = true; } else { self.col += 1; } Some(b) } fn advance_if(&mut self, expected: u8) -> bool { if self.peek() == Some(expected) { self.advance(); true } else { false } } fn slice(&self, start: usize, end: usize) -> &'a str { // Safety: we only slice at positions we've already walked over, // and we trust the input to be valid UTF-8 at identifier/keyword // boundaries. In practice this is safe because the lexer only // slices ASCII-compatible byte sequences. std::str::from_utf8(&self.source[start..end]).unwrap_or("") } // ── Whitespace & Comments ──────────────────────────────── fn skip_whitespace_and_comments(&mut self) -> Result<(), LexError> { loop { match self.peek() { Some(b' ' | b'\t' | b'\r' | b'\n') => { self.advance(); } // Unicode BOM / non-breaking spaces Some(0xC2) if self.peek_at(1) == Some(0xA0) => { // U+00A0 non-breaking space (2-byte UTF-8) self.advance(); self.advance(); } Some(0xEF) if self.peek_at(1) == Some(0xBB) && self.peek_at(2) == Some(0xBF) => { // BOM U+FEFF self.advance(); self.advance(); self.advance(); } Some(b'/') => { match self.peek_at(1) { Some(b'/') => { // single-line comment self.advance(); // / self.advance(); // / while let Some(b) = self.peek() { if b == b'\n' { break; } self.advance(); } } Some(b'*') => { // multi-line comment let start = self.current_pos(); self.advance(); // / self.advance(); // * let mut closed = false; while let Some(b) = self.advance() { if b == b'*' && self.peek() == Some(b'/') { self.advance(); // / closed = true; break; } } if !closed { return Err(LexError { message: "unterminated block comment".into(), pos: start, }); } } _ => break, } } _ => break, } } Ok(()) } // ── Main dispatch ──────────────────────────────────────── /// Produce the next token. pub fn next_token(&mut self) -> Result { self.saw_newline = false; self.skip_whitespace_and_comments()?; let start = self.current_pos(); let Some(b) = self.peek() else { return Ok(Token { kind: TokenKind::Eof, span: Span { start, end: self.current_pos(), }, preceded_by_newline: self.saw_newline, }); }; // If we're inside a template `${...}` and hit the matching `}`, // resume template scanning. if b == b'}' && !self.template_brace_stack.is_empty() && self.brace_depth == *self.template_brace_stack.last().unwrap() { self.template_brace_stack.pop(); self.template_depth -= 1; self.advance(); // consume } return self.scan_template_continuation(start); } let kind = match b { b'`' => { self.advance(); return self.scan_template_start(start); } b'0'..=b'9' => self.scan_number()?, b'.' if matches!(self.peek_at(1), Some(b'0'..=b'9')) => self.scan_number()?, b'"' | b'\'' => self.scan_string()?, b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'$' => self.scan_identifier_or_keyword(), // UTF-8 multi-byte identifier start 0xC0..=0xF7 if is_unicode_id_start(self.source, self.pos) => { self.scan_identifier_or_keyword() } b'/' if !self.prev_token_is_expr_end => self.scan_regexp()?, _ => self.scan_punctuator()?, }; let end = self.current_pos(); let preceded_by_newline = self.saw_newline; // Track whether this token ends an expression (for `/` disambiguation). self.prev_token_is_expr_end = token_is_expr_end(&kind); Ok(Token { kind, span: Span { start, end }, preceded_by_newline, }) } // ── Numbers ────────────────────────────────────────────── fn scan_number(&mut self) -> Result { let start = self.pos; if self.peek() == Some(b'0') { match self.peek_at(1) { Some(b'x' | b'X') => return self.scan_hex_number(), Some(b'o' | b'O') => return self.scan_octal_number(), Some(b'b' | b'B') => return self.scan_binary_number(), _ => {} } } // Decimal integer or float self.eat_decimal_digits(); if self.peek() == Some(b'.') { // Could be `1..toString()` — only consume `.` if followed by a digit // or if this is a leading dot (start already has a digit, so peek is safe). // Actually, `1.` is a valid numeric literal (= 1.0), and `1.e2` = 100. // We consume the dot always unless it's `..` (spread). if self.peek_at(1) != Some(b'.') { self.advance(); // . self.eat_decimal_digits(); } } // Exponent if matches!(self.peek(), Some(b'e' | b'E')) { self.advance(); if matches!(self.peek(), Some(b'+' | b'-')) { self.advance(); } if !matches!(self.peek(), Some(b'0'..=b'9')) { return Err(LexError { message: "expected digit after exponent".into(), pos: self.current_pos(), }); } self.eat_decimal_digits(); } // BigInt suffix `n` — we tokenize it but store as f64 (for now) self.advance_if(b'n'); let text = self.slice(start, self.pos); let value = parse_decimal(text); Ok(TokenKind::Number(value)) } fn scan_hex_number(&mut self) -> Result { self.advance(); // 0 self.advance(); // x/X let digit_start = self.pos; self.eat_hex_digits(); if self.pos == digit_start { return Err(LexError { message: "expected hex digit after 0x".into(), pos: self.current_pos(), }); } self.advance_if(b'n'); let text = self.slice(digit_start, self.pos); let text = text.trim_end_matches('n'); let value = u64_from_hex(text) as f64; Ok(TokenKind::Number(value)) } fn scan_octal_number(&mut self) -> Result { self.advance(); // 0 self.advance(); // o/O let digit_start = self.pos; while matches!(self.peek(), Some(b'0'..=b'7' | b'_')) { self.advance(); } if self.pos == digit_start { return Err(LexError { message: "expected octal digit after 0o".into(), pos: self.current_pos(), }); } self.advance_if(b'n'); let text = self.slice(digit_start, self.pos).trim_end_matches('n'); let value = u64_from_octal(text) as f64; Ok(TokenKind::Number(value)) } fn scan_binary_number(&mut self) -> Result { self.advance(); // 0 self.advance(); // b/B let digit_start = self.pos; while matches!(self.peek(), Some(b'0' | b'1' | b'_')) { self.advance(); } if self.pos == digit_start { return Err(LexError { message: "expected binary digit after 0b".into(), pos: self.current_pos(), }); } self.advance_if(b'n'); let text = self.slice(digit_start, self.pos).trim_end_matches('n'); let value = u64_from_binary(text) as f64; Ok(TokenKind::Number(value)) } fn eat_decimal_digits(&mut self) { while matches!(self.peek(), Some(b'0'..=b'9' | b'_')) { self.advance(); } } fn eat_hex_digits(&mut self) { while matches!( self.peek(), Some(b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_') ) { self.advance(); } } // ── Strings ────────────────────────────────────────────── fn scan_string(&mut self) -> Result { let quote = self.advance().unwrap(); // opening quote let start_pos = self.current_pos(); let mut value = std::string::String::new(); loop { match self.peek() { None | Some(b'\n') => { return Err(LexError { message: "unterminated string literal".into(), pos: start_pos, }); } Some(b) if b == quote => { self.advance(); break; } Some(b'\\') => { self.advance(); // backslash if let Some(ch) = self.scan_escape_sequence()? { value.push(ch); } } Some(_) => { let ch = self.advance_char(); value.push(ch); } } } Ok(TokenKind::String(value)) } /// Scan an escape sequence after the backslash has been consumed. /// Returns `None` for line continuations (`\`), which produce no character. fn scan_escape_sequence(&mut self) -> Result, LexError> { let pos = self.current_pos(); match self.advance() { Some(b'n') => Ok(Some('\n')), Some(b'r') => Ok(Some('\r')), Some(b't') => Ok(Some('\t')), Some(b'b') => Ok(Some('\u{0008}')), Some(b'f') => Ok(Some('\u{000C}')), Some(b'v') => Ok(Some('\u{000B}')), Some(b'0') if !matches!(self.peek(), Some(b'0'..=b'9')) => Ok(Some('\0')), Some(b'\\') => Ok(Some('\\')), Some(b'\'') => Ok(Some('\'')), Some(b'"') => Ok(Some('"')), Some(b'`') => Ok(Some('`')), // Line continuation: \ produces no character Some(b'\n') => Ok(None), Some(b'\r') => { self.advance_if(b'\n'); Ok(None) } Some(b'x') => { let hi = self.advance().and_then(hex_digit_val).ok_or(LexError { message: "invalid hex escape".into(), pos, })?; let lo = self.advance().and_then(hex_digit_val).ok_or(LexError { message: "invalid hex escape".into(), pos, })?; let code = (hi << 4) | lo; Ok(Some(code as char)) } Some(b'u') => self.scan_unicode_escape(pos).map(Some), Some(b) => { // identity escape Ok(Some(b as char)) } None => Err(LexError { message: "unexpected end of input in escape sequence".into(), pos, }), } } fn scan_unicode_escape(&mut self, pos: SourcePos) -> Result { if self.advance_if(b'{') { // \u{XXXXX} let mut code: u32 = 0; let mut count = 0; while let Some(b) = self.peek() { if b == b'}' { break; } let d = hex_digit_val(self.advance().unwrap()).ok_or(LexError { message: "invalid unicode escape".into(), pos, })?; code = code * 16 + d as u32; count += 1; if code > 0x10FFFF { return Err(LexError { message: "unicode escape out of range".into(), pos, }); } } if count == 0 || !self.advance_if(b'}') { return Err(LexError { message: "invalid unicode escape".into(), pos, }); } char::from_u32(code).ok_or(LexError { message: "invalid unicode code point".into(), pos, }) } else { // \uXXXX let mut code: u32 = 0; for _ in 0..4 { let d = self.advance().and_then(hex_digit_val).ok_or(LexError { message: "invalid unicode escape".into(), pos, })?; code = code * 16 + d as u32; } char::from_u32(code).ok_or(LexError { message: "invalid unicode code point".into(), pos, }) } } /// Advance one full UTF-8 character and return it. fn advance_char(&mut self) -> char { let start = self.pos; let b = self.advance().unwrap(); if b < 0x80 { return b as char; } // multi-byte: determine length let len = if b >= 0xF0 { 4 } else if b >= 0xE0 { 3 } else { 2 }; for _ in 1..len { self.advance(); } let s = std::str::from_utf8(&self.source[start..self.pos]).unwrap_or("\u{FFFD}"); s.chars().next().unwrap_or('\u{FFFD}') } // ── Template Literals ──────────────────────────────────── fn scan_template_start(&mut self, start: SourcePos) -> Result { let mut value = std::string::String::new(); loop { match self.peek() { None => { return Err(LexError { message: "unterminated template literal".into(), pos: start, }); } Some(b'`') => { self.advance(); let end = self.current_pos(); let kind = TokenKind::TemplateFull(value); self.prev_token_is_expr_end = true; return Ok(Token { kind, span: Span { start, end }, preceded_by_newline: self.saw_newline, }); } Some(b'$') if self.peek_at(1) == Some(b'{') => { self.advance(); // $ self.advance(); // { self.template_depth += 1; self.template_brace_stack.push(self.brace_depth); let end = self.current_pos(); let kind = TokenKind::TemplateHead(value); self.prev_token_is_expr_end = false; return Ok(Token { kind, span: Span { start, end }, preceded_by_newline: self.saw_newline, }); } Some(b'\\') => { self.advance(); if let Some(ch) = self.scan_escape_sequence()? { value.push(ch); } } Some(_) => { let ch = self.advance_char(); value.push(ch); } } } } fn scan_template_continuation(&mut self, start: SourcePos) -> Result { let mut value = std::string::String::new(); loop { match self.peek() { None => { return Err(LexError { message: "unterminated template literal".into(), pos: start, }); } Some(b'`') => { self.advance(); let end = self.current_pos(); let kind = TokenKind::TemplateTail(value); self.prev_token_is_expr_end = true; return Ok(Token { kind, span: Span { start, end }, preceded_by_newline: self.saw_newline, }); } Some(b'$') if self.peek_at(1) == Some(b'{') => { self.advance(); // $ self.advance(); // { self.template_depth += 1; self.template_brace_stack.push(self.brace_depth); let end = self.current_pos(); let kind = TokenKind::TemplateMiddle(value); self.prev_token_is_expr_end = false; return Ok(Token { kind, span: Span { start, end }, preceded_by_newline: self.saw_newline, }); } Some(b'\\') => { self.advance(); if let Some(ch) = self.scan_escape_sequence()? { value.push(ch); } } Some(_) => { let ch = self.advance_char(); value.push(ch); } } } } // ── Identifiers & Keywords ─────────────────────────────── fn scan_identifier_or_keyword(&mut self) -> TokenKind { let start = self.pos; // Consume the first character (which we already validated) self.advance_char(); // Consume continue characters while self.pos < self.source.len() { let b = self.source[self.pos]; match b { b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'$' => { self.advance(); } 0xC0..=0xF7 if is_unicode_id_continue(self.source, self.pos) => { self.advance_char(); } _ => break, } } let text = self.slice(start, self.pos); keyword_or_ident(text) } // ── Regular Expressions ────────────────────────────────── fn scan_regexp(&mut self) -> Result { let start_pos = self.current_pos(); self.advance(); // opening / let mut pattern = std::string::String::new(); let mut in_class = false; loop { match self.peek() { None | Some(b'\n') => { return Err(LexError { message: "unterminated regexp literal".into(), pos: start_pos, }); } Some(b'/') if !in_class => { self.advance(); break; } Some(b'[') => { in_class = true; pattern.push('['); self.advance(); } Some(b']') if in_class => { in_class = false; pattern.push(']'); self.advance(); } Some(b'\\') => { self.advance(); pattern.push('\\'); if let Some(b2) = self.peek() { if b2 != b'\n' { pattern.push(b2 as char); self.advance(); } } } Some(b) => { pattern.push(b as char); self.advance(); } } } // Flags let mut flags = std::string::String::new(); while matches!(self.peek(), Some(b'g' | b'i' | b'm' | b's' | b'u' | b'y')) { flags.push(self.advance().unwrap() as char); } Ok(TokenKind::RegExp { pattern, flags }) } // ── Punctuators ────────────────────────────────────────── fn scan_punctuator(&mut self) -> Result { let pos = self.current_pos(); let b = self.advance().unwrap(); let kind = match b { b'(' => TokenKind::LParen, b')' => TokenKind::RParen, b'[' => TokenKind::LBracket, b']' => TokenKind::RBracket, b'{' => { self.brace_depth += 1; TokenKind::LBrace } b'}' => { self.brace_depth = self.brace_depth.saturating_sub(1); TokenKind::RBrace } b';' => TokenKind::Semicolon, b',' => TokenKind::Comma, b':' => TokenKind::Colon, b'~' => TokenKind::Tilde, b'.' => { if self.peek() == Some(b'.') && self.peek_at(1) == Some(b'.') { self.advance(); self.advance(); TokenKind::Ellipsis } else { TokenKind::Dot } } b'?' => { if self.advance_if(b'?') { if self.advance_if(b'=') { TokenKind::NullishAssign } else { TokenKind::Nullish } } else if self.peek() == Some(b'.') && !matches!(self.peek_at(1), Some(b'0'..=b'9')) { self.advance(); TokenKind::QuestionDot } else { TokenKind::Question } } b'+' => { if self.advance_if(b'+') { TokenKind::PlusPlus } else if self.advance_if(b'=') { TokenKind::PlusAssign } else { TokenKind::Plus } } b'-' => { if self.advance_if(b'-') { TokenKind::MinusMinus } else if self.advance_if(b'=') { TokenKind::MinusAssign } else { TokenKind::Minus } } b'*' => { if self.advance_if(b'*') { if self.advance_if(b'=') { TokenKind::ExpAssign } else { TokenKind::Exp } } else if self.advance_if(b'=') { TokenKind::StarAssign } else { TokenKind::Star } } b'/' => { // We only get here for division (regexp was handled earlier) if self.advance_if(b'=') { TokenKind::SlashAssign } else { TokenKind::Slash } } b'%' => { if self.advance_if(b'=') { TokenKind::PercentAssign } else { TokenKind::Percent } } b'=' => { if self.advance_if(b'=') { if self.advance_if(b'=') { TokenKind::StrictEq } else { TokenKind::Eq } } else if self.advance_if(b'>') { TokenKind::Arrow } else { TokenKind::Assign } } b'!' => { if self.advance_if(b'=') { if self.advance_if(b'=') { TokenKind::StrictNe } else { TokenKind::Ne } } else { TokenKind::Not } } b'<' => { if self.advance_if(b'<') { if self.advance_if(b'=') { TokenKind::ShlAssign } else { TokenKind::Shl } } else if self.advance_if(b'=') { TokenKind::Le } else { TokenKind::Lt } } b'>' => { if self.advance_if(b'>') { if self.advance_if(b'>') { if self.advance_if(b'=') { TokenKind::UshrAssign } else { TokenKind::Ushr } } else if self.advance_if(b'=') { TokenKind::ShrAssign } else { TokenKind::Shr } } else if self.advance_if(b'=') { TokenKind::Ge } else { TokenKind::Gt } } b'&' => { if self.advance_if(b'&') { if self.advance_if(b'=') { TokenKind::AndAssign } else { TokenKind::And } } else if self.advance_if(b'=') { TokenKind::AmpAssign } else { TokenKind::Amp } } b'|' => { if self.advance_if(b'|') { if self.advance_if(b'=') { TokenKind::OrAssign } else { TokenKind::Or } } else if self.advance_if(b'=') { TokenKind::PipeAssign } else { TokenKind::Pipe } } b'^' => { if self.advance_if(b'=') { TokenKind::CaretAssign } else { TokenKind::Caret } } _ => { return Err(LexError { message: format!("unexpected character: {:?}", b as char), pos, }); } }; Ok(kind) } } // ── Keyword lookup ─────────────────────────────────────────── fn keyword_or_ident(s: &str) -> TokenKind { match s { "await" => TokenKind::Await, "break" => TokenKind::Break, "case" => TokenKind::Case, "catch" => TokenKind::Catch, "class" => TokenKind::Class, "const" => TokenKind::Const, "continue" => TokenKind::Continue, "debugger" => TokenKind::Debugger, "default" => TokenKind::Default, "delete" => TokenKind::Delete, "do" => TokenKind::Do, "else" => TokenKind::Else, "export" => TokenKind::Export, "extends" => TokenKind::Extends, "finally" => TokenKind::Finally, "for" => TokenKind::For, "function" => TokenKind::Function, "if" => TokenKind::If, "import" => TokenKind::Import, "in" => TokenKind::In, "instanceof" => TokenKind::Instanceof, "let" => TokenKind::Let, "new" => TokenKind::New, "of" => TokenKind::Of, "return" => TokenKind::Return, "static" => TokenKind::Static, "super" => TokenKind::Super, "switch" => TokenKind::Switch, "this" => TokenKind::This, "throw" => TokenKind::Throw, "try" => TokenKind::Try, "typeof" => TokenKind::Typeof, "var" => TokenKind::Var, "void" => TokenKind::Void, "while" => TokenKind::While, "with" => TokenKind::With, "yield" => TokenKind::Yield, "async" => TokenKind::Async, "true" => TokenKind::True, "false" => TokenKind::False, "null" => TokenKind::Null, _ => TokenKind::Identifier(s.to_owned()), } } // ── Expression-end tracking ────────────────────────────────── /// Returns `true` if a token of this kind could end an expression. /// Used to decide whether a following `/` is division or a RegExp literal. fn token_is_expr_end(kind: &TokenKind) -> bool { matches!( kind, TokenKind::Identifier(_) | TokenKind::Number(_) | TokenKind::String(_) | TokenKind::TemplateFull(_) | TokenKind::TemplateTail(_) | TokenKind::True | TokenKind::False | TokenKind::Null | TokenKind::This | TokenKind::Super | TokenKind::RParen | TokenKind::RBracket | TokenKind::RBrace | TokenKind::PlusPlus | TokenKind::MinusMinus | TokenKind::RegExp { .. } ) } // ── Unicode helpers ────────────────────────────────────────── /// Check if the byte sequence at `pos` starts a valid Unicode identifier start character. fn is_unicode_id_start(source: &[u8], pos: usize) -> bool { let s = std::str::from_utf8(&source[pos..]).unwrap_or(""); if let Some(ch) = s.chars().next() { ch.is_alphabetic() || ch == '_' || ch == '$' } else { false } } /// Check if the byte sequence at `pos` starts a valid Unicode identifier continue character. fn is_unicode_id_continue(source: &[u8], pos: usize) -> bool { let s = std::str::from_utf8(&source[pos..]).unwrap_or(""); if let Some(ch) = s.chars().next() { ch.is_alphanumeric() || ch == '_' || ch == '$' || ch == '\u{200C}' || ch == '\u{200D}' } else { false } } // ── Numeric parsing helpers ────────────────────────────────── fn hex_digit_val(b: u8) -> Option { match b { b'0'..=b'9' => Some(b - b'0'), b'a'..=b'f' => Some(b - b'a' + 10), b'A'..=b'F' => Some(b - b'A' + 10), _ => None, } } fn parse_decimal(s: &str) -> f64 { let s = s.replace('_', ""); let s = s.trim_end_matches('n'); // Use manual parsing for basic decimal and float if let Ok(v) = s.parse::() { return v; } 0.0 } fn u64_from_hex(s: &str) -> u64 { let mut result: u64 = 0; for b in s.bytes() { if b == b'_' { continue; } let d = hex_digit_val(b).unwrap_or(0) as u64; result = result.wrapping_mul(16).wrapping_add(d); } result } fn u64_from_octal(s: &str) -> u64 { let mut result: u64 = 0; for b in s.bytes() { if b == b'_' { continue; } let d = (b - b'0') as u64; result = result.wrapping_mul(8).wrapping_add(d); } result } fn u64_from_binary(s: &str) -> u64 { let mut result: u64 = 0; for b in s.bytes() { if b == b'_' { continue; } let d = (b - b'0') as u64; result = result.wrapping_mul(2).wrapping_add(d); } result } // ── Error type ─────────────────────────────────────────────── /// An error produced during lexing. #[derive(Debug, Clone, PartialEq, Eq)] pub struct LexError { pub message: std::string::String, pub pos: SourcePos, } impl fmt::Display for LexError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "LexError at {}:{}: {}", self.pos.line, self.pos.col, self.message ) } } // ── Tests ──────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; fn kinds(src: &str) -> Vec { Lexer::tokenize(src) .unwrap() .into_iter() .map(|t| t.kind) .collect() } fn kind(src: &str) -> TokenKind { let tokens = Lexer::tokenize(src).unwrap(); assert!(tokens.len() >= 2, "expected at least one token + Eof"); tokens[0].kind.clone() } // ── Keywords ────────────────────────────────────────── #[test] fn test_keywords() { assert_eq!(kind("var"), TokenKind::Var); assert_eq!(kind("let"), TokenKind::Let); assert_eq!(kind("const"), TokenKind::Const); assert_eq!(kind("function"), TokenKind::Function); assert_eq!(kind("class"), TokenKind::Class); assert_eq!(kind("if"), TokenKind::If); assert_eq!(kind("else"), TokenKind::Else); assert_eq!(kind("for"), TokenKind::For); assert_eq!(kind("while"), TokenKind::While); assert_eq!(kind("do"), TokenKind::Do); assert_eq!(kind("switch"), TokenKind::Switch); assert_eq!(kind("case"), TokenKind::Case); assert_eq!(kind("break"), TokenKind::Break); assert_eq!(kind("continue"), TokenKind::Continue); assert_eq!(kind("return"), TokenKind::Return); assert_eq!(kind("throw"), TokenKind::Throw); assert_eq!(kind("try"), TokenKind::Try); assert_eq!(kind("catch"), TokenKind::Catch); assert_eq!(kind("finally"), TokenKind::Finally); assert_eq!(kind("new"), TokenKind::New); assert_eq!(kind("delete"), TokenKind::Delete); assert_eq!(kind("typeof"), TokenKind::Typeof); assert_eq!(kind("instanceof"), TokenKind::Instanceof); assert_eq!(kind("void"), TokenKind::Void); assert_eq!(kind("in"), TokenKind::In); assert_eq!(kind("of"), TokenKind::Of); assert_eq!(kind("import"), TokenKind::Import); assert_eq!(kind("export"), TokenKind::Export); assert_eq!(kind("default"), TokenKind::Default); assert_eq!(kind("async"), TokenKind::Async); assert_eq!(kind("await"), TokenKind::Await); assert_eq!(kind("yield"), TokenKind::Yield); assert_eq!(kind("this"), TokenKind::This); assert_eq!(kind("super"), TokenKind::Super); assert_eq!(kind("extends"), TokenKind::Extends); assert_eq!(kind("static"), TokenKind::Static); assert_eq!(kind("debugger"), TokenKind::Debugger); assert_eq!(kind("with"), TokenKind::With); } #[test] fn test_literal_keywords() { assert_eq!(kind("true"), TokenKind::True); assert_eq!(kind("false"), TokenKind::False); assert_eq!(kind("null"), TokenKind::Null); } // ── Identifiers ────────────────────────────────────── #[test] fn test_identifiers() { assert_eq!(kind("foo"), TokenKind::Identifier("foo".into())); assert_eq!(kind("_bar"), TokenKind::Identifier("_bar".into())); assert_eq!(kind("$baz"), TokenKind::Identifier("$baz".into())); assert_eq!(kind("abc123"), TokenKind::Identifier("abc123".into())); assert_eq!(kind("camelCase"), TokenKind::Identifier("camelCase".into())); } #[test] fn test_unicode_identifiers() { assert_eq!(kind("café"), TokenKind::Identifier("café".into())); } // ── Numbers ────────────────────────────────────────── #[test] fn test_integers() { assert_eq!(kind("0"), TokenKind::Number(0.0)); assert_eq!(kind("42"), TokenKind::Number(42.0)); assert_eq!(kind("123456"), TokenKind::Number(123456.0)); } #[test] fn test_floats() { assert_eq!(kind("3.14"), TokenKind::Number(3.14)); assert_eq!(kind("0.5"), TokenKind::Number(0.5)); assert_eq!(kind(".5"), TokenKind::Number(0.5)); assert_eq!(kind("1."), TokenKind::Number(1.0)); } #[test] fn test_exponents() { assert_eq!(kind("1e2"), TokenKind::Number(100.0)); assert_eq!(kind("1E2"), TokenKind::Number(100.0)); assert_eq!(kind("1e+2"), TokenKind::Number(100.0)); assert_eq!(kind("1e-2"), TokenKind::Number(0.01)); assert_eq!(kind("2.5e3"), TokenKind::Number(2500.0)); } #[test] fn test_hex() { assert_eq!(kind("0xFF"), TokenKind::Number(255.0)); assert_eq!(kind("0x0"), TokenKind::Number(0.0)); assert_eq!(kind("0xDEAD"), TokenKind::Number(0xDEAD as f64)); } #[test] fn test_octal() { assert_eq!(kind("0o77"), TokenKind::Number(63.0)); assert_eq!(kind("0O10"), TokenKind::Number(8.0)); } #[test] fn test_binary() { assert_eq!(kind("0b1010"), TokenKind::Number(10.0)); assert_eq!(kind("0B11"), TokenKind::Number(3.0)); } #[test] fn test_numeric_separators() { assert_eq!(kind("1_000"), TokenKind::Number(1000.0)); assert_eq!(kind("0xFF_FF"), TokenKind::Number(65535.0)); assert_eq!(kind("0b1010_0101"), TokenKind::Number(165.0)); } // ── Strings ────────────────────────────────────────── #[test] fn test_double_quoted_string() { assert_eq!(kind(r#""hello""#), TokenKind::String("hello".into())); } #[test] fn test_single_quoted_string() { assert_eq!(kind("'world'"), TokenKind::String("world".into())); } #[test] fn test_string_escapes() { assert_eq!(kind(r#""\n\t\r""#), TokenKind::String("\n\t\r".into())); assert_eq!(kind(r#""\\""#), TokenKind::String("\\".into())); assert_eq!(kind(r#""\"""#), TokenKind::String("\"".into())); } #[test] fn test_string_hex_escape() { assert_eq!(kind(r#""\x41""#), TokenKind::String("A".into())); } #[test] fn test_string_unicode_escape() { assert_eq!(kind(r#""\u0041""#), TokenKind::String("A".into())); assert_eq!( kind(r#""\u{1F600}""#), TokenKind::String("\u{1F600}".into()) ); } #[test] fn test_string_line_continuation() { // \ is a line continuation producing no character assert_eq!( kind("\"line1\\\nline2\""), TokenKind::String("line1line2".into()) ); } #[test] fn test_empty_string() { assert_eq!(kind(r#""""#), TokenKind::String("".into())); assert_eq!(kind("''"), TokenKind::String("".into())); } // ── Template Literals ──────────────────────────────── #[test] fn test_template_no_substitution() { assert_eq!(kind("`hello`"), TokenKind::TemplateFull("hello".into())); } #[test] fn test_template_with_substitution() { let tokens = Lexer::tokenize("`hello ${name}!`").unwrap(); let k: Vec<_> = tokens.iter().map(|t| &t.kind).collect(); assert_eq!(k[0], &TokenKind::TemplateHead("hello ".into())); assert_eq!(k[1], &TokenKind::Identifier("name".into())); assert_eq!(k[2], &TokenKind::TemplateTail("!".into())); } #[test] fn test_template_multiple_substitutions() { let tokens = Lexer::tokenize("`a${1}b${2}c`").unwrap(); let k: Vec<_> = tokens.iter().map(|t| &t.kind).collect(); assert_eq!(k[0], &TokenKind::TemplateHead("a".into())); assert_eq!(k[1], &TokenKind::Number(1.0)); assert_eq!(k[2], &TokenKind::TemplateMiddle("b".into())); assert_eq!(k[3], &TokenKind::Number(2.0)); assert_eq!(k[4], &TokenKind::TemplateTail("c".into())); } #[test] fn test_template_with_nested_braces() { // `${({a:1})}` — the object literal inside ${ } has its own braces let tokens = Lexer::tokenize("`${({a:1})}`").unwrap(); let k: Vec<_> = tokens.iter().map(|t| &t.kind).collect(); assert_eq!(k[0], &TokenKind::TemplateHead("".into())); assert_eq!(k[1], &TokenKind::LParen); assert_eq!(k[2], &TokenKind::LBrace); assert_eq!(k[3], &TokenKind::Identifier("a".into())); assert_eq!(k[4], &TokenKind::Colon); assert_eq!(k[5], &TokenKind::Number(1.0)); assert_eq!(k[6], &TokenKind::RBrace); assert_eq!(k[7], &TokenKind::RParen); assert_eq!(k[8], &TokenKind::TemplateTail("".into())); } // ── Regular Expressions ────────────────────────────── #[test] fn test_regexp_basic() { let tokens = Lexer::tokenize("x = /foo/gi").unwrap(); let k: Vec<_> = tokens.iter().map(|t| &t.kind).collect(); assert_eq!( k[2], &TokenKind::RegExp { pattern: "foo".into(), flags: "gi".into() } ); } #[test] fn test_regexp_with_class() { // /[a-z]/ — the `/` inside the character class is not the end let tokens = Lexer::tokenize("x = /[a/b]/").unwrap(); let k: Vec<_> = tokens.iter().map(|t| &t.kind).collect(); assert_eq!( k[2], &TokenKind::RegExp { pattern: "[a/b]".into(), flags: "".into() } ); } #[test] fn test_regexp_vs_division() { // After an identifier, `/` is division let tokens = Lexer::tokenize("a / b").unwrap(); let k: Vec<_> = tokens.iter().map(|t| &t.kind).collect(); assert_eq!(k[1], &TokenKind::Slash); } // ── Punctuators ────────────────────────────────────── #[test] fn test_simple_punctuators() { assert_eq!(kind("("), TokenKind::LParen); assert_eq!(kind(")"), TokenKind::RParen); assert_eq!(kind("["), TokenKind::LBracket); assert_eq!(kind("]"), TokenKind::RBracket); assert_eq!(kind("{"), TokenKind::LBrace); assert_eq!(kind("}"), TokenKind::RBrace); assert_eq!(kind(";"), TokenKind::Semicolon); assert_eq!(kind(","), TokenKind::Comma); assert_eq!(kind(":"), TokenKind::Colon); assert_eq!(kind("~"), TokenKind::Tilde); } #[test] fn test_dot_and_ellipsis() { assert_eq!(kind("."), TokenKind::Dot); assert_eq!(kind("..."), TokenKind::Ellipsis); } #[test] fn test_arrow() { assert_eq!(kind("=>"), TokenKind::Arrow); } #[test] fn test_optional_chaining() { assert_eq!(kind("?."), TokenKind::QuestionDot); } #[test] fn test_comparison_operators() { assert_eq!(kind("=="), TokenKind::Eq); assert_eq!(kind("!="), TokenKind::Ne); assert_eq!(kind("==="), TokenKind::StrictEq); assert_eq!(kind("!=="), TokenKind::StrictNe); assert_eq!(kind("<"), TokenKind::Lt); assert_eq!(kind(">"), TokenKind::Gt); assert_eq!(kind("<="), TokenKind::Le); assert_eq!(kind(">="), TokenKind::Ge); } #[test] fn test_arithmetic_operators() { assert_eq!(kind("+"), TokenKind::Plus); assert_eq!(kind("-"), TokenKind::Minus); assert_eq!(kind("*"), TokenKind::Star); assert_eq!(kind("%"), TokenKind::Percent); assert_eq!(kind("**"), TokenKind::Exp); assert_eq!(kind("++"), TokenKind::PlusPlus); assert_eq!(kind("--"), TokenKind::MinusMinus); } #[test] fn test_bitwise_operators() { assert_eq!(kind("&"), TokenKind::Amp); assert_eq!(kind("|"), TokenKind::Pipe); assert_eq!(kind("^"), TokenKind::Caret); assert_eq!(kind("<<"), TokenKind::Shl); assert_eq!(kind(">>"), TokenKind::Shr); assert_eq!(kind(">>>"), TokenKind::Ushr); } #[test] fn test_logical_operators() { assert_eq!(kind("&&"), TokenKind::And); assert_eq!(kind("||"), TokenKind::Or); assert_eq!(kind("!"), TokenKind::Not); assert_eq!(kind("??"), TokenKind::Nullish); } #[test] fn test_assignment_operators() { assert_eq!(kind("="), TokenKind::Assign); assert_eq!(kind("+="), TokenKind::PlusAssign); assert_eq!(kind("-="), TokenKind::MinusAssign); assert_eq!(kind("*="), TokenKind::StarAssign); assert_eq!(kind("%="), TokenKind::PercentAssign); assert_eq!(kind("**="), TokenKind::ExpAssign); assert_eq!(kind("&="), TokenKind::AmpAssign); assert_eq!(kind("|="), TokenKind::PipeAssign); assert_eq!(kind("^="), TokenKind::CaretAssign); assert_eq!(kind("<<="), TokenKind::ShlAssign); assert_eq!(kind(">>="), TokenKind::ShrAssign); assert_eq!(kind(">>>="), TokenKind::UshrAssign); assert_eq!(kind("&&="), TokenKind::AndAssign); assert_eq!(kind("||="), TokenKind::OrAssign); assert_eq!(kind("??="), TokenKind::NullishAssign); } // ── Comments ───────────────────────────────────────── #[test] fn test_single_line_comment() { let tokens = kinds("a // comment\nb"); assert_eq!(tokens.len(), 3); // a, b, Eof assert_eq!(tokens[0], TokenKind::Identifier("a".into())); assert_eq!(tokens[1], TokenKind::Identifier("b".into())); } #[test] fn test_multi_line_comment() { let tokens = kinds("a /* comment */ b"); assert_eq!(tokens.len(), 3); assert_eq!(tokens[0], TokenKind::Identifier("a".into())); assert_eq!(tokens[1], TokenKind::Identifier("b".into())); } // ── Source positions ───────────────────────────────── #[test] fn test_source_positions() { let tokens = Lexer::tokenize("let x = 42").unwrap(); // `let` at line 1, col 1 assert_eq!(tokens[0].span.start, SourcePos { line: 1, col: 1 }); // `x` at line 1, col 5 assert_eq!(tokens[1].span.start, SourcePos { line: 1, col: 5 }); // `=` at line 1, col 7 assert_eq!(tokens[2].span.start, SourcePos { line: 1, col: 7 }); // `42` at line 1, col 9 assert_eq!(tokens[3].span.start, SourcePos { line: 1, col: 9 }); } #[test] fn test_multiline_positions() { let tokens = Lexer::tokenize("a\nb\nc").unwrap(); assert_eq!(tokens[0].span.start, SourcePos { line: 1, col: 1 }); assert_eq!(tokens[1].span.start, SourcePos { line: 2, col: 1 }); assert_eq!(tokens[2].span.start, SourcePos { line: 3, col: 1 }); } // ── Newline tracking (ASI) ─────────────────────────── #[test] fn test_preceded_by_newline() { let tokens = Lexer::tokenize("a\nb").unwrap(); assert!(!tokens[0].preceded_by_newline); // `a` assert!(tokens[1].preceded_by_newline); // `b` } // ── Error cases ────────────────────────────────────── #[test] fn test_unterminated_string() { assert!(Lexer::tokenize("\"hello").is_err()); } #[test] fn test_unterminated_block_comment() { assert!(Lexer::tokenize("/* oops").is_err()); } #[test] fn test_unterminated_template() { assert!(Lexer::tokenize("`hello").is_err()); } #[test] fn test_bad_hex_literal() { assert!(Lexer::tokenize("0x").is_err()); } // ── Full statement tokenization ────────────────────── #[test] fn test_full_statement() { let tokens = kinds("const x = 42 + y;"); assert_eq!( tokens, vec![ TokenKind::Const, TokenKind::Identifier("x".into()), TokenKind::Assign, TokenKind::Number(42.0), TokenKind::Plus, TokenKind::Identifier("y".into()), TokenKind::Semicolon, TokenKind::Eof, ] ); } #[test] fn test_arrow_function() { let tokens = kinds("(x) => x + 1"); assert_eq!( tokens, vec![ TokenKind::LParen, TokenKind::Identifier("x".into()), TokenKind::RParen, TokenKind::Arrow, TokenKind::Identifier("x".into()), TokenKind::Plus, TokenKind::Number(1.0), TokenKind::Eof, ] ); } #[test] fn test_complex_expression() { let tokens = kinds("a?.b ?? c !== d"); assert_eq!( tokens, vec![ TokenKind::Identifier("a".into()), TokenKind::QuestionDot, TokenKind::Identifier("b".into()), TokenKind::Nullish, TokenKind::Identifier("c".into()), TokenKind::StrictNe, TokenKind::Identifier("d".into()), TokenKind::Eof, ] ); } #[test] fn test_division_after_paren() { // `(a) / b` — the `/` after `)` should be division, not regexp let tokens = kinds("(a) / b"); assert_eq!( tokens, vec![ TokenKind::LParen, TokenKind::Identifier("a".into()), TokenKind::RParen, TokenKind::Slash, TokenKind::Identifier("b".into()), TokenKind::Eof, ] ); } #[test] fn test_slash_assign() { let tokens = kinds("a /= b"); assert_eq!( tokens, vec![ TokenKind::Identifier("a".into()), TokenKind::SlashAssign, TokenKind::Identifier("b".into()), TokenKind::Eof, ] ); } #[test] fn test_regexp_after_assign() { let tokens = kinds("x = /test/g"); assert_eq!( tokens, vec![ TokenKind::Identifier("x".into()), TokenKind::Assign, TokenKind::RegExp { pattern: "test".into(), flags: "g".into() }, TokenKind::Eof, ] ); } }