Serenity Operating System
at master 259 lines 12 kB view raw
1/* 2 * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#pragma once 8 9#include <AK/DeprecatedFlyString.h> 10#include <AK/String.h> 11#include <AK/StringView.h> 12#include <AK/Variant.h> 13 14namespace JS { 15 16// U+2028 LINE SEPARATOR 17constexpr char const line_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa8, 0 }; 18constexpr const StringView LINE_SEPARATOR_STRING { line_separator_chars, sizeof(line_separator_chars) - 1 }; 19constexpr const u32 LINE_SEPARATOR { 0x2028 }; 20 21// U+2029 PARAGRAPH SEPARATOR 22constexpr char const paragraph_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa9, 0 }; 23constexpr const StringView PARAGRAPH_SEPARATOR_STRING { paragraph_separator_chars, sizeof(paragraph_separator_chars) - 1 }; 24constexpr const u32 PARAGRAPH_SEPARATOR { 0x2029 }; 25 26// U+00A0 NO BREAK SPACE 27constexpr const u32 NO_BREAK_SPACE { 0x00A0 }; 28 29// U+200C ZERO WIDTH NON-JOINER 30constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C }; 31 32// U+FEFF ZERO WIDTH NO-BREAK SPACE 33constexpr const u32 ZERO_WIDTH_NO_BREAK_SPACE { 0xFEFF }; 34 35// U+200D ZERO WIDTH JOINER 36constexpr const u32 ZERO_WIDTH_JOINER { 0x200D }; 37 38#define ENUMERATE_JS_TOKENS \ 39 __ENUMERATE_JS_TOKEN(Ampersand, Operator) \ 40 __ENUMERATE_JS_TOKEN(AmpersandEquals, Operator) \ 41 __ENUMERATE_JS_TOKEN(Arrow, Operator) \ 42 __ENUMERATE_JS_TOKEN(Asterisk, Operator) \ 43 __ENUMERATE_JS_TOKEN(AsteriskEquals, Operator) \ 44 __ENUMERATE_JS_TOKEN(Async, Keyword) \ 45 __ENUMERATE_JS_TOKEN(Await, Keyword) \ 46 __ENUMERATE_JS_TOKEN(BigIntLiteral, Number) \ 47 __ENUMERATE_JS_TOKEN(BoolLiteral, Keyword) \ 48 __ENUMERATE_JS_TOKEN(BracketClose, Punctuation) \ 49 __ENUMERATE_JS_TOKEN(BracketOpen, Punctuation) \ 50 __ENUMERATE_JS_TOKEN(Break, Keyword) \ 51 __ENUMERATE_JS_TOKEN(Caret, Operator) \ 52 __ENUMERATE_JS_TOKEN(CaretEquals, Operator) \ 53 __ENUMERATE_JS_TOKEN(Case, ControlKeyword) \ 54 __ENUMERATE_JS_TOKEN(Catch, ControlKeyword) \ 55 __ENUMERATE_JS_TOKEN(Class, Keyword) \ 56 __ENUMERATE_JS_TOKEN(Colon, Punctuation) \ 57 __ENUMERATE_JS_TOKEN(Comma, Punctuation) \ 58 __ENUMERATE_JS_TOKEN(Const, Keyword) \ 59 __ENUMERATE_JS_TOKEN(Continue, ControlKeyword) \ 60 __ENUMERATE_JS_TOKEN(CurlyClose, Punctuation) \ 61 __ENUMERATE_JS_TOKEN(CurlyOpen, Punctuation) \ 62 __ENUMERATE_JS_TOKEN(Debugger, Keyword) \ 63 __ENUMERATE_JS_TOKEN(Default, ControlKeyword) \ 64 __ENUMERATE_JS_TOKEN(Delete, Keyword) \ 65 __ENUMERATE_JS_TOKEN(Do, ControlKeyword) \ 66 __ENUMERATE_JS_TOKEN(DoubleAmpersand, Operator) \ 67 __ENUMERATE_JS_TOKEN(DoubleAmpersandEquals, Operator) \ 68 __ENUMERATE_JS_TOKEN(DoubleAsterisk, Operator) \ 69 __ENUMERATE_JS_TOKEN(DoubleAsteriskEquals, Operator) \ 70 __ENUMERATE_JS_TOKEN(DoublePipe, Operator) \ 71 __ENUMERATE_JS_TOKEN(DoublePipeEquals, Operator) \ 72 __ENUMERATE_JS_TOKEN(DoubleQuestionMark, Operator) \ 73 __ENUMERATE_JS_TOKEN(DoubleQuestionMarkEquals, Operator) \ 74 __ENUMERATE_JS_TOKEN(Else, ControlKeyword) \ 75 __ENUMERATE_JS_TOKEN(Enum, Keyword) \ 76 __ENUMERATE_JS_TOKEN(Eof, Invalid) \ 77 __ENUMERATE_JS_TOKEN(Equals, Operator) \ 78 __ENUMERATE_JS_TOKEN(EqualsEquals, Operator) \ 79 __ENUMERATE_JS_TOKEN(EqualsEqualsEquals, Operator) \ 80 __ENUMERATE_JS_TOKEN(EscapedKeyword, Identifier) \ 81 __ENUMERATE_JS_TOKEN(ExclamationMark, Operator) \ 82 __ENUMERATE_JS_TOKEN(ExclamationMarkEquals, Operator) \ 83 __ENUMERATE_JS_TOKEN(ExclamationMarkEqualsEquals, Operator) \ 84 __ENUMERATE_JS_TOKEN(Export, Keyword) \ 85 __ENUMERATE_JS_TOKEN(Extends, Keyword) \ 86 __ENUMERATE_JS_TOKEN(Finally, ControlKeyword) \ 87 __ENUMERATE_JS_TOKEN(For, ControlKeyword) \ 88 __ENUMERATE_JS_TOKEN(Function, Keyword) \ 89 __ENUMERATE_JS_TOKEN(GreaterThan, Operator) \ 90 __ENUMERATE_JS_TOKEN(GreaterThanEquals, Operator) \ 91 __ENUMERATE_JS_TOKEN(Identifier, Identifier) \ 92 __ENUMERATE_JS_TOKEN(If, ControlKeyword) \ 93 __ENUMERATE_JS_TOKEN(Implements, Keyword) \ 94 __ENUMERATE_JS_TOKEN(Import, Keyword) \ 95 __ENUMERATE_JS_TOKEN(In, Keyword) \ 96 __ENUMERATE_JS_TOKEN(Instanceof, Keyword) \ 97 __ENUMERATE_JS_TOKEN(Interface, Keyword) \ 98 __ENUMERATE_JS_TOKEN(Invalid, Invalid) \ 99 __ENUMERATE_JS_TOKEN(LessThan, Operator) \ 100 __ENUMERATE_JS_TOKEN(LessThanEquals, Operator) \ 101 __ENUMERATE_JS_TOKEN(Let, Keyword) \ 102 __ENUMERATE_JS_TOKEN(Minus, Operator) \ 103 __ENUMERATE_JS_TOKEN(MinusEquals, Operator) \ 104 __ENUMERATE_JS_TOKEN(MinusMinus, Operator) \ 105 __ENUMERATE_JS_TOKEN(New, Keyword) \ 106 __ENUMERATE_JS_TOKEN(NullLiteral, Keyword) \ 107 __ENUMERATE_JS_TOKEN(NumericLiteral, Number) \ 108 __ENUMERATE_JS_TOKEN(Package, Keyword) \ 109 __ENUMERATE_JS_TOKEN(ParenClose, Punctuation) \ 110 __ENUMERATE_JS_TOKEN(ParenOpen, Punctuation) \ 111 __ENUMERATE_JS_TOKEN(Percent, Operator) \ 112 __ENUMERATE_JS_TOKEN(PercentEquals, Operator) \ 113 __ENUMERATE_JS_TOKEN(Period, Operator) \ 114 __ENUMERATE_JS_TOKEN(Pipe, Operator) \ 115 __ENUMERATE_JS_TOKEN(PipeEquals, Operator) \ 116 __ENUMERATE_JS_TOKEN(Plus, Operator) \ 117 __ENUMERATE_JS_TOKEN(PlusEquals, Operator) \ 118 __ENUMERATE_JS_TOKEN(PlusPlus, Operator) \ 119 __ENUMERATE_JS_TOKEN(Private, Keyword) \ 120 __ENUMERATE_JS_TOKEN(PrivateIdentifier, Identifier) \ 121 __ENUMERATE_JS_TOKEN(Protected, Keyword) \ 122 __ENUMERATE_JS_TOKEN(Public, Keyword) \ 123 __ENUMERATE_JS_TOKEN(QuestionMark, Operator) \ 124 __ENUMERATE_JS_TOKEN(QuestionMarkPeriod, Operator) \ 125 __ENUMERATE_JS_TOKEN(RegexFlags, String) \ 126 __ENUMERATE_JS_TOKEN(RegexLiteral, String) \ 127 __ENUMERATE_JS_TOKEN(Return, ControlKeyword) \ 128 __ENUMERATE_JS_TOKEN(Semicolon, Punctuation) \ 129 __ENUMERATE_JS_TOKEN(ShiftLeft, Operator) \ 130 __ENUMERATE_JS_TOKEN(ShiftLeftEquals, Operator) \ 131 __ENUMERATE_JS_TOKEN(ShiftRight, Operator) \ 132 __ENUMERATE_JS_TOKEN(ShiftRightEquals, Operator) \ 133 __ENUMERATE_JS_TOKEN(Slash, Operator) \ 134 __ENUMERATE_JS_TOKEN(SlashEquals, Operator) \ 135 __ENUMERATE_JS_TOKEN(Static, Keyword) \ 136 __ENUMERATE_JS_TOKEN(StringLiteral, String) \ 137 __ENUMERATE_JS_TOKEN(Super, Keyword) \ 138 __ENUMERATE_JS_TOKEN(Switch, ControlKeyword) \ 139 __ENUMERATE_JS_TOKEN(TemplateLiteralEnd, String) \ 140 __ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd, Punctuation) \ 141 __ENUMERATE_JS_TOKEN(TemplateLiteralExprStart, Punctuation) \ 142 __ENUMERATE_JS_TOKEN(TemplateLiteralStart, String) \ 143 __ENUMERATE_JS_TOKEN(TemplateLiteralString, String) \ 144 __ENUMERATE_JS_TOKEN(This, Keyword) \ 145 __ENUMERATE_JS_TOKEN(Throw, ControlKeyword) \ 146 __ENUMERATE_JS_TOKEN(Tilde, Operator) \ 147 __ENUMERATE_JS_TOKEN(TripleDot, Operator) \ 148 __ENUMERATE_JS_TOKEN(Try, ControlKeyword) \ 149 __ENUMERATE_JS_TOKEN(Typeof, Keyword) \ 150 __ENUMERATE_JS_TOKEN(UnsignedShiftRight, Operator) \ 151 __ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals, Operator) \ 152 __ENUMERATE_JS_TOKEN(UnterminatedRegexLiteral, String) \ 153 __ENUMERATE_JS_TOKEN(UnterminatedStringLiteral, String) \ 154 __ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral, String) \ 155 __ENUMERATE_JS_TOKEN(Var, Keyword) \ 156 __ENUMERATE_JS_TOKEN(Void, Keyword) \ 157 __ENUMERATE_JS_TOKEN(While, ControlKeyword) \ 158 __ENUMERATE_JS_TOKEN(With, ControlKeyword) \ 159 __ENUMERATE_JS_TOKEN(Yield, ControlKeyword) 160 161enum class TokenType { 162#define __ENUMERATE_JS_TOKEN(type, category) type, 163 ENUMERATE_JS_TOKENS 164#undef __ENUMERATE_JS_TOKEN 165 _COUNT_OF_TOKENS 166}; 167constexpr size_t cs_num_of_js_tokens = static_cast<size_t>(TokenType::_COUNT_OF_TOKENS); 168 169enum class TokenCategory { 170 Invalid, 171 Number, 172 String, 173 Punctuation, 174 Operator, 175 Keyword, 176 ControlKeyword, 177 Identifier 178}; 179 180class Token { 181public: 182 Token() = default; 183 184 Token(TokenType type, String message, StringView trivia, StringView value, StringView filename, size_t line_number, size_t line_column, size_t offset) 185 : m_type(type) 186 , m_message(move(message)) 187 , m_trivia(trivia) 188 , m_original_value(value) 189 , m_value(value) 190 , m_filename(filename) 191 , m_line_number(line_number) 192 , m_line_column(line_column) 193 , m_offset(offset) 194 { 195 } 196 197 TokenType type() const { return m_type; } 198 TokenCategory category() const; 199 static TokenCategory category(TokenType); 200 char const* name() const; 201 static char const* name(TokenType); 202 203 String const& message() const { return m_message; } 204 StringView trivia() const { return m_trivia; } 205 StringView original_value() const { return m_original_value; } 206 StringView value() const 207 { 208 return m_value.visit( 209 [](StringView view) { return view; }, 210 [](DeprecatedFlyString const& identifier) { return identifier.view(); }, 211 [](Empty) -> StringView { VERIFY_NOT_REACHED(); }); 212 } 213 214 DeprecatedFlyString DeprecatedFlyString_value() const 215 { 216 return m_value.visit( 217 [](StringView view) -> DeprecatedFlyString { return view; }, 218 [](DeprecatedFlyString const& identifier) -> DeprecatedFlyString { return identifier; }, 219 [](Empty) -> DeprecatedFlyString { VERIFY_NOT_REACHED(); }); 220 } 221 222 StringView filename() const { return m_filename; } 223 size_t line_number() const { return m_line_number; } 224 size_t line_column() const { return m_line_column; } 225 size_t offset() const { return m_offset; } 226 double double_value() const; 227 bool bool_value() const; 228 229 enum class StringValueStatus { 230 Ok, 231 MalformedHexEscape, 232 MalformedUnicodeEscape, 233 UnicodeEscapeOverflow, 234 LegacyOctalEscapeSequence, 235 }; 236 DeprecatedString string_value(StringValueStatus& status) const; 237 DeprecatedString raw_template_value() const; 238 239 void set_identifier_value(DeprecatedFlyString value) 240 { 241 m_value = move(value); 242 } 243 244 bool is_identifier_name() const; 245 bool trivia_contains_line_terminator() const; 246 247private: 248 TokenType m_type { TokenType::Invalid }; 249 String m_message; 250 StringView m_trivia; 251 StringView m_original_value; 252 Variant<Empty, StringView, DeprecatedFlyString> m_value {}; 253 StringView m_filename; 254 size_t m_line_number { 0 }; 255 size_t m_line_column { 0 }; 256 size_t m_offset { 0 }; 257}; 258 259}