Serenity Operating System
1/*
2 * Copyright (c) 2020, Stephan Unverwerth <s.unverwerth@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#pragma once
8
9#include <AK/DeprecatedFlyString.h>
10#include <AK/String.h>
11#include <AK/StringView.h>
12#include <AK/Variant.h>
13
14namespace JS {
15
16// U+2028 LINE SEPARATOR
17constexpr char const line_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa8, 0 };
18constexpr const StringView LINE_SEPARATOR_STRING { line_separator_chars, sizeof(line_separator_chars) - 1 };
19constexpr const u32 LINE_SEPARATOR { 0x2028 };
20
21// U+2029 PARAGRAPH SEPARATOR
22constexpr char const paragraph_separator_chars[] { (char)0xe2, (char)0x80, (char)0xa9, 0 };
23constexpr const StringView PARAGRAPH_SEPARATOR_STRING { paragraph_separator_chars, sizeof(paragraph_separator_chars) - 1 };
24constexpr const u32 PARAGRAPH_SEPARATOR { 0x2029 };
25
26// U+00A0 NO BREAK SPACE
27constexpr const u32 NO_BREAK_SPACE { 0x00A0 };
28
29// U+200C ZERO WIDTH NON-JOINER
30constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
31
32// U+FEFF ZERO WIDTH NO-BREAK SPACE
33constexpr const u32 ZERO_WIDTH_NO_BREAK_SPACE { 0xFEFF };
34
35// U+200D ZERO WIDTH JOINER
36constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
37
38#define ENUMERATE_JS_TOKENS \
39 __ENUMERATE_JS_TOKEN(Ampersand, Operator) \
40 __ENUMERATE_JS_TOKEN(AmpersandEquals, Operator) \
41 __ENUMERATE_JS_TOKEN(Arrow, Operator) \
42 __ENUMERATE_JS_TOKEN(Asterisk, Operator) \
43 __ENUMERATE_JS_TOKEN(AsteriskEquals, Operator) \
44 __ENUMERATE_JS_TOKEN(Async, Keyword) \
45 __ENUMERATE_JS_TOKEN(Await, Keyword) \
46 __ENUMERATE_JS_TOKEN(BigIntLiteral, Number) \
47 __ENUMERATE_JS_TOKEN(BoolLiteral, Keyword) \
48 __ENUMERATE_JS_TOKEN(BracketClose, Punctuation) \
49 __ENUMERATE_JS_TOKEN(BracketOpen, Punctuation) \
50 __ENUMERATE_JS_TOKEN(Break, Keyword) \
51 __ENUMERATE_JS_TOKEN(Caret, Operator) \
52 __ENUMERATE_JS_TOKEN(CaretEquals, Operator) \
53 __ENUMERATE_JS_TOKEN(Case, ControlKeyword) \
54 __ENUMERATE_JS_TOKEN(Catch, ControlKeyword) \
55 __ENUMERATE_JS_TOKEN(Class, Keyword) \
56 __ENUMERATE_JS_TOKEN(Colon, Punctuation) \
57 __ENUMERATE_JS_TOKEN(Comma, Punctuation) \
58 __ENUMERATE_JS_TOKEN(Const, Keyword) \
59 __ENUMERATE_JS_TOKEN(Continue, ControlKeyword) \
60 __ENUMERATE_JS_TOKEN(CurlyClose, Punctuation) \
61 __ENUMERATE_JS_TOKEN(CurlyOpen, Punctuation) \
62 __ENUMERATE_JS_TOKEN(Debugger, Keyword) \
63 __ENUMERATE_JS_TOKEN(Default, ControlKeyword) \
64 __ENUMERATE_JS_TOKEN(Delete, Keyword) \
65 __ENUMERATE_JS_TOKEN(Do, ControlKeyword) \
66 __ENUMERATE_JS_TOKEN(DoubleAmpersand, Operator) \
67 __ENUMERATE_JS_TOKEN(DoubleAmpersandEquals, Operator) \
68 __ENUMERATE_JS_TOKEN(DoubleAsterisk, Operator) \
69 __ENUMERATE_JS_TOKEN(DoubleAsteriskEquals, Operator) \
70 __ENUMERATE_JS_TOKEN(DoublePipe, Operator) \
71 __ENUMERATE_JS_TOKEN(DoublePipeEquals, Operator) \
72 __ENUMERATE_JS_TOKEN(DoubleQuestionMark, Operator) \
73 __ENUMERATE_JS_TOKEN(DoubleQuestionMarkEquals, Operator) \
74 __ENUMERATE_JS_TOKEN(Else, ControlKeyword) \
75 __ENUMERATE_JS_TOKEN(Enum, Keyword) \
76 __ENUMERATE_JS_TOKEN(Eof, Invalid) \
77 __ENUMERATE_JS_TOKEN(Equals, Operator) \
78 __ENUMERATE_JS_TOKEN(EqualsEquals, Operator) \
79 __ENUMERATE_JS_TOKEN(EqualsEqualsEquals, Operator) \
80 __ENUMERATE_JS_TOKEN(EscapedKeyword, Identifier) \
81 __ENUMERATE_JS_TOKEN(ExclamationMark, Operator) \
82 __ENUMERATE_JS_TOKEN(ExclamationMarkEquals, Operator) \
83 __ENUMERATE_JS_TOKEN(ExclamationMarkEqualsEquals, Operator) \
84 __ENUMERATE_JS_TOKEN(Export, Keyword) \
85 __ENUMERATE_JS_TOKEN(Extends, Keyword) \
86 __ENUMERATE_JS_TOKEN(Finally, ControlKeyword) \
87 __ENUMERATE_JS_TOKEN(For, ControlKeyword) \
88 __ENUMERATE_JS_TOKEN(Function, Keyword) \
89 __ENUMERATE_JS_TOKEN(GreaterThan, Operator) \
90 __ENUMERATE_JS_TOKEN(GreaterThanEquals, Operator) \
91 __ENUMERATE_JS_TOKEN(Identifier, Identifier) \
92 __ENUMERATE_JS_TOKEN(If, ControlKeyword) \
93 __ENUMERATE_JS_TOKEN(Implements, Keyword) \
94 __ENUMERATE_JS_TOKEN(Import, Keyword) \
95 __ENUMERATE_JS_TOKEN(In, Keyword) \
96 __ENUMERATE_JS_TOKEN(Instanceof, Keyword) \
97 __ENUMERATE_JS_TOKEN(Interface, Keyword) \
98 __ENUMERATE_JS_TOKEN(Invalid, Invalid) \
99 __ENUMERATE_JS_TOKEN(LessThan, Operator) \
100 __ENUMERATE_JS_TOKEN(LessThanEquals, Operator) \
101 __ENUMERATE_JS_TOKEN(Let, Keyword) \
102 __ENUMERATE_JS_TOKEN(Minus, Operator) \
103 __ENUMERATE_JS_TOKEN(MinusEquals, Operator) \
104 __ENUMERATE_JS_TOKEN(MinusMinus, Operator) \
105 __ENUMERATE_JS_TOKEN(New, Keyword) \
106 __ENUMERATE_JS_TOKEN(NullLiteral, Keyword) \
107 __ENUMERATE_JS_TOKEN(NumericLiteral, Number) \
108 __ENUMERATE_JS_TOKEN(Package, Keyword) \
109 __ENUMERATE_JS_TOKEN(ParenClose, Punctuation) \
110 __ENUMERATE_JS_TOKEN(ParenOpen, Punctuation) \
111 __ENUMERATE_JS_TOKEN(Percent, Operator) \
112 __ENUMERATE_JS_TOKEN(PercentEquals, Operator) \
113 __ENUMERATE_JS_TOKEN(Period, Operator) \
114 __ENUMERATE_JS_TOKEN(Pipe, Operator) \
115 __ENUMERATE_JS_TOKEN(PipeEquals, Operator) \
116 __ENUMERATE_JS_TOKEN(Plus, Operator) \
117 __ENUMERATE_JS_TOKEN(PlusEquals, Operator) \
118 __ENUMERATE_JS_TOKEN(PlusPlus, Operator) \
119 __ENUMERATE_JS_TOKEN(Private, Keyword) \
120 __ENUMERATE_JS_TOKEN(PrivateIdentifier, Identifier) \
121 __ENUMERATE_JS_TOKEN(Protected, Keyword) \
122 __ENUMERATE_JS_TOKEN(Public, Keyword) \
123 __ENUMERATE_JS_TOKEN(QuestionMark, Operator) \
124 __ENUMERATE_JS_TOKEN(QuestionMarkPeriod, Operator) \
125 __ENUMERATE_JS_TOKEN(RegexFlags, String) \
126 __ENUMERATE_JS_TOKEN(RegexLiteral, String) \
127 __ENUMERATE_JS_TOKEN(Return, ControlKeyword) \
128 __ENUMERATE_JS_TOKEN(Semicolon, Punctuation) \
129 __ENUMERATE_JS_TOKEN(ShiftLeft, Operator) \
130 __ENUMERATE_JS_TOKEN(ShiftLeftEquals, Operator) \
131 __ENUMERATE_JS_TOKEN(ShiftRight, Operator) \
132 __ENUMERATE_JS_TOKEN(ShiftRightEquals, Operator) \
133 __ENUMERATE_JS_TOKEN(Slash, Operator) \
134 __ENUMERATE_JS_TOKEN(SlashEquals, Operator) \
135 __ENUMERATE_JS_TOKEN(Static, Keyword) \
136 __ENUMERATE_JS_TOKEN(StringLiteral, String) \
137 __ENUMERATE_JS_TOKEN(Super, Keyword) \
138 __ENUMERATE_JS_TOKEN(Switch, ControlKeyword) \
139 __ENUMERATE_JS_TOKEN(TemplateLiteralEnd, String) \
140 __ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd, Punctuation) \
141 __ENUMERATE_JS_TOKEN(TemplateLiteralExprStart, Punctuation) \
142 __ENUMERATE_JS_TOKEN(TemplateLiteralStart, String) \
143 __ENUMERATE_JS_TOKEN(TemplateLiteralString, String) \
144 __ENUMERATE_JS_TOKEN(This, Keyword) \
145 __ENUMERATE_JS_TOKEN(Throw, ControlKeyword) \
146 __ENUMERATE_JS_TOKEN(Tilde, Operator) \
147 __ENUMERATE_JS_TOKEN(TripleDot, Operator) \
148 __ENUMERATE_JS_TOKEN(Try, ControlKeyword) \
149 __ENUMERATE_JS_TOKEN(Typeof, Keyword) \
150 __ENUMERATE_JS_TOKEN(UnsignedShiftRight, Operator) \
151 __ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals, Operator) \
152 __ENUMERATE_JS_TOKEN(UnterminatedRegexLiteral, String) \
153 __ENUMERATE_JS_TOKEN(UnterminatedStringLiteral, String) \
154 __ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral, String) \
155 __ENUMERATE_JS_TOKEN(Var, Keyword) \
156 __ENUMERATE_JS_TOKEN(Void, Keyword) \
157 __ENUMERATE_JS_TOKEN(While, ControlKeyword) \
158 __ENUMERATE_JS_TOKEN(With, ControlKeyword) \
159 __ENUMERATE_JS_TOKEN(Yield, ControlKeyword)
160
161enum class TokenType {
162#define __ENUMERATE_JS_TOKEN(type, category) type,
163 ENUMERATE_JS_TOKENS
164#undef __ENUMERATE_JS_TOKEN
165 _COUNT_OF_TOKENS
166};
167constexpr size_t cs_num_of_js_tokens = static_cast<size_t>(TokenType::_COUNT_OF_TOKENS);
168
169enum class TokenCategory {
170 Invalid,
171 Number,
172 String,
173 Punctuation,
174 Operator,
175 Keyword,
176 ControlKeyword,
177 Identifier
178};
179
180class Token {
181public:
182 Token() = default;
183
184 Token(TokenType type, String message, StringView trivia, StringView value, StringView filename, size_t line_number, size_t line_column, size_t offset)
185 : m_type(type)
186 , m_message(move(message))
187 , m_trivia(trivia)
188 , m_original_value(value)
189 , m_value(value)
190 , m_filename(filename)
191 , m_line_number(line_number)
192 , m_line_column(line_column)
193 , m_offset(offset)
194 {
195 }
196
197 TokenType type() const { return m_type; }
198 TokenCategory category() const;
199 static TokenCategory category(TokenType);
200 char const* name() const;
201 static char const* name(TokenType);
202
203 String const& message() const { return m_message; }
204 StringView trivia() const { return m_trivia; }
205 StringView original_value() const { return m_original_value; }
206 StringView value() const
207 {
208 return m_value.visit(
209 [](StringView view) { return view; },
210 [](DeprecatedFlyString const& identifier) { return identifier.view(); },
211 [](Empty) -> StringView { VERIFY_NOT_REACHED(); });
212 }
213
214 DeprecatedFlyString DeprecatedFlyString_value() const
215 {
216 return m_value.visit(
217 [](StringView view) -> DeprecatedFlyString { return view; },
218 [](DeprecatedFlyString const& identifier) -> DeprecatedFlyString { return identifier; },
219 [](Empty) -> DeprecatedFlyString { VERIFY_NOT_REACHED(); });
220 }
221
222 StringView filename() const { return m_filename; }
223 size_t line_number() const { return m_line_number; }
224 size_t line_column() const { return m_line_column; }
225 size_t offset() const { return m_offset; }
226 double double_value() const;
227 bool bool_value() const;
228
229 enum class StringValueStatus {
230 Ok,
231 MalformedHexEscape,
232 MalformedUnicodeEscape,
233 UnicodeEscapeOverflow,
234 LegacyOctalEscapeSequence,
235 };
236 DeprecatedString string_value(StringValueStatus& status) const;
237 DeprecatedString raw_template_value() const;
238
239 void set_identifier_value(DeprecatedFlyString value)
240 {
241 m_value = move(value);
242 }
243
244 bool is_identifier_name() const;
245 bool trivia_contains_line_terminator() const;
246
247private:
248 TokenType m_type { TokenType::Invalid };
249 String m_message;
250 StringView m_trivia;
251 StringView m_original_value;
252 Variant<Empty, StringView, DeprecatedFlyString> m_value {};
253 StringView m_filename;
254 size_t m_line_number { 0 };
255 size_t m_line_column { 0 };
256 size_t m_offset { 0 };
257};
258
259}