Serenity Operating System
1/*
2 * Copyright (c) 2021, David Tuin <davidot@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <LibJS/Lexer.h>
8#include <LibTest/TestCase.h>
9
10static bool produces_eof_tokens(JS::Lexer& lexer)
11{
12 for (auto i = 0; i < 10; i++) {
13 auto eof_token = lexer.next();
14 if (eof_token.type() != JS::TokenType::Eof)
15 return false;
16 }
17 return true;
18}
19
20static bool triggers_immediate_unicode_fault(StringView code)
21{
22 auto lexer = JS::Lexer(code);
23 auto first_token = lexer.next();
24
25 if (first_token.type() != JS::TokenType::Invalid)
26 return false;
27
28 return produces_eof_tokens(lexer);
29}
30// In the not leading character it must start with 0b10xxxxxx
31// Thus all these options are invalid:
32// \x0y = 0000 y (or \x1y, \x2y and \x3y)
33// \x4y = 0100 y (or \x5y, \x6y and \x7y)
34// \xCy = 1100 y (or \xDy, \xEy and \xFy)
35// And the only valid option is:
36// \x8y = 1000 y (or \x9y, \xAy
37
38TEST_CASE(no_input_only_gives_eof)
39{
40 auto code = ""sv;
41 auto lexer = JS::Lexer(code);
42 EXPECT(produces_eof_tokens(lexer));
43}
44
45TEST_CASE(invalid_start_code_point)
46{
47 EXPECT(triggers_immediate_unicode_fault("\x80"sv));
48 EXPECT(triggers_immediate_unicode_fault("\x90"sv));
49 EXPECT(triggers_immediate_unicode_fault("\xA0"sv));
50 EXPECT(triggers_immediate_unicode_fault("\xB0"sv));
51 EXPECT(triggers_immediate_unicode_fault("\xF8"sv));
52 EXPECT(triggers_immediate_unicode_fault("\xFF"sv));
53}
54
55TEST_CASE(code_points_of_length_2)
56{
57 // Initial 110xxxxx -> \xCy or \xDy
58 EXPECT(triggers_immediate_unicode_fault("\xC5"sv));
59 EXPECT(triggers_immediate_unicode_fault("\xC5\x02"sv));
60 EXPECT(triggers_immediate_unicode_fault("\xC5\x52"sv));
61 EXPECT(triggers_immediate_unicode_fault("\xC5\xD2"sv));
62
63 EXPECT(triggers_immediate_unicode_fault("\xD5"sv));
64 EXPECT(triggers_immediate_unicode_fault("\xD5\x23"sv));
65 EXPECT(triggers_immediate_unicode_fault("\xD5\x74"sv));
66 EXPECT(triggers_immediate_unicode_fault("\xD5\xF5"sv));
67}
68
69TEST_CASE(code_points_of_length_3)
70{
71 // Initial 1110xxxx -> \xEy
72 EXPECT(triggers_immediate_unicode_fault("\xE5"sv));
73 EXPECT(triggers_immediate_unicode_fault("\xE5\x02"sv));
74 EXPECT(triggers_immediate_unicode_fault("\xE5\x52"sv));
75 EXPECT(triggers_immediate_unicode_fault("\xE5\xD2"sv));
76
77 EXPECT(triggers_immediate_unicode_fault("\xEA\x80"sv));
78 EXPECT(triggers_immediate_unicode_fault("\xEA\x81\x07"sv));
79 EXPECT(triggers_immediate_unicode_fault("\xEA\x82\x57"sv));
80 EXPECT(triggers_immediate_unicode_fault("\xEA\x83\xD7"sv));
81}
82
83TEST_CASE(code_points_of_length_4)
84{
85 // Initial 11110xxx -> \xF{0..7}
86 EXPECT(triggers_immediate_unicode_fault("\xF0"sv));
87 EXPECT(triggers_immediate_unicode_fault("\xF1\x02"sv));
88 EXPECT(triggers_immediate_unicode_fault("\xF2\x52"sv));
89 EXPECT(triggers_immediate_unicode_fault("\xF3\xD2"sv));
90
91 EXPECT(triggers_immediate_unicode_fault("\xF4\x80"sv));
92 EXPECT(triggers_immediate_unicode_fault("\xF5\x81\x07"sv));
93 EXPECT(triggers_immediate_unicode_fault("\xF6\x82\x57"sv));
94 EXPECT(triggers_immediate_unicode_fault("\xF7\x83\xD7"sv));
95
96 EXPECT(triggers_immediate_unicode_fault("\xF4\x80\x80"sv));
97 EXPECT(triggers_immediate_unicode_fault("\xF5\x91\x80\x07"sv));
98 EXPECT(triggers_immediate_unicode_fault("\xF6\xA2\x80\x57"sv));
99 EXPECT(triggers_immediate_unicode_fault("\xF7\xB3\x80\xD7"sv));
100}
101
102TEST_CASE(gives_valid_part_until_fault)
103{
104 auto code = "abc\xF5\x81\x80\x07; abc\xF5\x81\x80\x07 += 4"sv;
105 JS::Lexer lexer(code);
106 auto first_token = lexer.next();
107 EXPECT_EQ(first_token.type(), JS::TokenType::Identifier);
108 EXPECT_EQ(first_token.value(), "abc"sv);
109 auto second_token = lexer.next();
110 EXPECT_EQ(second_token.type(), JS::TokenType::Invalid);
111 EXPECT(produces_eof_tokens(lexer));
112}
113
114TEST_CASE(gives_fully_parsed_tokens_even_if_invalid_unicode_follows)
115{
116 auto code = "let \xE5\xD2"sv;
117 JS::Lexer lexer(code);
118 auto first_token = lexer.next();
119 EXPECT_EQ(first_token.type(), JS::TokenType::Let);
120 auto second_token = lexer.next();
121 EXPECT_EQ(second_token.type(), JS::TokenType::Invalid);
122 EXPECT(produces_eof_tokens(lexer));
123}
124
125TEST_CASE(invalid_unicode_and_valid_code)
126{
127 EXPECT(triggers_immediate_unicode_fault("\xEA\xFDthrow 1;"sv));
128}
129
130TEST_CASE(long_invalid_unicode_and_valid_code)
131{
132 EXPECT(triggers_immediate_unicode_fault("\xF7throw 1;"sv));
133}
134
135TEST_CASE(invalid_unicode_after_valid_code_and_before_eof)
136{
137 auto code = "let \xEA\xFD;"sv;
138 auto lexer = JS::Lexer(code);
139 auto let_token = lexer.next();
140 EXPECT_EQ(let_token.type(), JS::TokenType::Let);
141 auto invalid_token = lexer.next();
142 EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid);
143 EXPECT(produces_eof_tokens(lexer));
144}