Serenity Operating System
at master 144 lines 4.8 kB view raw
1/* 2 * Copyright (c) 2021, David Tuin <davidot@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <LibJS/Lexer.h> 8#include <LibTest/TestCase.h> 9 10static bool produces_eof_tokens(JS::Lexer& lexer) 11{ 12 for (auto i = 0; i < 10; i++) { 13 auto eof_token = lexer.next(); 14 if (eof_token.type() != JS::TokenType::Eof) 15 return false; 16 } 17 return true; 18} 19 20static bool triggers_immediate_unicode_fault(StringView code) 21{ 22 auto lexer = JS::Lexer(code); 23 auto first_token = lexer.next(); 24 25 if (first_token.type() != JS::TokenType::Invalid) 26 return false; 27 28 return produces_eof_tokens(lexer); 29} 30// In the not leading character it must start with 0b10xxxxxx 31// Thus all these options are invalid: 32// \x0y = 0000 y (or \x1y, \x2y and \x3y) 33// \x4y = 0100 y (or \x5y, \x6y and \x7y) 34// \xCy = 1100 y (or \xDy, \xEy and \xFy) 35// And the only valid option is: 36// \x8y = 1000 y (or \x9y, \xAy 37 38TEST_CASE(no_input_only_gives_eof) 39{ 40 auto code = ""sv; 41 auto lexer = JS::Lexer(code); 42 EXPECT(produces_eof_tokens(lexer)); 43} 44 45TEST_CASE(invalid_start_code_point) 46{ 47 EXPECT(triggers_immediate_unicode_fault("\x80"sv)); 48 EXPECT(triggers_immediate_unicode_fault("\x90"sv)); 49 EXPECT(triggers_immediate_unicode_fault("\xA0"sv)); 50 EXPECT(triggers_immediate_unicode_fault("\xB0"sv)); 51 EXPECT(triggers_immediate_unicode_fault("\xF8"sv)); 52 EXPECT(triggers_immediate_unicode_fault("\xFF"sv)); 53} 54 55TEST_CASE(code_points_of_length_2) 56{ 57 // Initial 110xxxxx -> \xCy or \xDy 58 EXPECT(triggers_immediate_unicode_fault("\xC5"sv)); 59 EXPECT(triggers_immediate_unicode_fault("\xC5\x02"sv)); 60 EXPECT(triggers_immediate_unicode_fault("\xC5\x52"sv)); 61 EXPECT(triggers_immediate_unicode_fault("\xC5\xD2"sv)); 62 63 EXPECT(triggers_immediate_unicode_fault("\xD5"sv)); 64 EXPECT(triggers_immediate_unicode_fault("\xD5\x23"sv)); 65 EXPECT(triggers_immediate_unicode_fault("\xD5\x74"sv)); 66 EXPECT(triggers_immediate_unicode_fault("\xD5\xF5"sv)); 67} 68 69TEST_CASE(code_points_of_length_3) 70{ 71 // Initial 1110xxxx -> \xEy 72 EXPECT(triggers_immediate_unicode_fault("\xE5"sv)); 73 EXPECT(triggers_immediate_unicode_fault("\xE5\x02"sv)); 74 EXPECT(triggers_immediate_unicode_fault("\xE5\x52"sv)); 75 EXPECT(triggers_immediate_unicode_fault("\xE5\xD2"sv)); 76 77 EXPECT(triggers_immediate_unicode_fault("\xEA\x80"sv)); 78 EXPECT(triggers_immediate_unicode_fault("\xEA\x81\x07"sv)); 79 EXPECT(triggers_immediate_unicode_fault("\xEA\x82\x57"sv)); 80 EXPECT(triggers_immediate_unicode_fault("\xEA\x83\xD7"sv)); 81} 82 83TEST_CASE(code_points_of_length_4) 84{ 85 // Initial 11110xxx -> \xF{0..7} 86 EXPECT(triggers_immediate_unicode_fault("\xF0"sv)); 87 EXPECT(triggers_immediate_unicode_fault("\xF1\x02"sv)); 88 EXPECT(triggers_immediate_unicode_fault("\xF2\x52"sv)); 89 EXPECT(triggers_immediate_unicode_fault("\xF3\xD2"sv)); 90 91 EXPECT(triggers_immediate_unicode_fault("\xF4\x80"sv)); 92 EXPECT(triggers_immediate_unicode_fault("\xF5\x81\x07"sv)); 93 EXPECT(triggers_immediate_unicode_fault("\xF6\x82\x57"sv)); 94 EXPECT(triggers_immediate_unicode_fault("\xF7\x83\xD7"sv)); 95 96 EXPECT(triggers_immediate_unicode_fault("\xF4\x80\x80"sv)); 97 EXPECT(triggers_immediate_unicode_fault("\xF5\x91\x80\x07"sv)); 98 EXPECT(triggers_immediate_unicode_fault("\xF6\xA2\x80\x57"sv)); 99 EXPECT(triggers_immediate_unicode_fault("\xF7\xB3\x80\xD7"sv)); 100} 101 102TEST_CASE(gives_valid_part_until_fault) 103{ 104 auto code = "abc\xF5\x81\x80\x07; abc\xF5\x81\x80\x07 += 4"sv; 105 JS::Lexer lexer(code); 106 auto first_token = lexer.next(); 107 EXPECT_EQ(first_token.type(), JS::TokenType::Identifier); 108 EXPECT_EQ(first_token.value(), "abc"sv); 109 auto second_token = lexer.next(); 110 EXPECT_EQ(second_token.type(), JS::TokenType::Invalid); 111 EXPECT(produces_eof_tokens(lexer)); 112} 113 114TEST_CASE(gives_fully_parsed_tokens_even_if_invalid_unicode_follows) 115{ 116 auto code = "let \xE5\xD2"sv; 117 JS::Lexer lexer(code); 118 auto first_token = lexer.next(); 119 EXPECT_EQ(first_token.type(), JS::TokenType::Let); 120 auto second_token = lexer.next(); 121 EXPECT_EQ(second_token.type(), JS::TokenType::Invalid); 122 EXPECT(produces_eof_tokens(lexer)); 123} 124 125TEST_CASE(invalid_unicode_and_valid_code) 126{ 127 EXPECT(triggers_immediate_unicode_fault("\xEA\xFDthrow 1;"sv)); 128} 129 130TEST_CASE(long_invalid_unicode_and_valid_code) 131{ 132 EXPECT(triggers_immediate_unicode_fault("\xF7throw 1;"sv)); 133} 134 135TEST_CASE(invalid_unicode_after_valid_code_and_before_eof) 136{ 137 auto code = "let \xEA\xFD;"sv; 138 auto lexer = JS::Lexer(code); 139 auto let_token = lexer.next(); 140 EXPECT_EQ(let_token.type(), JS::TokenType::Let); 141 auto invalid_token = lexer.next(); 142 EXPECT_EQ(invalid_token.type(), JS::TokenType::Invalid); 143 EXPECT(produces_eof_tokens(lexer)); 144}