Serenity Operating System
1/*
2 * Copyright (c) 2020-2021, the SerenityOS developers.
3 * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#pragma once
9
10#include <AK/Optional.h>
11#include <AK/StringView.h>
12#include <AK/Types.h>
13#include <AK/Utf8View.h>
14#include <LibWeb/CSS/Parser/Token.h>
15#include <LibWeb/Forward.h>
16
17namespace Web::CSS::Parser {
18
19class U32Twin {
20public:
21 void set(size_t index, u32 value)
22 {
23 if (index == 0)
24 first = value;
25 if (index == 1)
26 second = value;
27 }
28
29 u32 first {};
30 u32 second {};
31};
32
33class U32Triplet {
34public:
35 void set(size_t index, u32 value)
36 {
37 if (index == 0)
38 first = value;
39 if (index == 1)
40 second = value;
41 if (index == 2)
42 third = value;
43 }
44
45 U32Twin to_twin_12()
46 {
47 return { first, second };
48 }
49
50 U32Twin to_twin_23()
51 {
52 return { second, third };
53 }
54
55 u32 first {};
56 u32 second {};
57 u32 third {};
58};
59
60class Tokenizer {
61public:
62 static ErrorOr<Vector<Token>> tokenize(StringView input, StringView encoding);
63
64 [[nodiscard]] static Token create_eof_token();
65
66private:
67 explicit Tokenizer(String decoded_input);
68
69 [[nodiscard]] ErrorOr<Vector<Token>> tokenize();
70
71 [[nodiscard]] u32 next_code_point();
72 [[nodiscard]] u32 peek_code_point(size_t offset = 0) const;
73 [[nodiscard]] U32Twin peek_twin() const;
74 [[nodiscard]] U32Triplet peek_triplet() const;
75
76 [[nodiscard]] U32Twin start_of_input_stream_twin();
77 [[nodiscard]] U32Triplet start_of_input_stream_triplet();
78
79 [[nodiscard]] static Token create_new_token(Token::Type);
80 [[nodiscard]] static Token create_value_token(Token::Type, FlyString&& value);
81 [[nodiscard]] static Token create_value_token(Token::Type, u32 value);
82 [[nodiscard]] ErrorOr<Token> consume_a_token();
83 [[nodiscard]] ErrorOr<Token> consume_string_token(u32 ending_code_point);
84 [[nodiscard]] ErrorOr<Token> consume_a_numeric_token();
85 [[nodiscard]] ErrorOr<Token> consume_an_ident_like_token();
86 [[nodiscard]] Number consume_a_number();
87 [[nodiscard]] float convert_a_string_to_a_number(StringView);
88 [[nodiscard]] ErrorOr<FlyString> consume_an_ident_sequence();
89 [[nodiscard]] u32 consume_escaped_code_point();
90 [[nodiscard]] ErrorOr<Token> consume_a_url_token();
91 void consume_the_remnants_of_a_bad_url();
92 void consume_comments();
93 void consume_as_much_whitespace_as_possible();
94 void reconsume_current_input_code_point();
95 [[nodiscard]] static bool is_valid_escape_sequence(U32Twin);
96 [[nodiscard]] static bool would_start_an_ident_sequence(U32Triplet);
97 [[nodiscard]] static bool would_start_a_number(U32Triplet);
98
99 String m_decoded_input;
100 Utf8View m_utf8_view;
101 AK::Utf8CodePointIterator m_utf8_iterator;
102 AK::Utf8CodePointIterator m_prev_utf8_iterator;
103 Token::Position m_position;
104 Token::Position m_prev_position;
105};
106}