Serenity Operating System
1/*
2 * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
3 * Copyright (c) 2021, Max Wipfli <max.wipfli@serenityos.org>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#pragma once
9
10#include <AK/DeprecatedFlyString.h>
11#include <AK/DeprecatedString.h>
12#include <AK/Function.h>
13#include <AK/OwnPtr.h>
14#include <AK/Types.h>
15#include <AK/Variant.h>
16#include <AK/Vector.h>
17
18namespace Web::HTML {
19
20class HTMLTokenizer;
21
22class HTMLToken {
23 AK_MAKE_NONCOPYABLE(HTMLToken);
24
25public:
26 enum class Type : u8 {
27 Invalid,
28 DOCTYPE,
29 StartTag,
30 EndTag,
31 Comment,
32 Character,
33 EndOfFile,
34 };
35
36 struct Position {
37 size_t line { 0 };
38 size_t column { 0 };
39 };
40
41 struct Attribute {
42 DeprecatedString prefix;
43 DeprecatedString local_name { "" };
44 DeprecatedString namespace_;
45 DeprecatedString value { "" };
46 Position name_start_position;
47 Position value_start_position;
48 Position name_end_position;
49 Position value_end_position;
50 };
51
52 struct DoctypeData {
53 // NOTE: "Missing" is a distinct state from the empty string.
54 DeprecatedString name;
55 DeprecatedString public_identifier;
56 DeprecatedString system_identifier;
57 bool missing_name { true };
58 bool missing_public_identifier { true };
59 bool missing_system_identifier { true };
60 bool force_quirks { false };
61 };
62
63 static HTMLToken make_character(u32 code_point)
64 {
65 HTMLToken token { Type::Character };
66 token.set_code_point(code_point);
67 return token;
68 }
69
70 static HTMLToken make_start_tag(DeprecatedFlyString const& tag_name)
71 {
72 HTMLToken token { Type::StartTag };
73 token.set_tag_name(tag_name);
74 return token;
75 }
76
77 HTMLToken() = default;
78
79 HTMLToken(Type type)
80 : m_type(type)
81 {
82 switch (m_type) {
83 case Type::Character:
84 m_data.set(0u);
85 break;
86 case Type::DOCTYPE:
87 m_data.set(OwnPtr<DoctypeData> {});
88 break;
89 case Type::StartTag:
90 case Type::EndTag:
91 m_data.set(OwnPtr<Vector<Attribute>>());
92 break;
93 default:
94 break;
95 }
96 }
97
98 HTMLToken(HTMLToken&&) = default;
99 HTMLToken& operator=(HTMLToken&&) = default;
100
101 bool is_doctype() const { return m_type == Type::DOCTYPE; }
102 bool is_start_tag() const { return m_type == Type::StartTag; }
103 bool is_end_tag() const { return m_type == Type::EndTag; }
104 bool is_comment() const { return m_type == Type::Comment; }
105 bool is_character() const { return m_type == Type::Character; }
106 bool is_end_of_file() const { return m_type == Type::EndOfFile; }
107
108 u32 code_point() const
109 {
110 VERIFY(is_character());
111 return m_data.get<u32>();
112 }
113
114 bool is_parser_whitespace() const
115 {
116 // NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
117 if (!is_character())
118 return false;
119 switch (code_point()) {
120 case '\t':
121 case '\n':
122 case '\f':
123 case '\r':
124 case ' ':
125 return true;
126 default:
127 return false;
128 }
129 }
130
131 void set_code_point(u32 code_point)
132 {
133 VERIFY(is_character());
134 m_data.get<u32>() = code_point;
135 }
136
137 DeprecatedFlyString const& comment() const
138 {
139 VERIFY(is_comment());
140 return m_string_data;
141 }
142
143 void set_comment(DeprecatedString comment)
144 {
145 VERIFY(is_comment());
146 m_string_data = move(comment);
147 }
148
149 DeprecatedFlyString const& tag_name() const
150 {
151 VERIFY(is_start_tag() || is_end_tag());
152 return m_string_data;
153 }
154
155 void set_tag_name(DeprecatedString name)
156 {
157 VERIFY(is_start_tag() || is_end_tag());
158 m_string_data = move(name);
159 }
160
161 bool is_self_closing() const
162 {
163 VERIFY(is_start_tag() || is_end_tag());
164 return m_tag_self_closing;
165 }
166
167 void set_self_closing(bool self_closing)
168 {
169 VERIFY(is_start_tag() || is_end_tag());
170 m_tag_self_closing = self_closing;
171 }
172
173 bool has_acknowledged_self_closing_flag() const
174 {
175 VERIFY(is_self_closing());
176 return m_tag_self_closing_acknowledged;
177 }
178
179 void acknowledge_self_closing_flag_if_set()
180 {
181 if (is_self_closing())
182 m_tag_self_closing_acknowledged = true;
183 }
184
185 bool has_attributes() const
186 {
187 VERIFY(is_start_tag() || is_end_tag());
188 auto* ptr = tag_attributes();
189 return ptr && !ptr->is_empty();
190 }
191
192 size_t attribute_count() const
193 {
194 VERIFY(is_start_tag() || is_end_tag());
195 if (auto* ptr = tag_attributes())
196 return ptr->size();
197 return 0;
198 }
199
200 void add_attribute(Attribute attribute)
201 {
202 VERIFY(is_start_tag() || is_end_tag());
203 ensure_tag_attributes().append(move(attribute));
204 }
205
206 Attribute const& last_attribute() const
207 {
208 VERIFY(is_start_tag() || is_end_tag());
209 VERIFY(has_attributes());
210 return tag_attributes()->last();
211 }
212
213 Attribute& last_attribute()
214 {
215 VERIFY(is_start_tag() || is_end_tag());
216 VERIFY(has_attributes());
217 return tag_attributes()->last();
218 }
219
220 void drop_attributes()
221 {
222 VERIFY(is_start_tag() || is_end_tag());
223 m_data.get<OwnPtr<Vector<Attribute>>>().clear();
224 }
225
226 void for_each_attribute(Function<IterationDecision(Attribute const&)> callback) const
227 {
228 VERIFY(is_start_tag() || is_end_tag());
229 auto* ptr = tag_attributes();
230 if (!ptr)
231 return;
232 for (auto& attribute : *ptr) {
233 if (callback(attribute) == IterationDecision::Break)
234 break;
235 }
236 }
237
238 void for_each_attribute(Function<IterationDecision(Attribute&)> callback)
239 {
240 VERIFY(is_start_tag() || is_end_tag());
241 auto* ptr = tag_attributes();
242 if (!ptr)
243 return;
244 for (auto& attribute : *ptr) {
245 if (callback(attribute) == IterationDecision::Break)
246 break;
247 }
248 }
249
250 StringView attribute(DeprecatedFlyString const& attribute_name)
251 {
252 VERIFY(is_start_tag() || is_end_tag());
253
254 auto* ptr = tag_attributes();
255 if (!ptr)
256 return {};
257 for (auto& attribute : *ptr) {
258 if (attribute_name == attribute.local_name)
259 return attribute.value;
260 }
261 return {};
262 }
263
264 bool has_attribute(DeprecatedFlyString const& attribute_name)
265 {
266 return !attribute(attribute_name).is_null();
267 }
268
269 void adjust_tag_name(DeprecatedFlyString const& old_name, DeprecatedFlyString const& new_name)
270 {
271 VERIFY(is_start_tag() || is_end_tag());
272 if (old_name == tag_name())
273 set_tag_name(new_name);
274 }
275
276 void adjust_attribute_name(DeprecatedFlyString const& old_name, DeprecatedFlyString const& new_name)
277 {
278 VERIFY(is_start_tag() || is_end_tag());
279 for_each_attribute([&](Attribute& attribute) {
280 if (old_name == attribute.local_name)
281 attribute.local_name = new_name;
282 return IterationDecision::Continue;
283 });
284 }
285
286 void adjust_foreign_attribute(DeprecatedFlyString const& old_name, DeprecatedFlyString const& prefix, DeprecatedFlyString const& local_name, DeprecatedFlyString const& namespace_)
287 {
288 VERIFY(is_start_tag() || is_end_tag());
289 for_each_attribute([&](Attribute& attribute) {
290 if (old_name == attribute.local_name) {
291 attribute.prefix = prefix;
292 attribute.local_name = local_name;
293 attribute.namespace_ = namespace_;
294 }
295 return IterationDecision::Continue;
296 });
297 }
298
299 DoctypeData const& doctype_data() const
300 {
301 VERIFY(is_doctype());
302 auto* ptr = m_data.get<OwnPtr<DoctypeData>>().ptr();
303 VERIFY(ptr);
304 return *ptr;
305 }
306
307 DoctypeData& ensure_doctype_data()
308 {
309 VERIFY(is_doctype());
310 auto& ptr = m_data.get<OwnPtr<DoctypeData>>();
311 if (!ptr)
312 ptr = make<DoctypeData>();
313 return *ptr;
314 }
315
316 Type type() const { return m_type; }
317
318 DeprecatedString to_deprecated_string() const;
319
320 Position const& start_position() const { return m_start_position; }
321 Position const& end_position() const { return m_end_position; }
322
323 void set_start_position(Badge<HTMLTokenizer>, Position start_position) { m_start_position = start_position; }
324 void set_end_position(Badge<HTMLTokenizer>, Position end_position) { m_end_position = end_position; }
325
326private:
327 Vector<Attribute> const* tag_attributes() const
328 {
329 return m_data.get<OwnPtr<Vector<Attribute>>>().ptr();
330 }
331
332 Vector<Attribute>* tag_attributes()
333 {
334 return m_data.get<OwnPtr<Vector<Attribute>>>().ptr();
335 }
336
337 Vector<Attribute>& ensure_tag_attributes()
338 {
339 VERIFY(is_start_tag() || is_end_tag());
340 auto& ptr = m_data.get<OwnPtr<Vector<Attribute>>>();
341 if (!ptr)
342 ptr = make<Vector<Attribute>>();
343 return *ptr;
344 }
345
346 Type m_type { Type::Invalid };
347
348 // Type::StartTag and Type::EndTag
349 bool m_tag_self_closing { false };
350 bool m_tag_self_closing_acknowledged { false };
351
352 // Type::Comment (comment data), Type::StartTag and Type::EndTag (tag name)
353 DeprecatedFlyString m_string_data;
354
355 Variant<Empty, u32, OwnPtr<DoctypeData>, OwnPtr<Vector<Attribute>>> m_data {};
356
357 Position m_start_position;
358 Position m_end_position;
359};
360
361}