Serenity Operating System
at master 361 lines 9.7 kB view raw
1/* 2 * Copyright (c) 2020, Andreas Kling <kling@serenityos.org> 3 * Copyright (c) 2021, Max Wipfli <max.wipfli@serenityos.org> 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#pragma once 9 10#include <AK/DeprecatedFlyString.h> 11#include <AK/DeprecatedString.h> 12#include <AK/Function.h> 13#include <AK/OwnPtr.h> 14#include <AK/Types.h> 15#include <AK/Variant.h> 16#include <AK/Vector.h> 17 18namespace Web::HTML { 19 20class HTMLTokenizer; 21 22class HTMLToken { 23 AK_MAKE_NONCOPYABLE(HTMLToken); 24 25public: 26 enum class Type : u8 { 27 Invalid, 28 DOCTYPE, 29 StartTag, 30 EndTag, 31 Comment, 32 Character, 33 EndOfFile, 34 }; 35 36 struct Position { 37 size_t line { 0 }; 38 size_t column { 0 }; 39 }; 40 41 struct Attribute { 42 DeprecatedString prefix; 43 DeprecatedString local_name { "" }; 44 DeprecatedString namespace_; 45 DeprecatedString value { "" }; 46 Position name_start_position; 47 Position value_start_position; 48 Position name_end_position; 49 Position value_end_position; 50 }; 51 52 struct DoctypeData { 53 // NOTE: "Missing" is a distinct state from the empty string. 54 DeprecatedString name; 55 DeprecatedString public_identifier; 56 DeprecatedString system_identifier; 57 bool missing_name { true }; 58 bool missing_public_identifier { true }; 59 bool missing_system_identifier { true }; 60 bool force_quirks { false }; 61 }; 62 63 static HTMLToken make_character(u32 code_point) 64 { 65 HTMLToken token { Type::Character }; 66 token.set_code_point(code_point); 67 return token; 68 } 69 70 static HTMLToken make_start_tag(DeprecatedFlyString const& tag_name) 71 { 72 HTMLToken token { Type::StartTag }; 73 token.set_tag_name(tag_name); 74 return token; 75 } 76 77 HTMLToken() = default; 78 79 HTMLToken(Type type) 80 : m_type(type) 81 { 82 switch (m_type) { 83 case Type::Character: 84 m_data.set(0u); 85 break; 86 case Type::DOCTYPE: 87 m_data.set(OwnPtr<DoctypeData> {}); 88 break; 89 case Type::StartTag: 90 case Type::EndTag: 91 m_data.set(OwnPtr<Vector<Attribute>>()); 92 break; 93 default: 94 break; 95 } 96 } 97 98 HTMLToken(HTMLToken&&) = default; 99 HTMLToken& operator=(HTMLToken&&) = default; 100 101 bool is_doctype() const { return m_type == Type::DOCTYPE; } 102 bool is_start_tag() const { return m_type == Type::StartTag; } 103 bool is_end_tag() const { return m_type == Type::EndTag; } 104 bool is_comment() const { return m_type == Type::Comment; } 105 bool is_character() const { return m_type == Type::Character; } 106 bool is_end_of_file() const { return m_type == Type::EndOfFile; } 107 108 u32 code_point() const 109 { 110 VERIFY(is_character()); 111 return m_data.get<u32>(); 112 } 113 114 bool is_parser_whitespace() const 115 { 116 // NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not. 117 if (!is_character()) 118 return false; 119 switch (code_point()) { 120 case '\t': 121 case '\n': 122 case '\f': 123 case '\r': 124 case ' ': 125 return true; 126 default: 127 return false; 128 } 129 } 130 131 void set_code_point(u32 code_point) 132 { 133 VERIFY(is_character()); 134 m_data.get<u32>() = code_point; 135 } 136 137 DeprecatedFlyString const& comment() const 138 { 139 VERIFY(is_comment()); 140 return m_string_data; 141 } 142 143 void set_comment(DeprecatedString comment) 144 { 145 VERIFY(is_comment()); 146 m_string_data = move(comment); 147 } 148 149 DeprecatedFlyString const& tag_name() const 150 { 151 VERIFY(is_start_tag() || is_end_tag()); 152 return m_string_data; 153 } 154 155 void set_tag_name(DeprecatedString name) 156 { 157 VERIFY(is_start_tag() || is_end_tag()); 158 m_string_data = move(name); 159 } 160 161 bool is_self_closing() const 162 { 163 VERIFY(is_start_tag() || is_end_tag()); 164 return m_tag_self_closing; 165 } 166 167 void set_self_closing(bool self_closing) 168 { 169 VERIFY(is_start_tag() || is_end_tag()); 170 m_tag_self_closing = self_closing; 171 } 172 173 bool has_acknowledged_self_closing_flag() const 174 { 175 VERIFY(is_self_closing()); 176 return m_tag_self_closing_acknowledged; 177 } 178 179 void acknowledge_self_closing_flag_if_set() 180 { 181 if (is_self_closing()) 182 m_tag_self_closing_acknowledged = true; 183 } 184 185 bool has_attributes() const 186 { 187 VERIFY(is_start_tag() || is_end_tag()); 188 auto* ptr = tag_attributes(); 189 return ptr && !ptr->is_empty(); 190 } 191 192 size_t attribute_count() const 193 { 194 VERIFY(is_start_tag() || is_end_tag()); 195 if (auto* ptr = tag_attributes()) 196 return ptr->size(); 197 return 0; 198 } 199 200 void add_attribute(Attribute attribute) 201 { 202 VERIFY(is_start_tag() || is_end_tag()); 203 ensure_tag_attributes().append(move(attribute)); 204 } 205 206 Attribute const& last_attribute() const 207 { 208 VERIFY(is_start_tag() || is_end_tag()); 209 VERIFY(has_attributes()); 210 return tag_attributes()->last(); 211 } 212 213 Attribute& last_attribute() 214 { 215 VERIFY(is_start_tag() || is_end_tag()); 216 VERIFY(has_attributes()); 217 return tag_attributes()->last(); 218 } 219 220 void drop_attributes() 221 { 222 VERIFY(is_start_tag() || is_end_tag()); 223 m_data.get<OwnPtr<Vector<Attribute>>>().clear(); 224 } 225 226 void for_each_attribute(Function<IterationDecision(Attribute const&)> callback) const 227 { 228 VERIFY(is_start_tag() || is_end_tag()); 229 auto* ptr = tag_attributes(); 230 if (!ptr) 231 return; 232 for (auto& attribute : *ptr) { 233 if (callback(attribute) == IterationDecision::Break) 234 break; 235 } 236 } 237 238 void for_each_attribute(Function<IterationDecision(Attribute&)> callback) 239 { 240 VERIFY(is_start_tag() || is_end_tag()); 241 auto* ptr = tag_attributes(); 242 if (!ptr) 243 return; 244 for (auto& attribute : *ptr) { 245 if (callback(attribute) == IterationDecision::Break) 246 break; 247 } 248 } 249 250 StringView attribute(DeprecatedFlyString const& attribute_name) 251 { 252 VERIFY(is_start_tag() || is_end_tag()); 253 254 auto* ptr = tag_attributes(); 255 if (!ptr) 256 return {}; 257 for (auto& attribute : *ptr) { 258 if (attribute_name == attribute.local_name) 259 return attribute.value; 260 } 261 return {}; 262 } 263 264 bool has_attribute(DeprecatedFlyString const& attribute_name) 265 { 266 return !attribute(attribute_name).is_null(); 267 } 268 269 void adjust_tag_name(DeprecatedFlyString const& old_name, DeprecatedFlyString const& new_name) 270 { 271 VERIFY(is_start_tag() || is_end_tag()); 272 if (old_name == tag_name()) 273 set_tag_name(new_name); 274 } 275 276 void adjust_attribute_name(DeprecatedFlyString const& old_name, DeprecatedFlyString const& new_name) 277 { 278 VERIFY(is_start_tag() || is_end_tag()); 279 for_each_attribute([&](Attribute& attribute) { 280 if (old_name == attribute.local_name) 281 attribute.local_name = new_name; 282 return IterationDecision::Continue; 283 }); 284 } 285 286 void adjust_foreign_attribute(DeprecatedFlyString const& old_name, DeprecatedFlyString const& prefix, DeprecatedFlyString const& local_name, DeprecatedFlyString const& namespace_) 287 { 288 VERIFY(is_start_tag() || is_end_tag()); 289 for_each_attribute([&](Attribute& attribute) { 290 if (old_name == attribute.local_name) { 291 attribute.prefix = prefix; 292 attribute.local_name = local_name; 293 attribute.namespace_ = namespace_; 294 } 295 return IterationDecision::Continue; 296 }); 297 } 298 299 DoctypeData const& doctype_data() const 300 { 301 VERIFY(is_doctype()); 302 auto* ptr = m_data.get<OwnPtr<DoctypeData>>().ptr(); 303 VERIFY(ptr); 304 return *ptr; 305 } 306 307 DoctypeData& ensure_doctype_data() 308 { 309 VERIFY(is_doctype()); 310 auto& ptr = m_data.get<OwnPtr<DoctypeData>>(); 311 if (!ptr) 312 ptr = make<DoctypeData>(); 313 return *ptr; 314 } 315 316 Type type() const { return m_type; } 317 318 DeprecatedString to_deprecated_string() const; 319 320 Position const& start_position() const { return m_start_position; } 321 Position const& end_position() const { return m_end_position; } 322 323 void set_start_position(Badge<HTMLTokenizer>, Position start_position) { m_start_position = start_position; } 324 void set_end_position(Badge<HTMLTokenizer>, Position end_position) { m_end_position = end_position; } 325 326private: 327 Vector<Attribute> const* tag_attributes() const 328 { 329 return m_data.get<OwnPtr<Vector<Attribute>>>().ptr(); 330 } 331 332 Vector<Attribute>* tag_attributes() 333 { 334 return m_data.get<OwnPtr<Vector<Attribute>>>().ptr(); 335 } 336 337 Vector<Attribute>& ensure_tag_attributes() 338 { 339 VERIFY(is_start_tag() || is_end_tag()); 340 auto& ptr = m_data.get<OwnPtr<Vector<Attribute>>>(); 341 if (!ptr) 342 ptr = make<Vector<Attribute>>(); 343 return *ptr; 344 } 345 346 Type m_type { Type::Invalid }; 347 348 // Type::StartTag and Type::EndTag 349 bool m_tag_self_closing { false }; 350 bool m_tag_self_closing_acknowledged { false }; 351 352 // Type::Comment (comment data), Type::StartTag and Type::EndTag (tag name) 353 DeprecatedFlyString m_string_data; 354 355 Variant<Empty, u32, OwnPtr<DoctypeData>, OwnPtr<Vector<Attribute>>> m_data {}; 356 357 Position m_start_position; 358 Position m_end_position; 359}; 360 361}