Serenity Operating System
at master 3895 lines 170 kB view raw
1/* 2 * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org> 3 * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org> 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#include <AK/Debug.h> 9#include <AK/SourceLocation.h> 10#include <AK/Utf32View.h> 11#include <LibTextCodec/Decoder.h> 12#include <LibWeb/Bindings/MainThreadVM.h> 13#include <LibWeb/DOM/Comment.h> 14#include <LibWeb/DOM/Document.h> 15#include <LibWeb/DOM/DocumentType.h> 16#include <LibWeb/DOM/ElementFactory.h> 17#include <LibWeb/DOM/Event.h> 18#include <LibWeb/DOM/ProcessingInstruction.h> 19#include <LibWeb/DOM/Text.h> 20#include <LibWeb/HTML/EventLoop/EventLoop.h> 21#include <LibWeb/HTML/EventNames.h> 22#include <LibWeb/HTML/HTMLFormElement.h> 23#include <LibWeb/HTML/HTMLHeadElement.h> 24#include <LibWeb/HTML/HTMLScriptElement.h> 25#include <LibWeb/HTML/HTMLTableElement.h> 26#include <LibWeb/HTML/HTMLTemplateElement.h> 27#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h> 28#include <LibWeb/HTML/Parser/HTMLParser.h> 29#include <LibWeb/HTML/Parser/HTMLToken.h> 30#include <LibWeb/HTML/Window.h> 31#include <LibWeb/HighResolutionTime/TimeOrigin.h> 32#include <LibWeb/Infra/CharacterTypes.h> 33#include <LibWeb/Namespace.h> 34#include <LibWeb/SVG/TagNames.h> 35 36namespace Web::HTML { 37 38static inline void log_parse_error(SourceLocation const& location = SourceLocation::current()) 39{ 40 dbgln_if(HTML_PARSER_DEBUG, "Parse error! {}", location); 41} 42 43static Vector<DeprecatedFlyString> s_quirks_public_ids = { 44 "+//Silmaril//dtd html Pro v0r11 19970101//", 45 "-//AS//DTD HTML 3.0 asWedit + extensions//", 46 "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", 47 "-//IETF//DTD HTML 2.0 Level 1//", 48 "-//IETF//DTD HTML 2.0 Level 2//", 49 "-//IETF//DTD HTML 2.0 Strict Level 1//", 50 "-//IETF//DTD HTML 2.0 Strict Level 2//", 51 "-//IETF//DTD HTML 2.0 Strict//", 52 "-//IETF//DTD HTML 2.0//", 53 "-//IETF//DTD HTML 2.1E//", 54 "-//IETF//DTD HTML 3.0//", 55 "-//IETF//DTD HTML 3.2 Final//", 56 "-//IETF//DTD HTML 3.2//", 57 "-//IETF//DTD HTML 3//", 58 "-//IETF//DTD HTML Level 0//", 59 "-//IETF//DTD HTML Level 1//", 60 "-//IETF//DTD HTML Level 2//", 61 "-//IETF//DTD HTML Level 3//", 62 "-//IETF//DTD HTML Strict Level 0//", 63 "-//IETF//DTD HTML Strict Level 1//", 64 "-//IETF//DTD HTML Strict Level 2//", 65 "-//IETF//DTD HTML Strict Level 3//", 66 "-//IETF//DTD HTML Strict//", 67 "-//IETF//DTD HTML//", 68 "-//Metrius//DTD Metrius Presentational//", 69 "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", 70 "-//Microsoft//DTD Internet Explorer 2.0 HTML//", 71 "-//Microsoft//DTD Internet Explorer 2.0 Tables//", 72 "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", 73 "-//Microsoft//DTD Internet Explorer 3.0 HTML//", 74 "-//Microsoft//DTD Internet Explorer 3.0 Tables//", 75 "-//Netscape Comm. Corp.//DTD HTML//", 76 "-//Netscape Comm. Corp.//DTD Strict HTML//", 77 "-//O'Reilly and Associates//DTD HTML 2.0//", 78 "-//O'Reilly and Associates//DTD HTML Extended 1.0//", 79 "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", 80 "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", 81 "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", 82 "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", 83 "-//Spyglass//DTD HTML 2.0 Extended//", 84 "-//Sun Microsystems Corp.//DTD HotJava HTML//", 85 "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", 86 "-//W3C//DTD HTML 3 1995-03-24//", 87 "-//W3C//DTD HTML 3.2 Draft//", 88 "-//W3C//DTD HTML 3.2 Final//", 89 "-//W3C//DTD HTML 3.2//", 90 "-//W3C//DTD HTML 3.2S Draft//", 91 "-//W3C//DTD HTML 4.0 Frameset//", 92 "-//W3C//DTD HTML 4.0 Transitional//", 93 "-//W3C//DTD HTML Experimental 19960712//", 94 "-//W3C//DTD HTML Experimental 970421//", 95 "-//W3C//DTD W3 HTML//", 96 "-//W3O//DTD W3 HTML 3.0//", 97 "-//WebTechs//DTD Mozilla HTML 2.0//", 98 "-//WebTechs//DTD Mozilla HTML//" 99}; 100 101// https://html.spec.whatwg.org/multipage/parsing.html#mathml-text-integration-point 102static bool is_mathml_text_integration_point(DOM::Element const&) 103{ 104 // FIXME: Implement. 105 return false; 106} 107 108// https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point 109static bool is_html_integration_point(DOM::Element const& element) 110{ 111 // A node is an HTML integration point if it is one of the following elements: 112 // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html" 113 // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml" 114 115 // An SVG foreignObject element 116 // An SVG desc element 117 // An SVG title element 118 if (element.tag_name().is_one_of(SVG::TagNames::foreignObject, SVG::TagNames::desc, SVG::TagNames::title)) 119 return true; 120 121 return false; 122} 123 124HTMLParser::HTMLParser(DOM::Document& document, StringView input, DeprecatedString const& encoding) 125 : m_tokenizer(input, encoding) 126 , m_scripting_enabled(document.is_scripting_enabled()) 127 , m_document(JS::make_handle(document)) 128{ 129 m_tokenizer.set_parser({}, *this); 130 m_document->set_parser({}, *this); 131 auto standardized_encoding = TextCodec::get_standardized_encoding(encoding); 132 VERIFY(standardized_encoding.has_value()); 133 m_document->set_encoding(standardized_encoding.value()); 134} 135 136HTMLParser::HTMLParser(DOM::Document& document) 137 : m_scripting_enabled(document.is_scripting_enabled()) 138 , m_document(JS::make_handle(document)) 139{ 140 m_document->set_parser({}, *this); 141 m_tokenizer.set_parser({}, *this); 142} 143 144HTMLParser::~HTMLParser() 145{ 146} 147 148void HTMLParser::visit_edges(Cell::Visitor& visitor) 149{ 150 Base::visit_edges(visitor); 151 visitor.visit(m_document); 152 visitor.visit(m_head_element); 153 visitor.visit(m_form_element); 154 visitor.visit(m_context_element); 155 visitor.visit(m_character_insertion_node); 156 157 m_stack_of_open_elements.visit_edges(visitor); 158 m_list_of_active_formatting_elements.visit_edges(visitor); 159} 160 161void HTMLParser::run() 162{ 163 for (;;) { 164 // FIXME: Find a better way to say that we come from Document::close() and want to process EOF. 165 if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached()) 166 return; 167 168 auto optional_token = m_tokenizer.next_token(); 169 if (!optional_token.has_value()) 170 break; 171 auto& token = optional_token.value(); 172 173 dbgln_if(HTML_PARSER_DEBUG, "[{}] {}", insertion_mode_name(), token.to_deprecated_string()); 174 175 // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction-dispatcher 176 // As each token is emitted from the tokenizer, the user agent must follow the appropriate steps from the following list, known as the tree construction dispatcher: 177 if (m_stack_of_open_elements.is_empty() 178 || adjusted_current_node().namespace_() == Namespace::HTML 179 || (is_html_integration_point(adjusted_current_node()) && (token.is_start_tag() || token.is_character())) 180 || token.is_end_of_file()) { 181 // -> If the stack of open elements is empty 182 // -> If the adjusted current node is an element in the HTML namespace 183 // FIXME: -> If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark" 184 // FIXME: -> If the adjusted current node is a MathML text integration point and the token is a character token 185 // FIXME: -> If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg" 186 // -> If the adjusted current node is an HTML integration point and the token is a start tag 187 // -> If the adjusted current node is an HTML integration point and the token is a character token 188 // -> If the token is an end-of-file token 189 190 // Process the token according to the rules given in the section corresponding to the current insertion mode in HTML content. 191 process_using_the_rules_for(m_insertion_mode, token); 192 } else { 193 // -> Otherwise 194 195 // Process the token according to the rules given in the section for parsing tokens in foreign content. 196 process_using_the_rules_for_foreign_content(token); 197 } 198 199 if (m_stop_parsing) { 200 dbgln_if(HTML_PARSER_DEBUG, "Stop parsing{}! :^)", m_parsing_fragment ? " fragment" : ""); 201 break; 202 } 203 } 204 205 flush_character_insertions(); 206} 207 208void HTMLParser::run(const AK::URL& url) 209{ 210 m_document->set_url(url); 211 m_document->set_source(m_tokenizer.source()); 212 run(); 213 the_end(); 214 m_document->detach_parser({}); 215} 216 217// https://html.spec.whatwg.org/multipage/parsing.html#the-end 218void HTMLParser::the_end() 219{ 220 // Once the user agent stops parsing the document, the user agent must run the following steps: 221 222 // FIXME: 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return. 223 224 // 2. Set the insertion point to undefined. 225 m_tokenizer.undefine_insertion_point(); 226 227 // 3. Update the current document readiness to "interactive". 228 m_document->update_readiness(HTML::DocumentReadyState::Interactive); 229 230 // 4. Pop all the nodes off the stack of open elements. 231 while (!m_stack_of_open_elements.is_empty()) 232 (void)m_stack_of_open_elements.pop(); 233 234 // 5. While the list of scripts that will execute when the document has finished parsing is not empty: 235 while (!m_document->scripts_to_execute_when_parsing_has_finished().is_empty()) { 236 // 1. Spin the event loop until the first script in the list of scripts that will execute when the document has finished parsing 237 // has its "ready to be parser-executed" flag set and the parser's Document has no style sheet that is blocking scripts. 238 main_thread_event_loop().spin_until([&] { 239 return m_document->scripts_to_execute_when_parsing_has_finished().first()->is_ready_to_be_parser_executed() 240 && !m_document->has_a_style_sheet_that_is_blocking_scripts(); 241 }); 242 243 // 2. Execute the first script in the list of scripts that will execute when the document has finished parsing. 244 m_document->scripts_to_execute_when_parsing_has_finished().first()->execute_script(); 245 246 // 3. Remove the first script element from the list of scripts that will execute when the document has finished parsing (i.e. shift out the first entry in the list). 247 (void)m_document->scripts_to_execute_when_parsing_has_finished().take_first(); 248 } 249 250 // 6. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following substeps: 251 old_queue_global_task_with_document(HTML::Task::Source::DOMManipulation, *m_document, [document = m_document] { 252 // 1. Set the Document's load timing info's DOM content loaded event start time to the current high resolution time given the Document's relevant global object. 253 document->load_timing_info().dom_content_loaded_event_start_time = HighResolutionTime::unsafe_shared_current_time(); 254 255 // 2. Fire an event named DOMContentLoaded at the Document object, with its bubbles attribute initialized to true. 256 auto content_loaded_event = DOM::Event::create(document->realm(), HTML::EventNames::DOMContentLoaded).release_value_but_fixme_should_propagate_errors(); 257 content_loaded_event->set_bubbles(true); 258 document->dispatch_event(content_loaded_event); 259 260 // 3. Set the Document's load timing info's DOM content loaded event end time to the current high resolution time given the Document's relevant global object. 261 document->load_timing_info().dom_content_loaded_event_end_time = HighResolutionTime::unsafe_shared_current_time(); 262 263 // FIXME: 4. Enable the client message queue of the ServiceWorkerContainer object whose associated service worker client is the Document object's relevant settings object. 264 265 // FIXME: 5. Invoke WebDriver BiDi DOM content loaded with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "pending", and url is the Document object's URL. 266 }); 267 268 // 7. Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts that will execute in order as soon as possible are empty. 269 main_thread_event_loop().spin_until([&] { 270 return m_document->scripts_to_execute_as_soon_as_possible().is_empty(); 271 }); 272 273 // 8. Spin the event loop until there is nothing that delays the load event in the Document. 274 // FIXME: Track down all the things that are supposed to delay the load event. 275 main_thread_event_loop().spin_until([&] { 276 return m_document->number_of_things_delaying_the_load_event() == 0; 277 }); 278 279 // 9. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following steps: 280 old_queue_global_task_with_document(HTML::Task::Source::DOMManipulation, *m_document, [document = m_document] { 281 // 1. Update the current document readiness to "complete". 282 document->update_readiness(HTML::DocumentReadyState::Complete); 283 284 // 2. If the Document object's browsing context is null, then abort these steps. 285 if (!document->browsing_context()) 286 return; 287 288 // 3. Let window be the Document's relevant global object. 289 JS::NonnullGCPtr<Window> window = document->window(); 290 291 // 4. Set the Document's load timing info's load event start time to the current high resolution time given window. 292 document->load_timing_info().load_event_start_time = HighResolutionTime::unsafe_shared_current_time(); 293 294 // 5. Fire an event named load at window, with legacy target override flag set. 295 // FIXME: The legacy target override flag is currently set by a virtual override of dispatch_event() 296 // We should reorganize this so that the flag appears explicitly here instead. 297 window->dispatch_event(DOM::Event::create(document->realm(), HTML::EventNames::load).release_value_but_fixme_should_propagate_errors()); 298 299 // FIXME: 6. Invoke WebDriver BiDi load complete with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "complete", and url is the Document object's URL. 300 301 // FIXME: 7. Set the Document object's navigation id to null. 302 303 // 8. Set the Document's load timing info's load event end time to the current high resolution time given window. 304 document->load_timing_info().load_event_end_time = HighResolutionTime::unsafe_shared_current_time(); 305 306 // 9. Assert: Document's page showing is false. 307 VERIFY(!document->page_showing()); 308 309 // 10. Set the Document's page showing flag to true. 310 document->set_page_showing(true); 311 312 // 11. Fire a page transition event named pageshow at window with false. 313 window->fire_a_page_transition_event(HTML::EventNames::pageshow, false); 314 315 // 12. Completely finish loading the Document. 316 document->completely_finish_loading(); 317 318 // FIXME: 13. Queue the navigation timing entry for the Document. 319 }); 320 321 // FIXME: 10. If the Document's print when loaded flag is set, then run the printing steps. 322 323 // 11. The Document is now ready for post-load tasks. 324 m_document->set_ready_for_post_load_tasks(true); 325} 326 327void HTMLParser::process_using_the_rules_for(InsertionMode mode, HTMLToken& token) 328{ 329 switch (mode) { 330 case InsertionMode::Initial: 331 handle_initial(token); 332 break; 333 case InsertionMode::BeforeHTML: 334 handle_before_html(token); 335 break; 336 case InsertionMode::BeforeHead: 337 handle_before_head(token); 338 break; 339 case InsertionMode::InHead: 340 handle_in_head(token); 341 break; 342 case InsertionMode::InHeadNoscript: 343 handle_in_head_noscript(token); 344 break; 345 case InsertionMode::AfterHead: 346 handle_after_head(token); 347 break; 348 case InsertionMode::InBody: 349 handle_in_body(token); 350 break; 351 case InsertionMode::AfterBody: 352 handle_after_body(token); 353 break; 354 case InsertionMode::AfterAfterBody: 355 handle_after_after_body(token); 356 break; 357 case InsertionMode::Text: 358 handle_text(token); 359 break; 360 case InsertionMode::InTable: 361 handle_in_table(token); 362 break; 363 case InsertionMode::InTableBody: 364 handle_in_table_body(token); 365 break; 366 case InsertionMode::InRow: 367 handle_in_row(token); 368 break; 369 case InsertionMode::InCell: 370 handle_in_cell(token); 371 break; 372 case InsertionMode::InTableText: 373 handle_in_table_text(token); 374 break; 375 case InsertionMode::InSelectInTable: 376 handle_in_select_in_table(token); 377 break; 378 case InsertionMode::InSelect: 379 handle_in_select(token); 380 break; 381 case InsertionMode::InCaption: 382 handle_in_caption(token); 383 break; 384 case InsertionMode::InColumnGroup: 385 handle_in_column_group(token); 386 break; 387 case InsertionMode::InTemplate: 388 handle_in_template(token); 389 break; 390 case InsertionMode::InFrameset: 391 handle_in_frameset(token); 392 break; 393 case InsertionMode::AfterFrameset: 394 handle_after_frameset(token); 395 break; 396 case InsertionMode::AfterAfterFrameset: 397 handle_after_after_frameset(token); 398 break; 399 default: 400 VERIFY_NOT_REACHED(); 401 } 402} 403 404DOM::QuirksMode HTMLParser::which_quirks_mode(HTMLToken const& doctype_token) const 405{ 406 if (doctype_token.doctype_data().force_quirks) 407 return DOM::QuirksMode::Yes; 408 409 // NOTE: The tokenizer puts the name into lower case for us. 410 if (doctype_token.doctype_data().name != "html") 411 return DOM::QuirksMode::Yes; 412 413 auto const& public_identifier = doctype_token.doctype_data().public_identifier; 414 auto const& system_identifier = doctype_token.doctype_data().system_identifier; 415 416 if (public_identifier.equals_ignoring_ascii_case("-//W3O//DTD W3 HTML Strict 3.0//EN//"sv)) 417 return DOM::QuirksMode::Yes; 418 419 if (public_identifier.equals_ignoring_ascii_case("-/W3C/DTD HTML 4.0 Transitional/EN"sv)) 420 return DOM::QuirksMode::Yes; 421 422 if (public_identifier.equals_ignoring_ascii_case("HTML"sv)) 423 return DOM::QuirksMode::Yes; 424 425 if (system_identifier.equals_ignoring_ascii_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"sv)) 426 return DOM::QuirksMode::Yes; 427 428 for (auto& public_id : s_quirks_public_ids) { 429 if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive)) 430 return DOM::QuirksMode::Yes; 431 } 432 433 if (doctype_token.doctype_data().missing_system_identifier) { 434 if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive)) 435 return DOM::QuirksMode::Yes; 436 437 if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive)) 438 return DOM::QuirksMode::Yes; 439 } 440 441 if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//"sv, CaseSensitivity::CaseInsensitive)) 442 return DOM::QuirksMode::Limited; 443 444 if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//"sv, CaseSensitivity::CaseInsensitive)) 445 return DOM::QuirksMode::Limited; 446 447 if (!doctype_token.doctype_data().missing_system_identifier) { 448 if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive)) 449 return DOM::QuirksMode::Limited; 450 451 if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive)) 452 return DOM::QuirksMode::Limited; 453 } 454 455 return DOM::QuirksMode::No; 456} 457 458void HTMLParser::handle_initial(HTMLToken& token) 459{ 460 if (token.is_character() && token.is_parser_whitespace()) { 461 return; 462 } 463 464 if (token.is_comment()) { 465 auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(); 466 MUST(document().append_child(*comment)); 467 return; 468 } 469 470 if (token.is_doctype()) { 471 auto doctype = realm().heap().allocate<DOM::DocumentType>(realm(), document()).release_allocated_value_but_fixme_should_propagate_errors(); 472 doctype->set_name(token.doctype_data().name); 473 doctype->set_public_id(token.doctype_data().public_identifier); 474 doctype->set_system_id(token.doctype_data().system_identifier); 475 MUST(document().append_child(*doctype)); 476 document().set_quirks_mode(which_quirks_mode(token)); 477 m_insertion_mode = InsertionMode::BeforeHTML; 478 return; 479 } 480 481 log_parse_error(); 482 document().set_quirks_mode(DOM::QuirksMode::Yes); 483 m_insertion_mode = InsertionMode::BeforeHTML; 484 process_using_the_rules_for(InsertionMode::BeforeHTML, token); 485} 486 487// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode 488void HTMLParser::handle_before_html(HTMLToken& token) 489{ 490 // -> A DOCTYPE token 491 if (token.is_doctype()) { 492 // Parse error. Ignore the token. 493 log_parse_error(); 494 return; 495 } 496 497 // -> A comment token 498 if (token.is_comment()) { 499 // Insert a comment as the last child of the Document object. 500 auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(); 501 MUST(document().append_child(*comment)); 502 return; 503 } 504 505 // -> A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE 506 if (token.is_character() && token.is_parser_whitespace()) { 507 // Ignore the token. 508 return; 509 } 510 511 // -> A start tag whose tag name is "html" 512 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 513 // Create an element for the token in the HTML namespace, with the Document as the intended parent. Append it to the Document object. Put this element in the stack of open elements. 514 auto element = create_element_for(token, Namespace::HTML, document()); 515 MUST(document().append_child(*element)); 516 m_stack_of_open_elements.push(move(element)); 517 518 // Switch the insertion mode to "before head". 519 m_insertion_mode = InsertionMode::BeforeHead; 520 return; 521 } 522 523 // -> An end tag whose tag name is one of: "head", "body", "html", "br" 524 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { 525 // Act as described in the "anything else" entry below. 526 goto AnythingElse; 527 } 528 529 // -> Any other end tag 530 if (token.is_end_tag()) { 531 // Parse error. Ignore the token. 532 log_parse_error(); 533 return; 534 } 535 536 // -> Anything else 537AnythingElse: 538 // Create an html element whose node document is the Document object. Append it to the Document object. Put this element in the stack of open elements. 539 auto element = create_element(document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); 540 MUST(document().append_child(element)); 541 m_stack_of_open_elements.push(element); 542 543 // Switch the insertion mode to "before head", then reprocess the token. 544 m_insertion_mode = InsertionMode::BeforeHead; 545 process_using_the_rules_for(InsertionMode::BeforeHead, token); 546 return; 547} 548 549DOM::Element& HTMLParser::current_node() 550{ 551 return m_stack_of_open_elements.current_node(); 552} 553 554DOM::Element& HTMLParser::adjusted_current_node() 555{ 556 if (m_parsing_fragment && m_stack_of_open_elements.elements().size() == 1) 557 return *m_context_element; 558 559 return current_node(); 560} 561 562DOM::Element& HTMLParser::node_before_current_node() 563{ 564 return *m_stack_of_open_elements.elements().at(m_stack_of_open_elements.elements().size() - 2); 565} 566 567// https://html.spec.whatwg.org/multipage/parsing.html#appropriate-place-for-inserting-a-node 568HTMLParser::AdjustedInsertionLocation HTMLParser::find_appropriate_place_for_inserting_node(JS::GCPtr<DOM::Element> override_target) 569{ 570 auto& target = override_target ? *override_target.ptr() : current_node(); 571 HTMLParser::AdjustedInsertionLocation adjusted_insertion_location; 572 573 // 2. Determine the adjusted insertion location using the first matching steps from the following list: 574 575 // `-> If foster parenting is enabled and target is a table, tbody, tfoot, thead, or tr element 576 if (m_foster_parenting && target.local_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) { 577 // 1. Let last template be the last template element in the stack of open elements, if any. 578 auto last_template = m_stack_of_open_elements.last_element_with_tag_name(HTML::TagNames::template_); 579 // 2. Let last table be the last table element in the stack of open elements, if any. 580 auto last_table = m_stack_of_open_elements.last_element_with_tag_name(HTML::TagNames::table); 581 // 3. If there is a last template and either there is no last table, 582 // or there is one, but last template is lower (more recently added) than last table in the stack of open elements, 583 if (last_template.element && (!last_table.element || last_template.index > last_table.index)) { 584 // then: let adjusted insertion location be inside last template's template contents, after its last child (if any), and abort these steps. 585 586 // NOTE: This returns the template content, so no need to check the parent is a template. 587 return { verify_cast<HTMLTemplateElement>(*last_template.element).content().ptr(), nullptr }; 588 } 589 // 4. If there is no last table, then let adjusted insertion location be inside the first element in the stack of open elements (the html element), 590 // after its last child (if any), and abort these steps. (fragment case) 591 if (!last_table.element) { 592 VERIFY(m_parsing_fragment); 593 // Guaranteed not to be a template element (it will be the html element), 594 // so no need to check the parent is a template. 595 return { *m_stack_of_open_elements.elements().first(), nullptr }; 596 } 597 // 5. If last table has a parent node, then let adjusted insertion location be inside last table's parent node, immediately before last table, and abort these steps. 598 if (last_table.element->parent_node()) { 599 adjusted_insertion_location = { last_table.element->parent_node(), last_table.element.ptr() }; 600 } else { 601 // 6. Let previous element be the element immediately above last table in the stack of open elements. 602 auto previous_element = m_stack_of_open_elements.element_immediately_above(*last_table.element); 603 604 // 7. Let adjusted insertion location be inside previous element, after its last child (if any). 605 adjusted_insertion_location = { previous_element.ptr(), nullptr }; 606 } 607 } else { 608 // `-> Otherwise 609 // Let adjusted insertion location be inside target, after its last child (if any). 610 adjusted_insertion_location = { target, nullptr }; 611 } 612 613 if (is<HTMLTemplateElement>(*adjusted_insertion_location.parent)) 614 return { verify_cast<HTMLTemplateElement>(*adjusted_insertion_location.parent).content().ptr(), nullptr }; 615 616 return adjusted_insertion_location; 617} 618 619JS::NonnullGCPtr<DOM::Element> HTMLParser::create_element_for(HTMLToken const& token, DeprecatedFlyString const& namespace_, DOM::Node& intended_parent) 620{ 621 // FIXME: 1. If the active speculative HTML parser is not null, then return the result of creating a speculative mock element given given namespace, the tag name of the given token, and the attributes of the given token. 622 // FIXME: 2. Otherwise, optionally create a speculative mock element given given namespace, the tag name of the given token, and the attributes of the given token. 623 624 // 3. Let document be intended parent's node document. 625 JS::NonnullGCPtr<DOM::Document> document = intended_parent.document(); 626 627 // 4. Let local name be the tag name of the token. 628 auto local_name = token.tag_name(); 629 630 // FIXME: 5. Let is be the value of the "is" attribute in the given token, if such an attribute exists, or null otherwise. 631 // FIXME: 6. Let definition be the result of looking up a custom element definition given document, given namespace, local name, and is. 632 // FIXME: 7. If definition is non-null and the parser was not created as part of the HTML fragment parsing algorithm, then let will execute script be true. Otherwise, let it be false. 633 // FIXME: 8. If will execute script is true, then: 634 // FIXME: 1. Increment document's throw-on-dynamic-markup-insertion counter. 635 // FIXME: 2. If the JavaScript execution context stack is empty, then perform a microtask checkpoint. 636 // FIXME: 3. Push a new element queue onto document's relevant agent's custom element reactions stack. 637 638 // 9. Let element be the result of creating an element given document, localName, given namespace, null, and is. 639 // FIXME: If will execute script is true, set the synchronous custom elements flag; otherwise, leave it unset. 640 // FIXME: Pass in `null` and `is`. 641 auto element = create_element(*document, local_name, namespace_).release_value_but_fixme_should_propagate_errors(); 642 643 // 10. Append each attribute in the given token to element. 644 // FIXME: This isn't the exact `append` the spec is talking about. 645 token.for_each_attribute([&](auto& attribute) { 646 MUST(element->set_attribute(attribute.local_name, attribute.value)); 647 return IterationDecision::Continue; 648 }); 649 650 // FIXME: 11. If will execute script is true, then: 651 // FIXME: 1. Let queue be the result of popping from document's relevant agent's custom element reactions stack. (This will be the same element queue as was pushed above.) 652 // FIXME: 2. Invoke custom element reactions in queue. 653 // FIXME: 3. Decrement document's throw-on-dynamic-markup-insertion counter. 654 655 // FIXME: 12. If element has an xmlns attribute in the XMLNS namespace whose value is not exactly the same as the element's namespace, that is a parse error. 656 // Similarly, if element has an xmlns:xlink attribute in the XMLNS namespace whose value is not the XLink Namespace, that is a parse error. 657 658 // FIXME: 13. If element is a resettable element, invoke its reset algorithm. (This initializes the element's value and checkedness based on the element's attributes.) 659 660 // 14. If element is a form-associated element and not a form-associated custom element, the form element pointer is not null, there is no template element on the stack of open elements, 661 // element is either not listed or doesn't have a form attribute, and the intended parent is in the same tree as the element pointed to by the form element pointer, 662 // then associate element with the form element pointed to by the form element pointer and set element's parser inserted flag. 663 // FIXME: Check if the element is not a form-associated custom element. 664 if (is<FormAssociatedElement>(*element)) { 665 auto* form_associated_element = dynamic_cast<FormAssociatedElement*>(element.ptr()); 666 VERIFY(form_associated_element); 667 668 auto& html_element = form_associated_element->form_associated_element_to_html_element(); 669 670 if (m_form_element.ptr() 671 && !m_stack_of_open_elements.contains(HTML::TagNames::template_) 672 && (!form_associated_element->is_listed() || !html_element.has_attribute(HTML::AttributeNames::form)) 673 && &intended_parent.root() == &m_form_element->root()) { 674 form_associated_element->set_form(m_form_element.ptr()); 675 form_associated_element->set_parser_inserted({}); 676 } 677 } 678 679 // 15. Return element. 680 return element; 681} 682 683// https://html.spec.whatwg.org/multipage/parsing.html#insert-a-foreign-element 684JS::NonnullGCPtr<DOM::Element> HTMLParser::insert_foreign_element(HTMLToken const& token, DeprecatedFlyString const& namespace_) 685{ 686 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); 687 688 // NOTE: adjusted_insertion_location.parent will be non-null, however, it uses RP to be able to default-initialize HTMLParser::AdjustedInsertionLocation. 689 auto element = create_element_for(token, namespace_, *adjusted_insertion_location.parent); 690 691 auto pre_insertion_validity = adjusted_insertion_location.parent->ensure_pre_insertion_validity(*element, adjusted_insertion_location.insert_before_sibling); 692 693 // NOTE: If it's not possible to insert the element at the adjusted insertion location, the element is simply dropped. 694 if (!pre_insertion_validity.is_exception()) { 695 if (!m_parsing_fragment) { 696 // FIXME: push a new element queue onto element's relevant agent's custom element reactions stack. 697 } 698 699 adjusted_insertion_location.parent->insert_before(*element, adjusted_insertion_location.insert_before_sibling); 700 701 if (!m_parsing_fragment) { 702 // FIXME: pop the element queue from element's relevant agent's custom element reactions stack, and invoke custom element reactions in that queue. 703 } 704 } 705 706 m_stack_of_open_elements.push(element); 707 return element; 708} 709 710JS::NonnullGCPtr<DOM::Element> HTMLParser::insert_html_element(HTMLToken const& token) 711{ 712 return insert_foreign_element(token, Namespace::HTML); 713} 714 715void HTMLParser::handle_before_head(HTMLToken& token) 716{ 717 if (token.is_character() && token.is_parser_whitespace()) { 718 return; 719 } 720 721 if (token.is_comment()) { 722 insert_comment(token); 723 return; 724 } 725 726 if (token.is_doctype()) { 727 log_parse_error(); 728 return; 729 } 730 731 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 732 process_using_the_rules_for(InsertionMode::InBody, token); 733 return; 734 } 735 736 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::head) { 737 auto element = insert_html_element(token); 738 m_head_element = JS::make_handle(verify_cast<HTMLHeadElement>(*element)); 739 m_insertion_mode = InsertionMode::InHead; 740 return; 741 } 742 743 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { 744 goto AnythingElse; 745 } 746 747 if (token.is_end_tag()) { 748 log_parse_error(); 749 return; 750 } 751 752AnythingElse: 753 m_head_element = JS::make_handle(verify_cast<HTMLHeadElement>(*insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::head)))); 754 m_insertion_mode = InsertionMode::InHead; 755 process_using_the_rules_for(InsertionMode::InHead, token); 756 return; 757} 758 759void HTMLParser::insert_comment(HTMLToken& token) 760{ 761 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); 762 adjusted_insertion_location.parent->insert_before(realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(), adjusted_insertion_location.insert_before_sibling); 763} 764 765void HTMLParser::handle_in_head(HTMLToken& token) 766{ 767 if (token.is_parser_whitespace()) { 768 insert_character(token.code_point()); 769 return; 770 } 771 772 if (token.is_comment()) { 773 insert_comment(token); 774 return; 775 } 776 777 if (token.is_doctype()) { 778 log_parse_error(); 779 return; 780 } 781 782 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 783 process_using_the_rules_for(InsertionMode::InBody, token); 784 return; 785 } 786 787 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link)) { 788 (void)insert_html_element(token); 789 (void)m_stack_of_open_elements.pop(); 790 token.acknowledge_self_closing_flag_if_set(); 791 return; 792 } 793 794 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::meta) { 795 (void)insert_html_element(token); 796 (void)m_stack_of_open_elements.pop(); 797 token.acknowledge_self_closing_flag_if_set(); 798 return; 799 } 800 801 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::title) { 802 (void)insert_html_element(token); 803 m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); 804 m_original_insertion_mode = m_insertion_mode; 805 m_insertion_mode = InsertionMode::Text; 806 return; 807 } 808 809 if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled) || token.tag_name() == HTML::TagNames::noframes || token.tag_name() == HTML::TagNames::style)) { 810 parse_generic_raw_text_element(token); 811 return; 812 } 813 814 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noscript && !m_scripting_enabled) { 815 (void)insert_html_element(token); 816 m_insertion_mode = InsertionMode::InHeadNoscript; 817 return; 818 } 819 820 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::script) { 821 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); 822 auto element = create_element_for(token, Namespace::HTML, *adjusted_insertion_location.parent); 823 auto& script_element = verify_cast<HTMLScriptElement>(*element); 824 script_element.set_parser_document(Badge<HTMLParser> {}, document()); 825 script_element.set_force_async(Badge<HTMLParser> {}, false); 826 script_element.set_source_line_number({}, token.start_position().line + 1); // FIXME: This +1 is incorrect for script tags whose script does not start on a new line 827 828 if (m_parsing_fragment) { 829 script_element.set_already_started(Badge<HTMLParser> {}, true); 830 } 831 832 if (m_invoked_via_document_write) { 833 TODO(); 834 } 835 836 adjusted_insertion_location.parent->insert_before(*element, adjusted_insertion_location.insert_before_sibling, false); 837 m_stack_of_open_elements.push(element); 838 m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData); 839 m_original_insertion_mode = m_insertion_mode; 840 m_insertion_mode = InsertionMode::Text; 841 return; 842 } 843 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::head) { 844 (void)m_stack_of_open_elements.pop(); 845 m_insertion_mode = InsertionMode::AfterHead; 846 return; 847 } 848 849 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { 850 goto AnythingElse; 851 } 852 853 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::template_) { 854 (void)insert_html_element(token); 855 m_list_of_active_formatting_elements.add_marker(); 856 m_frameset_ok = false; 857 m_insertion_mode = InsertionMode::InTemplate; 858 m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate); 859 return; 860 } 861 862 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { 863 if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) { 864 log_parse_error(); 865 return; 866 } 867 868 generate_all_implied_end_tags_thoroughly(); 869 870 if (current_node().local_name() != HTML::TagNames::template_) 871 log_parse_error(); 872 873 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::template_); 874 m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); 875 m_stack_of_template_insertion_modes.take_last(); 876 reset_the_insertion_mode_appropriately(); 877 return; 878 } 879 880 if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) { 881 log_parse_error(); 882 return; 883 } 884 885AnythingElse: 886 (void)m_stack_of_open_elements.pop(); 887 m_insertion_mode = InsertionMode::AfterHead; 888 process_using_the_rules_for(m_insertion_mode, token); 889} 890 891void HTMLParser::handle_in_head_noscript(HTMLToken& token) 892{ 893 if (token.is_doctype()) { 894 log_parse_error(); 895 return; 896 } 897 898 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 899 process_using_the_rules_for(InsertionMode::InBody, token); 900 return; 901 } 902 903 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::noscript) { 904 (void)m_stack_of_open_elements.pop(); 905 m_insertion_mode = InsertionMode::InHead; 906 return; 907 } 908 909 if (token.is_parser_whitespace() || token.is_comment() || (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::style))) { 910 process_using_the_rules_for(InsertionMode::InHead, token); 911 return; 912 } 913 914 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) { 915 goto AnythingElse; 916 } 917 918 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::noscript)) { 919 log_parse_error(); 920 return; 921 } 922 923AnythingElse: 924 log_parse_error(); 925 (void)m_stack_of_open_elements.pop(); 926 m_insertion_mode = InsertionMode::InHead; 927 process_using_the_rules_for(m_insertion_mode, token); 928} 929 930void HTMLParser::parse_generic_raw_text_element(HTMLToken& token) 931{ 932 (void)insert_html_element(token); 933 m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); 934 m_original_insertion_mode = m_insertion_mode; 935 m_insertion_mode = InsertionMode::Text; 936} 937 938DOM::Text* HTMLParser::find_character_insertion_node() 939{ 940 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); 941 if (adjusted_insertion_location.insert_before_sibling) { 942 TODO(); 943 } 944 if (adjusted_insertion_location.parent->is_document()) 945 return nullptr; 946 if (adjusted_insertion_location.parent->last_child() && adjusted_insertion_location.parent->last_child()->is_text()) 947 return verify_cast<DOM::Text>(adjusted_insertion_location.parent->last_child()); 948 auto new_text_node = realm().heap().allocate<DOM::Text>(realm(), document(), "").release_allocated_value_but_fixme_should_propagate_errors(); 949 MUST(adjusted_insertion_location.parent->append_child(*new_text_node)); 950 return new_text_node; 951} 952 953void HTMLParser::flush_character_insertions() 954{ 955 if (m_character_insertion_builder.is_empty()) 956 return; 957 m_character_insertion_node->set_data(m_character_insertion_builder.to_deprecated_string()); 958 m_character_insertion_node->parent()->children_changed(); 959 m_character_insertion_builder.clear(); 960} 961 962void HTMLParser::insert_character(u32 data) 963{ 964 auto node = find_character_insertion_node(); 965 if (node == m_character_insertion_node.ptr()) { 966 m_character_insertion_builder.append(Utf32View { &data, 1 }); 967 return; 968 } 969 if (!m_character_insertion_node.ptr()) { 970 m_character_insertion_node = JS::make_handle(node); 971 m_character_insertion_builder.append(Utf32View { &data, 1 }); 972 return; 973 } 974 flush_character_insertions(); 975 m_character_insertion_node = JS::make_handle(node); 976 m_character_insertion_builder.append(Utf32View { &data, 1 }); 977} 978 979void HTMLParser::handle_after_head(HTMLToken& token) 980{ 981 if (token.is_character() && token.is_parser_whitespace()) { 982 insert_character(token.code_point()); 983 return; 984 } 985 986 if (token.is_comment()) { 987 insert_comment(token); 988 return; 989 } 990 991 if (token.is_doctype()) { 992 log_parse_error(); 993 return; 994 } 995 996 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 997 process_using_the_rules_for(InsertionMode::InBody, token); 998 return; 999 } 1000 1001 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) { 1002 (void)insert_html_element(token); 1003 m_frameset_ok = false; 1004 m_insertion_mode = InsertionMode::InBody; 1005 return; 1006 } 1007 1008 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) { 1009 (void)insert_html_element(token); 1010 m_insertion_mode = InsertionMode::InFrameset; 1011 return; 1012 } 1013 1014 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { 1015 log_parse_error(); 1016 m_stack_of_open_elements.push(*m_head_element); 1017 process_using_the_rules_for(InsertionMode::InHead, token); 1018 m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { 1019 return entry.ptr() == m_head_element.ptr(); 1020 }); 1021 return; 1022 } 1023 1024 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { 1025 process_using_the_rules_for(InsertionMode::InHead, token); 1026 return; 1027 } 1028 1029 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) { 1030 goto AnythingElse; 1031 } 1032 1033 if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) { 1034 log_parse_error(); 1035 return; 1036 } 1037 1038AnythingElse: 1039 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::body)); 1040 m_insertion_mode = InsertionMode::InBody; 1041 process_using_the_rules_for(m_insertion_mode, token); 1042} 1043 1044void HTMLParser::generate_implied_end_tags(DeprecatedFlyString const& exception) 1045{ 1046 while (current_node().local_name() != exception && current_node().local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc)) 1047 (void)m_stack_of_open_elements.pop(); 1048} 1049 1050void HTMLParser::generate_all_implied_end_tags_thoroughly() 1051{ 1052 while (current_node().local_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::colgroup, HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) 1053 (void)m_stack_of_open_elements.pop(); 1054} 1055 1056void HTMLParser::close_a_p_element() 1057{ 1058 generate_implied_end_tags(HTML::TagNames::p); 1059 if (current_node().local_name() != HTML::TagNames::p) { 1060 log_parse_error(); 1061 } 1062 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::p); 1063} 1064 1065void HTMLParser::handle_after_body(HTMLToken& token) 1066{ 1067 if (token.is_character() && token.is_parser_whitespace()) { 1068 process_using_the_rules_for(InsertionMode::InBody, token); 1069 return; 1070 } 1071 1072 if (token.is_comment()) { 1073 auto& insertion_location = m_stack_of_open_elements.first(); 1074 MUST(insertion_location.append_child(realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors())); 1075 return; 1076 } 1077 1078 if (token.is_doctype()) { 1079 log_parse_error(); 1080 return; 1081 } 1082 1083 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 1084 process_using_the_rules_for(InsertionMode::InBody, token); 1085 return; 1086 } 1087 1088 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { 1089 if (m_parsing_fragment) { 1090 log_parse_error(); 1091 return; 1092 } 1093 m_insertion_mode = InsertionMode::AfterAfterBody; 1094 return; 1095 } 1096 1097 if (token.is_end_of_file()) { 1098 stop_parsing(); 1099 return; 1100 } 1101 1102 log_parse_error(); 1103 m_insertion_mode = InsertionMode::InBody; 1104 process_using_the_rules_for(InsertionMode::InBody, token); 1105} 1106 1107void HTMLParser::handle_after_after_body(HTMLToken& token) 1108{ 1109 if (token.is_comment()) { 1110 auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(); 1111 MUST(document().append_child(*comment)); 1112 return; 1113 } 1114 1115 if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == HTML::TagNames::html)) { 1116 process_using_the_rules_for(InsertionMode::InBody, token); 1117 return; 1118 } 1119 1120 if (token.is_end_of_file()) { 1121 stop_parsing(); 1122 return; 1123 } 1124 1125 log_parse_error(); 1126 m_insertion_mode = InsertionMode::InBody; 1127 process_using_the_rules_for(m_insertion_mode, token); 1128} 1129 1130// https://html.spec.whatwg.org/multipage/parsing.html#reconstruct-the-active-formatting-elements 1131void HTMLParser::reconstruct_the_active_formatting_elements() 1132{ 1133 // 1. If there are no entries in the list of active formatting elements, then there is nothing to reconstruct; stop this algorithm. 1134 if (m_list_of_active_formatting_elements.is_empty()) 1135 return; 1136 1137 // 2. If the last (most recently added) entry in the list of active formatting elements is a marker, or if it is an element that is in the stack of open elements, 1138 // then there is nothing to reconstruct; stop this algorithm. 1139 if (m_list_of_active_formatting_elements.entries().last().is_marker()) 1140 return; 1141 1142 if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element)) 1143 return; 1144 1145 // 3. Let entry be the last (most recently added) element in the list of active formatting elements. 1146 size_t index = m_list_of_active_formatting_elements.entries().size() - 1; 1147 1148 // NOTE: Entry will never be null, but must be a pointer instead of a reference to allow rebinding. 1149 auto* entry = &m_list_of_active_formatting_elements.entries().at(index); 1150 1151Rewind: 1152 // 4. Rewind: If there are no entries before entry in the list of active formatting elements, then jump to the step labeled create. 1153 if (index == 0) 1154 goto Create; 1155 1156 // 5. Let entry be the entry one earlier than entry in the list of active formatting elements. 1157 --index; 1158 entry = &m_list_of_active_formatting_elements.entries().at(index); 1159 1160 // 6. If entry is neither a marker nor an element that is also in the stack of open elements, go to the step labeled rewind. 1161 if (!entry->is_marker() && !m_stack_of_open_elements.contains(*entry->element)) 1162 goto Rewind; 1163 1164Advance: 1165 // 7. Advance: Let entry be the element one later than entry in the list of active formatting elements. 1166 ++index; 1167 entry = &m_list_of_active_formatting_elements.entries().at(index); 1168 1169Create: 1170 // 8. Create: Insert an HTML element for the token for which the element entry was created, to obtain new element. 1171 VERIFY(!entry->is_marker()); 1172 1173 // FIXME: Hold on to the real token! 1174 auto new_element = insert_html_element(HTMLToken::make_start_tag(entry->element->local_name())); 1175 1176 // 9. Replace the entry for entry in the list with an entry for new element. 1177 m_list_of_active_formatting_elements.entries().at(index).element = JS::make_handle(new_element); 1178 1179 // 10. If the entry for new element in the list of active formatting elements is not the last entry in the list, return to the step labeled advance. 1180 if (index != m_list_of_active_formatting_elements.entries().size() - 1) 1181 goto Advance; 1182} 1183 1184// https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm 1185HTMLParser::AdoptionAgencyAlgorithmOutcome HTMLParser::run_the_adoption_agency_algorithm(HTMLToken& token) 1186{ 1187 // 1. Let subject be token's tag name. 1188 auto& subject = token.tag_name(); 1189 1190 // 2. If the current node is an HTML element whose tag name is subject, 1191 // and the current node is not in the list of active formatting elements, 1192 // then pop the current node off the stack of open elements, and return. 1193 if (current_node().local_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) { 1194 (void)m_stack_of_open_elements.pop(); 1195 return AdoptionAgencyAlgorithmOutcome::DoNothing; 1196 } 1197 1198 // 3. Let outer loop counter be 0. 1199 size_t outer_loop_counter = 0; 1200 1201 // 4. While true: 1202 while (true) { 1203 // 1. If outer loop counter is greater than or equal to 8, then return. 1204 if (outer_loop_counter >= 8) 1205 return AdoptionAgencyAlgorithmOutcome::DoNothing; 1206 1207 // 2. Increment outer loop counter by 1. 1208 outer_loop_counter++; 1209 1210 // 3. Let formatting element be the last element in the list of active formatting elements that: 1211 // - is between the end of the list and the last marker in the list, if any, or the start of the list otherwise, and 1212 // - has the tag name subject. 1213 auto* formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject); 1214 1215 // If there is no such element, then return and instead act as described in the "any other end tag" entry above. 1216 if (!formatting_element) 1217 return AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps; 1218 1219 // 4. If formatting element is not in the stack of open elements, 1220 if (!m_stack_of_open_elements.contains(*formatting_element)) { 1221 // then this is a parse error; 1222 log_parse_error(); 1223 // remove the element from the list, 1224 m_list_of_active_formatting_elements.remove(*formatting_element); 1225 // and return. 1226 return AdoptionAgencyAlgorithmOutcome::DoNothing; 1227 } 1228 1229 // 5. If formatting element is in the stack of open elements, but the element is not in scope, 1230 if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) { 1231 // then this is a parse error; 1232 log_parse_error(); 1233 // return. 1234 return AdoptionAgencyAlgorithmOutcome::DoNothing; 1235 } 1236 1237 // 6. If formatting element is not the current node, 1238 if (formatting_element != &current_node()) { 1239 // this is a parse error. (But do not return.) 1240 log_parse_error(); 1241 } 1242 1243 // 7. Let furthest block be the topmost node in the stack of open elements that is lower in the stack than formatting element, 1244 // and is an element in the special category. There might not be one. 1245 JS::GCPtr<DOM::Element> furthest_block = m_stack_of_open_elements.topmost_special_node_below(*formatting_element); 1246 1247 // 8. If there is no furthest block 1248 if (!furthest_block) { 1249 // then the UA must first pop all the nodes from the bottom of the stack of open elements, 1250 // from the current node up to and including formatting element, 1251 while (&current_node() != formatting_element) 1252 (void)m_stack_of_open_elements.pop(); 1253 (void)m_stack_of_open_elements.pop(); 1254 1255 // then remove formatting element from the list of active formatting elements, 1256 m_list_of_active_formatting_elements.remove(*formatting_element); 1257 // and finally return. 1258 return AdoptionAgencyAlgorithmOutcome::DoNothing; 1259 } 1260 1261 // 9. Let common ancestor be the element immediately above formatting element in the stack of open elements. 1262 auto common_ancestor = m_stack_of_open_elements.element_immediately_above(*formatting_element); 1263 1264 // 10. Let a bookmark note the position of formatting element in the list of active formatting elements 1265 // relative to the elements on either side of it in the list. 1266 auto bookmark = m_list_of_active_formatting_elements.find_index(*formatting_element).value(); 1267 1268 // 11. Let node and last node be furthest block. 1269 auto node = furthest_block; 1270 auto last_node = furthest_block; 1271 1272 // Keep track of this for later 1273 auto node_above_node = m_stack_of_open_elements.element_immediately_above(*node); 1274 1275 // 12. Let inner loop counter be 0. 1276 size_t inner_loop_counter = 0; 1277 1278 // 13. While true: 1279 while (true) { 1280 // 1. Increment inner loop counter by 1. 1281 inner_loop_counter++; 1282 1283 // 2. Let node be the element immediately above node in the stack of open elements, 1284 // or if node is no longer in the stack of open elements (e.g. because it got removed by this algorithm), 1285 // the element that was immediately above node in the stack of open elements before node was removed. 1286 node = node_above_node; 1287 VERIFY(node); 1288 1289 // Keep track of this for later 1290 node_above_node = m_stack_of_open_elements.element_immediately_above(*node); 1291 1292 // 3. If node is formatting element, then break. 1293 if (node.ptr() == formatting_element) 1294 break; 1295 1296 // 4. If inner loop counter is greater than 3 and node is in the list of active formatting elements, 1297 if (inner_loop_counter > 3 && m_list_of_active_formatting_elements.contains(*node)) { 1298 auto node_index = m_list_of_active_formatting_elements.find_index(*node); 1299 if (node_index.has_value() && node_index.value() < bookmark) 1300 bookmark--; 1301 // then remove node from the list of active formatting elements. 1302 m_list_of_active_formatting_elements.remove(*node); 1303 } 1304 1305 // 5. If node is not in the list of active formatting elements 1306 if (!m_list_of_active_formatting_elements.contains(*node)) { 1307 // then remove node from the stack of open elements and continue. 1308 m_stack_of_open_elements.remove(*node); 1309 continue; 1310 } 1311 1312 // 6. Create an element for the token for which the element node was created, 1313 // in the HTML namespace, with common ancestor as the intended parent; 1314 // FIXME: hold onto the real token 1315 auto element = create_element_for(HTMLToken::make_start_tag(node->local_name()), Namespace::HTML, *common_ancestor); 1316 // replace the entry for node in the list of active formatting elements with an entry for the new element, 1317 m_list_of_active_formatting_elements.replace(*node, *element); 1318 // replace the entry for node in the stack of open elements with an entry for the new element, 1319 m_stack_of_open_elements.replace(*node, element); 1320 // and let node be the new element. 1321 node = element; 1322 1323 // 7. If last node is furthest block, 1324 if (last_node == furthest_block) { 1325 // then move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements. 1326 bookmark = m_list_of_active_formatting_elements.find_index(*node).value() + 1; 1327 } 1328 1329 // 8. Append last node to node. 1330 MUST(node->append_child(*last_node)); 1331 1332 // 9. Set last node to node. 1333 last_node = node; 1334 } 1335 1336 // 14. Insert whatever last node ended up being in the previous step at the appropriate place for inserting a node, 1337 // but using common ancestor as the override target. 1338 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(common_ancestor); 1339 adjusted_insertion_location.parent->insert_before(*last_node, adjusted_insertion_location.insert_before_sibling, false); 1340 1341 // 15. Create an element for the token for which formatting element was created, 1342 // in the HTML namespace, with furthest block as the intended parent. 1343 // FIXME: hold onto the real token 1344 auto element = create_element_for(HTMLToken::make_start_tag(formatting_element->local_name()), Namespace::HTML, *furthest_block); 1345 1346 // 16. Take all of the child nodes of furthest block and append them to the element created in the last step. 1347 for (auto& child : furthest_block->children_as_vector()) 1348 MUST(element->append_child(furthest_block->remove_child(*child).release_value())); 1349 1350 // 17. Append that new element to furthest block. 1351 MUST(furthest_block->append_child(*element)); 1352 1353 // 18. Remove formatting element from the list of active formatting elements, 1354 // and insert the new element into the list of active formatting elements at the position of the aforementioned bookmark. 1355 auto formatting_element_index = m_list_of_active_formatting_elements.find_index(*formatting_element); 1356 if (formatting_element_index.has_value() && formatting_element_index.value() < bookmark) 1357 bookmark--; 1358 m_list_of_active_formatting_elements.remove(*formatting_element); 1359 m_list_of_active_formatting_elements.insert_at(bookmark, *element); 1360 1361 // 19. Remove formatting element from the stack of open elements, and insert the new element 1362 // into the stack of open elements immediately below the position of furthest block in that stack. 1363 m_stack_of_open_elements.remove(*formatting_element); 1364 m_stack_of_open_elements.insert_immediately_below(*element, *furthest_block); 1365 } 1366} 1367 1368bool HTMLParser::is_special_tag(DeprecatedFlyString const& tag_name, DeprecatedFlyString const& namespace_) 1369{ 1370 if (namespace_ == Namespace::HTML) { 1371 return tag_name.is_one_of( 1372 HTML::TagNames::address, 1373 HTML::TagNames::applet, 1374 HTML::TagNames::area, 1375 HTML::TagNames::article, 1376 HTML::TagNames::aside, 1377 HTML::TagNames::base, 1378 HTML::TagNames::basefont, 1379 HTML::TagNames::bgsound, 1380 HTML::TagNames::blockquote, 1381 HTML::TagNames::body, 1382 HTML::TagNames::br, 1383 HTML::TagNames::button, 1384 HTML::TagNames::caption, 1385 HTML::TagNames::center, 1386 HTML::TagNames::col, 1387 HTML::TagNames::colgroup, 1388 HTML::TagNames::dd, 1389 HTML::TagNames::details, 1390 HTML::TagNames::dir, 1391 HTML::TagNames::div, 1392 HTML::TagNames::dl, 1393 HTML::TagNames::dt, 1394 HTML::TagNames::embed, 1395 HTML::TagNames::fieldset, 1396 HTML::TagNames::figcaption, 1397 HTML::TagNames::figure, 1398 HTML::TagNames::footer, 1399 HTML::TagNames::form, 1400 HTML::TagNames::frame, 1401 HTML::TagNames::frameset, 1402 HTML::TagNames::h1, 1403 HTML::TagNames::h2, 1404 HTML::TagNames::h3, 1405 HTML::TagNames::h4, 1406 HTML::TagNames::h5, 1407 HTML::TagNames::h6, 1408 HTML::TagNames::head, 1409 HTML::TagNames::header, 1410 HTML::TagNames::hgroup, 1411 HTML::TagNames::hr, 1412 HTML::TagNames::html, 1413 HTML::TagNames::iframe, 1414 HTML::TagNames::img, 1415 HTML::TagNames::input, 1416 HTML::TagNames::keygen, 1417 HTML::TagNames::li, 1418 HTML::TagNames::link, 1419 HTML::TagNames::listing, 1420 HTML::TagNames::main, 1421 HTML::TagNames::marquee, 1422 HTML::TagNames::menu, 1423 HTML::TagNames::meta, 1424 HTML::TagNames::nav, 1425 HTML::TagNames::noembed, 1426 HTML::TagNames::noframes, 1427 HTML::TagNames::noscript, 1428 HTML::TagNames::object, 1429 HTML::TagNames::ol, 1430 HTML::TagNames::p, 1431 HTML::TagNames::param, 1432 HTML::TagNames::plaintext, 1433 HTML::TagNames::pre, 1434 HTML::TagNames::script, 1435 HTML::TagNames::section, 1436 HTML::TagNames::select, 1437 HTML::TagNames::source, 1438 HTML::TagNames::style, 1439 HTML::TagNames::summary, 1440 HTML::TagNames::table, 1441 HTML::TagNames::tbody, 1442 HTML::TagNames::td, 1443 HTML::TagNames::template_, 1444 HTML::TagNames::textarea, 1445 HTML::TagNames::tfoot, 1446 HTML::TagNames::th, 1447 HTML::TagNames::thead, 1448 HTML::TagNames::title, 1449 HTML::TagNames::tr, 1450 HTML::TagNames::track, 1451 HTML::TagNames::ul, 1452 HTML::TagNames::wbr, 1453 HTML::TagNames::xmp); 1454 } else if (namespace_ == Namespace::SVG) { 1455 return tag_name.is_one_of( 1456 SVG::TagNames::desc, 1457 SVG::TagNames::foreignObject, 1458 SVG::TagNames::title); 1459 } else if (namespace_ == Namespace::MathML) { 1460 TODO(); 1461 } 1462 1463 return false; 1464} 1465 1466void HTMLParser::handle_in_body(HTMLToken& token) 1467{ 1468 if (token.is_character()) { 1469 if (token.code_point() == 0) { 1470 log_parse_error(); 1471 return; 1472 } 1473 if (token.is_parser_whitespace()) { 1474 reconstruct_the_active_formatting_elements(); 1475 insert_character(token.code_point()); 1476 return; 1477 } 1478 reconstruct_the_active_formatting_elements(); 1479 insert_character(token.code_point()); 1480 m_frameset_ok = false; 1481 return; 1482 } 1483 1484 if (token.is_comment()) { 1485 insert_comment(token); 1486 return; 1487 } 1488 1489 if (token.is_doctype()) { 1490 log_parse_error(); 1491 return; 1492 } 1493 1494 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 1495 log_parse_error(); 1496 if (m_stack_of_open_elements.contains(HTML::TagNames::template_)) 1497 return; 1498 token.for_each_attribute([&](auto& attribute) { 1499 if (!current_node().has_attribute(attribute.local_name)) 1500 MUST(current_node().set_attribute(attribute.local_name, attribute.value)); 1501 return IterationDecision::Continue; 1502 }); 1503 return; 1504 } 1505 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { 1506 process_using_the_rules_for(InsertionMode::InHead, token); 1507 return; 1508 } 1509 1510 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { 1511 process_using_the_rules_for(InsertionMode::InHead, token); 1512 return; 1513 } 1514 1515 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) { 1516 log_parse_error(); 1517 if (m_stack_of_open_elements.elements().size() == 1 1518 || m_stack_of_open_elements.elements().at(1)->local_name() != HTML::TagNames::body 1519 || m_stack_of_open_elements.contains(HTML::TagNames::template_)) { 1520 VERIFY(m_parsing_fragment); 1521 return; 1522 } 1523 m_frameset_ok = false; 1524 auto& body_element = m_stack_of_open_elements.elements().at(1); 1525 token.for_each_attribute([&](auto& attribute) { 1526 if (!body_element->has_attribute(attribute.local_name)) 1527 MUST(body_element->set_attribute(attribute.local_name, attribute.value)); 1528 return IterationDecision::Continue; 1529 }); 1530 return; 1531 } 1532 1533 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) { 1534 log_parse_error(); 1535 1536 if (m_stack_of_open_elements.elements().size() == 1 1537 || m_stack_of_open_elements.elements().at(1)->local_name() != HTML::TagNames::body) { 1538 VERIFY(m_parsing_fragment); 1539 return; 1540 } 1541 1542 if (!m_frameset_ok) 1543 return; 1544 1545 TODO(); 1546 } 1547 1548 if (token.is_end_of_file()) { 1549 if (!m_stack_of_template_insertion_modes.is_empty()) { 1550 process_using_the_rules_for(InsertionMode::InTemplate, token); 1551 return; 1552 } 1553 1554 for (auto& node : m_stack_of_open_elements.elements()) { 1555 if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { 1556 log_parse_error(); 1557 break; 1558 } 1559 } 1560 1561 stop_parsing(); 1562 return; 1563 } 1564 1565 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::body) { 1566 if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) { 1567 log_parse_error(); 1568 return; 1569 } 1570 1571 for (auto& node : m_stack_of_open_elements.elements()) { 1572 if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { 1573 log_parse_error(); 1574 break; 1575 } 1576 } 1577 1578 m_insertion_mode = InsertionMode::AfterBody; 1579 return; 1580 } 1581 1582 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { 1583 if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) { 1584 log_parse_error(); 1585 return; 1586 } 1587 1588 for (auto& node : m_stack_of_open_elements.elements()) { 1589 if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) { 1590 log_parse_error(); 1591 break; 1592 } 1593 } 1594 1595 m_insertion_mode = InsertionMode::AfterBody; 1596 process_using_the_rules_for(m_insertion_mode, token); 1597 return; 1598 } 1599 1600 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) { 1601 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1602 close_a_p_element(); 1603 (void)insert_html_element(token); 1604 return; 1605 } 1606 1607 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { 1608 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1609 close_a_p_element(); 1610 if (current_node().local_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { 1611 log_parse_error(); 1612 (void)m_stack_of_open_elements.pop(); 1613 } 1614 (void)insert_html_element(token); 1615 return; 1616 } 1617 1618 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::pre, HTML::TagNames::listing)) { 1619 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1620 close_a_p_element(); 1621 1622 (void)insert_html_element(token); 1623 1624 m_frameset_ok = false; 1625 1626 // If the next token is a U+000A LINE FEED (LF) character token, 1627 // then ignore that token and move on to the next one. 1628 // (Newlines at the start of pre blocks are ignored as an authoring convenience.) 1629 auto next_token = m_tokenizer.next_token(); 1630 if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') { 1631 // Ignore it. 1632 } else { 1633 process_using_the_rules_for(m_insertion_mode, next_token.value()); 1634 } 1635 return; 1636 } 1637 1638 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::form) { 1639 if (m_form_element.ptr() && !m_stack_of_open_elements.contains(HTML::TagNames::template_)) { 1640 log_parse_error(); 1641 return; 1642 } 1643 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1644 close_a_p_element(); 1645 auto element = insert_html_element(token); 1646 if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) 1647 m_form_element = JS::make_handle(verify_cast<HTMLFormElement>(*element)); 1648 return; 1649 } 1650 1651 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::li) { 1652 m_frameset_ok = false; 1653 1654 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { 1655 JS::GCPtr<DOM::Element> node = m_stack_of_open_elements.elements()[i].ptr(); 1656 1657 if (node->local_name() == HTML::TagNames::li) { 1658 generate_implied_end_tags(HTML::TagNames::li); 1659 if (current_node().local_name() != HTML::TagNames::li) { 1660 log_parse_error(); 1661 } 1662 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li); 1663 break; 1664 } 1665 1666 if (is_special_tag(node->local_name(), node->namespace_()) && !node->local_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p)) 1667 break; 1668 } 1669 1670 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1671 close_a_p_element(); 1672 1673 (void)insert_html_element(token); 1674 return; 1675 } 1676 1677 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) { 1678 m_frameset_ok = false; 1679 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { 1680 JS::GCPtr<DOM::Element> node = m_stack_of_open_elements.elements()[i].ptr(); 1681 if (node->local_name() == HTML::TagNames::dd) { 1682 generate_implied_end_tags(HTML::TagNames::dd); 1683 if (current_node().local_name() != HTML::TagNames::dd) { 1684 log_parse_error(); 1685 } 1686 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dd); 1687 break; 1688 } 1689 if (node->local_name() == HTML::TagNames::dt) { 1690 generate_implied_end_tags(HTML::TagNames::dt); 1691 if (current_node().local_name() != HTML::TagNames::dt) { 1692 log_parse_error(); 1693 } 1694 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dt); 1695 break; 1696 } 1697 if (is_special_tag(node->local_name(), node->namespace_()) && !node->local_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p)) 1698 break; 1699 } 1700 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1701 close_a_p_element(); 1702 (void)insert_html_element(token); 1703 return; 1704 } 1705 1706 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::plaintext) { 1707 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1708 close_a_p_element(); 1709 (void)insert_html_element(token); 1710 m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT); 1711 return; 1712 } 1713 1714 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::button) { 1715 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::button)) { 1716 log_parse_error(); 1717 generate_implied_end_tags(); 1718 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::button); 1719 } 1720 reconstruct_the_active_formatting_elements(); 1721 (void)insert_html_element(token); 1722 m_frameset_ok = false; 1723 return; 1724 } 1725 1726 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::button, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::listing, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::pre, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) { 1727 if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { 1728 log_parse_error(); 1729 return; 1730 } 1731 1732 generate_implied_end_tags(); 1733 1734 if (current_node().local_name() != token.tag_name()) { 1735 log_parse_error(); 1736 } 1737 1738 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); 1739 return; 1740 } 1741 1742 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::form) { 1743 if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) { 1744 auto node = m_form_element; 1745 m_form_element = {}; 1746 if (!node || !m_stack_of_open_elements.has_in_scope(*node)) { 1747 log_parse_error(); 1748 return; 1749 } 1750 generate_implied_end_tags(); 1751 if (&current_node() != node.ptr()) { 1752 log_parse_error(); 1753 } 1754 m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == node.ptr(); }); 1755 } else { 1756 if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::form)) { 1757 log_parse_error(); 1758 return; 1759 } 1760 generate_implied_end_tags(); 1761 if (current_node().local_name() != HTML::TagNames::form) { 1762 log_parse_error(); 1763 } 1764 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::form); 1765 } 1766 return; 1767 } 1768 1769 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::p) { 1770 if (!m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) { 1771 log_parse_error(); 1772 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::p)); 1773 } 1774 close_a_p_element(); 1775 return; 1776 } 1777 1778 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::li) { 1779 if (!m_stack_of_open_elements.has_in_list_item_scope(HTML::TagNames::li)) { 1780 log_parse_error(); 1781 return; 1782 } 1783 generate_implied_end_tags(HTML::TagNames::li); 1784 if (current_node().local_name() != HTML::TagNames::li) { 1785 log_parse_error(); 1786 dbgln("Expected <li> current node, but had <{}>", current_node().local_name()); 1787 } 1788 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li); 1789 return; 1790 } 1791 1792 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) { 1793 if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { 1794 log_parse_error(); 1795 return; 1796 } 1797 generate_implied_end_tags(token.tag_name()); 1798 if (current_node().local_name() != token.tag_name()) { 1799 log_parse_error(); 1800 } 1801 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); 1802 return; 1803 } 1804 1805 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) { 1806 if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::h1) 1807 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h2) 1808 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h3) 1809 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h4) 1810 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h5) 1811 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h6)) { 1812 log_parse_error(); 1813 return; 1814 } 1815 1816 generate_implied_end_tags(); 1817 if (current_node().local_name() != token.tag_name()) { 1818 log_parse_error(); 1819 } 1820 1821 for (;;) { 1822 auto popped_element = m_stack_of_open_elements.pop(); 1823 if (popped_element->local_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) 1824 break; 1825 } 1826 return; 1827 } 1828 1829 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::a) { 1830 if (auto* element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(HTML::TagNames::a)) { 1831 log_parse_error(); 1832 if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps) 1833 goto AnyOtherEndTag; 1834 m_list_of_active_formatting_elements.remove(*element); 1835 m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { 1836 return entry.ptr() == element; 1837 }); 1838 } 1839 reconstruct_the_active_formatting_elements(); 1840 auto element = insert_html_element(token); 1841 m_list_of_active_formatting_elements.add(*element); 1842 return; 1843 } 1844 1845 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) { 1846 reconstruct_the_active_formatting_elements(); 1847 auto element = insert_html_element(token); 1848 m_list_of_active_formatting_elements.add(*element); 1849 return; 1850 } 1851 1852 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::nobr) { 1853 reconstruct_the_active_formatting_elements(); 1854 if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::nobr)) { 1855 log_parse_error(); 1856 run_the_adoption_agency_algorithm(token); 1857 reconstruct_the_active_formatting_elements(); 1858 } 1859 auto element = insert_html_element(token); 1860 m_list_of_active_formatting_elements.add(*element); 1861 return; 1862 } 1863 1864 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::a, HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::nobr, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) { 1865 if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps) 1866 goto AnyOtherEndTag; 1867 return; 1868 } 1869 1870 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) { 1871 reconstruct_the_active_formatting_elements(); 1872 (void)insert_html_element(token); 1873 m_list_of_active_formatting_elements.add_marker(); 1874 m_frameset_ok = false; 1875 return; 1876 } 1877 1878 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) { 1879 if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) { 1880 log_parse_error(); 1881 return; 1882 } 1883 1884 generate_implied_end_tags(); 1885 if (current_node().local_name() != token.tag_name()) { 1886 log_parse_error(); 1887 } 1888 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); 1889 m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); 1890 return; 1891 } 1892 1893 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::table) { 1894 if (!document().in_quirks_mode()) { 1895 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1896 close_a_p_element(); 1897 } 1898 (void)insert_html_element(token); 1899 m_frameset_ok = false; 1900 m_insertion_mode = InsertionMode::InTable; 1901 return; 1902 } 1903 1904 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) { 1905 token.drop_attributes(); 1906 goto BRStartTag; 1907 } 1908 1909 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::area, HTML::TagNames::br, HTML::TagNames::embed, HTML::TagNames::img, HTML::TagNames::keygen, HTML::TagNames::wbr)) { 1910 BRStartTag: 1911 reconstruct_the_active_formatting_elements(); 1912 (void)insert_html_element(token); 1913 (void)m_stack_of_open_elements.pop(); 1914 token.acknowledge_self_closing_flag_if_set(); 1915 m_frameset_ok = false; 1916 return; 1917 } 1918 1919 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::input) { 1920 reconstruct_the_active_formatting_elements(); 1921 (void)insert_html_element(token); 1922 (void)m_stack_of_open_elements.pop(); 1923 token.acknowledge_self_closing_flag_if_set(); 1924 auto type_attribute = token.attribute(HTML::AttributeNames::type); 1925 if (type_attribute.is_null() || !type_attribute.equals_ignoring_ascii_case("hidden"sv)) { 1926 m_frameset_ok = false; 1927 } 1928 return; 1929 } 1930 1931 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::param, HTML::TagNames::source, HTML::TagNames::track)) { 1932 (void)insert_html_element(token); 1933 (void)m_stack_of_open_elements.pop(); 1934 token.acknowledge_self_closing_flag_if_set(); 1935 return; 1936 } 1937 1938 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::hr) { 1939 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) 1940 close_a_p_element(); 1941 (void)insert_html_element(token); 1942 (void)m_stack_of_open_elements.pop(); 1943 token.acknowledge_self_closing_flag_if_set(); 1944 m_frameset_ok = false; 1945 return; 1946 } 1947 1948 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::image) { 1949 // Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.) 1950 log_parse_error(); 1951 token.set_tag_name("img"); 1952 process_using_the_rules_for(m_insertion_mode, token); 1953 return; 1954 } 1955 1956 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::textarea) { 1957 (void)insert_html_element(token); 1958 1959 m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); 1960 1961 // If the next token is a U+000A LINE FEED (LF) character token, 1962 // then ignore that token and move on to the next one. 1963 // (Newlines at the start of pre blocks are ignored as an authoring convenience.) 1964 auto next_token = m_tokenizer.next_token(); 1965 1966 m_original_insertion_mode = m_insertion_mode; 1967 m_frameset_ok = false; 1968 m_insertion_mode = InsertionMode::Text; 1969 1970 if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') { 1971 // Ignore it. 1972 } else { 1973 process_using_the_rules_for(m_insertion_mode, next_token.value()); 1974 } 1975 return; 1976 } 1977 1978 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::xmp) { 1979 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) { 1980 close_a_p_element(); 1981 } 1982 reconstruct_the_active_formatting_elements(); 1983 m_frameset_ok = false; 1984 parse_generic_raw_text_element(token); 1985 return; 1986 } 1987 1988 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::iframe) { 1989 m_frameset_ok = false; 1990 parse_generic_raw_text_element(token); 1991 return; 1992 } 1993 1994 if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noembed) || (token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled))) { 1995 parse_generic_raw_text_element(token); 1996 return; 1997 } 1998 1999 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::select) { 2000 reconstruct_the_active_formatting_elements(); 2001 (void)insert_html_element(token); 2002 m_frameset_ok = false; 2003 switch (m_insertion_mode) { 2004 case InsertionMode::InTable: 2005 case InsertionMode::InCaption: 2006 case InsertionMode::InTableBody: 2007 case InsertionMode::InRow: 2008 case InsertionMode::InCell: 2009 m_insertion_mode = InsertionMode::InSelectInTable; 2010 break; 2011 default: 2012 m_insertion_mode = InsertionMode::InSelect; 2013 break; 2014 } 2015 return; 2016 } 2017 2018 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::optgroup, HTML::TagNames::option)) { 2019 if (current_node().local_name() == HTML::TagNames::option) 2020 (void)m_stack_of_open_elements.pop(); 2021 reconstruct_the_active_formatting_elements(); 2022 (void)insert_html_element(token); 2023 return; 2024 } 2025 2026 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rb, HTML::TagNames::rtc)) { 2027 if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby)) 2028 generate_implied_end_tags(); 2029 2030 if (current_node().local_name() != HTML::TagNames::ruby) 2031 log_parse_error(); 2032 2033 (void)insert_html_element(token); 2034 return; 2035 } 2036 2037 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rp, HTML::TagNames::rt)) { 2038 if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby)) 2039 generate_implied_end_tags(HTML::TagNames::rtc); 2040 2041 if (current_node().local_name() != HTML::TagNames::rtc || current_node().local_name() != HTML::TagNames::ruby) 2042 log_parse_error(); 2043 2044 (void)insert_html_element(token); 2045 return; 2046 } 2047 2048 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::math) { 2049 reconstruct_the_active_formatting_elements(); 2050 adjust_mathml_attributes(token); 2051 adjust_foreign_attributes(token); 2052 2053 (void)insert_foreign_element(token, Namespace::MathML); 2054 2055 if (token.is_self_closing()) { 2056 (void)m_stack_of_open_elements.pop(); 2057 token.acknowledge_self_closing_flag_if_set(); 2058 } 2059 return; 2060 } 2061 2062 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::svg) { 2063 reconstruct_the_active_formatting_elements(); 2064 adjust_svg_attributes(token); 2065 adjust_foreign_attributes(token); 2066 2067 (void)insert_foreign_element(token, Namespace::SVG); 2068 2069 if (token.is_self_closing()) { 2070 (void)m_stack_of_open_elements.pop(); 2071 token.acknowledge_self_closing_flag_if_set(); 2072 } 2073 return; 2074 } 2075 2076 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::frame, HTML::TagNames::head, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr))) { 2077 log_parse_error(); 2078 return; 2079 } 2080 2081 // Any other start tag 2082 if (token.is_start_tag()) { 2083 reconstruct_the_active_formatting_elements(); 2084 (void)insert_html_element(token); 2085 return; 2086 } 2087 2088 if (token.is_end_tag()) { 2089 AnyOtherEndTag: 2090 JS::GCPtr<DOM::Element> node; 2091 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { 2092 node = m_stack_of_open_elements.elements()[i].ptr(); 2093 if (node->local_name() == token.tag_name()) { 2094 generate_implied_end_tags(token.tag_name()); 2095 if (node.ptr() != &current_node()) { 2096 log_parse_error(); 2097 } 2098 while (&current_node() != node.ptr()) { 2099 (void)m_stack_of_open_elements.pop(); 2100 } 2101 (void)m_stack_of_open_elements.pop(); 2102 break; 2103 } 2104 if (is_special_tag(node->local_name(), node->namespace_())) { 2105 log_parse_error(); 2106 return; 2107 } 2108 } 2109 return; 2110 } 2111} 2112 2113void HTMLParser::adjust_mathml_attributes(HTMLToken& token) 2114{ 2115 token.adjust_attribute_name("definitionurl", "definitionURL"); 2116} 2117 2118void HTMLParser::adjust_svg_tag_names(HTMLToken& token) 2119{ 2120 token.adjust_tag_name("altglyph", "altGlyph"); 2121 token.adjust_tag_name("altglyphdef", "altGlyphDef"); 2122 token.adjust_tag_name("altglyphitem", "altGlyphItem"); 2123 token.adjust_tag_name("animatecolor", "animateColor"); 2124 token.adjust_tag_name("animatemotion", "animateMotion"); 2125 token.adjust_tag_name("animatetransform", "animateTransform"); 2126 token.adjust_tag_name("clippath", "clipPath"); 2127 token.adjust_tag_name("feblend", "feBlend"); 2128 token.adjust_tag_name("fecolormatrix", "feColorMatrix"); 2129 token.adjust_tag_name("fecomponenttransfer", "feComponentTransfer"); 2130 token.adjust_tag_name("fecomposite", "feComposite"); 2131 token.adjust_tag_name("feconvolvematrix", "feConvolveMatrix"); 2132 token.adjust_tag_name("fediffuselighting", "feDiffuseLighting"); 2133 token.adjust_tag_name("fedisplacementmap", "feDisplacementMap"); 2134 token.adjust_tag_name("fedistantlight", "feDistantLight"); 2135 token.adjust_tag_name("fedropshadow", "feDropShadow"); 2136 token.adjust_tag_name("feflood", "feFlood"); 2137 token.adjust_tag_name("fefunca", "feFuncA"); 2138 token.adjust_tag_name("fefuncb", "feFuncB"); 2139 token.adjust_tag_name("fefuncg", "feFuncG"); 2140 token.adjust_tag_name("fefuncr", "feFuncR"); 2141 token.adjust_tag_name("fegaussianblur", "feGaussianBlur"); 2142 token.adjust_tag_name("feimage", "feImage"); 2143 token.adjust_tag_name("femerge", "feMerge"); 2144 token.adjust_tag_name("femergenode", "feMergeNode"); 2145 token.adjust_tag_name("femorphology", "feMorphology"); 2146 token.adjust_tag_name("feoffset", "feOffset"); 2147 token.adjust_tag_name("fepointlight", "fePointLight"); 2148 token.adjust_tag_name("fespecularlighting", "feSpecularLighting"); 2149 token.adjust_tag_name("fespotlight", "feSpotlight"); 2150 token.adjust_tag_name("foreignobject", "foreignObject"); 2151 token.adjust_tag_name("glyphref", "glyphRef"); 2152 token.adjust_tag_name("lineargradient", "linearGradient"); 2153 token.adjust_tag_name("radialgradient", "radialGradient"); 2154 token.adjust_tag_name("textpath", "textPath"); 2155} 2156 2157void HTMLParser::adjust_svg_attributes(HTMLToken& token) 2158{ 2159 token.adjust_attribute_name("attributename", "attributeName"); 2160 token.adjust_attribute_name("attributetype", "attributeType"); 2161 token.adjust_attribute_name("basefrequency", "baseFrequency"); 2162 token.adjust_attribute_name("baseprofile", "baseProfile"); 2163 token.adjust_attribute_name("calcmode", "calcMode"); 2164 token.adjust_attribute_name("clippathunits", "clipPathUnits"); 2165 token.adjust_attribute_name("diffuseconstant", "diffuseConstant"); 2166 token.adjust_attribute_name("edgemode", "edgeMode"); 2167 token.adjust_attribute_name("filterunits", "filterUnits"); 2168 token.adjust_attribute_name("glyphref", "glyphRef"); 2169 token.adjust_attribute_name("gradienttransform", "gradientTransform"); 2170 token.adjust_attribute_name("gradientunits", "gradientUnits"); 2171 token.adjust_attribute_name("kernelmatrix", "kernelMatrix"); 2172 token.adjust_attribute_name("kernelunitlength", "kernelUnitLength"); 2173 token.adjust_attribute_name("keypoints", "keyPoints"); 2174 token.adjust_attribute_name("keysplines", "keySplines"); 2175 token.adjust_attribute_name("keytimes", "keyTimes"); 2176 token.adjust_attribute_name("lengthadjust", "lengthAdjust"); 2177 token.adjust_attribute_name("limitingconeangle", "limitingConeAngle"); 2178 token.adjust_attribute_name("markerheight", "markerHeight"); 2179 token.adjust_attribute_name("markerunits", "markerUnits"); 2180 token.adjust_attribute_name("markerwidth", "markerWidth"); 2181 token.adjust_attribute_name("maskcontentunits", "maskContentUnits"); 2182 token.adjust_attribute_name("maskunits", "maskUnits"); 2183 token.adjust_attribute_name("numoctaves", "numOctaves"); 2184 token.adjust_attribute_name("pathlength", "pathLength"); 2185 token.adjust_attribute_name("patterncontentunits", "patternContentUnits"); 2186 token.adjust_attribute_name("patterntransform", "patternTransform"); 2187 token.adjust_attribute_name("patternunits", "patternUnits"); 2188 token.adjust_attribute_name("pointsatx", "pointsAtX"); 2189 token.adjust_attribute_name("pointsaty", "pointsAtY"); 2190 token.adjust_attribute_name("pointsatz", "pointsAtZ"); 2191 token.adjust_attribute_name("preservealpha", "preserveAlpha"); 2192 token.adjust_attribute_name("preserveaspectratio", "preserveAspectRatio"); 2193 token.adjust_attribute_name("primitiveunits", "primitiveUnits"); 2194 token.adjust_attribute_name("refx", "refX"); 2195 token.adjust_attribute_name("refy", "refY"); 2196 token.adjust_attribute_name("repeatcount", "repeatCount"); 2197 token.adjust_attribute_name("repeatdur", "repeatDur"); 2198 token.adjust_attribute_name("requiredextensions", "requiredExtensions"); 2199 token.adjust_attribute_name("requiredfeatures", "requiredFeatures"); 2200 token.adjust_attribute_name("specularconstant", "specularConstant"); 2201 token.adjust_attribute_name("specularexponent", "specularExponent"); 2202 token.adjust_attribute_name("spreadmethod", "spreadMethod"); 2203 token.adjust_attribute_name("startoffset", "startOffset"); 2204 token.adjust_attribute_name("stddeviation", "stdDeviation"); 2205 token.adjust_attribute_name("stitchtiles", "stitchTiles"); 2206 token.adjust_attribute_name("surfacescale", "surfaceScale"); 2207 token.adjust_attribute_name("systemlanguage", "systemLanguage"); 2208 token.adjust_attribute_name("tablevalues", "tableValues"); 2209 token.adjust_attribute_name("targetx", "targetX"); 2210 token.adjust_attribute_name("targety", "targetY"); 2211 token.adjust_attribute_name("textlength", "textLength"); 2212 token.adjust_attribute_name("viewbox", "viewBox"); 2213 token.adjust_attribute_name("viewtarget", "viewTarget"); 2214 token.adjust_attribute_name("xchannelselector", "xChannelSelector"); 2215 token.adjust_attribute_name("ychannelselector", "yChannelSelector"); 2216 token.adjust_attribute_name("zoomandpan", "zoomAndPan"); 2217} 2218 2219void HTMLParser::adjust_foreign_attributes(HTMLToken& token) 2220{ 2221 token.adjust_foreign_attribute("xlink:actuate", "xlink", "actuate", Namespace::XLink); 2222 token.adjust_foreign_attribute("xlink:arcrole", "xlink", "arcrole", Namespace::XLink); 2223 token.adjust_foreign_attribute("xlink:href", "xlink", "href", Namespace::XLink); 2224 token.adjust_foreign_attribute("xlink:role", "xlink", "role", Namespace::XLink); 2225 token.adjust_foreign_attribute("xlink:show", "xlink", "show", Namespace::XLink); 2226 token.adjust_foreign_attribute("xlink:title", "xlink", "title", Namespace::XLink); 2227 token.adjust_foreign_attribute("xlink:type", "xlink", "type", Namespace::XLink); 2228 2229 token.adjust_foreign_attribute("xml:lang", "xml", "lang", Namespace::XML); 2230 token.adjust_foreign_attribute("xml:space", "xml", "space", Namespace::XML); 2231 2232 token.adjust_foreign_attribute("xmlns", "", "xmlns", Namespace::XMLNS); 2233 token.adjust_foreign_attribute("xmlns:xlink", "xmlns", "xlink", Namespace::XMLNS); 2234} 2235 2236void HTMLParser::increment_script_nesting_level() 2237{ 2238 ++m_script_nesting_level; 2239} 2240 2241void HTMLParser::decrement_script_nesting_level() 2242{ 2243 VERIFY(m_script_nesting_level); 2244 --m_script_nesting_level; 2245} 2246 2247// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata 2248void HTMLParser::handle_text(HTMLToken& token) 2249{ 2250 if (token.is_character()) { 2251 insert_character(token.code_point()); 2252 return; 2253 } 2254 if (token.is_end_of_file()) { 2255 log_parse_error(); 2256 if (current_node().local_name() == HTML::TagNames::script) 2257 verify_cast<HTMLScriptElement>(current_node()).set_already_started(Badge<HTMLParser> {}, true); 2258 (void)m_stack_of_open_elements.pop(); 2259 m_insertion_mode = m_original_insertion_mode; 2260 process_using_the_rules_for(m_insertion_mode, token); 2261 return; 2262 } 2263 2264 // -> An end tag whose tag name is "script" 2265 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::script) { 2266 // FIXME: If the active speculative HTML parser is null and the JavaScript execution context stack is empty, then perform a microtask checkpoint. 2267 2268 // Non-standard: Make sure the <script> element has up-to-date text content before preparing the script. 2269 flush_character_insertions(); 2270 2271 // Let script be the current node (which will be a script element). 2272 JS::NonnullGCPtr<HTMLScriptElement> script = verify_cast<HTMLScriptElement>(current_node()); 2273 2274 // Pop the current node off the stack of open elements. 2275 (void)m_stack_of_open_elements.pop(); 2276 2277 // Switch the insertion mode to the original insertion mode. 2278 m_insertion_mode = m_original_insertion_mode; 2279 2280 // Let the old insertion point have the same value as the current insertion point. 2281 m_tokenizer.store_insertion_point(); 2282 2283 // Let the insertion point be just before the next input character. 2284 m_tokenizer.update_insertion_point(); 2285 2286 // Increment the parser's script nesting level by one. 2287 increment_script_nesting_level(); 2288 2289 // If the active speculative HTML parser is null, then prepare the script element script. 2290 // This might cause some script to execute, which might cause new characters to be inserted into the tokenizer, 2291 // and might cause the tokenizer to output more tokens, resulting in a reentrant invocation of the parser. 2292 // FIXME: Check if active speculative HTML parser is null. 2293 script->prepare_script(Badge<HTMLParser> {}); 2294 2295 // Decrement the parser's script nesting level by one. 2296 decrement_script_nesting_level(); 2297 2298 // If the parser's script nesting level is zero, then set the parser pause flag to false. 2299 if (script_nesting_level() == 0) 2300 m_parser_pause_flag = false; 2301 2302 // Let the insertion point have the value of the old insertion point. 2303 m_tokenizer.restore_insertion_point(); 2304 2305 // At this stage, if the pending parsing-blocking script is not null, then: 2306 if (document().pending_parsing_blocking_script()) { 2307 // -> If the script nesting level is not zero: 2308 if (script_nesting_level() != 0) { 2309 // Set the parser pause flag to true, 2310 m_parser_pause_flag = true; 2311 // FIXME: and abort the processing of any nested invocations of the tokenizer, yielding control back to the caller. 2312 // (Tokenization will resume when the caller returns to the "outer" tree construction stage.) 2313 TODO(); 2314 } 2315 2316 // Otherwise: 2317 else { 2318 // While the pending parsing-blocking script is not null: 2319 while (document().pending_parsing_blocking_script()) { 2320 // 1. Let the script be the pending parsing-blocking script. 2321 // 2. Set the pending parsing-blocking script to null. 2322 auto the_script = document().take_pending_parsing_blocking_script({}); 2323 2324 // FIXME: 3. Start the speculative HTML parser for this instance of the HTML parser. 2325 2326 // 4. Block the tokenizer for this instance of the HTML parser, such that the event loop will not run tasks that invoke the tokenizer. 2327 m_tokenizer.set_blocked(true); 2328 2329 // 5. If the parser's Document has a style sheet that is blocking scripts 2330 // or the script's ready to be parser-executed is false: 2331 if (m_document->has_a_style_sheet_that_is_blocking_scripts() || script->is_ready_to_be_parser_executed() == false) { 2332 // spin the event loop until the parser's Document has no style sheet that is blocking scripts 2333 // and the script's ready to be parser-executed becomes true. 2334 main_thread_event_loop().spin_until([&] { 2335 return !m_document->has_a_style_sheet_that_is_blocking_scripts() && script->is_ready_to_be_parser_executed(); 2336 }); 2337 } 2338 2339 // 6. If this parser has been aborted in the meantime, return. 2340 if (m_aborted) 2341 return; 2342 2343 // FIXME: 7. Stop the speculative HTML parser for this instance of the HTML parser. 2344 2345 // 8. Unblock the tokenizer for this instance of the HTML parser, such that tasks that invoke the tokenizer can again be run. 2346 m_tokenizer.set_blocked(false); 2347 2348 // 9. Let the insertion point be just before the next input character. 2349 m_tokenizer.update_insertion_point(); 2350 2351 // 10. Increment the parser's script nesting level by one (it should be zero before this step, so this sets it to one). 2352 VERIFY(script_nesting_level() == 0); 2353 increment_script_nesting_level(); 2354 2355 // 11. Execute the script element the script. 2356 the_script->execute_script(); 2357 2358 // 12. Decrement the parser's script nesting level by one. 2359 decrement_script_nesting_level(); 2360 2361 // If the parser's script nesting level is zero (which it always should be at this point), then set the parser pause flag to false. 2362 VERIFY(script_nesting_level() == 0); 2363 m_parser_pause_flag = false; 2364 2365 // 13. Let the insertion point be undefined again. 2366 m_tokenizer.undefine_insertion_point(); 2367 } 2368 } 2369 } 2370 2371 return; 2372 } 2373 2374 if (token.is_end_tag()) { 2375 (void)m_stack_of_open_elements.pop(); 2376 m_insertion_mode = m_original_insertion_mode; 2377 return; 2378 } 2379 TODO(); 2380} 2381 2382void HTMLParser::clear_the_stack_back_to_a_table_context() 2383{ 2384 while (!current_node().local_name().is_one_of(HTML::TagNames::table, HTML::TagNames::template_, HTML::TagNames::html)) 2385 (void)m_stack_of_open_elements.pop(); 2386 2387 if (current_node().local_name() == HTML::TagNames::html) 2388 VERIFY(m_parsing_fragment); 2389} 2390 2391void HTMLParser::clear_the_stack_back_to_a_table_row_context() 2392{ 2393 while (!current_node().local_name().is_one_of(HTML::TagNames::tr, HTML::TagNames::template_, HTML::TagNames::html)) 2394 (void)m_stack_of_open_elements.pop(); 2395 2396 if (current_node().local_name() == HTML::TagNames::html) 2397 VERIFY(m_parsing_fragment); 2398} 2399 2400void HTMLParser::clear_the_stack_back_to_a_table_body_context() 2401{ 2402 while (!current_node().local_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::template_, HTML::TagNames::html)) 2403 (void)m_stack_of_open_elements.pop(); 2404 2405 if (current_node().local_name() == HTML::TagNames::html) 2406 VERIFY(m_parsing_fragment); 2407} 2408 2409void HTMLParser::handle_in_row(HTMLToken& token) 2410{ 2411 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::th, HTML::TagNames::td)) { 2412 clear_the_stack_back_to_a_table_row_context(); 2413 (void)insert_html_element(token); 2414 m_insertion_mode = InsertionMode::InCell; 2415 m_list_of_active_formatting_elements.add_marker(); 2416 return; 2417 } 2418 2419 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::tr) { 2420 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) { 2421 log_parse_error(); 2422 return; 2423 } 2424 clear_the_stack_back_to_a_table_row_context(); 2425 (void)m_stack_of_open_elements.pop(); 2426 m_insertion_mode = InsertionMode::InTableBody; 2427 return; 2428 } 2429 2430 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) 2431 || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) { 2432 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) { 2433 log_parse_error(); 2434 return; 2435 } 2436 clear_the_stack_back_to_a_table_row_context(); 2437 (void)m_stack_of_open_elements.pop(); 2438 m_insertion_mode = InsertionMode::InTableBody; 2439 process_using_the_rules_for(m_insertion_mode, token); 2440 return; 2441 } 2442 2443 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) { 2444 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) { 2445 log_parse_error(); 2446 return; 2447 } 2448 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) { 2449 return; 2450 } 2451 clear_the_stack_back_to_a_table_row_context(); 2452 (void)m_stack_of_open_elements.pop(); 2453 m_insertion_mode = InsertionMode::InTableBody; 2454 process_using_the_rules_for(m_insertion_mode, token); 2455 return; 2456 } 2457 2458 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::td, HTML::TagNames::th)) { 2459 log_parse_error(); 2460 return; 2461 } 2462 2463 process_using_the_rules_for(InsertionMode::InTable, token); 2464} 2465 2466void HTMLParser::close_the_cell() 2467{ 2468 generate_implied_end_tags(); 2469 if (!current_node().local_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) { 2470 log_parse_error(); 2471 } 2472 while (!current_node().local_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) 2473 (void)m_stack_of_open_elements.pop(); 2474 (void)m_stack_of_open_elements.pop(); 2475 m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); 2476 m_insertion_mode = InsertionMode::InRow; 2477} 2478 2479void HTMLParser::handle_in_cell(HTMLToken& token) 2480{ 2481 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) { 2482 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) { 2483 log_parse_error(); 2484 return; 2485 } 2486 generate_implied_end_tags(); 2487 2488 if (current_node().local_name() != token.tag_name()) { 2489 log_parse_error(); 2490 } 2491 2492 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name()); 2493 2494 m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); 2495 2496 m_insertion_mode = InsertionMode::InRow; 2497 return; 2498 } 2499 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) { 2500 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::td) && !m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::th)) { 2501 VERIFY(m_parsing_fragment); 2502 log_parse_error(); 2503 return; 2504 } 2505 close_the_cell(); 2506 process_using_the_rules_for(m_insertion_mode, token); 2507 return; 2508 } 2509 2510 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html)) { 2511 log_parse_error(); 2512 return; 2513 } 2514 2515 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) { 2516 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) { 2517 log_parse_error(); 2518 return; 2519 } 2520 close_the_cell(); 2521 // Reprocess the token. 2522 process_using_the_rules_for(m_insertion_mode, token); 2523 return; 2524 } 2525 2526 process_using_the_rules_for(InsertionMode::InBody, token); 2527} 2528 2529void HTMLParser::handle_in_table_text(HTMLToken& token) 2530{ 2531 if (token.is_character()) { 2532 if (token.code_point() == 0) { 2533 log_parse_error(); 2534 return; 2535 } 2536 2537 m_pending_table_character_tokens.append(move(token)); 2538 return; 2539 } 2540 2541 for (auto& pending_token : m_pending_table_character_tokens) { 2542 VERIFY(pending_token.is_character()); 2543 if (!pending_token.is_parser_whitespace()) { 2544 // If any of the tokens in the pending table character tokens list 2545 // are character tokens that are not ASCII whitespace, then this is a parse error: 2546 // reprocess the character tokens in the pending table character tokens list using 2547 // the rules given in the "anything else" entry in the "in table" insertion mode. 2548 log_parse_error(); 2549 m_foster_parenting = true; 2550 process_using_the_rules_for(InsertionMode::InBody, token); 2551 m_foster_parenting = false; 2552 return; 2553 } 2554 } 2555 2556 for (auto& pending_token : m_pending_table_character_tokens) { 2557 insert_character(pending_token.code_point()); 2558 } 2559 2560 m_insertion_mode = m_original_insertion_mode; 2561 process_using_the_rules_for(m_insertion_mode, token); 2562} 2563 2564void HTMLParser::handle_in_table_body(HTMLToken& token) 2565{ 2566 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::tr) { 2567 clear_the_stack_back_to_a_table_body_context(); 2568 (void)insert_html_element(token); 2569 m_insertion_mode = InsertionMode::InRow; 2570 return; 2571 } 2572 2573 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::th, HTML::TagNames::td)) { 2574 log_parse_error(); 2575 clear_the_stack_back_to_a_table_body_context(); 2576 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::tr)); 2577 m_insertion_mode = InsertionMode::InRow; 2578 process_using_the_rules_for(m_insertion_mode, token); 2579 return; 2580 } 2581 2582 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) { 2583 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) { 2584 log_parse_error(); 2585 return; 2586 } 2587 clear_the_stack_back_to_a_table_body_context(); 2588 (void)m_stack_of_open_elements.pop(); 2589 m_insertion_mode = InsertionMode::InTable; 2590 return; 2591 } 2592 2593 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) 2594 || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) { 2595 2596 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tbody) 2597 && !m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::thead) 2598 && !m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tfoot)) { 2599 log_parse_error(); 2600 return; 2601 } 2602 2603 clear_the_stack_back_to_a_table_body_context(); 2604 (void)m_stack_of_open_elements.pop(); 2605 m_insertion_mode = InsertionMode::InTable; 2606 process_using_the_rules_for(InsertionMode::InTable, token); 2607 return; 2608 } 2609 2610 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::td, HTML::TagNames::th, HTML::TagNames::tr)) { 2611 log_parse_error(); 2612 return; 2613 } 2614 2615 process_using_the_rules_for(InsertionMode::InTable, token); 2616} 2617 2618void HTMLParser::handle_in_table(HTMLToken& token) 2619{ 2620 if (token.is_character() && current_node().local_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) { 2621 m_pending_table_character_tokens.clear(); 2622 m_original_insertion_mode = m_insertion_mode; 2623 m_insertion_mode = InsertionMode::InTableText; 2624 process_using_the_rules_for(InsertionMode::InTableText, token); 2625 return; 2626 } 2627 if (token.is_comment()) { 2628 insert_comment(token); 2629 return; 2630 } 2631 if (token.is_doctype()) { 2632 log_parse_error(); 2633 return; 2634 } 2635 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::caption) { 2636 clear_the_stack_back_to_a_table_context(); 2637 m_list_of_active_formatting_elements.add_marker(); 2638 (void)insert_html_element(token); 2639 m_insertion_mode = InsertionMode::InCaption; 2640 return; 2641 } 2642 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::colgroup) { 2643 clear_the_stack_back_to_a_table_context(); 2644 (void)insert_html_element(token); 2645 m_insertion_mode = InsertionMode::InColumnGroup; 2646 return; 2647 } 2648 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) { 2649 clear_the_stack_back_to_a_table_context(); 2650 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::colgroup)); 2651 m_insertion_mode = InsertionMode::InColumnGroup; 2652 process_using_the_rules_for(m_insertion_mode, token); 2653 return; 2654 } 2655 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) { 2656 clear_the_stack_back_to_a_table_context(); 2657 (void)insert_html_element(token); 2658 m_insertion_mode = InsertionMode::InTableBody; 2659 return; 2660 } 2661 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th, HTML::TagNames::tr)) { 2662 clear_the_stack_back_to_a_table_context(); 2663 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::tbody)); 2664 m_insertion_mode = InsertionMode::InTableBody; 2665 process_using_the_rules_for(m_insertion_mode, token); 2666 return; 2667 } 2668 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::table) { 2669 log_parse_error(); 2670 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::table)) 2671 return; 2672 2673 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::table); 2674 2675 reset_the_insertion_mode_appropriately(); 2676 process_using_the_rules_for(m_insertion_mode, token); 2677 return; 2678 } 2679 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::table) { 2680 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::table)) { 2681 log_parse_error(); 2682 return; 2683 } 2684 2685 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::table); 2686 2687 reset_the_insertion_mode_appropriately(); 2688 return; 2689 } 2690 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) { 2691 log_parse_error(); 2692 return; 2693 } 2694 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::template_)) 2695 || (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_)) { 2696 process_using_the_rules_for(InsertionMode::InHead, token); 2697 return; 2698 } 2699 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::input) { 2700 auto type_attribute = token.attribute(HTML::AttributeNames::type); 2701 if (type_attribute.is_null() || !type_attribute.equals_ignoring_ascii_case("hidden"sv)) { 2702 goto AnythingElse; 2703 } 2704 2705 log_parse_error(); 2706 (void)insert_html_element(token); 2707 2708 // FIXME: Is this the correct interpretation of "Pop that input element off the stack of open elements."? 2709 // Because this wording is the first time it's seen in the spec. 2710 // Other times it's worded as: "Immediately pop the current node off the stack of open elements." 2711 (void)m_stack_of_open_elements.pop(); 2712 token.acknowledge_self_closing_flag_if_set(); 2713 return; 2714 } 2715 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::form) { 2716 log_parse_error(); 2717 if (m_form_element.ptr() || m_stack_of_open_elements.contains(HTML::TagNames::template_)) { 2718 return; 2719 } 2720 2721 m_form_element = JS::make_handle(verify_cast<HTMLFormElement>(*insert_html_element(token))); 2722 2723 // FIXME: See previous FIXME, as this is the same situation but for form. 2724 (void)m_stack_of_open_elements.pop(); 2725 return; 2726 } 2727 if (token.is_end_of_file()) { 2728 process_using_the_rules_for(InsertionMode::InBody, token); 2729 return; 2730 } 2731 2732AnythingElse: 2733 log_parse_error(); 2734 m_foster_parenting = true; 2735 process_using_the_rules_for(InsertionMode::InBody, token); 2736 m_foster_parenting = false; 2737} 2738 2739void HTMLParser::handle_in_select_in_table(HTMLToken& token) 2740{ 2741 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::td, HTML::TagNames::th)) { 2742 log_parse_error(); 2743 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select); 2744 reset_the_insertion_mode_appropriately(); 2745 process_using_the_rules_for(m_insertion_mode, token); 2746 return; 2747 } 2748 2749 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::td, HTML::TagNames::th)) { 2750 log_parse_error(); 2751 2752 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) 2753 return; 2754 2755 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select); 2756 reset_the_insertion_mode_appropriately(); 2757 process_using_the_rules_for(m_insertion_mode, token); 2758 return; 2759 } 2760 2761 process_using_the_rules_for(InsertionMode::InSelect, token); 2762} 2763 2764void HTMLParser::handle_in_select(HTMLToken& token) 2765{ 2766 if (token.is_character()) { 2767 if (token.code_point() == 0) { 2768 log_parse_error(); 2769 return; 2770 } 2771 insert_character(token.code_point()); 2772 return; 2773 } 2774 2775 if (token.is_comment()) { 2776 insert_comment(token); 2777 return; 2778 } 2779 2780 if (token.is_doctype()) { 2781 log_parse_error(); 2782 return; 2783 } 2784 2785 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 2786 process_using_the_rules_for(InsertionMode::InBody, token); 2787 return; 2788 } 2789 2790 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::option) { 2791 if (current_node().local_name() == HTML::TagNames::option) { 2792 (void)m_stack_of_open_elements.pop(); 2793 } 2794 (void)insert_html_element(token); 2795 return; 2796 } 2797 2798 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::optgroup) { 2799 if (current_node().local_name() == HTML::TagNames::option) { 2800 (void)m_stack_of_open_elements.pop(); 2801 } 2802 if (current_node().local_name() == HTML::TagNames::optgroup) { 2803 (void)m_stack_of_open_elements.pop(); 2804 } 2805 (void)insert_html_element(token); 2806 return; 2807 } 2808 2809 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::optgroup) { 2810 if (current_node().local_name() == HTML::TagNames::option && node_before_current_node().local_name() == HTML::TagNames::optgroup) 2811 (void)m_stack_of_open_elements.pop(); 2812 2813 if (current_node().local_name() == HTML::TagNames::optgroup) { 2814 (void)m_stack_of_open_elements.pop(); 2815 } else { 2816 log_parse_error(); 2817 return; 2818 } 2819 return; 2820 } 2821 2822 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::option) { 2823 if (current_node().local_name() == HTML::TagNames::option) { 2824 (void)m_stack_of_open_elements.pop(); 2825 } else { 2826 log_parse_error(); 2827 return; 2828 } 2829 return; 2830 } 2831 2832 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::select) { 2833 if (!m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select)) { 2834 VERIFY(m_parsing_fragment); 2835 log_parse_error(); 2836 return; 2837 } 2838 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select); 2839 reset_the_insertion_mode_appropriately(); 2840 return; 2841 } 2842 2843 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::select) { 2844 log_parse_error(); 2845 2846 if (!m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select)) { 2847 VERIFY(m_parsing_fragment); 2848 return; 2849 } 2850 2851 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select); 2852 reset_the_insertion_mode_appropriately(); 2853 return; 2854 } 2855 2856 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::input, HTML::TagNames::keygen, HTML::TagNames::textarea)) { 2857 log_parse_error(); 2858 2859 if (!m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select)) { 2860 VERIFY(m_parsing_fragment); 2861 return; 2862 } 2863 2864 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select); 2865 reset_the_insertion_mode_appropriately(); 2866 process_using_the_rules_for(m_insertion_mode, token); 2867 return; 2868 } 2869 2870 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::script, HTML::TagNames::template_)) { 2871 process_using_the_rules_for(InsertionMode::InHead, token); 2872 return; 2873 } 2874 2875 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { 2876 process_using_the_rules_for(InsertionMode::InHead, token); 2877 return; 2878 } 2879 2880 if (token.is_end_of_file()) { 2881 process_using_the_rules_for(InsertionMode::InBody, token); 2882 return; 2883 } 2884 2885 log_parse_error(); 2886} 2887 2888void HTMLParser::handle_in_caption(HTMLToken& token) 2889{ 2890 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::caption) { 2891 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::caption)) { 2892 VERIFY(m_parsing_fragment); 2893 log_parse_error(); 2894 return; 2895 } 2896 2897 generate_implied_end_tags(); 2898 2899 if (current_node().local_name() != HTML::TagNames::caption) 2900 log_parse_error(); 2901 2902 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::caption); 2903 m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); 2904 2905 m_insertion_mode = InsertionMode::InTable; 2906 return; 2907 } 2908 2909 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) 2910 || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) { 2911 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::caption)) { 2912 VERIFY(m_parsing_fragment); 2913 log_parse_error(); 2914 return; 2915 } 2916 2917 generate_implied_end_tags(); 2918 2919 if (current_node().local_name() != HTML::TagNames::caption) 2920 log_parse_error(); 2921 2922 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::caption); 2923 m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); 2924 2925 m_insertion_mode = InsertionMode::InTable; 2926 process_using_the_rules_for(m_insertion_mode, token); 2927 return; 2928 } 2929 2930 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) { 2931 log_parse_error(); 2932 return; 2933 } 2934 2935 process_using_the_rules_for(InsertionMode::InBody, token); 2936} 2937 2938void HTMLParser::handle_in_column_group(HTMLToken& token) 2939{ 2940 if (token.is_character() && token.is_parser_whitespace()) { 2941 insert_character(token.code_point()); 2942 return; 2943 } 2944 2945 if (token.is_comment()) { 2946 insert_comment(token); 2947 return; 2948 } 2949 2950 if (token.is_doctype()) { 2951 log_parse_error(); 2952 return; 2953 } 2954 2955 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 2956 process_using_the_rules_for(InsertionMode::InBody, token); 2957 return; 2958 } 2959 2960 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) { 2961 (void)insert_html_element(token); 2962 (void)m_stack_of_open_elements.pop(); 2963 token.acknowledge_self_closing_flag_if_set(); 2964 return; 2965 } 2966 2967 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::colgroup) { 2968 if (current_node().local_name() != HTML::TagNames::colgroup) { 2969 log_parse_error(); 2970 return; 2971 } 2972 2973 (void)m_stack_of_open_elements.pop(); 2974 m_insertion_mode = InsertionMode::InTable; 2975 return; 2976 } 2977 2978 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::col) { 2979 log_parse_error(); 2980 return; 2981 } 2982 2983 if ((token.is_start_tag() || token.is_end_tag()) && token.tag_name() == HTML::TagNames::template_) { 2984 process_using_the_rules_for(InsertionMode::InHead, token); 2985 return; 2986 } 2987 2988 if (token.is_end_of_file()) { 2989 process_using_the_rules_for(InsertionMode::InBody, token); 2990 return; 2991 } 2992 2993 if (current_node().local_name() != HTML::TagNames::colgroup) { 2994 log_parse_error(); 2995 return; 2996 } 2997 2998 (void)m_stack_of_open_elements.pop(); 2999 m_insertion_mode = InsertionMode::InTable; 3000 process_using_the_rules_for(m_insertion_mode, token); 3001} 3002 3003void HTMLParser::handle_in_template(HTMLToken& token) 3004{ 3005 if (token.is_character() || token.is_comment() || token.is_doctype()) { 3006 process_using_the_rules_for(InsertionMode::InBody, token); 3007 return; 3008 } 3009 3010 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { 3011 process_using_the_rules_for(InsertionMode::InHead, token); 3012 return; 3013 } 3014 3015 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) { 3016 process_using_the_rules_for(InsertionMode::InHead, token); 3017 return; 3018 } 3019 3020 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) { 3021 m_stack_of_template_insertion_modes.take_last(); 3022 m_stack_of_template_insertion_modes.append(InsertionMode::InTable); 3023 m_insertion_mode = InsertionMode::InTable; 3024 process_using_the_rules_for(m_insertion_mode, token); 3025 return; 3026 } 3027 3028 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) { 3029 m_stack_of_template_insertion_modes.take_last(); 3030 m_stack_of_template_insertion_modes.append(InsertionMode::InColumnGroup); 3031 m_insertion_mode = InsertionMode::InColumnGroup; 3032 process_using_the_rules_for(m_insertion_mode, token); 3033 return; 3034 } 3035 3036 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::tr) { 3037 m_stack_of_template_insertion_modes.take_last(); 3038 m_stack_of_template_insertion_modes.append(InsertionMode::InTableBody); 3039 m_insertion_mode = InsertionMode::InTableBody; 3040 process_using_the_rules_for(m_insertion_mode, token); 3041 return; 3042 } 3043 3044 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) { 3045 m_stack_of_template_insertion_modes.take_last(); 3046 m_stack_of_template_insertion_modes.append(InsertionMode::InRow); 3047 m_insertion_mode = InsertionMode::InRow; 3048 process_using_the_rules_for(m_insertion_mode, token); 3049 return; 3050 } 3051 3052 if (token.is_start_tag()) { 3053 m_stack_of_template_insertion_modes.take_last(); 3054 m_stack_of_template_insertion_modes.append(InsertionMode::InBody); 3055 m_insertion_mode = InsertionMode::InBody; 3056 process_using_the_rules_for(m_insertion_mode, token); 3057 return; 3058 } 3059 3060 if (token.is_end_tag()) { 3061 log_parse_error(); 3062 return; 3063 } 3064 3065 if (token.is_end_of_file()) { 3066 if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) { 3067 VERIFY(m_parsing_fragment); 3068 stop_parsing(); 3069 return; 3070 } 3071 3072 log_parse_error(); 3073 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::template_); 3074 m_list_of_active_formatting_elements.clear_up_to_the_last_marker(); 3075 m_stack_of_template_insertion_modes.take_last(); 3076 reset_the_insertion_mode_appropriately(); 3077 process_using_the_rules_for(m_insertion_mode, token); 3078 } 3079} 3080 3081void HTMLParser::handle_in_frameset(HTMLToken& token) 3082{ 3083 if (token.is_character() && token.is_parser_whitespace()) { 3084 insert_character(token.code_point()); 3085 return; 3086 } 3087 3088 if (token.is_comment()) { 3089 insert_comment(token); 3090 return; 3091 } 3092 3093 if (token.is_doctype()) { 3094 log_parse_error(); 3095 return; 3096 } 3097 3098 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 3099 process_using_the_rules_for(InsertionMode::InBody, token); 3100 return; 3101 } 3102 3103 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) { 3104 (void)insert_html_element(token); 3105 return; 3106 } 3107 3108 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::frameset) { 3109 // FIXME: If the current node is the root html element, then this is a parse error; ignore the token. (fragment case) 3110 3111 (void)m_stack_of_open_elements.pop(); 3112 3113 if (!m_parsing_fragment && current_node().local_name() != HTML::TagNames::frameset) { 3114 m_insertion_mode = InsertionMode::AfterFrameset; 3115 } 3116 return; 3117 } 3118 3119 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frame) { 3120 (void)insert_html_element(token); 3121 (void)m_stack_of_open_elements.pop(); 3122 token.acknowledge_self_closing_flag_if_set(); 3123 return; 3124 } 3125 3126 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) { 3127 process_using_the_rules_for(InsertionMode::InHead, token); 3128 return; 3129 } 3130 3131 if (token.is_end_of_file()) { 3132 // FIXME: If the current node is not the root html element, then this is a parse error. 3133 3134 stop_parsing(); 3135 return; 3136 } 3137 3138 log_parse_error(); 3139} 3140 3141void HTMLParser::handle_after_frameset(HTMLToken& token) 3142{ 3143 if (token.is_character() && token.is_parser_whitespace()) { 3144 insert_character(token.code_point()); 3145 return; 3146 } 3147 3148 if (token.is_comment()) { 3149 insert_comment(token); 3150 return; 3151 } 3152 3153 if (token.is_doctype()) { 3154 log_parse_error(); 3155 return; 3156 } 3157 3158 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) { 3159 process_using_the_rules_for(InsertionMode::InBody, token); 3160 return; 3161 } 3162 3163 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) { 3164 m_insertion_mode = InsertionMode::AfterAfterFrameset; 3165 return; 3166 } 3167 3168 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) { 3169 process_using_the_rules_for(InsertionMode::InHead, token); 3170 return; 3171 } 3172 3173 if (token.is_end_of_file()) { 3174 stop_parsing(); 3175 return; 3176 } 3177 3178 log_parse_error(); 3179} 3180 3181void HTMLParser::handle_after_after_frameset(HTMLToken& token) 3182{ 3183 if (token.is_comment()) { 3184 auto comment = document().heap().allocate<DOM::Comment>(document().realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(); 3185 MUST(document().append_child(comment)); 3186 return; 3187 } 3188 3189 if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == HTML::TagNames::html)) { 3190 process_using_the_rules_for(InsertionMode::InBody, token); 3191 return; 3192 } 3193 3194 if (token.is_end_of_file()) { 3195 stop_parsing(); 3196 return; 3197 } 3198 3199 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) { 3200 process_using_the_rules_for(InsertionMode::InHead, token); 3201 return; 3202 } 3203 3204 log_parse_error(); 3205} 3206 3207void HTMLParser::process_using_the_rules_for_foreign_content(HTMLToken& token) 3208{ 3209 if (token.is_character()) { 3210 if (token.code_point() == 0) { 3211 log_parse_error(); 3212 insert_character(0xFFFD); 3213 return; 3214 } 3215 if (token.is_parser_whitespace()) { 3216 insert_character(token.code_point()); 3217 return; 3218 } 3219 insert_character(token.code_point()); 3220 m_frameset_ok = false; 3221 return; 3222 } 3223 3224 if (token.is_comment()) { 3225 insert_comment(token); 3226 return; 3227 } 3228 3229 if (token.is_doctype()) { 3230 log_parse_error(); 3231 return; 3232 } 3233 3234 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::blockquote, HTML::TagNames::body, HTML::TagNames::br, HTML::TagNames::center, HTML::TagNames::code, HTML::TagNames::dd, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::dt, HTML::TagNames::em, HTML::TagNames::embed, HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6, HTML::TagNames::head, HTML::TagNames::hr, HTML::TagNames::i, HTML::TagNames::img, HTML::TagNames::li, HTML::TagNames::listing, HTML::TagNames::menu, HTML::TagNames::meta, HTML::TagNames::nobr, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::pre, HTML::TagNames::ruby, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::span, HTML::TagNames::strong, HTML::TagNames::strike, HTML::TagNames::sub, HTML::TagNames::sup, HTML::TagNames::table, HTML::TagNames::tt, HTML::TagNames::u, HTML::TagNames::ul, HTML::TagNames::var)) 3235 || (token.is_start_tag() && token.tag_name() == HTML::TagNames::font && (token.has_attribute(HTML::AttributeNames::color) || token.has_attribute(HTML::AttributeNames::face) || token.has_attribute(HTML::AttributeNames::size))) 3236 || (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::br, HTML::TagNames::p))) { 3237 log_parse_error(); 3238 3239 // While the current node is not a MathML text integration point, an HTML integration point, or an element in the HTML namespace, pop elements from the stack of open elements. 3240 while (!is_mathml_text_integration_point(current_node()) 3241 && !is_html_integration_point(current_node()) 3242 && current_node().namespace_() != Namespace::HTML) { 3243 (void)m_stack_of_open_elements.pop(); 3244 } 3245 3246 // Reprocess the token according to the rules given in the section corresponding to the current insertion mode in HTML content. 3247 process_using_the_rules_for(m_insertion_mode, token); 3248 return; 3249 } 3250 3251 // Any other start tag 3252 if (token.is_start_tag()) { 3253 if (adjusted_current_node().namespace_() == Namespace::MathML) { 3254 adjust_mathml_attributes(token); 3255 } else if (adjusted_current_node().namespace_() == Namespace::SVG) { 3256 adjust_svg_tag_names(token); 3257 adjust_svg_attributes(token); 3258 } 3259 3260 adjust_foreign_attributes(token); 3261 (void)insert_foreign_element(token, adjusted_current_node().namespace_()); 3262 3263 if (token.is_self_closing()) { 3264 if (token.tag_name() == SVG::TagNames::script && current_node().namespace_() == Namespace::SVG) { 3265 token.acknowledge_self_closing_flag_if_set(); 3266 goto ScriptEndTag; 3267 } 3268 3269 (void)m_stack_of_open_elements.pop(); 3270 token.acknowledge_self_closing_flag_if_set(); 3271 } 3272 3273 return; 3274 } 3275 3276 if (token.is_end_tag() && current_node().namespace_() == Namespace::SVG && current_node().tag_name() == SVG::TagNames::script) { 3277 ScriptEndTag: 3278 // Pop the current node off the stack of open elements. 3279 (void)m_stack_of_open_elements.pop(); 3280 // Let the old insertion point have the same value as the current insertion point. 3281 m_tokenizer.store_insertion_point(); 3282 // Let the insertion point be just before the next input character. 3283 m_tokenizer.update_insertion_point(); 3284 // Increment the parser's script nesting level by one. 3285 increment_script_nesting_level(); 3286 // Set the parser pause flag to true. 3287 m_parser_pause_flag = true; 3288 // FIXME: Implement SVG script parsing. 3289 TODO(); 3290 // Decrement the parser's script nesting level by one. 3291 decrement_script_nesting_level(); 3292 // If the parser's script nesting level is zero, then set the parser pause flag to false. 3293 if (script_nesting_level() == 0) 3294 m_parser_pause_flag = false; 3295 3296 // Let the insertion point have the value of the old insertion point. 3297 m_tokenizer.restore_insertion_point(); 3298 } 3299 3300 if (token.is_end_tag()) { 3301 JS::GCPtr<DOM::Element> node = current_node(); 3302 // FIXME: Not sure if this is the correct to_lowercase, as the specification says "to ASCII lowercase" 3303 if (node->tag_name().to_lowercase() != token.tag_name()) 3304 log_parse_error(); 3305 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { 3306 if (node.ptr() == &m_stack_of_open_elements.first()) { 3307 VERIFY(m_parsing_fragment); 3308 return; 3309 } 3310 // FIXME: See the above FIXME 3311 if (node->tag_name().to_lowercase() == token.tag_name()) { 3312 while (&current_node() != node.ptr()) 3313 (void)m_stack_of_open_elements.pop(); 3314 (void)m_stack_of_open_elements.pop(); 3315 return; 3316 } 3317 3318 node = m_stack_of_open_elements.elements().at(i - 1).ptr(); 3319 3320 if (node->namespace_() != Namespace::HTML) 3321 continue; 3322 3323 process_using_the_rules_for(m_insertion_mode, token); 3324 return; 3325 } 3326 } 3327 3328 VERIFY_NOT_REACHED(); 3329} 3330 3331// https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately 3332void HTMLParser::reset_the_insertion_mode_appropriately() 3333{ 3334 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) { 3335 bool last = i == 0; 3336 // NOTE: When parsing fragments, we substitute the context element for the root of the stack of open elements. 3337 JS::GCPtr<DOM::Element> node; 3338 if (last && m_parsing_fragment) { 3339 node = m_context_element.ptr(); 3340 } else { 3341 node = m_stack_of_open_elements.elements().at(i).ptr(); 3342 } 3343 3344 if (node->local_name() == HTML::TagNames::select) { 3345 if (!last) { 3346 for (ssize_t j = i; j > 0; --j) { 3347 auto& ancestor = m_stack_of_open_elements.elements().at(j - 1); 3348 3349 if (is<HTMLTemplateElement>(*ancestor)) 3350 break; 3351 3352 if (is<HTMLTableElement>(*ancestor)) { 3353 m_insertion_mode = InsertionMode::InSelectInTable; 3354 return; 3355 } 3356 } 3357 } 3358 3359 m_insertion_mode = InsertionMode::InSelect; 3360 return; 3361 } 3362 3363 if (!last && node->local_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) { 3364 m_insertion_mode = InsertionMode::InCell; 3365 return; 3366 } 3367 3368 if (node->local_name() == HTML::TagNames::tr) { 3369 m_insertion_mode = InsertionMode::InRow; 3370 return; 3371 } 3372 3373 if (node->local_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::thead, HTML::TagNames::tfoot)) { 3374 m_insertion_mode = InsertionMode::InTableBody; 3375 return; 3376 } 3377 3378 if (node->local_name() == HTML::TagNames::caption) { 3379 m_insertion_mode = InsertionMode::InCaption; 3380 return; 3381 } 3382 3383 if (node->local_name() == HTML::TagNames::colgroup) { 3384 m_insertion_mode = InsertionMode::InColumnGroup; 3385 return; 3386 } 3387 3388 if (node->local_name() == HTML::TagNames::table) { 3389 m_insertion_mode = InsertionMode::InTable; 3390 return; 3391 } 3392 3393 if (node->local_name() == HTML::TagNames::template_) { 3394 m_insertion_mode = m_stack_of_template_insertion_modes.last(); 3395 return; 3396 } 3397 3398 if (!last && node->local_name() == HTML::TagNames::head) { 3399 m_insertion_mode = InsertionMode::InHead; 3400 return; 3401 } 3402 3403 if (node->local_name() == HTML::TagNames::body) { 3404 m_insertion_mode = InsertionMode::InBody; 3405 return; 3406 } 3407 3408 if (node->local_name() == HTML::TagNames::frameset) { 3409 VERIFY(m_parsing_fragment); 3410 m_insertion_mode = InsertionMode::InFrameset; 3411 return; 3412 } 3413 3414 if (node->local_name() == HTML::TagNames::html) { 3415 if (!m_head_element) { 3416 VERIFY(m_parsing_fragment); 3417 m_insertion_mode = InsertionMode::BeforeHead; 3418 return; 3419 } 3420 3421 m_insertion_mode = InsertionMode::AfterHead; 3422 return; 3423 } 3424 } 3425 3426 VERIFY(m_parsing_fragment); 3427 m_insertion_mode = InsertionMode::InBody; 3428} 3429 3430char const* HTMLParser::insertion_mode_name() const 3431{ 3432 switch (m_insertion_mode) { 3433#define __ENUMERATE_INSERTION_MODE(mode) \ 3434 case InsertionMode::mode: \ 3435 return #mode; 3436 ENUMERATE_INSERTION_MODES 3437#undef __ENUMERATE_INSERTION_MODE 3438 } 3439 VERIFY_NOT_REACHED(); 3440} 3441 3442DOM::Document& HTMLParser::document() 3443{ 3444 return *m_document; 3445} 3446 3447// https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments 3448Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup) 3449{ 3450 // 1. Create a new Document node, and mark it as being an HTML document. 3451 auto temp_document = DOM::Document::create(context_element.realm()).release_value_but_fixme_should_propagate_errors(); 3452 temp_document->set_document_type(DOM::Document::Type::HTML); 3453 3454 // 2. If the node document of the context element is in quirks mode, then let the Document be in quirks mode. 3455 // Otherwise, the node document of the context element is in limited-quirks mode, then let the Document be in limited-quirks mode. 3456 // Otherwise, leave the Document in no-quirks mode. 3457 temp_document->set_quirks_mode(context_element.document().mode()); 3458 3459 // 3. Create a new HTML parser, and associate it with the just created Document node. 3460 auto parser = HTMLParser::create(*temp_document, markup, "utf-8"); 3461 parser->m_context_element = JS::make_handle(context_element); 3462 parser->m_parsing_fragment = true; 3463 3464 // 4. Set the state of the HTML parser's tokenization stage as follows, switching on the context element: 3465 // - title 3466 // - textarea 3467 if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) { 3468 // Switch the tokenizer to the RCDATA state. 3469 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); 3470 } 3471 // - style 3472 // - xmp 3473 // - iframe 3474 // - noembed 3475 // - noframes 3476 else if (context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) { 3477 // Switch the tokenizer to the RAWTEXT state. 3478 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); 3479 } 3480 // - script 3481 else if (context_element.local_name().is_one_of(HTML::TagNames::script)) { 3482 // Switch the tokenizer to the script data state. 3483 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData); 3484 } 3485 // - noscript 3486 else if (context_element.local_name().is_one_of(HTML::TagNames::noscript)) { 3487 // If the scripting flag is enabled, switch the tokenizer to the RAWTEXT state. Otherwise, leave the tokenizer in the data state. 3488 if (context_element.document().is_scripting_enabled()) 3489 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); 3490 } 3491 // - plaintext 3492 else if (context_element.local_name().is_one_of(HTML::TagNames::plaintext)) { 3493 // Switch the tokenizer to the PLAINTEXT state. 3494 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT); 3495 } 3496 // Any other element 3497 else { 3498 // Leave the tokenizer in the data state. 3499 } 3500 3501 // 5. Let root be a new html element with no attributes. 3502 auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); 3503 3504 // 6. Append the element root to the Document node created above. 3505 MUST(temp_document->append_child(root)); 3506 3507 // 7. Set up the parser's stack of open elements so that it contains just the single element root. 3508 parser->m_stack_of_open_elements.push(root); 3509 3510 // 8. If the context element is a template element, 3511 if (context_element.local_name() == HTML::TagNames::template_) { 3512 // push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode. 3513 parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate); 3514 } 3515 3516 // FIXME: 9. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context. 3517 // Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point. 3518 3519 // 10. Reset the parser's insertion mode appropriately. 3520 parser->reset_the_insertion_mode_appropriately(); 3521 3522 // 11. Set the parser's form element pointer to the nearest node to the context element that is a form element 3523 // (going straight up the ancestor chain, and including the element itself, if it is a form element), if any. 3524 // (If there is no such form element, the form element pointer keeps its initial value, null.) 3525 parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>(); 3526 3527 // 12. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant. 3528 // 13. Start the parser and let it run until it has consumed all the characters just inserted into the input stream. 3529 parser->run(context_element.document().url()); 3530 3531 // 14. Return the child nodes of root, in tree order. 3532 Vector<JS::Handle<DOM::Node>> children; 3533 while (JS::GCPtr<DOM::Node> child = root->first_child()) { 3534 MUST(root->remove_child(*child)); 3535 context_element.document().adopt_node(*child); 3536 children.append(JS::make_handle(*child)); 3537 } 3538 return children; 3539} 3540 3541JS::NonnullGCPtr<HTMLParser> HTMLParser::create_for_scripting(DOM::Document& document) 3542{ 3543 return document.heap().allocate_without_realm<HTMLParser>(document); 3544} 3545 3546JS::NonnullGCPtr<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Document& document, ByteBuffer const& input) 3547{ 3548 if (document.has_encoding()) 3549 return document.heap().allocate_without_realm<HTMLParser>(document, input, document.encoding().value()); 3550 auto encoding = run_encoding_sniffing_algorithm(document, input); 3551 dbgln_if(HTML_PARSER_DEBUG, "The encoding sniffing algorithm returned encoding '{}'", encoding); 3552 return document.heap().allocate_without_realm<HTMLParser>(document, input, encoding); 3553} 3554 3555JS::NonnullGCPtr<HTMLParser> HTMLParser::create(DOM::Document& document, StringView input, DeprecatedString const& encoding) 3556{ 3557 return document.heap().allocate_without_realm<HTMLParser>(document, input, encoding); 3558} 3559 3560// https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm 3561DeprecatedString HTMLParser::serialize_html_fragment(DOM::Node const& node) 3562{ 3563 // The algorithm takes as input a DOM Element, Document, or DocumentFragment referred to as the node. 3564 VERIFY(node.is_element() || node.is_document() || node.is_document_fragment()); 3565 JS::NonnullGCPtr<DOM::Node const> actual_node = node; 3566 3567 if (is<DOM::Element>(node)) { 3568 auto& element = verify_cast<DOM::Element>(node); 3569 3570 // 1. If the node serializes as void, then return the empty string. 3571 // (NOTE: serializes as void is defined only on elements in the spec) 3572 if (element.serializes_as_void()) 3573 return DeprecatedString::empty(); 3574 3575 // 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node). 3576 // (NOTE: This is out of order of the spec to avoid another dynamic cast. The second step just creates a string builder, so it shouldn't matter) 3577 if (is<HTML::HTMLTemplateElement>(element)) 3578 actual_node = verify_cast<HTML::HTMLTemplateElement>(element).content(); 3579 } 3580 3581 enum class AttributeMode { 3582 No, 3583 Yes, 3584 }; 3585 3586 auto escape_string = [](StringView string, AttributeMode attribute_mode) -> DeprecatedString { 3587 // https://html.spec.whatwg.org/multipage/parsing.html#escapingString 3588 StringBuilder builder; 3589 for (auto code_point : Utf8View { string }) { 3590 // 1. Replace any occurrence of the "&" character by the string "&amp;". 3591 if (code_point == '&') 3592 builder.append("&amp;"sv); 3593 // 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the string "&nbsp;". 3594 else if (code_point == 0xA0) 3595 builder.append("&nbsp;"sv); 3596 // 3. If the algorithm was invoked in the attribute mode, replace any occurrences of the """ character by the string "&quot;". 3597 else if (code_point == '"' && attribute_mode == AttributeMode::Yes) 3598 builder.append("&quot;"sv); 3599 // 4. If the algorithm was not invoked in the attribute mode, replace any occurrences of the "<" character by the string "&lt;", and any occurrences of the ">" character by the string "&gt;". 3600 else if (code_point == '<' && attribute_mode == AttributeMode::No) 3601 builder.append("&lt;"sv); 3602 else if (code_point == '>' && attribute_mode == AttributeMode::No) 3603 builder.append("&gt;"sv); 3604 else 3605 builder.append_code_point(code_point); 3606 } 3607 return builder.to_deprecated_string(); 3608 }; 3609 3610 // 2. Let s be a string, and initialize it to the empty string. 3611 StringBuilder builder; 3612 3613 // 4. For each child node of the node, in tree order, run the following steps: 3614 actual_node->for_each_child([&](DOM::Node& current_node) { 3615 // 1. Let current node be the child node being processed. 3616 3617 // 2. Append the appropriate string from the following list to s: 3618 3619 if (is<DOM::Element>(current_node)) { 3620 // -> If current node is an Element 3621 auto& element = verify_cast<DOM::Element>(current_node); 3622 3623 // 1. If current node is an element in the HTML namespace, the MathML namespace, or the SVG namespace, then let tagname be current node's local name. 3624 // Otherwise, let tagname be current node's qualified name. 3625 DeprecatedString tag_name; 3626 3627 if (element.namespace_().is_one_of(Namespace::HTML, Namespace::MathML, Namespace::SVG)) 3628 tag_name = element.local_name(); 3629 else 3630 tag_name = element.qualified_name(); 3631 3632 // 2. Append a U+003C LESS-THAN SIGN character (<), followed by tagname. 3633 builder.append('<'); 3634 builder.append(tag_name); 3635 3636 // FIXME: 3. If current node's is value is not null, and the element does not have an is attribute in its attribute list, 3637 // then append the string " is="", followed by current node's is value escaped as described below in attribute mode, 3638 // followed by a U+0022 QUOTATION MARK character ("). 3639 3640 // 4. For each attribute that the element has, append a U+0020 SPACE character, the attribute's serialized name as described below, a U+003D EQUALS SIGN character (=), 3641 // a U+0022 QUOTATION MARK character ("), the attribute's value, escaped as described below in attribute mode, and a second U+0022 QUOTATION MARK character ("). 3642 // NOTE: The order of attributes is implementation-defined. The only constraint is that the order must be stable. 3643 element.for_each_attribute([&](auto& name, auto& value) { 3644 builder.append(' '); 3645 3646 // An attribute's serialized name for the purposes of the previous paragraph must be determined as follows: 3647 3648 // FIXME: -> If the attribute has no namespace: 3649 // The attribute's serialized name is the attribute's local name. 3650 // (We currently always do this) 3651 builder.append(name); 3652 3653 // FIXME: -> If the attribute is in the XML namespace: 3654 // The attribute's serialized name is the string "xml:" followed by the attribute's local name. 3655 3656 // FIXME: -> If the attribute is in the XMLNS namespace and the attribute's local name is xmlns: 3657 // The attribute's serialized name is the string "xmlns". 3658 3659 // FIXME: -> If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns: 3660 // The attribute's serialized name is the string "xmlns:" followed by the attribute's local name. 3661 3662 // FIXME: -> If the attribute is in the XLink namespace: 3663 // The attribute's serialized name is the string "xlink:" followed by the attribute's local name. 3664 3665 // FIXME: -> If the attribute is in some other namespace: 3666 // The attribute's serialized name is the attribute's qualified name. 3667 3668 builder.append("=\""sv); 3669 builder.append(escape_string(value, AttributeMode::Yes)); 3670 builder.append('"'); 3671 }); 3672 3673 // 5. Append a U+003E GREATER-THAN SIGN character (>). 3674 builder.append('>'); 3675 3676 // 6. If current node serializes as void, then continue on to the next child node at this point. 3677 if (element.serializes_as_void()) 3678 return IterationDecision::Continue; 3679 3680 // 7. Append the value of running the HTML fragment serialization algorithm on the current node element (thus recursing into this algorithm for that element), 3681 // followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/), tagname again, and finally a U+003E GREATER-THAN SIGN character (>). 3682 builder.append(serialize_html_fragment(element)); 3683 builder.append("</"sv); 3684 builder.append(tag_name); 3685 builder.append('>'); 3686 3687 return IterationDecision::Continue; 3688 } 3689 3690 if (is<DOM::Text>(current_node)) { 3691 // -> If current node is a Text node 3692 auto& text_node = verify_cast<DOM::Text>(current_node); 3693 auto* parent = current_node.parent(); 3694 3695 if (is<DOM::Element>(parent)) { 3696 auto& parent_element = verify_cast<DOM::Element>(*parent); 3697 3698 // 1. If the parent of current node is a style, script, xmp, iframe, noembed, noframes, or plaintext element, 3699 // or if the parent of current node is a noscript element and scripting is enabled for the node, then append the value of current node's data IDL attribute literally. 3700 if (parent_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::plaintext) 3701 || (parent_element.local_name() == HTML::TagNames::noscript && !parent_element.is_scripting_disabled())) { 3702 builder.append(text_node.data()); 3703 return IterationDecision::Continue; 3704 } 3705 } 3706 3707 // 2. Otherwise, append the value of current node's data IDL attribute, escaped as described below. 3708 builder.append(escape_string(text_node.data(), AttributeMode::No)); 3709 return IterationDecision::Continue; 3710 } 3711 3712 if (is<DOM::Comment>(current_node)) { 3713 // -> If current node is a Comment 3714 auto& comment_node = verify_cast<DOM::Comment>(current_node); 3715 3716 // 1. Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS), 3717 // followed by the value of current node's data IDL attribute, followed by the literal string "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN). 3718 builder.append("<!--"sv); 3719 builder.append(comment_node.data()); 3720 builder.append("-->"sv); 3721 return IterationDecision::Continue; 3722 } 3723 3724 if (is<DOM::ProcessingInstruction>(current_node)) { 3725 // -> If current node is a ProcessingInstruction 3726 auto& processing_instruction_node = verify_cast<DOM::ProcessingInstruction>(current_node); 3727 3728 // 1. Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK), followed by the value of current node's target IDL attribute, 3729 // followed by a single U+0020 SPACE character, followed by the value of current node's data IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>). 3730 builder.append("<?"sv); 3731 builder.append(processing_instruction_node.target()); 3732 builder.append(' '); 3733 builder.append(processing_instruction_node.data()); 3734 builder.append('>'); 3735 return IterationDecision::Continue; 3736 } 3737 3738 if (is<DOM::DocumentType>(current_node)) { 3739 // -> If current node is a DocumentType 3740 auto& document_type_node = verify_cast<DOM::DocumentType>(current_node); 3741 3742 // 1. Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER O, 3743 // U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059 LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL LETTER E), 3744 // followed by a space (U+0020 SPACE), followed by the value of current node's name IDL attribute, followed by the literal string ">" (U+003E GREATER-THAN SIGN). 3745 builder.append("<!DOCTYPE "sv); 3746 builder.append(document_type_node.name()); 3747 builder.append('>'); 3748 return IterationDecision::Continue; 3749 } 3750 3751 return IterationDecision::Continue; 3752 }); 3753 3754 // 5. Return s. 3755 return builder.to_deprecated_string(); 3756} 3757 3758// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#current-dimension-value 3759static RefPtr<CSS::StyleValue> parse_current_dimension_value(float value, Utf8View input, Utf8View::Iterator position) 3760{ 3761 // 1. If position is past the end of input, then return value as a length. 3762 if (position == input.end()) 3763 return CSS::LengthStyleValue::create(CSS::Length::make_px(value)); 3764 3765 // 2. If the code point at position within input is U+0025 (%), then return value as a percentage. 3766 if (*position == '%') 3767 return CSS::PercentageStyleValue::create(CSS::Percentage(value)); 3768 3769 // 3. Return value as a length. 3770 return CSS::LengthStyleValue::create(CSS::Length::make_px(value)); 3771} 3772 3773// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-dimension-values 3774RefPtr<CSS::StyleValue> parse_dimension_value(StringView string) 3775{ 3776 // 1. Let input be the string being parsed. 3777 auto input = Utf8View(string); 3778 if (!input.validate()) 3779 return nullptr; 3780 3781 // 2. Let position be a position variable for input, initially pointing at the start of input. 3782 auto position = input.begin(); 3783 3784 // 3. Skip ASCII whitespace within input given position. 3785 while (position != input.end() && Infra::is_ascii_whitespace(*position)) 3786 ++position; 3787 3788 // 4. If position is past the end of input or the code point at position within input is not an ASCII digit, 3789 // then return failure. 3790 if (position == input.end() || !is_ascii_digit(*position)) 3791 return nullptr; 3792 3793 // 5. Collect a sequence of code points that are ASCII digits from input given position, 3794 // and interpret the resulting sequence as a base-ten integer. Let value be that number. 3795 StringBuilder number_string; 3796 while (position != input.end() && is_ascii_digit(*position)) { 3797 number_string.append(*position); 3798 ++position; 3799 } 3800 auto integer_value = number_string.string_view().to_int(); 3801 3802 // 6. If position is past the end of input, then return value as a length. 3803 if (position == input.end()) 3804 return CSS::LengthStyleValue::create(CSS::Length::make_px(*integer_value)); 3805 3806 float value = *integer_value; 3807 3808 // 7. If the code point at position within input is U+002E (.), then: 3809 if (*position == '.') { 3810 // 1. Advance position by 1. 3811 ++position; 3812 3813 // 2. If position is past the end of input or the code point at position within input is not an ASCII digit, 3814 // then return the current dimension value with value, input, and position. 3815 if (position == input.end() || !is_ascii_digit(*position)) 3816 return parse_current_dimension_value(value, input, position); 3817 3818 // 3. Let divisor have the value 1. 3819 float divisor = 1; 3820 3821 // 4. While true: 3822 while (true) { 3823 // 1. Multiply divisor by ten. 3824 divisor *= 10; 3825 3826 // 2. Add the value of the code point at position within input, 3827 // interpreted as a base-ten digit (0..9) and divided by divisor, to value. 3828 value += (*position - '0') / divisor; 3829 3830 // 3. Advance position by 1. 3831 ++position; 3832 3833 // 4. If position is past the end of input, then return value as a length. 3834 if (position == input.end()) 3835 return CSS::LengthStyleValue::create(CSS::Length::make_px(value)); 3836 3837 // 5. If the code point at position within input is not an ASCII digit, then break. 3838 if (!is_ascii_digit(*position)) 3839 break; 3840 } 3841 } 3842 3843 // 8. Return the current dimension value with value, input, and position. 3844 return parse_current_dimension_value(value, input, position); 3845} 3846 3847// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-zero-dimension-values 3848RefPtr<CSS::StyleValue> parse_nonzero_dimension_value(StringView string) 3849{ 3850 // 1. Let input be the string being parsed. 3851 // 2. Let value be the result of parsing input using the rules for parsing dimension values. 3852 auto value = parse_dimension_value(string); 3853 3854 // 3. If value is an error, return an error. 3855 if (!value) 3856 return nullptr; 3857 3858 // 4. If value is zero, return an error. 3859 if (value->is_length() && value->as_length().length().raw_value() == 0) 3860 return nullptr; 3861 if (value->is_percentage() && value->as_percentage().percentage().value() == 0) 3862 return nullptr; 3863 3864 // 5. If value is a percentage, return value as a percentage. 3865 // 6. Return value as a length. 3866 return value; 3867} 3868 3869JS::Realm& HTMLParser::realm() 3870{ 3871 return m_document->realm(); 3872} 3873 3874// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser 3875void HTMLParser::abort() 3876{ 3877 // 1. Throw away any pending content in the input stream, and discard any future content that would have been added to it. 3878 m_tokenizer.abort(); 3879 3880 // FIXME: 2. Stop the speculative HTML parser for this HTML parser. 3881 3882 // 3. Update the current document readiness to "interactive". 3883 m_document->update_readiness(DocumentReadyState::Interactive); 3884 3885 // 4. Pop all the nodes off the stack of open elements. 3886 while (!m_stack_of_open_elements.is_empty()) 3887 m_stack_of_open_elements.pop(); 3888 3889 // 5. Update the current document readiness to "complete". 3890 m_document->update_readiness(DocumentReadyState::Complete); 3891 3892 m_aborted = true; 3893} 3894 3895}