Serenity Operating System
1/*
2 * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org>
3 * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <AK/Debug.h>
9#include <AK/SourceLocation.h>
10#include <AK/Utf32View.h>
11#include <LibTextCodec/Decoder.h>
12#include <LibWeb/Bindings/MainThreadVM.h>
13#include <LibWeb/DOM/Comment.h>
14#include <LibWeb/DOM/Document.h>
15#include <LibWeb/DOM/DocumentType.h>
16#include <LibWeb/DOM/ElementFactory.h>
17#include <LibWeb/DOM/Event.h>
18#include <LibWeb/DOM/ProcessingInstruction.h>
19#include <LibWeb/DOM/Text.h>
20#include <LibWeb/HTML/EventLoop/EventLoop.h>
21#include <LibWeb/HTML/EventNames.h>
22#include <LibWeb/HTML/HTMLFormElement.h>
23#include <LibWeb/HTML/HTMLHeadElement.h>
24#include <LibWeb/HTML/HTMLScriptElement.h>
25#include <LibWeb/HTML/HTMLTableElement.h>
26#include <LibWeb/HTML/HTMLTemplateElement.h>
27#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
28#include <LibWeb/HTML/Parser/HTMLParser.h>
29#include <LibWeb/HTML/Parser/HTMLToken.h>
30#include <LibWeb/HTML/Window.h>
31#include <LibWeb/HighResolutionTime/TimeOrigin.h>
32#include <LibWeb/Infra/CharacterTypes.h>
33#include <LibWeb/Namespace.h>
34#include <LibWeb/SVG/TagNames.h>
35
36namespace Web::HTML {
37
38static inline void log_parse_error(SourceLocation const& location = SourceLocation::current())
39{
40 dbgln_if(HTML_PARSER_DEBUG, "Parse error! {}", location);
41}
42
43static Vector<DeprecatedFlyString> s_quirks_public_ids = {
44 "+//Silmaril//dtd html Pro v0r11 19970101//",
45 "-//AS//DTD HTML 3.0 asWedit + extensions//",
46 "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//",
47 "-//IETF//DTD HTML 2.0 Level 1//",
48 "-//IETF//DTD HTML 2.0 Level 2//",
49 "-//IETF//DTD HTML 2.0 Strict Level 1//",
50 "-//IETF//DTD HTML 2.0 Strict Level 2//",
51 "-//IETF//DTD HTML 2.0 Strict//",
52 "-//IETF//DTD HTML 2.0//",
53 "-//IETF//DTD HTML 2.1E//",
54 "-//IETF//DTD HTML 3.0//",
55 "-//IETF//DTD HTML 3.2 Final//",
56 "-//IETF//DTD HTML 3.2//",
57 "-//IETF//DTD HTML 3//",
58 "-//IETF//DTD HTML Level 0//",
59 "-//IETF//DTD HTML Level 1//",
60 "-//IETF//DTD HTML Level 2//",
61 "-//IETF//DTD HTML Level 3//",
62 "-//IETF//DTD HTML Strict Level 0//",
63 "-//IETF//DTD HTML Strict Level 1//",
64 "-//IETF//DTD HTML Strict Level 2//",
65 "-//IETF//DTD HTML Strict Level 3//",
66 "-//IETF//DTD HTML Strict//",
67 "-//IETF//DTD HTML//",
68 "-//Metrius//DTD Metrius Presentational//",
69 "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//",
70 "-//Microsoft//DTD Internet Explorer 2.0 HTML//",
71 "-//Microsoft//DTD Internet Explorer 2.0 Tables//",
72 "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//",
73 "-//Microsoft//DTD Internet Explorer 3.0 HTML//",
74 "-//Microsoft//DTD Internet Explorer 3.0 Tables//",
75 "-//Netscape Comm. Corp.//DTD HTML//",
76 "-//Netscape Comm. Corp.//DTD Strict HTML//",
77 "-//O'Reilly and Associates//DTD HTML 2.0//",
78 "-//O'Reilly and Associates//DTD HTML Extended 1.0//",
79 "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//",
80 "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//",
81 "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//",
82 "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//",
83 "-//Spyglass//DTD HTML 2.0 Extended//",
84 "-//Sun Microsystems Corp.//DTD HotJava HTML//",
85 "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//",
86 "-//W3C//DTD HTML 3 1995-03-24//",
87 "-//W3C//DTD HTML 3.2 Draft//",
88 "-//W3C//DTD HTML 3.2 Final//",
89 "-//W3C//DTD HTML 3.2//",
90 "-//W3C//DTD HTML 3.2S Draft//",
91 "-//W3C//DTD HTML 4.0 Frameset//",
92 "-//W3C//DTD HTML 4.0 Transitional//",
93 "-//W3C//DTD HTML Experimental 19960712//",
94 "-//W3C//DTD HTML Experimental 970421//",
95 "-//W3C//DTD W3 HTML//",
96 "-//W3O//DTD W3 HTML 3.0//",
97 "-//WebTechs//DTD Mozilla HTML 2.0//",
98 "-//WebTechs//DTD Mozilla HTML//"
99};
100
101// https://html.spec.whatwg.org/multipage/parsing.html#mathml-text-integration-point
102static bool is_mathml_text_integration_point(DOM::Element const&)
103{
104 // FIXME: Implement.
105 return false;
106}
107
108// https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point
109static bool is_html_integration_point(DOM::Element const& element)
110{
111 // A node is an HTML integration point if it is one of the following elements:
112 // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
113 // FIXME: A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
114
115 // An SVG foreignObject element
116 // An SVG desc element
117 // An SVG title element
118 if (element.tag_name().is_one_of(SVG::TagNames::foreignObject, SVG::TagNames::desc, SVG::TagNames::title))
119 return true;
120
121 return false;
122}
123
124HTMLParser::HTMLParser(DOM::Document& document, StringView input, DeprecatedString const& encoding)
125 : m_tokenizer(input, encoding)
126 , m_scripting_enabled(document.is_scripting_enabled())
127 , m_document(JS::make_handle(document))
128{
129 m_tokenizer.set_parser({}, *this);
130 m_document->set_parser({}, *this);
131 auto standardized_encoding = TextCodec::get_standardized_encoding(encoding);
132 VERIFY(standardized_encoding.has_value());
133 m_document->set_encoding(standardized_encoding.value());
134}
135
136HTMLParser::HTMLParser(DOM::Document& document)
137 : m_scripting_enabled(document.is_scripting_enabled())
138 , m_document(JS::make_handle(document))
139{
140 m_document->set_parser({}, *this);
141 m_tokenizer.set_parser({}, *this);
142}
143
144HTMLParser::~HTMLParser()
145{
146}
147
148void HTMLParser::visit_edges(Cell::Visitor& visitor)
149{
150 Base::visit_edges(visitor);
151 visitor.visit(m_document);
152 visitor.visit(m_head_element);
153 visitor.visit(m_form_element);
154 visitor.visit(m_context_element);
155 visitor.visit(m_character_insertion_node);
156
157 m_stack_of_open_elements.visit_edges(visitor);
158 m_list_of_active_formatting_elements.visit_edges(visitor);
159}
160
161void HTMLParser::run()
162{
163 for (;;) {
164 // FIXME: Find a better way to say that we come from Document::close() and want to process EOF.
165 if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached())
166 return;
167
168 auto optional_token = m_tokenizer.next_token();
169 if (!optional_token.has_value())
170 break;
171 auto& token = optional_token.value();
172
173 dbgln_if(HTML_PARSER_DEBUG, "[{}] {}", insertion_mode_name(), token.to_deprecated_string());
174
175 // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction-dispatcher
176 // As each token is emitted from the tokenizer, the user agent must follow the appropriate steps from the following list, known as the tree construction dispatcher:
177 if (m_stack_of_open_elements.is_empty()
178 || adjusted_current_node().namespace_() == Namespace::HTML
179 || (is_html_integration_point(adjusted_current_node()) && (token.is_start_tag() || token.is_character()))
180 || token.is_end_of_file()) {
181 // -> If the stack of open elements is empty
182 // -> If the adjusted current node is an element in the HTML namespace
183 // FIXME: -> If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
184 // FIXME: -> If the adjusted current node is a MathML text integration point and the token is a character token
185 // FIXME: -> If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
186 // -> If the adjusted current node is an HTML integration point and the token is a start tag
187 // -> If the adjusted current node is an HTML integration point and the token is a character token
188 // -> If the token is an end-of-file token
189
190 // Process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
191 process_using_the_rules_for(m_insertion_mode, token);
192 } else {
193 // -> Otherwise
194
195 // Process the token according to the rules given in the section for parsing tokens in foreign content.
196 process_using_the_rules_for_foreign_content(token);
197 }
198
199 if (m_stop_parsing) {
200 dbgln_if(HTML_PARSER_DEBUG, "Stop parsing{}! :^)", m_parsing_fragment ? " fragment" : "");
201 break;
202 }
203 }
204
205 flush_character_insertions();
206}
207
208void HTMLParser::run(const AK::URL& url)
209{
210 m_document->set_url(url);
211 m_document->set_source(m_tokenizer.source());
212 run();
213 the_end();
214 m_document->detach_parser({});
215}
216
217// https://html.spec.whatwg.org/multipage/parsing.html#the-end
218void HTMLParser::the_end()
219{
220 // Once the user agent stops parsing the document, the user agent must run the following steps:
221
222 // FIXME: 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return.
223
224 // 2. Set the insertion point to undefined.
225 m_tokenizer.undefine_insertion_point();
226
227 // 3. Update the current document readiness to "interactive".
228 m_document->update_readiness(HTML::DocumentReadyState::Interactive);
229
230 // 4. Pop all the nodes off the stack of open elements.
231 while (!m_stack_of_open_elements.is_empty())
232 (void)m_stack_of_open_elements.pop();
233
234 // 5. While the list of scripts that will execute when the document has finished parsing is not empty:
235 while (!m_document->scripts_to_execute_when_parsing_has_finished().is_empty()) {
236 // 1. Spin the event loop until the first script in the list of scripts that will execute when the document has finished parsing
237 // has its "ready to be parser-executed" flag set and the parser's Document has no style sheet that is blocking scripts.
238 main_thread_event_loop().spin_until([&] {
239 return m_document->scripts_to_execute_when_parsing_has_finished().first()->is_ready_to_be_parser_executed()
240 && !m_document->has_a_style_sheet_that_is_blocking_scripts();
241 });
242
243 // 2. Execute the first script in the list of scripts that will execute when the document has finished parsing.
244 m_document->scripts_to_execute_when_parsing_has_finished().first()->execute_script();
245
246 // 3. Remove the first script element from the list of scripts that will execute when the document has finished parsing (i.e. shift out the first entry in the list).
247 (void)m_document->scripts_to_execute_when_parsing_has_finished().take_first();
248 }
249
250 // 6. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following substeps:
251 old_queue_global_task_with_document(HTML::Task::Source::DOMManipulation, *m_document, [document = m_document] {
252 // 1. Set the Document's load timing info's DOM content loaded event start time to the current high resolution time given the Document's relevant global object.
253 document->load_timing_info().dom_content_loaded_event_start_time = HighResolutionTime::unsafe_shared_current_time();
254
255 // 2. Fire an event named DOMContentLoaded at the Document object, with its bubbles attribute initialized to true.
256 auto content_loaded_event = DOM::Event::create(document->realm(), HTML::EventNames::DOMContentLoaded).release_value_but_fixme_should_propagate_errors();
257 content_loaded_event->set_bubbles(true);
258 document->dispatch_event(content_loaded_event);
259
260 // 3. Set the Document's load timing info's DOM content loaded event end time to the current high resolution time given the Document's relevant global object.
261 document->load_timing_info().dom_content_loaded_event_end_time = HighResolutionTime::unsafe_shared_current_time();
262
263 // FIXME: 4. Enable the client message queue of the ServiceWorkerContainer object whose associated service worker client is the Document object's relevant settings object.
264
265 // FIXME: 5. Invoke WebDriver BiDi DOM content loaded with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "pending", and url is the Document object's URL.
266 });
267
268 // 7. Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts that will execute in order as soon as possible are empty.
269 main_thread_event_loop().spin_until([&] {
270 return m_document->scripts_to_execute_as_soon_as_possible().is_empty();
271 });
272
273 // 8. Spin the event loop until there is nothing that delays the load event in the Document.
274 // FIXME: Track down all the things that are supposed to delay the load event.
275 main_thread_event_loop().spin_until([&] {
276 return m_document->number_of_things_delaying_the_load_event() == 0;
277 });
278
279 // 9. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following steps:
280 old_queue_global_task_with_document(HTML::Task::Source::DOMManipulation, *m_document, [document = m_document] {
281 // 1. Update the current document readiness to "complete".
282 document->update_readiness(HTML::DocumentReadyState::Complete);
283
284 // 2. If the Document object's browsing context is null, then abort these steps.
285 if (!document->browsing_context())
286 return;
287
288 // 3. Let window be the Document's relevant global object.
289 JS::NonnullGCPtr<Window> window = document->window();
290
291 // 4. Set the Document's load timing info's load event start time to the current high resolution time given window.
292 document->load_timing_info().load_event_start_time = HighResolutionTime::unsafe_shared_current_time();
293
294 // 5. Fire an event named load at window, with legacy target override flag set.
295 // FIXME: The legacy target override flag is currently set by a virtual override of dispatch_event()
296 // We should reorganize this so that the flag appears explicitly here instead.
297 window->dispatch_event(DOM::Event::create(document->realm(), HTML::EventNames::load).release_value_but_fixme_should_propagate_errors());
298
299 // FIXME: 6. Invoke WebDriver BiDi load complete with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "complete", and url is the Document object's URL.
300
301 // FIXME: 7. Set the Document object's navigation id to null.
302
303 // 8. Set the Document's load timing info's load event end time to the current high resolution time given window.
304 document->load_timing_info().load_event_end_time = HighResolutionTime::unsafe_shared_current_time();
305
306 // 9. Assert: Document's page showing is false.
307 VERIFY(!document->page_showing());
308
309 // 10. Set the Document's page showing flag to true.
310 document->set_page_showing(true);
311
312 // 11. Fire a page transition event named pageshow at window with false.
313 window->fire_a_page_transition_event(HTML::EventNames::pageshow, false);
314
315 // 12. Completely finish loading the Document.
316 document->completely_finish_loading();
317
318 // FIXME: 13. Queue the navigation timing entry for the Document.
319 });
320
321 // FIXME: 10. If the Document's print when loaded flag is set, then run the printing steps.
322
323 // 11. The Document is now ready for post-load tasks.
324 m_document->set_ready_for_post_load_tasks(true);
325}
326
327void HTMLParser::process_using_the_rules_for(InsertionMode mode, HTMLToken& token)
328{
329 switch (mode) {
330 case InsertionMode::Initial:
331 handle_initial(token);
332 break;
333 case InsertionMode::BeforeHTML:
334 handle_before_html(token);
335 break;
336 case InsertionMode::BeforeHead:
337 handle_before_head(token);
338 break;
339 case InsertionMode::InHead:
340 handle_in_head(token);
341 break;
342 case InsertionMode::InHeadNoscript:
343 handle_in_head_noscript(token);
344 break;
345 case InsertionMode::AfterHead:
346 handle_after_head(token);
347 break;
348 case InsertionMode::InBody:
349 handle_in_body(token);
350 break;
351 case InsertionMode::AfterBody:
352 handle_after_body(token);
353 break;
354 case InsertionMode::AfterAfterBody:
355 handle_after_after_body(token);
356 break;
357 case InsertionMode::Text:
358 handle_text(token);
359 break;
360 case InsertionMode::InTable:
361 handle_in_table(token);
362 break;
363 case InsertionMode::InTableBody:
364 handle_in_table_body(token);
365 break;
366 case InsertionMode::InRow:
367 handle_in_row(token);
368 break;
369 case InsertionMode::InCell:
370 handle_in_cell(token);
371 break;
372 case InsertionMode::InTableText:
373 handle_in_table_text(token);
374 break;
375 case InsertionMode::InSelectInTable:
376 handle_in_select_in_table(token);
377 break;
378 case InsertionMode::InSelect:
379 handle_in_select(token);
380 break;
381 case InsertionMode::InCaption:
382 handle_in_caption(token);
383 break;
384 case InsertionMode::InColumnGroup:
385 handle_in_column_group(token);
386 break;
387 case InsertionMode::InTemplate:
388 handle_in_template(token);
389 break;
390 case InsertionMode::InFrameset:
391 handle_in_frameset(token);
392 break;
393 case InsertionMode::AfterFrameset:
394 handle_after_frameset(token);
395 break;
396 case InsertionMode::AfterAfterFrameset:
397 handle_after_after_frameset(token);
398 break;
399 default:
400 VERIFY_NOT_REACHED();
401 }
402}
403
404DOM::QuirksMode HTMLParser::which_quirks_mode(HTMLToken const& doctype_token) const
405{
406 if (doctype_token.doctype_data().force_quirks)
407 return DOM::QuirksMode::Yes;
408
409 // NOTE: The tokenizer puts the name into lower case for us.
410 if (doctype_token.doctype_data().name != "html")
411 return DOM::QuirksMode::Yes;
412
413 auto const& public_identifier = doctype_token.doctype_data().public_identifier;
414 auto const& system_identifier = doctype_token.doctype_data().system_identifier;
415
416 if (public_identifier.equals_ignoring_ascii_case("-//W3O//DTD W3 HTML Strict 3.0//EN//"sv))
417 return DOM::QuirksMode::Yes;
418
419 if (public_identifier.equals_ignoring_ascii_case("-/W3C/DTD HTML 4.0 Transitional/EN"sv))
420 return DOM::QuirksMode::Yes;
421
422 if (public_identifier.equals_ignoring_ascii_case("HTML"sv))
423 return DOM::QuirksMode::Yes;
424
425 if (system_identifier.equals_ignoring_ascii_case("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"sv))
426 return DOM::QuirksMode::Yes;
427
428 for (auto& public_id : s_quirks_public_ids) {
429 if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive))
430 return DOM::QuirksMode::Yes;
431 }
432
433 if (doctype_token.doctype_data().missing_system_identifier) {
434 if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive))
435 return DOM::QuirksMode::Yes;
436
437 if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive))
438 return DOM::QuirksMode::Yes;
439 }
440
441 if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//"sv, CaseSensitivity::CaseInsensitive))
442 return DOM::QuirksMode::Limited;
443
444 if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//"sv, CaseSensitivity::CaseInsensitive))
445 return DOM::QuirksMode::Limited;
446
447 if (!doctype_token.doctype_data().missing_system_identifier) {
448 if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive))
449 return DOM::QuirksMode::Limited;
450
451 if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive))
452 return DOM::QuirksMode::Limited;
453 }
454
455 return DOM::QuirksMode::No;
456}
457
458void HTMLParser::handle_initial(HTMLToken& token)
459{
460 if (token.is_character() && token.is_parser_whitespace()) {
461 return;
462 }
463
464 if (token.is_comment()) {
465 auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors();
466 MUST(document().append_child(*comment));
467 return;
468 }
469
470 if (token.is_doctype()) {
471 auto doctype = realm().heap().allocate<DOM::DocumentType>(realm(), document()).release_allocated_value_but_fixme_should_propagate_errors();
472 doctype->set_name(token.doctype_data().name);
473 doctype->set_public_id(token.doctype_data().public_identifier);
474 doctype->set_system_id(token.doctype_data().system_identifier);
475 MUST(document().append_child(*doctype));
476 document().set_quirks_mode(which_quirks_mode(token));
477 m_insertion_mode = InsertionMode::BeforeHTML;
478 return;
479 }
480
481 log_parse_error();
482 document().set_quirks_mode(DOM::QuirksMode::Yes);
483 m_insertion_mode = InsertionMode::BeforeHTML;
484 process_using_the_rules_for(InsertionMode::BeforeHTML, token);
485}
486
487// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
488void HTMLParser::handle_before_html(HTMLToken& token)
489{
490 // -> A DOCTYPE token
491 if (token.is_doctype()) {
492 // Parse error. Ignore the token.
493 log_parse_error();
494 return;
495 }
496
497 // -> A comment token
498 if (token.is_comment()) {
499 // Insert a comment as the last child of the Document object.
500 auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors();
501 MUST(document().append_child(*comment));
502 return;
503 }
504
505 // -> A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
506 if (token.is_character() && token.is_parser_whitespace()) {
507 // Ignore the token.
508 return;
509 }
510
511 // -> A start tag whose tag name is "html"
512 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
513 // Create an element for the token in the HTML namespace, with the Document as the intended parent. Append it to the Document object. Put this element in the stack of open elements.
514 auto element = create_element_for(token, Namespace::HTML, document());
515 MUST(document().append_child(*element));
516 m_stack_of_open_elements.push(move(element));
517
518 // Switch the insertion mode to "before head".
519 m_insertion_mode = InsertionMode::BeforeHead;
520 return;
521 }
522
523 // -> An end tag whose tag name is one of: "head", "body", "html", "br"
524 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) {
525 // Act as described in the "anything else" entry below.
526 goto AnythingElse;
527 }
528
529 // -> Any other end tag
530 if (token.is_end_tag()) {
531 // Parse error. Ignore the token.
532 log_parse_error();
533 return;
534 }
535
536 // -> Anything else
537AnythingElse:
538 // Create an html element whose node document is the Document object. Append it to the Document object. Put this element in the stack of open elements.
539 auto element = create_element(document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
540 MUST(document().append_child(element));
541 m_stack_of_open_elements.push(element);
542
543 // Switch the insertion mode to "before head", then reprocess the token.
544 m_insertion_mode = InsertionMode::BeforeHead;
545 process_using_the_rules_for(InsertionMode::BeforeHead, token);
546 return;
547}
548
549DOM::Element& HTMLParser::current_node()
550{
551 return m_stack_of_open_elements.current_node();
552}
553
554DOM::Element& HTMLParser::adjusted_current_node()
555{
556 if (m_parsing_fragment && m_stack_of_open_elements.elements().size() == 1)
557 return *m_context_element;
558
559 return current_node();
560}
561
562DOM::Element& HTMLParser::node_before_current_node()
563{
564 return *m_stack_of_open_elements.elements().at(m_stack_of_open_elements.elements().size() - 2);
565}
566
567// https://html.spec.whatwg.org/multipage/parsing.html#appropriate-place-for-inserting-a-node
568HTMLParser::AdjustedInsertionLocation HTMLParser::find_appropriate_place_for_inserting_node(JS::GCPtr<DOM::Element> override_target)
569{
570 auto& target = override_target ? *override_target.ptr() : current_node();
571 HTMLParser::AdjustedInsertionLocation adjusted_insertion_location;
572
573 // 2. Determine the adjusted insertion location using the first matching steps from the following list:
574
575 // `-> If foster parenting is enabled and target is a table, tbody, tfoot, thead, or tr element
576 if (m_foster_parenting && target.local_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) {
577 // 1. Let last template be the last template element in the stack of open elements, if any.
578 auto last_template = m_stack_of_open_elements.last_element_with_tag_name(HTML::TagNames::template_);
579 // 2. Let last table be the last table element in the stack of open elements, if any.
580 auto last_table = m_stack_of_open_elements.last_element_with_tag_name(HTML::TagNames::table);
581 // 3. If there is a last template and either there is no last table,
582 // or there is one, but last template is lower (more recently added) than last table in the stack of open elements,
583 if (last_template.element && (!last_table.element || last_template.index > last_table.index)) {
584 // then: let adjusted insertion location be inside last template's template contents, after its last child (if any), and abort these steps.
585
586 // NOTE: This returns the template content, so no need to check the parent is a template.
587 return { verify_cast<HTMLTemplateElement>(*last_template.element).content().ptr(), nullptr };
588 }
589 // 4. If there is no last table, then let adjusted insertion location be inside the first element in the stack of open elements (the html element),
590 // after its last child (if any), and abort these steps. (fragment case)
591 if (!last_table.element) {
592 VERIFY(m_parsing_fragment);
593 // Guaranteed not to be a template element (it will be the html element),
594 // so no need to check the parent is a template.
595 return { *m_stack_of_open_elements.elements().first(), nullptr };
596 }
597 // 5. If last table has a parent node, then let adjusted insertion location be inside last table's parent node, immediately before last table, and abort these steps.
598 if (last_table.element->parent_node()) {
599 adjusted_insertion_location = { last_table.element->parent_node(), last_table.element.ptr() };
600 } else {
601 // 6. Let previous element be the element immediately above last table in the stack of open elements.
602 auto previous_element = m_stack_of_open_elements.element_immediately_above(*last_table.element);
603
604 // 7. Let adjusted insertion location be inside previous element, after its last child (if any).
605 adjusted_insertion_location = { previous_element.ptr(), nullptr };
606 }
607 } else {
608 // `-> Otherwise
609 // Let adjusted insertion location be inside target, after its last child (if any).
610 adjusted_insertion_location = { target, nullptr };
611 }
612
613 if (is<HTMLTemplateElement>(*adjusted_insertion_location.parent))
614 return { verify_cast<HTMLTemplateElement>(*adjusted_insertion_location.parent).content().ptr(), nullptr };
615
616 return adjusted_insertion_location;
617}
618
619JS::NonnullGCPtr<DOM::Element> HTMLParser::create_element_for(HTMLToken const& token, DeprecatedFlyString const& namespace_, DOM::Node& intended_parent)
620{
621 // FIXME: 1. If the active speculative HTML parser is not null, then return the result of creating a speculative mock element given given namespace, the tag name of the given token, and the attributes of the given token.
622 // FIXME: 2. Otherwise, optionally create a speculative mock element given given namespace, the tag name of the given token, and the attributes of the given token.
623
624 // 3. Let document be intended parent's node document.
625 JS::NonnullGCPtr<DOM::Document> document = intended_parent.document();
626
627 // 4. Let local name be the tag name of the token.
628 auto local_name = token.tag_name();
629
630 // FIXME: 5. Let is be the value of the "is" attribute in the given token, if such an attribute exists, or null otherwise.
631 // FIXME: 6. Let definition be the result of looking up a custom element definition given document, given namespace, local name, and is.
632 // FIXME: 7. If definition is non-null and the parser was not created as part of the HTML fragment parsing algorithm, then let will execute script be true. Otherwise, let it be false.
633 // FIXME: 8. If will execute script is true, then:
634 // FIXME: 1. Increment document's throw-on-dynamic-markup-insertion counter.
635 // FIXME: 2. If the JavaScript execution context stack is empty, then perform a microtask checkpoint.
636 // FIXME: 3. Push a new element queue onto document's relevant agent's custom element reactions stack.
637
638 // 9. Let element be the result of creating an element given document, localName, given namespace, null, and is.
639 // FIXME: If will execute script is true, set the synchronous custom elements flag; otherwise, leave it unset.
640 // FIXME: Pass in `null` and `is`.
641 auto element = create_element(*document, local_name, namespace_).release_value_but_fixme_should_propagate_errors();
642
643 // 10. Append each attribute in the given token to element.
644 // FIXME: This isn't the exact `append` the spec is talking about.
645 token.for_each_attribute([&](auto& attribute) {
646 MUST(element->set_attribute(attribute.local_name, attribute.value));
647 return IterationDecision::Continue;
648 });
649
650 // FIXME: 11. If will execute script is true, then:
651 // FIXME: 1. Let queue be the result of popping from document's relevant agent's custom element reactions stack. (This will be the same element queue as was pushed above.)
652 // FIXME: 2. Invoke custom element reactions in queue.
653 // FIXME: 3. Decrement document's throw-on-dynamic-markup-insertion counter.
654
655 // FIXME: 12. If element has an xmlns attribute in the XMLNS namespace whose value is not exactly the same as the element's namespace, that is a parse error.
656 // Similarly, if element has an xmlns:xlink attribute in the XMLNS namespace whose value is not the XLink Namespace, that is a parse error.
657
658 // FIXME: 13. If element is a resettable element, invoke its reset algorithm. (This initializes the element's value and checkedness based on the element's attributes.)
659
660 // 14. If element is a form-associated element and not a form-associated custom element, the form element pointer is not null, there is no template element on the stack of open elements,
661 // element is either not listed or doesn't have a form attribute, and the intended parent is in the same tree as the element pointed to by the form element pointer,
662 // then associate element with the form element pointed to by the form element pointer and set element's parser inserted flag.
663 // FIXME: Check if the element is not a form-associated custom element.
664 if (is<FormAssociatedElement>(*element)) {
665 auto* form_associated_element = dynamic_cast<FormAssociatedElement*>(element.ptr());
666 VERIFY(form_associated_element);
667
668 auto& html_element = form_associated_element->form_associated_element_to_html_element();
669
670 if (m_form_element.ptr()
671 && !m_stack_of_open_elements.contains(HTML::TagNames::template_)
672 && (!form_associated_element->is_listed() || !html_element.has_attribute(HTML::AttributeNames::form))
673 && &intended_parent.root() == &m_form_element->root()) {
674 form_associated_element->set_form(m_form_element.ptr());
675 form_associated_element->set_parser_inserted({});
676 }
677 }
678
679 // 15. Return element.
680 return element;
681}
682
683// https://html.spec.whatwg.org/multipage/parsing.html#insert-a-foreign-element
684JS::NonnullGCPtr<DOM::Element> HTMLParser::insert_foreign_element(HTMLToken const& token, DeprecatedFlyString const& namespace_)
685{
686 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
687
688 // NOTE: adjusted_insertion_location.parent will be non-null, however, it uses RP to be able to default-initialize HTMLParser::AdjustedInsertionLocation.
689 auto element = create_element_for(token, namespace_, *adjusted_insertion_location.parent);
690
691 auto pre_insertion_validity = adjusted_insertion_location.parent->ensure_pre_insertion_validity(*element, adjusted_insertion_location.insert_before_sibling);
692
693 // NOTE: If it's not possible to insert the element at the adjusted insertion location, the element is simply dropped.
694 if (!pre_insertion_validity.is_exception()) {
695 if (!m_parsing_fragment) {
696 // FIXME: push a new element queue onto element's relevant agent's custom element reactions stack.
697 }
698
699 adjusted_insertion_location.parent->insert_before(*element, adjusted_insertion_location.insert_before_sibling);
700
701 if (!m_parsing_fragment) {
702 // FIXME: pop the element queue from element's relevant agent's custom element reactions stack, and invoke custom element reactions in that queue.
703 }
704 }
705
706 m_stack_of_open_elements.push(element);
707 return element;
708}
709
710JS::NonnullGCPtr<DOM::Element> HTMLParser::insert_html_element(HTMLToken const& token)
711{
712 return insert_foreign_element(token, Namespace::HTML);
713}
714
715void HTMLParser::handle_before_head(HTMLToken& token)
716{
717 if (token.is_character() && token.is_parser_whitespace()) {
718 return;
719 }
720
721 if (token.is_comment()) {
722 insert_comment(token);
723 return;
724 }
725
726 if (token.is_doctype()) {
727 log_parse_error();
728 return;
729 }
730
731 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
732 process_using_the_rules_for(InsertionMode::InBody, token);
733 return;
734 }
735
736 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::head) {
737 auto element = insert_html_element(token);
738 m_head_element = JS::make_handle(verify_cast<HTMLHeadElement>(*element));
739 m_insertion_mode = InsertionMode::InHead;
740 return;
741 }
742
743 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) {
744 goto AnythingElse;
745 }
746
747 if (token.is_end_tag()) {
748 log_parse_error();
749 return;
750 }
751
752AnythingElse:
753 m_head_element = JS::make_handle(verify_cast<HTMLHeadElement>(*insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::head))));
754 m_insertion_mode = InsertionMode::InHead;
755 process_using_the_rules_for(InsertionMode::InHead, token);
756 return;
757}
758
759void HTMLParser::insert_comment(HTMLToken& token)
760{
761 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
762 adjusted_insertion_location.parent->insert_before(realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors(), adjusted_insertion_location.insert_before_sibling);
763}
764
765void HTMLParser::handle_in_head(HTMLToken& token)
766{
767 if (token.is_parser_whitespace()) {
768 insert_character(token.code_point());
769 return;
770 }
771
772 if (token.is_comment()) {
773 insert_comment(token);
774 return;
775 }
776
777 if (token.is_doctype()) {
778 log_parse_error();
779 return;
780 }
781
782 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
783 process_using_the_rules_for(InsertionMode::InBody, token);
784 return;
785 }
786
787 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link)) {
788 (void)insert_html_element(token);
789 (void)m_stack_of_open_elements.pop();
790 token.acknowledge_self_closing_flag_if_set();
791 return;
792 }
793
794 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::meta) {
795 (void)insert_html_element(token);
796 (void)m_stack_of_open_elements.pop();
797 token.acknowledge_self_closing_flag_if_set();
798 return;
799 }
800
801 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::title) {
802 (void)insert_html_element(token);
803 m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
804 m_original_insertion_mode = m_insertion_mode;
805 m_insertion_mode = InsertionMode::Text;
806 return;
807 }
808
809 if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled) || token.tag_name() == HTML::TagNames::noframes || token.tag_name() == HTML::TagNames::style)) {
810 parse_generic_raw_text_element(token);
811 return;
812 }
813
814 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noscript && !m_scripting_enabled) {
815 (void)insert_html_element(token);
816 m_insertion_mode = InsertionMode::InHeadNoscript;
817 return;
818 }
819
820 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::script) {
821 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
822 auto element = create_element_for(token, Namespace::HTML, *adjusted_insertion_location.parent);
823 auto& script_element = verify_cast<HTMLScriptElement>(*element);
824 script_element.set_parser_document(Badge<HTMLParser> {}, document());
825 script_element.set_force_async(Badge<HTMLParser> {}, false);
826 script_element.set_source_line_number({}, token.start_position().line + 1); // FIXME: This +1 is incorrect for script tags whose script does not start on a new line
827
828 if (m_parsing_fragment) {
829 script_element.set_already_started(Badge<HTMLParser> {}, true);
830 }
831
832 if (m_invoked_via_document_write) {
833 TODO();
834 }
835
836 adjusted_insertion_location.parent->insert_before(*element, adjusted_insertion_location.insert_before_sibling, false);
837 m_stack_of_open_elements.push(element);
838 m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
839 m_original_insertion_mode = m_insertion_mode;
840 m_insertion_mode = InsertionMode::Text;
841 return;
842 }
843 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::head) {
844 (void)m_stack_of_open_elements.pop();
845 m_insertion_mode = InsertionMode::AfterHead;
846 return;
847 }
848
849 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) {
850 goto AnythingElse;
851 }
852
853 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::template_) {
854 (void)insert_html_element(token);
855 m_list_of_active_formatting_elements.add_marker();
856 m_frameset_ok = false;
857 m_insertion_mode = InsertionMode::InTemplate;
858 m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
859 return;
860 }
861
862 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
863 if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
864 log_parse_error();
865 return;
866 }
867
868 generate_all_implied_end_tags_thoroughly();
869
870 if (current_node().local_name() != HTML::TagNames::template_)
871 log_parse_error();
872
873 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::template_);
874 m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
875 m_stack_of_template_insertion_modes.take_last();
876 reset_the_insertion_mode_appropriately();
877 return;
878 }
879
880 if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) {
881 log_parse_error();
882 return;
883 }
884
885AnythingElse:
886 (void)m_stack_of_open_elements.pop();
887 m_insertion_mode = InsertionMode::AfterHead;
888 process_using_the_rules_for(m_insertion_mode, token);
889}
890
891void HTMLParser::handle_in_head_noscript(HTMLToken& token)
892{
893 if (token.is_doctype()) {
894 log_parse_error();
895 return;
896 }
897
898 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
899 process_using_the_rules_for(InsertionMode::InBody, token);
900 return;
901 }
902
903 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::noscript) {
904 (void)m_stack_of_open_elements.pop();
905 m_insertion_mode = InsertionMode::InHead;
906 return;
907 }
908
909 if (token.is_parser_whitespace() || token.is_comment() || (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::style))) {
910 process_using_the_rules_for(InsertionMode::InHead, token);
911 return;
912 }
913
914 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) {
915 goto AnythingElse;
916 }
917
918 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::head, HTML::TagNames::noscript)) {
919 log_parse_error();
920 return;
921 }
922
923AnythingElse:
924 log_parse_error();
925 (void)m_stack_of_open_elements.pop();
926 m_insertion_mode = InsertionMode::InHead;
927 process_using_the_rules_for(m_insertion_mode, token);
928}
929
930void HTMLParser::parse_generic_raw_text_element(HTMLToken& token)
931{
932 (void)insert_html_element(token);
933 m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
934 m_original_insertion_mode = m_insertion_mode;
935 m_insertion_mode = InsertionMode::Text;
936}
937
938DOM::Text* HTMLParser::find_character_insertion_node()
939{
940 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
941 if (adjusted_insertion_location.insert_before_sibling) {
942 TODO();
943 }
944 if (adjusted_insertion_location.parent->is_document())
945 return nullptr;
946 if (adjusted_insertion_location.parent->last_child() && adjusted_insertion_location.parent->last_child()->is_text())
947 return verify_cast<DOM::Text>(adjusted_insertion_location.parent->last_child());
948 auto new_text_node = realm().heap().allocate<DOM::Text>(realm(), document(), "").release_allocated_value_but_fixme_should_propagate_errors();
949 MUST(adjusted_insertion_location.parent->append_child(*new_text_node));
950 return new_text_node;
951}
952
953void HTMLParser::flush_character_insertions()
954{
955 if (m_character_insertion_builder.is_empty())
956 return;
957 m_character_insertion_node->set_data(m_character_insertion_builder.to_deprecated_string());
958 m_character_insertion_node->parent()->children_changed();
959 m_character_insertion_builder.clear();
960}
961
962void HTMLParser::insert_character(u32 data)
963{
964 auto node = find_character_insertion_node();
965 if (node == m_character_insertion_node.ptr()) {
966 m_character_insertion_builder.append(Utf32View { &data, 1 });
967 return;
968 }
969 if (!m_character_insertion_node.ptr()) {
970 m_character_insertion_node = JS::make_handle(node);
971 m_character_insertion_builder.append(Utf32View { &data, 1 });
972 return;
973 }
974 flush_character_insertions();
975 m_character_insertion_node = JS::make_handle(node);
976 m_character_insertion_builder.append(Utf32View { &data, 1 });
977}
978
979void HTMLParser::handle_after_head(HTMLToken& token)
980{
981 if (token.is_character() && token.is_parser_whitespace()) {
982 insert_character(token.code_point());
983 return;
984 }
985
986 if (token.is_comment()) {
987 insert_comment(token);
988 return;
989 }
990
991 if (token.is_doctype()) {
992 log_parse_error();
993 return;
994 }
995
996 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
997 process_using_the_rules_for(InsertionMode::InBody, token);
998 return;
999 }
1000
1001 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) {
1002 (void)insert_html_element(token);
1003 m_frameset_ok = false;
1004 m_insertion_mode = InsertionMode::InBody;
1005 return;
1006 }
1007
1008 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) {
1009 (void)insert_html_element(token);
1010 m_insertion_mode = InsertionMode::InFrameset;
1011 return;
1012 }
1013
1014 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) {
1015 log_parse_error();
1016 m_stack_of_open_elements.push(*m_head_element);
1017 process_using_the_rules_for(InsertionMode::InHead, token);
1018 m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) {
1019 return entry.ptr() == m_head_element.ptr();
1020 });
1021 return;
1022 }
1023
1024 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
1025 process_using_the_rules_for(InsertionMode::InHead, token);
1026 return;
1027 }
1028
1029 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::html, HTML::TagNames::br)) {
1030 goto AnythingElse;
1031 }
1032
1033 if ((token.is_start_tag() && token.tag_name() == HTML::TagNames::head) || token.is_end_tag()) {
1034 log_parse_error();
1035 return;
1036 }
1037
1038AnythingElse:
1039 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::body));
1040 m_insertion_mode = InsertionMode::InBody;
1041 process_using_the_rules_for(m_insertion_mode, token);
1042}
1043
1044void HTMLParser::generate_implied_end_tags(DeprecatedFlyString const& exception)
1045{
1046 while (current_node().local_name() != exception && current_node().local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc))
1047 (void)m_stack_of_open_elements.pop();
1048}
1049
1050void HTMLParser::generate_all_implied_end_tags_thoroughly()
1051{
1052 while (current_node().local_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::colgroup, HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr))
1053 (void)m_stack_of_open_elements.pop();
1054}
1055
1056void HTMLParser::close_a_p_element()
1057{
1058 generate_implied_end_tags(HTML::TagNames::p);
1059 if (current_node().local_name() != HTML::TagNames::p) {
1060 log_parse_error();
1061 }
1062 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::p);
1063}
1064
1065void HTMLParser::handle_after_body(HTMLToken& token)
1066{
1067 if (token.is_character() && token.is_parser_whitespace()) {
1068 process_using_the_rules_for(InsertionMode::InBody, token);
1069 return;
1070 }
1071
1072 if (token.is_comment()) {
1073 auto& insertion_location = m_stack_of_open_elements.first();
1074 MUST(insertion_location.append_child(realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors()));
1075 return;
1076 }
1077
1078 if (token.is_doctype()) {
1079 log_parse_error();
1080 return;
1081 }
1082
1083 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
1084 process_using_the_rules_for(InsertionMode::InBody, token);
1085 return;
1086 }
1087
1088 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) {
1089 if (m_parsing_fragment) {
1090 log_parse_error();
1091 return;
1092 }
1093 m_insertion_mode = InsertionMode::AfterAfterBody;
1094 return;
1095 }
1096
1097 if (token.is_end_of_file()) {
1098 stop_parsing();
1099 return;
1100 }
1101
1102 log_parse_error();
1103 m_insertion_mode = InsertionMode::InBody;
1104 process_using_the_rules_for(InsertionMode::InBody, token);
1105}
1106
1107void HTMLParser::handle_after_after_body(HTMLToken& token)
1108{
1109 if (token.is_comment()) {
1110 auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors();
1111 MUST(document().append_child(*comment));
1112 return;
1113 }
1114
1115 if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == HTML::TagNames::html)) {
1116 process_using_the_rules_for(InsertionMode::InBody, token);
1117 return;
1118 }
1119
1120 if (token.is_end_of_file()) {
1121 stop_parsing();
1122 return;
1123 }
1124
1125 log_parse_error();
1126 m_insertion_mode = InsertionMode::InBody;
1127 process_using_the_rules_for(m_insertion_mode, token);
1128}
1129
1130// https://html.spec.whatwg.org/multipage/parsing.html#reconstruct-the-active-formatting-elements
1131void HTMLParser::reconstruct_the_active_formatting_elements()
1132{
1133 // 1. If there are no entries in the list of active formatting elements, then there is nothing to reconstruct; stop this algorithm.
1134 if (m_list_of_active_formatting_elements.is_empty())
1135 return;
1136
1137 // 2. If the last (most recently added) entry in the list of active formatting elements is a marker, or if it is an element that is in the stack of open elements,
1138 // then there is nothing to reconstruct; stop this algorithm.
1139 if (m_list_of_active_formatting_elements.entries().last().is_marker())
1140 return;
1141
1142 if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element))
1143 return;
1144
1145 // 3. Let entry be the last (most recently added) element in the list of active formatting elements.
1146 size_t index = m_list_of_active_formatting_elements.entries().size() - 1;
1147
1148 // NOTE: Entry will never be null, but must be a pointer instead of a reference to allow rebinding.
1149 auto* entry = &m_list_of_active_formatting_elements.entries().at(index);
1150
1151Rewind:
1152 // 4. Rewind: If there are no entries before entry in the list of active formatting elements, then jump to the step labeled create.
1153 if (index == 0)
1154 goto Create;
1155
1156 // 5. Let entry be the entry one earlier than entry in the list of active formatting elements.
1157 --index;
1158 entry = &m_list_of_active_formatting_elements.entries().at(index);
1159
1160 // 6. If entry is neither a marker nor an element that is also in the stack of open elements, go to the step labeled rewind.
1161 if (!entry->is_marker() && !m_stack_of_open_elements.contains(*entry->element))
1162 goto Rewind;
1163
1164Advance:
1165 // 7. Advance: Let entry be the element one later than entry in the list of active formatting elements.
1166 ++index;
1167 entry = &m_list_of_active_formatting_elements.entries().at(index);
1168
1169Create:
1170 // 8. Create: Insert an HTML element for the token for which the element entry was created, to obtain new element.
1171 VERIFY(!entry->is_marker());
1172
1173 // FIXME: Hold on to the real token!
1174 auto new_element = insert_html_element(HTMLToken::make_start_tag(entry->element->local_name()));
1175
1176 // 9. Replace the entry for entry in the list with an entry for new element.
1177 m_list_of_active_formatting_elements.entries().at(index).element = JS::make_handle(new_element);
1178
1179 // 10. If the entry for new element in the list of active formatting elements is not the last entry in the list, return to the step labeled advance.
1180 if (index != m_list_of_active_formatting_elements.entries().size() - 1)
1181 goto Advance;
1182}
1183
1184// https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm
1185HTMLParser::AdoptionAgencyAlgorithmOutcome HTMLParser::run_the_adoption_agency_algorithm(HTMLToken& token)
1186{
1187 // 1. Let subject be token's tag name.
1188 auto& subject = token.tag_name();
1189
1190 // 2. If the current node is an HTML element whose tag name is subject,
1191 // and the current node is not in the list of active formatting elements,
1192 // then pop the current node off the stack of open elements, and return.
1193 if (current_node().local_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) {
1194 (void)m_stack_of_open_elements.pop();
1195 return AdoptionAgencyAlgorithmOutcome::DoNothing;
1196 }
1197
1198 // 3. Let outer loop counter be 0.
1199 size_t outer_loop_counter = 0;
1200
1201 // 4. While true:
1202 while (true) {
1203 // 1. If outer loop counter is greater than or equal to 8, then return.
1204 if (outer_loop_counter >= 8)
1205 return AdoptionAgencyAlgorithmOutcome::DoNothing;
1206
1207 // 2. Increment outer loop counter by 1.
1208 outer_loop_counter++;
1209
1210 // 3. Let formatting element be the last element in the list of active formatting elements that:
1211 // - is between the end of the list and the last marker in the list, if any, or the start of the list otherwise, and
1212 // - has the tag name subject.
1213 auto* formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject);
1214
1215 // If there is no such element, then return and instead act as described in the "any other end tag" entry above.
1216 if (!formatting_element)
1217 return AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps;
1218
1219 // 4. If formatting element is not in the stack of open elements,
1220 if (!m_stack_of_open_elements.contains(*formatting_element)) {
1221 // then this is a parse error;
1222 log_parse_error();
1223 // remove the element from the list,
1224 m_list_of_active_formatting_elements.remove(*formatting_element);
1225 // and return.
1226 return AdoptionAgencyAlgorithmOutcome::DoNothing;
1227 }
1228
1229 // 5. If formatting element is in the stack of open elements, but the element is not in scope,
1230 if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) {
1231 // then this is a parse error;
1232 log_parse_error();
1233 // return.
1234 return AdoptionAgencyAlgorithmOutcome::DoNothing;
1235 }
1236
1237 // 6. If formatting element is not the current node,
1238 if (formatting_element != ¤t_node()) {
1239 // this is a parse error. (But do not return.)
1240 log_parse_error();
1241 }
1242
1243 // 7. Let furthest block be the topmost node in the stack of open elements that is lower in the stack than formatting element,
1244 // and is an element in the special category. There might not be one.
1245 JS::GCPtr<DOM::Element> furthest_block = m_stack_of_open_elements.topmost_special_node_below(*formatting_element);
1246
1247 // 8. If there is no furthest block
1248 if (!furthest_block) {
1249 // then the UA must first pop all the nodes from the bottom of the stack of open elements,
1250 // from the current node up to and including formatting element,
1251 while (¤t_node() != formatting_element)
1252 (void)m_stack_of_open_elements.pop();
1253 (void)m_stack_of_open_elements.pop();
1254
1255 // then remove formatting element from the list of active formatting elements,
1256 m_list_of_active_formatting_elements.remove(*formatting_element);
1257 // and finally return.
1258 return AdoptionAgencyAlgorithmOutcome::DoNothing;
1259 }
1260
1261 // 9. Let common ancestor be the element immediately above formatting element in the stack of open elements.
1262 auto common_ancestor = m_stack_of_open_elements.element_immediately_above(*formatting_element);
1263
1264 // 10. Let a bookmark note the position of formatting element in the list of active formatting elements
1265 // relative to the elements on either side of it in the list.
1266 auto bookmark = m_list_of_active_formatting_elements.find_index(*formatting_element).value();
1267
1268 // 11. Let node and last node be furthest block.
1269 auto node = furthest_block;
1270 auto last_node = furthest_block;
1271
1272 // Keep track of this for later
1273 auto node_above_node = m_stack_of_open_elements.element_immediately_above(*node);
1274
1275 // 12. Let inner loop counter be 0.
1276 size_t inner_loop_counter = 0;
1277
1278 // 13. While true:
1279 while (true) {
1280 // 1. Increment inner loop counter by 1.
1281 inner_loop_counter++;
1282
1283 // 2. Let node be the element immediately above node in the stack of open elements,
1284 // or if node is no longer in the stack of open elements (e.g. because it got removed by this algorithm),
1285 // the element that was immediately above node in the stack of open elements before node was removed.
1286 node = node_above_node;
1287 VERIFY(node);
1288
1289 // Keep track of this for later
1290 node_above_node = m_stack_of_open_elements.element_immediately_above(*node);
1291
1292 // 3. If node is formatting element, then break.
1293 if (node.ptr() == formatting_element)
1294 break;
1295
1296 // 4. If inner loop counter is greater than 3 and node is in the list of active formatting elements,
1297 if (inner_loop_counter > 3 && m_list_of_active_formatting_elements.contains(*node)) {
1298 auto node_index = m_list_of_active_formatting_elements.find_index(*node);
1299 if (node_index.has_value() && node_index.value() < bookmark)
1300 bookmark--;
1301 // then remove node from the list of active formatting elements.
1302 m_list_of_active_formatting_elements.remove(*node);
1303 }
1304
1305 // 5. If node is not in the list of active formatting elements
1306 if (!m_list_of_active_formatting_elements.contains(*node)) {
1307 // then remove node from the stack of open elements and continue.
1308 m_stack_of_open_elements.remove(*node);
1309 continue;
1310 }
1311
1312 // 6. Create an element for the token for which the element node was created,
1313 // in the HTML namespace, with common ancestor as the intended parent;
1314 // FIXME: hold onto the real token
1315 auto element = create_element_for(HTMLToken::make_start_tag(node->local_name()), Namespace::HTML, *common_ancestor);
1316 // replace the entry for node in the list of active formatting elements with an entry for the new element,
1317 m_list_of_active_formatting_elements.replace(*node, *element);
1318 // replace the entry for node in the stack of open elements with an entry for the new element,
1319 m_stack_of_open_elements.replace(*node, element);
1320 // and let node be the new element.
1321 node = element;
1322
1323 // 7. If last node is furthest block,
1324 if (last_node == furthest_block) {
1325 // then move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
1326 bookmark = m_list_of_active_formatting_elements.find_index(*node).value() + 1;
1327 }
1328
1329 // 8. Append last node to node.
1330 MUST(node->append_child(*last_node));
1331
1332 // 9. Set last node to node.
1333 last_node = node;
1334 }
1335
1336 // 14. Insert whatever last node ended up being in the previous step at the appropriate place for inserting a node,
1337 // but using common ancestor as the override target.
1338 auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(common_ancestor);
1339 adjusted_insertion_location.parent->insert_before(*last_node, adjusted_insertion_location.insert_before_sibling, false);
1340
1341 // 15. Create an element for the token for which formatting element was created,
1342 // in the HTML namespace, with furthest block as the intended parent.
1343 // FIXME: hold onto the real token
1344 auto element = create_element_for(HTMLToken::make_start_tag(formatting_element->local_name()), Namespace::HTML, *furthest_block);
1345
1346 // 16. Take all of the child nodes of furthest block and append them to the element created in the last step.
1347 for (auto& child : furthest_block->children_as_vector())
1348 MUST(element->append_child(furthest_block->remove_child(*child).release_value()));
1349
1350 // 17. Append that new element to furthest block.
1351 MUST(furthest_block->append_child(*element));
1352
1353 // 18. Remove formatting element from the list of active formatting elements,
1354 // and insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
1355 auto formatting_element_index = m_list_of_active_formatting_elements.find_index(*formatting_element);
1356 if (formatting_element_index.has_value() && formatting_element_index.value() < bookmark)
1357 bookmark--;
1358 m_list_of_active_formatting_elements.remove(*formatting_element);
1359 m_list_of_active_formatting_elements.insert_at(bookmark, *element);
1360
1361 // 19. Remove formatting element from the stack of open elements, and insert the new element
1362 // into the stack of open elements immediately below the position of furthest block in that stack.
1363 m_stack_of_open_elements.remove(*formatting_element);
1364 m_stack_of_open_elements.insert_immediately_below(*element, *furthest_block);
1365 }
1366}
1367
1368bool HTMLParser::is_special_tag(DeprecatedFlyString const& tag_name, DeprecatedFlyString const& namespace_)
1369{
1370 if (namespace_ == Namespace::HTML) {
1371 return tag_name.is_one_of(
1372 HTML::TagNames::address,
1373 HTML::TagNames::applet,
1374 HTML::TagNames::area,
1375 HTML::TagNames::article,
1376 HTML::TagNames::aside,
1377 HTML::TagNames::base,
1378 HTML::TagNames::basefont,
1379 HTML::TagNames::bgsound,
1380 HTML::TagNames::blockquote,
1381 HTML::TagNames::body,
1382 HTML::TagNames::br,
1383 HTML::TagNames::button,
1384 HTML::TagNames::caption,
1385 HTML::TagNames::center,
1386 HTML::TagNames::col,
1387 HTML::TagNames::colgroup,
1388 HTML::TagNames::dd,
1389 HTML::TagNames::details,
1390 HTML::TagNames::dir,
1391 HTML::TagNames::div,
1392 HTML::TagNames::dl,
1393 HTML::TagNames::dt,
1394 HTML::TagNames::embed,
1395 HTML::TagNames::fieldset,
1396 HTML::TagNames::figcaption,
1397 HTML::TagNames::figure,
1398 HTML::TagNames::footer,
1399 HTML::TagNames::form,
1400 HTML::TagNames::frame,
1401 HTML::TagNames::frameset,
1402 HTML::TagNames::h1,
1403 HTML::TagNames::h2,
1404 HTML::TagNames::h3,
1405 HTML::TagNames::h4,
1406 HTML::TagNames::h5,
1407 HTML::TagNames::h6,
1408 HTML::TagNames::head,
1409 HTML::TagNames::header,
1410 HTML::TagNames::hgroup,
1411 HTML::TagNames::hr,
1412 HTML::TagNames::html,
1413 HTML::TagNames::iframe,
1414 HTML::TagNames::img,
1415 HTML::TagNames::input,
1416 HTML::TagNames::keygen,
1417 HTML::TagNames::li,
1418 HTML::TagNames::link,
1419 HTML::TagNames::listing,
1420 HTML::TagNames::main,
1421 HTML::TagNames::marquee,
1422 HTML::TagNames::menu,
1423 HTML::TagNames::meta,
1424 HTML::TagNames::nav,
1425 HTML::TagNames::noembed,
1426 HTML::TagNames::noframes,
1427 HTML::TagNames::noscript,
1428 HTML::TagNames::object,
1429 HTML::TagNames::ol,
1430 HTML::TagNames::p,
1431 HTML::TagNames::param,
1432 HTML::TagNames::plaintext,
1433 HTML::TagNames::pre,
1434 HTML::TagNames::script,
1435 HTML::TagNames::section,
1436 HTML::TagNames::select,
1437 HTML::TagNames::source,
1438 HTML::TagNames::style,
1439 HTML::TagNames::summary,
1440 HTML::TagNames::table,
1441 HTML::TagNames::tbody,
1442 HTML::TagNames::td,
1443 HTML::TagNames::template_,
1444 HTML::TagNames::textarea,
1445 HTML::TagNames::tfoot,
1446 HTML::TagNames::th,
1447 HTML::TagNames::thead,
1448 HTML::TagNames::title,
1449 HTML::TagNames::tr,
1450 HTML::TagNames::track,
1451 HTML::TagNames::ul,
1452 HTML::TagNames::wbr,
1453 HTML::TagNames::xmp);
1454 } else if (namespace_ == Namespace::SVG) {
1455 return tag_name.is_one_of(
1456 SVG::TagNames::desc,
1457 SVG::TagNames::foreignObject,
1458 SVG::TagNames::title);
1459 } else if (namespace_ == Namespace::MathML) {
1460 TODO();
1461 }
1462
1463 return false;
1464}
1465
1466void HTMLParser::handle_in_body(HTMLToken& token)
1467{
1468 if (token.is_character()) {
1469 if (token.code_point() == 0) {
1470 log_parse_error();
1471 return;
1472 }
1473 if (token.is_parser_whitespace()) {
1474 reconstruct_the_active_formatting_elements();
1475 insert_character(token.code_point());
1476 return;
1477 }
1478 reconstruct_the_active_formatting_elements();
1479 insert_character(token.code_point());
1480 m_frameset_ok = false;
1481 return;
1482 }
1483
1484 if (token.is_comment()) {
1485 insert_comment(token);
1486 return;
1487 }
1488
1489 if (token.is_doctype()) {
1490 log_parse_error();
1491 return;
1492 }
1493
1494 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
1495 log_parse_error();
1496 if (m_stack_of_open_elements.contains(HTML::TagNames::template_))
1497 return;
1498 token.for_each_attribute([&](auto& attribute) {
1499 if (!current_node().has_attribute(attribute.local_name))
1500 MUST(current_node().set_attribute(attribute.local_name, attribute.value));
1501 return IterationDecision::Continue;
1502 });
1503 return;
1504 }
1505 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) {
1506 process_using_the_rules_for(InsertionMode::InHead, token);
1507 return;
1508 }
1509
1510 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
1511 process_using_the_rules_for(InsertionMode::InHead, token);
1512 return;
1513 }
1514
1515 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::body) {
1516 log_parse_error();
1517 if (m_stack_of_open_elements.elements().size() == 1
1518 || m_stack_of_open_elements.elements().at(1)->local_name() != HTML::TagNames::body
1519 || m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
1520 VERIFY(m_parsing_fragment);
1521 return;
1522 }
1523 m_frameset_ok = false;
1524 auto& body_element = m_stack_of_open_elements.elements().at(1);
1525 token.for_each_attribute([&](auto& attribute) {
1526 if (!body_element->has_attribute(attribute.local_name))
1527 MUST(body_element->set_attribute(attribute.local_name, attribute.value));
1528 return IterationDecision::Continue;
1529 });
1530 return;
1531 }
1532
1533 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) {
1534 log_parse_error();
1535
1536 if (m_stack_of_open_elements.elements().size() == 1
1537 || m_stack_of_open_elements.elements().at(1)->local_name() != HTML::TagNames::body) {
1538 VERIFY(m_parsing_fragment);
1539 return;
1540 }
1541
1542 if (!m_frameset_ok)
1543 return;
1544
1545 TODO();
1546 }
1547
1548 if (token.is_end_of_file()) {
1549 if (!m_stack_of_template_insertion_modes.is_empty()) {
1550 process_using_the_rules_for(InsertionMode::InTemplate, token);
1551 return;
1552 }
1553
1554 for (auto& node : m_stack_of_open_elements.elements()) {
1555 if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) {
1556 log_parse_error();
1557 break;
1558 }
1559 }
1560
1561 stop_parsing();
1562 return;
1563 }
1564
1565 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::body) {
1566 if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) {
1567 log_parse_error();
1568 return;
1569 }
1570
1571 for (auto& node : m_stack_of_open_elements.elements()) {
1572 if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) {
1573 log_parse_error();
1574 break;
1575 }
1576 }
1577
1578 m_insertion_mode = InsertionMode::AfterBody;
1579 return;
1580 }
1581
1582 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) {
1583 if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::body)) {
1584 log_parse_error();
1585 return;
1586 }
1587
1588 for (auto& node : m_stack_of_open_elements.elements()) {
1589 if (!node->local_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt, HTML::TagNames::li, HTML::TagNames::optgroup, HTML::TagNames::option, HTML::TagNames::p, HTML::TagNames::rb, HTML::TagNames::rp, HTML::TagNames::rt, HTML::TagNames::rtc, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::body, HTML::TagNames::html)) {
1590 log_parse_error();
1591 break;
1592 }
1593 }
1594
1595 m_insertion_mode = InsertionMode::AfterBody;
1596 process_using_the_rules_for(m_insertion_mode, token);
1597 return;
1598 }
1599
1600 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) {
1601 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1602 close_a_p_element();
1603 (void)insert_html_element(token);
1604 return;
1605 }
1606
1607 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) {
1608 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1609 close_a_p_element();
1610 if (current_node().local_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) {
1611 log_parse_error();
1612 (void)m_stack_of_open_elements.pop();
1613 }
1614 (void)insert_html_element(token);
1615 return;
1616 }
1617
1618 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::pre, HTML::TagNames::listing)) {
1619 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1620 close_a_p_element();
1621
1622 (void)insert_html_element(token);
1623
1624 m_frameset_ok = false;
1625
1626 // If the next token is a U+000A LINE FEED (LF) character token,
1627 // then ignore that token and move on to the next one.
1628 // (Newlines at the start of pre blocks are ignored as an authoring convenience.)
1629 auto next_token = m_tokenizer.next_token();
1630 if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') {
1631 // Ignore it.
1632 } else {
1633 process_using_the_rules_for(m_insertion_mode, next_token.value());
1634 }
1635 return;
1636 }
1637
1638 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::form) {
1639 if (m_form_element.ptr() && !m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
1640 log_parse_error();
1641 return;
1642 }
1643 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1644 close_a_p_element();
1645 auto element = insert_html_element(token);
1646 if (!m_stack_of_open_elements.contains(HTML::TagNames::template_))
1647 m_form_element = JS::make_handle(verify_cast<HTMLFormElement>(*element));
1648 return;
1649 }
1650
1651 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::li) {
1652 m_frameset_ok = false;
1653
1654 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
1655 JS::GCPtr<DOM::Element> node = m_stack_of_open_elements.elements()[i].ptr();
1656
1657 if (node->local_name() == HTML::TagNames::li) {
1658 generate_implied_end_tags(HTML::TagNames::li);
1659 if (current_node().local_name() != HTML::TagNames::li) {
1660 log_parse_error();
1661 }
1662 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li);
1663 break;
1664 }
1665
1666 if (is_special_tag(node->local_name(), node->namespace_()) && !node->local_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p))
1667 break;
1668 }
1669
1670 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1671 close_a_p_element();
1672
1673 (void)insert_html_element(token);
1674 return;
1675 }
1676
1677 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) {
1678 m_frameset_ok = false;
1679 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
1680 JS::GCPtr<DOM::Element> node = m_stack_of_open_elements.elements()[i].ptr();
1681 if (node->local_name() == HTML::TagNames::dd) {
1682 generate_implied_end_tags(HTML::TagNames::dd);
1683 if (current_node().local_name() != HTML::TagNames::dd) {
1684 log_parse_error();
1685 }
1686 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dd);
1687 break;
1688 }
1689 if (node->local_name() == HTML::TagNames::dt) {
1690 generate_implied_end_tags(HTML::TagNames::dt);
1691 if (current_node().local_name() != HTML::TagNames::dt) {
1692 log_parse_error();
1693 }
1694 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::dt);
1695 break;
1696 }
1697 if (is_special_tag(node->local_name(), node->namespace_()) && !node->local_name().is_one_of(HTML::TagNames::address, HTML::TagNames::div, HTML::TagNames::p))
1698 break;
1699 }
1700 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1701 close_a_p_element();
1702 (void)insert_html_element(token);
1703 return;
1704 }
1705
1706 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::plaintext) {
1707 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1708 close_a_p_element();
1709 (void)insert_html_element(token);
1710 m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT);
1711 return;
1712 }
1713
1714 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::button) {
1715 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::button)) {
1716 log_parse_error();
1717 generate_implied_end_tags();
1718 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::button);
1719 }
1720 reconstruct_the_active_formatting_elements();
1721 (void)insert_html_element(token);
1722 m_frameset_ok = false;
1723 return;
1724 }
1725
1726 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::address, HTML::TagNames::article, HTML::TagNames::aside, HTML::TagNames::blockquote, HTML::TagNames::button, HTML::TagNames::center, HTML::TagNames::details, HTML::TagNames::dialog, HTML::TagNames::dir, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::fieldset, HTML::TagNames::figcaption, HTML::TagNames::figure, HTML::TagNames::footer, HTML::TagNames::header, HTML::TagNames::hgroup, HTML::TagNames::listing, HTML::TagNames::main, HTML::TagNames::menu, HTML::TagNames::nav, HTML::TagNames::ol, HTML::TagNames::pre, HTML::TagNames::section, HTML::TagNames::summary, HTML::TagNames::ul)) {
1727 if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) {
1728 log_parse_error();
1729 return;
1730 }
1731
1732 generate_implied_end_tags();
1733
1734 if (current_node().local_name() != token.tag_name()) {
1735 log_parse_error();
1736 }
1737
1738 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name());
1739 return;
1740 }
1741
1742 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::form) {
1743 if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
1744 auto node = m_form_element;
1745 m_form_element = {};
1746 if (!node || !m_stack_of_open_elements.has_in_scope(*node)) {
1747 log_parse_error();
1748 return;
1749 }
1750 generate_implied_end_tags();
1751 if (¤t_node() != node.ptr()) {
1752 log_parse_error();
1753 }
1754 m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) { return entry.ptr() == node.ptr(); });
1755 } else {
1756 if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::form)) {
1757 log_parse_error();
1758 return;
1759 }
1760 generate_implied_end_tags();
1761 if (current_node().local_name() != HTML::TagNames::form) {
1762 log_parse_error();
1763 }
1764 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::form);
1765 }
1766 return;
1767 }
1768
1769 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::p) {
1770 if (!m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) {
1771 log_parse_error();
1772 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::p));
1773 }
1774 close_a_p_element();
1775 return;
1776 }
1777
1778 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::li) {
1779 if (!m_stack_of_open_elements.has_in_list_item_scope(HTML::TagNames::li)) {
1780 log_parse_error();
1781 return;
1782 }
1783 generate_implied_end_tags(HTML::TagNames::li);
1784 if (current_node().local_name() != HTML::TagNames::li) {
1785 log_parse_error();
1786 dbgln("Expected <li> current node, but had <{}>", current_node().local_name());
1787 }
1788 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::li);
1789 return;
1790 }
1791
1792 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::dd, HTML::TagNames::dt)) {
1793 if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) {
1794 log_parse_error();
1795 return;
1796 }
1797 generate_implied_end_tags(token.tag_name());
1798 if (current_node().local_name() != token.tag_name()) {
1799 log_parse_error();
1800 }
1801 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name());
1802 return;
1803 }
1804
1805 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6)) {
1806 if (!m_stack_of_open_elements.has_in_scope(HTML::TagNames::h1)
1807 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h2)
1808 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h3)
1809 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h4)
1810 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h5)
1811 && !m_stack_of_open_elements.has_in_scope(HTML::TagNames::h6)) {
1812 log_parse_error();
1813 return;
1814 }
1815
1816 generate_implied_end_tags();
1817 if (current_node().local_name() != token.tag_name()) {
1818 log_parse_error();
1819 }
1820
1821 for (;;) {
1822 auto popped_element = m_stack_of_open_elements.pop();
1823 if (popped_element->local_name().is_one_of(HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6))
1824 break;
1825 }
1826 return;
1827 }
1828
1829 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::a) {
1830 if (auto* element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(HTML::TagNames::a)) {
1831 log_parse_error();
1832 if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps)
1833 goto AnyOtherEndTag;
1834 m_list_of_active_formatting_elements.remove(*element);
1835 m_stack_of_open_elements.elements().remove_first_matching([&](auto& entry) {
1836 return entry.ptr() == element;
1837 });
1838 }
1839 reconstruct_the_active_formatting_elements();
1840 auto element = insert_html_element(token);
1841 m_list_of_active_formatting_elements.add(*element);
1842 return;
1843 }
1844
1845 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) {
1846 reconstruct_the_active_formatting_elements();
1847 auto element = insert_html_element(token);
1848 m_list_of_active_formatting_elements.add(*element);
1849 return;
1850 }
1851
1852 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::nobr) {
1853 reconstruct_the_active_formatting_elements();
1854 if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::nobr)) {
1855 log_parse_error();
1856 run_the_adoption_agency_algorithm(token);
1857 reconstruct_the_active_formatting_elements();
1858 }
1859 auto element = insert_html_element(token);
1860 m_list_of_active_formatting_elements.add(*element);
1861 return;
1862 }
1863
1864 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::a, HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::code, HTML::TagNames::em, HTML::TagNames::font, HTML::TagNames::i, HTML::TagNames::nobr, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::strike, HTML::TagNames::strong, HTML::TagNames::tt, HTML::TagNames::u)) {
1865 if (run_the_adoption_agency_algorithm(token) == AdoptionAgencyAlgorithmOutcome::RunAnyOtherEndTagSteps)
1866 goto AnyOtherEndTag;
1867 return;
1868 }
1869
1870 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) {
1871 reconstruct_the_active_formatting_elements();
1872 (void)insert_html_element(token);
1873 m_list_of_active_formatting_elements.add_marker();
1874 m_frameset_ok = false;
1875 return;
1876 }
1877
1878 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::applet, HTML::TagNames::marquee, HTML::TagNames::object)) {
1879 if (!m_stack_of_open_elements.has_in_scope(token.tag_name())) {
1880 log_parse_error();
1881 return;
1882 }
1883
1884 generate_implied_end_tags();
1885 if (current_node().local_name() != token.tag_name()) {
1886 log_parse_error();
1887 }
1888 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name());
1889 m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
1890 return;
1891 }
1892
1893 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::table) {
1894 if (!document().in_quirks_mode()) {
1895 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1896 close_a_p_element();
1897 }
1898 (void)insert_html_element(token);
1899 m_frameset_ok = false;
1900 m_insertion_mode = InsertionMode::InTable;
1901 return;
1902 }
1903
1904 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::br) {
1905 token.drop_attributes();
1906 goto BRStartTag;
1907 }
1908
1909 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::area, HTML::TagNames::br, HTML::TagNames::embed, HTML::TagNames::img, HTML::TagNames::keygen, HTML::TagNames::wbr)) {
1910 BRStartTag:
1911 reconstruct_the_active_formatting_elements();
1912 (void)insert_html_element(token);
1913 (void)m_stack_of_open_elements.pop();
1914 token.acknowledge_self_closing_flag_if_set();
1915 m_frameset_ok = false;
1916 return;
1917 }
1918
1919 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::input) {
1920 reconstruct_the_active_formatting_elements();
1921 (void)insert_html_element(token);
1922 (void)m_stack_of_open_elements.pop();
1923 token.acknowledge_self_closing_flag_if_set();
1924 auto type_attribute = token.attribute(HTML::AttributeNames::type);
1925 if (type_attribute.is_null() || !type_attribute.equals_ignoring_ascii_case("hidden"sv)) {
1926 m_frameset_ok = false;
1927 }
1928 return;
1929 }
1930
1931 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::param, HTML::TagNames::source, HTML::TagNames::track)) {
1932 (void)insert_html_element(token);
1933 (void)m_stack_of_open_elements.pop();
1934 token.acknowledge_self_closing_flag_if_set();
1935 return;
1936 }
1937
1938 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::hr) {
1939 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p))
1940 close_a_p_element();
1941 (void)insert_html_element(token);
1942 (void)m_stack_of_open_elements.pop();
1943 token.acknowledge_self_closing_flag_if_set();
1944 m_frameset_ok = false;
1945 return;
1946 }
1947
1948 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::image) {
1949 // Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.)
1950 log_parse_error();
1951 token.set_tag_name("img");
1952 process_using_the_rules_for(m_insertion_mode, token);
1953 return;
1954 }
1955
1956 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::textarea) {
1957 (void)insert_html_element(token);
1958
1959 m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
1960
1961 // If the next token is a U+000A LINE FEED (LF) character token,
1962 // then ignore that token and move on to the next one.
1963 // (Newlines at the start of pre blocks are ignored as an authoring convenience.)
1964 auto next_token = m_tokenizer.next_token();
1965
1966 m_original_insertion_mode = m_insertion_mode;
1967 m_frameset_ok = false;
1968 m_insertion_mode = InsertionMode::Text;
1969
1970 if (next_token.has_value() && next_token.value().is_character() && next_token.value().code_point() == '\n') {
1971 // Ignore it.
1972 } else {
1973 process_using_the_rules_for(m_insertion_mode, next_token.value());
1974 }
1975 return;
1976 }
1977
1978 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::xmp) {
1979 if (m_stack_of_open_elements.has_in_button_scope(HTML::TagNames::p)) {
1980 close_a_p_element();
1981 }
1982 reconstruct_the_active_formatting_elements();
1983 m_frameset_ok = false;
1984 parse_generic_raw_text_element(token);
1985 return;
1986 }
1987
1988 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::iframe) {
1989 m_frameset_ok = false;
1990 parse_generic_raw_text_element(token);
1991 return;
1992 }
1993
1994 if (token.is_start_tag() && ((token.tag_name() == HTML::TagNames::noembed) || (token.tag_name() == HTML::TagNames::noscript && m_scripting_enabled))) {
1995 parse_generic_raw_text_element(token);
1996 return;
1997 }
1998
1999 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::select) {
2000 reconstruct_the_active_formatting_elements();
2001 (void)insert_html_element(token);
2002 m_frameset_ok = false;
2003 switch (m_insertion_mode) {
2004 case InsertionMode::InTable:
2005 case InsertionMode::InCaption:
2006 case InsertionMode::InTableBody:
2007 case InsertionMode::InRow:
2008 case InsertionMode::InCell:
2009 m_insertion_mode = InsertionMode::InSelectInTable;
2010 break;
2011 default:
2012 m_insertion_mode = InsertionMode::InSelect;
2013 break;
2014 }
2015 return;
2016 }
2017
2018 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::optgroup, HTML::TagNames::option)) {
2019 if (current_node().local_name() == HTML::TagNames::option)
2020 (void)m_stack_of_open_elements.pop();
2021 reconstruct_the_active_formatting_elements();
2022 (void)insert_html_element(token);
2023 return;
2024 }
2025
2026 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rb, HTML::TagNames::rtc)) {
2027 if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby))
2028 generate_implied_end_tags();
2029
2030 if (current_node().local_name() != HTML::TagNames::ruby)
2031 log_parse_error();
2032
2033 (void)insert_html_element(token);
2034 return;
2035 }
2036
2037 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::rp, HTML::TagNames::rt)) {
2038 if (m_stack_of_open_elements.has_in_scope(HTML::TagNames::ruby))
2039 generate_implied_end_tags(HTML::TagNames::rtc);
2040
2041 if (current_node().local_name() != HTML::TagNames::rtc || current_node().local_name() != HTML::TagNames::ruby)
2042 log_parse_error();
2043
2044 (void)insert_html_element(token);
2045 return;
2046 }
2047
2048 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::math) {
2049 reconstruct_the_active_formatting_elements();
2050 adjust_mathml_attributes(token);
2051 adjust_foreign_attributes(token);
2052
2053 (void)insert_foreign_element(token, Namespace::MathML);
2054
2055 if (token.is_self_closing()) {
2056 (void)m_stack_of_open_elements.pop();
2057 token.acknowledge_self_closing_flag_if_set();
2058 }
2059 return;
2060 }
2061
2062 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::svg) {
2063 reconstruct_the_active_formatting_elements();
2064 adjust_svg_attributes(token);
2065 adjust_foreign_attributes(token);
2066
2067 (void)insert_foreign_element(token, Namespace::SVG);
2068
2069 if (token.is_self_closing()) {
2070 (void)m_stack_of_open_elements.pop();
2071 token.acknowledge_self_closing_flag_if_set();
2072 }
2073 return;
2074 }
2075
2076 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::frame, HTML::TagNames::head, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr))) {
2077 log_parse_error();
2078 return;
2079 }
2080
2081 // Any other start tag
2082 if (token.is_start_tag()) {
2083 reconstruct_the_active_formatting_elements();
2084 (void)insert_html_element(token);
2085 return;
2086 }
2087
2088 if (token.is_end_tag()) {
2089 AnyOtherEndTag:
2090 JS::GCPtr<DOM::Element> node;
2091 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
2092 node = m_stack_of_open_elements.elements()[i].ptr();
2093 if (node->local_name() == token.tag_name()) {
2094 generate_implied_end_tags(token.tag_name());
2095 if (node.ptr() != ¤t_node()) {
2096 log_parse_error();
2097 }
2098 while (¤t_node() != node.ptr()) {
2099 (void)m_stack_of_open_elements.pop();
2100 }
2101 (void)m_stack_of_open_elements.pop();
2102 break;
2103 }
2104 if (is_special_tag(node->local_name(), node->namespace_())) {
2105 log_parse_error();
2106 return;
2107 }
2108 }
2109 return;
2110 }
2111}
2112
2113void HTMLParser::adjust_mathml_attributes(HTMLToken& token)
2114{
2115 token.adjust_attribute_name("definitionurl", "definitionURL");
2116}
2117
2118void HTMLParser::adjust_svg_tag_names(HTMLToken& token)
2119{
2120 token.adjust_tag_name("altglyph", "altGlyph");
2121 token.adjust_tag_name("altglyphdef", "altGlyphDef");
2122 token.adjust_tag_name("altglyphitem", "altGlyphItem");
2123 token.adjust_tag_name("animatecolor", "animateColor");
2124 token.adjust_tag_name("animatemotion", "animateMotion");
2125 token.adjust_tag_name("animatetransform", "animateTransform");
2126 token.adjust_tag_name("clippath", "clipPath");
2127 token.adjust_tag_name("feblend", "feBlend");
2128 token.adjust_tag_name("fecolormatrix", "feColorMatrix");
2129 token.adjust_tag_name("fecomponenttransfer", "feComponentTransfer");
2130 token.adjust_tag_name("fecomposite", "feComposite");
2131 token.adjust_tag_name("feconvolvematrix", "feConvolveMatrix");
2132 token.adjust_tag_name("fediffuselighting", "feDiffuseLighting");
2133 token.adjust_tag_name("fedisplacementmap", "feDisplacementMap");
2134 token.adjust_tag_name("fedistantlight", "feDistantLight");
2135 token.adjust_tag_name("fedropshadow", "feDropShadow");
2136 token.adjust_tag_name("feflood", "feFlood");
2137 token.adjust_tag_name("fefunca", "feFuncA");
2138 token.adjust_tag_name("fefuncb", "feFuncB");
2139 token.adjust_tag_name("fefuncg", "feFuncG");
2140 token.adjust_tag_name("fefuncr", "feFuncR");
2141 token.adjust_tag_name("fegaussianblur", "feGaussianBlur");
2142 token.adjust_tag_name("feimage", "feImage");
2143 token.adjust_tag_name("femerge", "feMerge");
2144 token.adjust_tag_name("femergenode", "feMergeNode");
2145 token.adjust_tag_name("femorphology", "feMorphology");
2146 token.adjust_tag_name("feoffset", "feOffset");
2147 token.adjust_tag_name("fepointlight", "fePointLight");
2148 token.adjust_tag_name("fespecularlighting", "feSpecularLighting");
2149 token.adjust_tag_name("fespotlight", "feSpotlight");
2150 token.adjust_tag_name("foreignobject", "foreignObject");
2151 token.adjust_tag_name("glyphref", "glyphRef");
2152 token.adjust_tag_name("lineargradient", "linearGradient");
2153 token.adjust_tag_name("radialgradient", "radialGradient");
2154 token.adjust_tag_name("textpath", "textPath");
2155}
2156
2157void HTMLParser::adjust_svg_attributes(HTMLToken& token)
2158{
2159 token.adjust_attribute_name("attributename", "attributeName");
2160 token.adjust_attribute_name("attributetype", "attributeType");
2161 token.adjust_attribute_name("basefrequency", "baseFrequency");
2162 token.adjust_attribute_name("baseprofile", "baseProfile");
2163 token.adjust_attribute_name("calcmode", "calcMode");
2164 token.adjust_attribute_name("clippathunits", "clipPathUnits");
2165 token.adjust_attribute_name("diffuseconstant", "diffuseConstant");
2166 token.adjust_attribute_name("edgemode", "edgeMode");
2167 token.adjust_attribute_name("filterunits", "filterUnits");
2168 token.adjust_attribute_name("glyphref", "glyphRef");
2169 token.adjust_attribute_name("gradienttransform", "gradientTransform");
2170 token.adjust_attribute_name("gradientunits", "gradientUnits");
2171 token.adjust_attribute_name("kernelmatrix", "kernelMatrix");
2172 token.adjust_attribute_name("kernelunitlength", "kernelUnitLength");
2173 token.adjust_attribute_name("keypoints", "keyPoints");
2174 token.adjust_attribute_name("keysplines", "keySplines");
2175 token.adjust_attribute_name("keytimes", "keyTimes");
2176 token.adjust_attribute_name("lengthadjust", "lengthAdjust");
2177 token.adjust_attribute_name("limitingconeangle", "limitingConeAngle");
2178 token.adjust_attribute_name("markerheight", "markerHeight");
2179 token.adjust_attribute_name("markerunits", "markerUnits");
2180 token.adjust_attribute_name("markerwidth", "markerWidth");
2181 token.adjust_attribute_name("maskcontentunits", "maskContentUnits");
2182 token.adjust_attribute_name("maskunits", "maskUnits");
2183 token.adjust_attribute_name("numoctaves", "numOctaves");
2184 token.adjust_attribute_name("pathlength", "pathLength");
2185 token.adjust_attribute_name("patterncontentunits", "patternContentUnits");
2186 token.adjust_attribute_name("patterntransform", "patternTransform");
2187 token.adjust_attribute_name("patternunits", "patternUnits");
2188 token.adjust_attribute_name("pointsatx", "pointsAtX");
2189 token.adjust_attribute_name("pointsaty", "pointsAtY");
2190 token.adjust_attribute_name("pointsatz", "pointsAtZ");
2191 token.adjust_attribute_name("preservealpha", "preserveAlpha");
2192 token.adjust_attribute_name("preserveaspectratio", "preserveAspectRatio");
2193 token.adjust_attribute_name("primitiveunits", "primitiveUnits");
2194 token.adjust_attribute_name("refx", "refX");
2195 token.adjust_attribute_name("refy", "refY");
2196 token.adjust_attribute_name("repeatcount", "repeatCount");
2197 token.adjust_attribute_name("repeatdur", "repeatDur");
2198 token.adjust_attribute_name("requiredextensions", "requiredExtensions");
2199 token.adjust_attribute_name("requiredfeatures", "requiredFeatures");
2200 token.adjust_attribute_name("specularconstant", "specularConstant");
2201 token.adjust_attribute_name("specularexponent", "specularExponent");
2202 token.adjust_attribute_name("spreadmethod", "spreadMethod");
2203 token.adjust_attribute_name("startoffset", "startOffset");
2204 token.adjust_attribute_name("stddeviation", "stdDeviation");
2205 token.adjust_attribute_name("stitchtiles", "stitchTiles");
2206 token.adjust_attribute_name("surfacescale", "surfaceScale");
2207 token.adjust_attribute_name("systemlanguage", "systemLanguage");
2208 token.adjust_attribute_name("tablevalues", "tableValues");
2209 token.adjust_attribute_name("targetx", "targetX");
2210 token.adjust_attribute_name("targety", "targetY");
2211 token.adjust_attribute_name("textlength", "textLength");
2212 token.adjust_attribute_name("viewbox", "viewBox");
2213 token.adjust_attribute_name("viewtarget", "viewTarget");
2214 token.adjust_attribute_name("xchannelselector", "xChannelSelector");
2215 token.adjust_attribute_name("ychannelselector", "yChannelSelector");
2216 token.adjust_attribute_name("zoomandpan", "zoomAndPan");
2217}
2218
2219void HTMLParser::adjust_foreign_attributes(HTMLToken& token)
2220{
2221 token.adjust_foreign_attribute("xlink:actuate", "xlink", "actuate", Namespace::XLink);
2222 token.adjust_foreign_attribute("xlink:arcrole", "xlink", "arcrole", Namespace::XLink);
2223 token.adjust_foreign_attribute("xlink:href", "xlink", "href", Namespace::XLink);
2224 token.adjust_foreign_attribute("xlink:role", "xlink", "role", Namespace::XLink);
2225 token.adjust_foreign_attribute("xlink:show", "xlink", "show", Namespace::XLink);
2226 token.adjust_foreign_attribute("xlink:title", "xlink", "title", Namespace::XLink);
2227 token.adjust_foreign_attribute("xlink:type", "xlink", "type", Namespace::XLink);
2228
2229 token.adjust_foreign_attribute("xml:lang", "xml", "lang", Namespace::XML);
2230 token.adjust_foreign_attribute("xml:space", "xml", "space", Namespace::XML);
2231
2232 token.adjust_foreign_attribute("xmlns", "", "xmlns", Namespace::XMLNS);
2233 token.adjust_foreign_attribute("xmlns:xlink", "xmlns", "xlink", Namespace::XMLNS);
2234}
2235
2236void HTMLParser::increment_script_nesting_level()
2237{
2238 ++m_script_nesting_level;
2239}
2240
2241void HTMLParser::decrement_script_nesting_level()
2242{
2243 VERIFY(m_script_nesting_level);
2244 --m_script_nesting_level;
2245}
2246
2247// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
2248void HTMLParser::handle_text(HTMLToken& token)
2249{
2250 if (token.is_character()) {
2251 insert_character(token.code_point());
2252 return;
2253 }
2254 if (token.is_end_of_file()) {
2255 log_parse_error();
2256 if (current_node().local_name() == HTML::TagNames::script)
2257 verify_cast<HTMLScriptElement>(current_node()).set_already_started(Badge<HTMLParser> {}, true);
2258 (void)m_stack_of_open_elements.pop();
2259 m_insertion_mode = m_original_insertion_mode;
2260 process_using_the_rules_for(m_insertion_mode, token);
2261 return;
2262 }
2263
2264 // -> An end tag whose tag name is "script"
2265 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::script) {
2266 // FIXME: If the active speculative HTML parser is null and the JavaScript execution context stack is empty, then perform a microtask checkpoint.
2267
2268 // Non-standard: Make sure the <script> element has up-to-date text content before preparing the script.
2269 flush_character_insertions();
2270
2271 // Let script be the current node (which will be a script element).
2272 JS::NonnullGCPtr<HTMLScriptElement> script = verify_cast<HTMLScriptElement>(current_node());
2273
2274 // Pop the current node off the stack of open elements.
2275 (void)m_stack_of_open_elements.pop();
2276
2277 // Switch the insertion mode to the original insertion mode.
2278 m_insertion_mode = m_original_insertion_mode;
2279
2280 // Let the old insertion point have the same value as the current insertion point.
2281 m_tokenizer.store_insertion_point();
2282
2283 // Let the insertion point be just before the next input character.
2284 m_tokenizer.update_insertion_point();
2285
2286 // Increment the parser's script nesting level by one.
2287 increment_script_nesting_level();
2288
2289 // If the active speculative HTML parser is null, then prepare the script element script.
2290 // This might cause some script to execute, which might cause new characters to be inserted into the tokenizer,
2291 // and might cause the tokenizer to output more tokens, resulting in a reentrant invocation of the parser.
2292 // FIXME: Check if active speculative HTML parser is null.
2293 script->prepare_script(Badge<HTMLParser> {});
2294
2295 // Decrement the parser's script nesting level by one.
2296 decrement_script_nesting_level();
2297
2298 // If the parser's script nesting level is zero, then set the parser pause flag to false.
2299 if (script_nesting_level() == 0)
2300 m_parser_pause_flag = false;
2301
2302 // Let the insertion point have the value of the old insertion point.
2303 m_tokenizer.restore_insertion_point();
2304
2305 // At this stage, if the pending parsing-blocking script is not null, then:
2306 if (document().pending_parsing_blocking_script()) {
2307 // -> If the script nesting level is not zero:
2308 if (script_nesting_level() != 0) {
2309 // Set the parser pause flag to true,
2310 m_parser_pause_flag = true;
2311 // FIXME: and abort the processing of any nested invocations of the tokenizer, yielding control back to the caller.
2312 // (Tokenization will resume when the caller returns to the "outer" tree construction stage.)
2313 TODO();
2314 }
2315
2316 // Otherwise:
2317 else {
2318 // While the pending parsing-blocking script is not null:
2319 while (document().pending_parsing_blocking_script()) {
2320 // 1. Let the script be the pending parsing-blocking script.
2321 // 2. Set the pending parsing-blocking script to null.
2322 auto the_script = document().take_pending_parsing_blocking_script({});
2323
2324 // FIXME: 3. Start the speculative HTML parser for this instance of the HTML parser.
2325
2326 // 4. Block the tokenizer for this instance of the HTML parser, such that the event loop will not run tasks that invoke the tokenizer.
2327 m_tokenizer.set_blocked(true);
2328
2329 // 5. If the parser's Document has a style sheet that is blocking scripts
2330 // or the script's ready to be parser-executed is false:
2331 if (m_document->has_a_style_sheet_that_is_blocking_scripts() || script->is_ready_to_be_parser_executed() == false) {
2332 // spin the event loop until the parser's Document has no style sheet that is blocking scripts
2333 // and the script's ready to be parser-executed becomes true.
2334 main_thread_event_loop().spin_until([&] {
2335 return !m_document->has_a_style_sheet_that_is_blocking_scripts() && script->is_ready_to_be_parser_executed();
2336 });
2337 }
2338
2339 // 6. If this parser has been aborted in the meantime, return.
2340 if (m_aborted)
2341 return;
2342
2343 // FIXME: 7. Stop the speculative HTML parser for this instance of the HTML parser.
2344
2345 // 8. Unblock the tokenizer for this instance of the HTML parser, such that tasks that invoke the tokenizer can again be run.
2346 m_tokenizer.set_blocked(false);
2347
2348 // 9. Let the insertion point be just before the next input character.
2349 m_tokenizer.update_insertion_point();
2350
2351 // 10. Increment the parser's script nesting level by one (it should be zero before this step, so this sets it to one).
2352 VERIFY(script_nesting_level() == 0);
2353 increment_script_nesting_level();
2354
2355 // 11. Execute the script element the script.
2356 the_script->execute_script();
2357
2358 // 12. Decrement the parser's script nesting level by one.
2359 decrement_script_nesting_level();
2360
2361 // If the parser's script nesting level is zero (which it always should be at this point), then set the parser pause flag to false.
2362 VERIFY(script_nesting_level() == 0);
2363 m_parser_pause_flag = false;
2364
2365 // 13. Let the insertion point be undefined again.
2366 m_tokenizer.undefine_insertion_point();
2367 }
2368 }
2369 }
2370
2371 return;
2372 }
2373
2374 if (token.is_end_tag()) {
2375 (void)m_stack_of_open_elements.pop();
2376 m_insertion_mode = m_original_insertion_mode;
2377 return;
2378 }
2379 TODO();
2380}
2381
2382void HTMLParser::clear_the_stack_back_to_a_table_context()
2383{
2384 while (!current_node().local_name().is_one_of(HTML::TagNames::table, HTML::TagNames::template_, HTML::TagNames::html))
2385 (void)m_stack_of_open_elements.pop();
2386
2387 if (current_node().local_name() == HTML::TagNames::html)
2388 VERIFY(m_parsing_fragment);
2389}
2390
2391void HTMLParser::clear_the_stack_back_to_a_table_row_context()
2392{
2393 while (!current_node().local_name().is_one_of(HTML::TagNames::tr, HTML::TagNames::template_, HTML::TagNames::html))
2394 (void)m_stack_of_open_elements.pop();
2395
2396 if (current_node().local_name() == HTML::TagNames::html)
2397 VERIFY(m_parsing_fragment);
2398}
2399
2400void HTMLParser::clear_the_stack_back_to_a_table_body_context()
2401{
2402 while (!current_node().local_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::template_, HTML::TagNames::html))
2403 (void)m_stack_of_open_elements.pop();
2404
2405 if (current_node().local_name() == HTML::TagNames::html)
2406 VERIFY(m_parsing_fragment);
2407}
2408
2409void HTMLParser::handle_in_row(HTMLToken& token)
2410{
2411 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::th, HTML::TagNames::td)) {
2412 clear_the_stack_back_to_a_table_row_context();
2413 (void)insert_html_element(token);
2414 m_insertion_mode = InsertionMode::InCell;
2415 m_list_of_active_formatting_elements.add_marker();
2416 return;
2417 }
2418
2419 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::tr) {
2420 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) {
2421 log_parse_error();
2422 return;
2423 }
2424 clear_the_stack_back_to_a_table_row_context();
2425 (void)m_stack_of_open_elements.pop();
2426 m_insertion_mode = InsertionMode::InTableBody;
2427 return;
2428 }
2429
2430 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr))
2431 || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) {
2432 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) {
2433 log_parse_error();
2434 return;
2435 }
2436 clear_the_stack_back_to_a_table_row_context();
2437 (void)m_stack_of_open_elements.pop();
2438 m_insertion_mode = InsertionMode::InTableBody;
2439 process_using_the_rules_for(m_insertion_mode, token);
2440 return;
2441 }
2442
2443 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) {
2444 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
2445 log_parse_error();
2446 return;
2447 }
2448 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tr)) {
2449 return;
2450 }
2451 clear_the_stack_back_to_a_table_row_context();
2452 (void)m_stack_of_open_elements.pop();
2453 m_insertion_mode = InsertionMode::InTableBody;
2454 process_using_the_rules_for(m_insertion_mode, token);
2455 return;
2456 }
2457
2458 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::td, HTML::TagNames::th)) {
2459 log_parse_error();
2460 return;
2461 }
2462
2463 process_using_the_rules_for(InsertionMode::InTable, token);
2464}
2465
2466void HTMLParser::close_the_cell()
2467{
2468 generate_implied_end_tags();
2469 if (!current_node().local_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) {
2470 log_parse_error();
2471 }
2472 while (!current_node().local_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th))
2473 (void)m_stack_of_open_elements.pop();
2474 (void)m_stack_of_open_elements.pop();
2475 m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
2476 m_insertion_mode = InsertionMode::InRow;
2477}
2478
2479void HTMLParser::handle_in_cell(HTMLToken& token)
2480{
2481 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) {
2482 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
2483 log_parse_error();
2484 return;
2485 }
2486 generate_implied_end_tags();
2487
2488 if (current_node().local_name() != token.tag_name()) {
2489 log_parse_error();
2490 }
2491
2492 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(token.tag_name());
2493
2494 m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
2495
2496 m_insertion_mode = InsertionMode::InRow;
2497 return;
2498 }
2499 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) {
2500 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::td) && !m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::th)) {
2501 VERIFY(m_parsing_fragment);
2502 log_parse_error();
2503 return;
2504 }
2505 close_the_cell();
2506 process_using_the_rules_for(m_insertion_mode, token);
2507 return;
2508 }
2509
2510 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html)) {
2511 log_parse_error();
2512 return;
2513 }
2514
2515 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) {
2516 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
2517 log_parse_error();
2518 return;
2519 }
2520 close_the_cell();
2521 // Reprocess the token.
2522 process_using_the_rules_for(m_insertion_mode, token);
2523 return;
2524 }
2525
2526 process_using_the_rules_for(InsertionMode::InBody, token);
2527}
2528
2529void HTMLParser::handle_in_table_text(HTMLToken& token)
2530{
2531 if (token.is_character()) {
2532 if (token.code_point() == 0) {
2533 log_parse_error();
2534 return;
2535 }
2536
2537 m_pending_table_character_tokens.append(move(token));
2538 return;
2539 }
2540
2541 for (auto& pending_token : m_pending_table_character_tokens) {
2542 VERIFY(pending_token.is_character());
2543 if (!pending_token.is_parser_whitespace()) {
2544 // If any of the tokens in the pending table character tokens list
2545 // are character tokens that are not ASCII whitespace, then this is a parse error:
2546 // reprocess the character tokens in the pending table character tokens list using
2547 // the rules given in the "anything else" entry in the "in table" insertion mode.
2548 log_parse_error();
2549 m_foster_parenting = true;
2550 process_using_the_rules_for(InsertionMode::InBody, token);
2551 m_foster_parenting = false;
2552 return;
2553 }
2554 }
2555
2556 for (auto& pending_token : m_pending_table_character_tokens) {
2557 insert_character(pending_token.code_point());
2558 }
2559
2560 m_insertion_mode = m_original_insertion_mode;
2561 process_using_the_rules_for(m_insertion_mode, token);
2562}
2563
2564void HTMLParser::handle_in_table_body(HTMLToken& token)
2565{
2566 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::tr) {
2567 clear_the_stack_back_to_a_table_body_context();
2568 (void)insert_html_element(token);
2569 m_insertion_mode = InsertionMode::InRow;
2570 return;
2571 }
2572
2573 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::th, HTML::TagNames::td)) {
2574 log_parse_error();
2575 clear_the_stack_back_to_a_table_body_context();
2576 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::tr));
2577 m_insertion_mode = InsertionMode::InRow;
2578 process_using_the_rules_for(m_insertion_mode, token);
2579 return;
2580 }
2581
2582 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) {
2583 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
2584 log_parse_error();
2585 return;
2586 }
2587 clear_the_stack_back_to_a_table_body_context();
2588 (void)m_stack_of_open_elements.pop();
2589 m_insertion_mode = InsertionMode::InTable;
2590 return;
2591 }
2592
2593 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead))
2594 || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) {
2595
2596 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tbody)
2597 && !m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::thead)
2598 && !m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::tfoot)) {
2599 log_parse_error();
2600 return;
2601 }
2602
2603 clear_the_stack_back_to_a_table_body_context();
2604 (void)m_stack_of_open_elements.pop();
2605 m_insertion_mode = InsertionMode::InTable;
2606 process_using_the_rules_for(InsertionMode::InTable, token);
2607 return;
2608 }
2609
2610 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::td, HTML::TagNames::th, HTML::TagNames::tr)) {
2611 log_parse_error();
2612 return;
2613 }
2614
2615 process_using_the_rules_for(InsertionMode::InTable, token);
2616}
2617
2618void HTMLParser::handle_in_table(HTMLToken& token)
2619{
2620 if (token.is_character() && current_node().local_name().is_one_of(HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr)) {
2621 m_pending_table_character_tokens.clear();
2622 m_original_insertion_mode = m_insertion_mode;
2623 m_insertion_mode = InsertionMode::InTableText;
2624 process_using_the_rules_for(InsertionMode::InTableText, token);
2625 return;
2626 }
2627 if (token.is_comment()) {
2628 insert_comment(token);
2629 return;
2630 }
2631 if (token.is_doctype()) {
2632 log_parse_error();
2633 return;
2634 }
2635 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::caption) {
2636 clear_the_stack_back_to_a_table_context();
2637 m_list_of_active_formatting_elements.add_marker();
2638 (void)insert_html_element(token);
2639 m_insertion_mode = InsertionMode::InCaption;
2640 return;
2641 }
2642 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::colgroup) {
2643 clear_the_stack_back_to_a_table_context();
2644 (void)insert_html_element(token);
2645 m_insertion_mode = InsertionMode::InColumnGroup;
2646 return;
2647 }
2648 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) {
2649 clear_the_stack_back_to_a_table_context();
2650 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::colgroup));
2651 m_insertion_mode = InsertionMode::InColumnGroup;
2652 process_using_the_rules_for(m_insertion_mode, token);
2653 return;
2654 }
2655 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) {
2656 clear_the_stack_back_to_a_table_context();
2657 (void)insert_html_element(token);
2658 m_insertion_mode = InsertionMode::InTableBody;
2659 return;
2660 }
2661 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th, HTML::TagNames::tr)) {
2662 clear_the_stack_back_to_a_table_context();
2663 (void)insert_html_element(HTMLToken::make_start_tag(HTML::TagNames::tbody));
2664 m_insertion_mode = InsertionMode::InTableBody;
2665 process_using_the_rules_for(m_insertion_mode, token);
2666 return;
2667 }
2668 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::table) {
2669 log_parse_error();
2670 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::table))
2671 return;
2672
2673 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::table);
2674
2675 reset_the_insertion_mode_appropriately();
2676 process_using_the_rules_for(m_insertion_mode, token);
2677 return;
2678 }
2679 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::table) {
2680 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::table)) {
2681 log_parse_error();
2682 return;
2683 }
2684
2685 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::table);
2686
2687 reset_the_insertion_mode_appropriately();
2688 return;
2689 }
2690 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) {
2691 log_parse_error();
2692 return;
2693 }
2694 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::template_))
2695 || (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_)) {
2696 process_using_the_rules_for(InsertionMode::InHead, token);
2697 return;
2698 }
2699 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::input) {
2700 auto type_attribute = token.attribute(HTML::AttributeNames::type);
2701 if (type_attribute.is_null() || !type_attribute.equals_ignoring_ascii_case("hidden"sv)) {
2702 goto AnythingElse;
2703 }
2704
2705 log_parse_error();
2706 (void)insert_html_element(token);
2707
2708 // FIXME: Is this the correct interpretation of "Pop that input element off the stack of open elements."?
2709 // Because this wording is the first time it's seen in the spec.
2710 // Other times it's worded as: "Immediately pop the current node off the stack of open elements."
2711 (void)m_stack_of_open_elements.pop();
2712 token.acknowledge_self_closing_flag_if_set();
2713 return;
2714 }
2715 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::form) {
2716 log_parse_error();
2717 if (m_form_element.ptr() || m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
2718 return;
2719 }
2720
2721 m_form_element = JS::make_handle(verify_cast<HTMLFormElement>(*insert_html_element(token)));
2722
2723 // FIXME: See previous FIXME, as this is the same situation but for form.
2724 (void)m_stack_of_open_elements.pop();
2725 return;
2726 }
2727 if (token.is_end_of_file()) {
2728 process_using_the_rules_for(InsertionMode::InBody, token);
2729 return;
2730 }
2731
2732AnythingElse:
2733 log_parse_error();
2734 m_foster_parenting = true;
2735 process_using_the_rules_for(InsertionMode::InBody, token);
2736 m_foster_parenting = false;
2737}
2738
2739void HTMLParser::handle_in_select_in_table(HTMLToken& token)
2740{
2741 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::td, HTML::TagNames::th)) {
2742 log_parse_error();
2743 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select);
2744 reset_the_insertion_mode_appropriately();
2745 process_using_the_rules_for(m_insertion_mode, token);
2746 return;
2747 }
2748
2749 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::table, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead, HTML::TagNames::tr, HTML::TagNames::td, HTML::TagNames::th)) {
2750 log_parse_error();
2751
2752 if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name()))
2753 return;
2754
2755 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select);
2756 reset_the_insertion_mode_appropriately();
2757 process_using_the_rules_for(m_insertion_mode, token);
2758 return;
2759 }
2760
2761 process_using_the_rules_for(InsertionMode::InSelect, token);
2762}
2763
2764void HTMLParser::handle_in_select(HTMLToken& token)
2765{
2766 if (token.is_character()) {
2767 if (token.code_point() == 0) {
2768 log_parse_error();
2769 return;
2770 }
2771 insert_character(token.code_point());
2772 return;
2773 }
2774
2775 if (token.is_comment()) {
2776 insert_comment(token);
2777 return;
2778 }
2779
2780 if (token.is_doctype()) {
2781 log_parse_error();
2782 return;
2783 }
2784
2785 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
2786 process_using_the_rules_for(InsertionMode::InBody, token);
2787 return;
2788 }
2789
2790 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::option) {
2791 if (current_node().local_name() == HTML::TagNames::option) {
2792 (void)m_stack_of_open_elements.pop();
2793 }
2794 (void)insert_html_element(token);
2795 return;
2796 }
2797
2798 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::optgroup) {
2799 if (current_node().local_name() == HTML::TagNames::option) {
2800 (void)m_stack_of_open_elements.pop();
2801 }
2802 if (current_node().local_name() == HTML::TagNames::optgroup) {
2803 (void)m_stack_of_open_elements.pop();
2804 }
2805 (void)insert_html_element(token);
2806 return;
2807 }
2808
2809 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::optgroup) {
2810 if (current_node().local_name() == HTML::TagNames::option && node_before_current_node().local_name() == HTML::TagNames::optgroup)
2811 (void)m_stack_of_open_elements.pop();
2812
2813 if (current_node().local_name() == HTML::TagNames::optgroup) {
2814 (void)m_stack_of_open_elements.pop();
2815 } else {
2816 log_parse_error();
2817 return;
2818 }
2819 return;
2820 }
2821
2822 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::option) {
2823 if (current_node().local_name() == HTML::TagNames::option) {
2824 (void)m_stack_of_open_elements.pop();
2825 } else {
2826 log_parse_error();
2827 return;
2828 }
2829 return;
2830 }
2831
2832 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::select) {
2833 if (!m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select)) {
2834 VERIFY(m_parsing_fragment);
2835 log_parse_error();
2836 return;
2837 }
2838 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select);
2839 reset_the_insertion_mode_appropriately();
2840 return;
2841 }
2842
2843 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::select) {
2844 log_parse_error();
2845
2846 if (!m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select)) {
2847 VERIFY(m_parsing_fragment);
2848 return;
2849 }
2850
2851 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select);
2852 reset_the_insertion_mode_appropriately();
2853 return;
2854 }
2855
2856 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::input, HTML::TagNames::keygen, HTML::TagNames::textarea)) {
2857 log_parse_error();
2858
2859 if (!m_stack_of_open_elements.has_in_select_scope(HTML::TagNames::select)) {
2860 VERIFY(m_parsing_fragment);
2861 return;
2862 }
2863
2864 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::select);
2865 reset_the_insertion_mode_appropriately();
2866 process_using_the_rules_for(m_insertion_mode, token);
2867 return;
2868 }
2869
2870 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::script, HTML::TagNames::template_)) {
2871 process_using_the_rules_for(InsertionMode::InHead, token);
2872 return;
2873 }
2874
2875 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
2876 process_using_the_rules_for(InsertionMode::InHead, token);
2877 return;
2878 }
2879
2880 if (token.is_end_of_file()) {
2881 process_using_the_rules_for(InsertionMode::InBody, token);
2882 return;
2883 }
2884
2885 log_parse_error();
2886}
2887
2888void HTMLParser::handle_in_caption(HTMLToken& token)
2889{
2890 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::caption) {
2891 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::caption)) {
2892 VERIFY(m_parsing_fragment);
2893 log_parse_error();
2894 return;
2895 }
2896
2897 generate_implied_end_tags();
2898
2899 if (current_node().local_name() != HTML::TagNames::caption)
2900 log_parse_error();
2901
2902 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::caption);
2903 m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
2904
2905 m_insertion_mode = InsertionMode::InTable;
2906 return;
2907 }
2908
2909 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr))
2910 || (token.is_end_tag() && token.tag_name() == HTML::TagNames::table)) {
2911 if (!m_stack_of_open_elements.has_in_table_scope(HTML::TagNames::caption)) {
2912 VERIFY(m_parsing_fragment);
2913 log_parse_error();
2914 return;
2915 }
2916
2917 generate_implied_end_tags();
2918
2919 if (current_node().local_name() != HTML::TagNames::caption)
2920 log_parse_error();
2921
2922 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::caption);
2923 m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
2924
2925 m_insertion_mode = InsertionMode::InTable;
2926 process_using_the_rules_for(m_insertion_mode, token);
2927 return;
2928 }
2929
2930 if (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::body, HTML::TagNames::col, HTML::TagNames::colgroup, HTML::TagNames::html, HTML::TagNames::tbody, HTML::TagNames::td, HTML::TagNames::tfoot, HTML::TagNames::th, HTML::TagNames::thead, HTML::TagNames::tr)) {
2931 log_parse_error();
2932 return;
2933 }
2934
2935 process_using_the_rules_for(InsertionMode::InBody, token);
2936}
2937
2938void HTMLParser::handle_in_column_group(HTMLToken& token)
2939{
2940 if (token.is_character() && token.is_parser_whitespace()) {
2941 insert_character(token.code_point());
2942 return;
2943 }
2944
2945 if (token.is_comment()) {
2946 insert_comment(token);
2947 return;
2948 }
2949
2950 if (token.is_doctype()) {
2951 log_parse_error();
2952 return;
2953 }
2954
2955 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
2956 process_using_the_rules_for(InsertionMode::InBody, token);
2957 return;
2958 }
2959
2960 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) {
2961 (void)insert_html_element(token);
2962 (void)m_stack_of_open_elements.pop();
2963 token.acknowledge_self_closing_flag_if_set();
2964 return;
2965 }
2966
2967 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::colgroup) {
2968 if (current_node().local_name() != HTML::TagNames::colgroup) {
2969 log_parse_error();
2970 return;
2971 }
2972
2973 (void)m_stack_of_open_elements.pop();
2974 m_insertion_mode = InsertionMode::InTable;
2975 return;
2976 }
2977
2978 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::col) {
2979 log_parse_error();
2980 return;
2981 }
2982
2983 if ((token.is_start_tag() || token.is_end_tag()) && token.tag_name() == HTML::TagNames::template_) {
2984 process_using_the_rules_for(InsertionMode::InHead, token);
2985 return;
2986 }
2987
2988 if (token.is_end_of_file()) {
2989 process_using_the_rules_for(InsertionMode::InBody, token);
2990 return;
2991 }
2992
2993 if (current_node().local_name() != HTML::TagNames::colgroup) {
2994 log_parse_error();
2995 return;
2996 }
2997
2998 (void)m_stack_of_open_elements.pop();
2999 m_insertion_mode = InsertionMode::InTable;
3000 process_using_the_rules_for(m_insertion_mode, token);
3001}
3002
3003void HTMLParser::handle_in_template(HTMLToken& token)
3004{
3005 if (token.is_character() || token.is_comment() || token.is_doctype()) {
3006 process_using_the_rules_for(InsertionMode::InBody, token);
3007 return;
3008 }
3009
3010 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) {
3011 process_using_the_rules_for(InsertionMode::InHead, token);
3012 return;
3013 }
3014
3015 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::template_) {
3016 process_using_the_rules_for(InsertionMode::InHead, token);
3017 return;
3018 }
3019
3020 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::caption, HTML::TagNames::colgroup, HTML::TagNames::tbody, HTML::TagNames::tfoot, HTML::TagNames::thead)) {
3021 m_stack_of_template_insertion_modes.take_last();
3022 m_stack_of_template_insertion_modes.append(InsertionMode::InTable);
3023 m_insertion_mode = InsertionMode::InTable;
3024 process_using_the_rules_for(m_insertion_mode, token);
3025 return;
3026 }
3027
3028 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::col) {
3029 m_stack_of_template_insertion_modes.take_last();
3030 m_stack_of_template_insertion_modes.append(InsertionMode::InColumnGroup);
3031 m_insertion_mode = InsertionMode::InColumnGroup;
3032 process_using_the_rules_for(m_insertion_mode, token);
3033 return;
3034 }
3035
3036 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::tr) {
3037 m_stack_of_template_insertion_modes.take_last();
3038 m_stack_of_template_insertion_modes.append(InsertionMode::InTableBody);
3039 m_insertion_mode = InsertionMode::InTableBody;
3040 process_using_the_rules_for(m_insertion_mode, token);
3041 return;
3042 }
3043
3044 if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) {
3045 m_stack_of_template_insertion_modes.take_last();
3046 m_stack_of_template_insertion_modes.append(InsertionMode::InRow);
3047 m_insertion_mode = InsertionMode::InRow;
3048 process_using_the_rules_for(m_insertion_mode, token);
3049 return;
3050 }
3051
3052 if (token.is_start_tag()) {
3053 m_stack_of_template_insertion_modes.take_last();
3054 m_stack_of_template_insertion_modes.append(InsertionMode::InBody);
3055 m_insertion_mode = InsertionMode::InBody;
3056 process_using_the_rules_for(m_insertion_mode, token);
3057 return;
3058 }
3059
3060 if (token.is_end_tag()) {
3061 log_parse_error();
3062 return;
3063 }
3064
3065 if (token.is_end_of_file()) {
3066 if (!m_stack_of_open_elements.contains(HTML::TagNames::template_)) {
3067 VERIFY(m_parsing_fragment);
3068 stop_parsing();
3069 return;
3070 }
3071
3072 log_parse_error();
3073 m_stack_of_open_elements.pop_until_an_element_with_tag_name_has_been_popped(HTML::TagNames::template_);
3074 m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
3075 m_stack_of_template_insertion_modes.take_last();
3076 reset_the_insertion_mode_appropriately();
3077 process_using_the_rules_for(m_insertion_mode, token);
3078 }
3079}
3080
3081void HTMLParser::handle_in_frameset(HTMLToken& token)
3082{
3083 if (token.is_character() && token.is_parser_whitespace()) {
3084 insert_character(token.code_point());
3085 return;
3086 }
3087
3088 if (token.is_comment()) {
3089 insert_comment(token);
3090 return;
3091 }
3092
3093 if (token.is_doctype()) {
3094 log_parse_error();
3095 return;
3096 }
3097
3098 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
3099 process_using_the_rules_for(InsertionMode::InBody, token);
3100 return;
3101 }
3102
3103 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frameset) {
3104 (void)insert_html_element(token);
3105 return;
3106 }
3107
3108 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::frameset) {
3109 // FIXME: If the current node is the root html element, then this is a parse error; ignore the token. (fragment case)
3110
3111 (void)m_stack_of_open_elements.pop();
3112
3113 if (!m_parsing_fragment && current_node().local_name() != HTML::TagNames::frameset) {
3114 m_insertion_mode = InsertionMode::AfterFrameset;
3115 }
3116 return;
3117 }
3118
3119 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::frame) {
3120 (void)insert_html_element(token);
3121 (void)m_stack_of_open_elements.pop();
3122 token.acknowledge_self_closing_flag_if_set();
3123 return;
3124 }
3125
3126 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) {
3127 process_using_the_rules_for(InsertionMode::InHead, token);
3128 return;
3129 }
3130
3131 if (token.is_end_of_file()) {
3132 // FIXME: If the current node is not the root html element, then this is a parse error.
3133
3134 stop_parsing();
3135 return;
3136 }
3137
3138 log_parse_error();
3139}
3140
3141void HTMLParser::handle_after_frameset(HTMLToken& token)
3142{
3143 if (token.is_character() && token.is_parser_whitespace()) {
3144 insert_character(token.code_point());
3145 return;
3146 }
3147
3148 if (token.is_comment()) {
3149 insert_comment(token);
3150 return;
3151 }
3152
3153 if (token.is_doctype()) {
3154 log_parse_error();
3155 return;
3156 }
3157
3158 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::html) {
3159 process_using_the_rules_for(InsertionMode::InBody, token);
3160 return;
3161 }
3162
3163 if (token.is_end_tag() && token.tag_name() == HTML::TagNames::html) {
3164 m_insertion_mode = InsertionMode::AfterAfterFrameset;
3165 return;
3166 }
3167
3168 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) {
3169 process_using_the_rules_for(InsertionMode::InHead, token);
3170 return;
3171 }
3172
3173 if (token.is_end_of_file()) {
3174 stop_parsing();
3175 return;
3176 }
3177
3178 log_parse_error();
3179}
3180
3181void HTMLParser::handle_after_after_frameset(HTMLToken& token)
3182{
3183 if (token.is_comment()) {
3184 auto comment = document().heap().allocate<DOM::Comment>(document().realm(), document(), token.comment()).release_allocated_value_but_fixme_should_propagate_errors();
3185 MUST(document().append_child(comment));
3186 return;
3187 }
3188
3189 if (token.is_doctype() || token.is_parser_whitespace() || (token.is_start_tag() && token.tag_name() == HTML::TagNames::html)) {
3190 process_using_the_rules_for(InsertionMode::InBody, token);
3191 return;
3192 }
3193
3194 if (token.is_end_of_file()) {
3195 stop_parsing();
3196 return;
3197 }
3198
3199 if (token.is_start_tag() && token.tag_name() == HTML::TagNames::noframes) {
3200 process_using_the_rules_for(InsertionMode::InHead, token);
3201 return;
3202 }
3203
3204 log_parse_error();
3205}
3206
3207void HTMLParser::process_using_the_rules_for_foreign_content(HTMLToken& token)
3208{
3209 if (token.is_character()) {
3210 if (token.code_point() == 0) {
3211 log_parse_error();
3212 insert_character(0xFFFD);
3213 return;
3214 }
3215 if (token.is_parser_whitespace()) {
3216 insert_character(token.code_point());
3217 return;
3218 }
3219 insert_character(token.code_point());
3220 m_frameset_ok = false;
3221 return;
3222 }
3223
3224 if (token.is_comment()) {
3225 insert_comment(token);
3226 return;
3227 }
3228
3229 if (token.is_doctype()) {
3230 log_parse_error();
3231 return;
3232 }
3233
3234 if ((token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::b, HTML::TagNames::big, HTML::TagNames::blockquote, HTML::TagNames::body, HTML::TagNames::br, HTML::TagNames::center, HTML::TagNames::code, HTML::TagNames::dd, HTML::TagNames::div, HTML::TagNames::dl, HTML::TagNames::dt, HTML::TagNames::em, HTML::TagNames::embed, HTML::TagNames::h1, HTML::TagNames::h2, HTML::TagNames::h3, HTML::TagNames::h4, HTML::TagNames::h5, HTML::TagNames::h6, HTML::TagNames::head, HTML::TagNames::hr, HTML::TagNames::i, HTML::TagNames::img, HTML::TagNames::li, HTML::TagNames::listing, HTML::TagNames::menu, HTML::TagNames::meta, HTML::TagNames::nobr, HTML::TagNames::ol, HTML::TagNames::p, HTML::TagNames::pre, HTML::TagNames::ruby, HTML::TagNames::s, HTML::TagNames::small, HTML::TagNames::span, HTML::TagNames::strong, HTML::TagNames::strike, HTML::TagNames::sub, HTML::TagNames::sup, HTML::TagNames::table, HTML::TagNames::tt, HTML::TagNames::u, HTML::TagNames::ul, HTML::TagNames::var))
3235 || (token.is_start_tag() && token.tag_name() == HTML::TagNames::font && (token.has_attribute(HTML::AttributeNames::color) || token.has_attribute(HTML::AttributeNames::face) || token.has_attribute(HTML::AttributeNames::size)))
3236 || (token.is_end_tag() && token.tag_name().is_one_of(HTML::TagNames::br, HTML::TagNames::p))) {
3237 log_parse_error();
3238
3239 // While the current node is not a MathML text integration point, an HTML integration point, or an element in the HTML namespace, pop elements from the stack of open elements.
3240 while (!is_mathml_text_integration_point(current_node())
3241 && !is_html_integration_point(current_node())
3242 && current_node().namespace_() != Namespace::HTML) {
3243 (void)m_stack_of_open_elements.pop();
3244 }
3245
3246 // Reprocess the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
3247 process_using_the_rules_for(m_insertion_mode, token);
3248 return;
3249 }
3250
3251 // Any other start tag
3252 if (token.is_start_tag()) {
3253 if (adjusted_current_node().namespace_() == Namespace::MathML) {
3254 adjust_mathml_attributes(token);
3255 } else if (adjusted_current_node().namespace_() == Namespace::SVG) {
3256 adjust_svg_tag_names(token);
3257 adjust_svg_attributes(token);
3258 }
3259
3260 adjust_foreign_attributes(token);
3261 (void)insert_foreign_element(token, adjusted_current_node().namespace_());
3262
3263 if (token.is_self_closing()) {
3264 if (token.tag_name() == SVG::TagNames::script && current_node().namespace_() == Namespace::SVG) {
3265 token.acknowledge_self_closing_flag_if_set();
3266 goto ScriptEndTag;
3267 }
3268
3269 (void)m_stack_of_open_elements.pop();
3270 token.acknowledge_self_closing_flag_if_set();
3271 }
3272
3273 return;
3274 }
3275
3276 if (token.is_end_tag() && current_node().namespace_() == Namespace::SVG && current_node().tag_name() == SVG::TagNames::script) {
3277 ScriptEndTag:
3278 // Pop the current node off the stack of open elements.
3279 (void)m_stack_of_open_elements.pop();
3280 // Let the old insertion point have the same value as the current insertion point.
3281 m_tokenizer.store_insertion_point();
3282 // Let the insertion point be just before the next input character.
3283 m_tokenizer.update_insertion_point();
3284 // Increment the parser's script nesting level by one.
3285 increment_script_nesting_level();
3286 // Set the parser pause flag to true.
3287 m_parser_pause_flag = true;
3288 // FIXME: Implement SVG script parsing.
3289 TODO();
3290 // Decrement the parser's script nesting level by one.
3291 decrement_script_nesting_level();
3292 // If the parser's script nesting level is zero, then set the parser pause flag to false.
3293 if (script_nesting_level() == 0)
3294 m_parser_pause_flag = false;
3295
3296 // Let the insertion point have the value of the old insertion point.
3297 m_tokenizer.restore_insertion_point();
3298 }
3299
3300 if (token.is_end_tag()) {
3301 JS::GCPtr<DOM::Element> node = current_node();
3302 // FIXME: Not sure if this is the correct to_lowercase, as the specification says "to ASCII lowercase"
3303 if (node->tag_name().to_lowercase() != token.tag_name())
3304 log_parse_error();
3305 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
3306 if (node.ptr() == &m_stack_of_open_elements.first()) {
3307 VERIFY(m_parsing_fragment);
3308 return;
3309 }
3310 // FIXME: See the above FIXME
3311 if (node->tag_name().to_lowercase() == token.tag_name()) {
3312 while (¤t_node() != node.ptr())
3313 (void)m_stack_of_open_elements.pop();
3314 (void)m_stack_of_open_elements.pop();
3315 return;
3316 }
3317
3318 node = m_stack_of_open_elements.elements().at(i - 1).ptr();
3319
3320 if (node->namespace_() != Namespace::HTML)
3321 continue;
3322
3323 process_using_the_rules_for(m_insertion_mode, token);
3324 return;
3325 }
3326 }
3327
3328 VERIFY_NOT_REACHED();
3329}
3330
3331// https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately
3332void HTMLParser::reset_the_insertion_mode_appropriately()
3333{
3334 for (ssize_t i = m_stack_of_open_elements.elements().size() - 1; i >= 0; --i) {
3335 bool last = i == 0;
3336 // NOTE: When parsing fragments, we substitute the context element for the root of the stack of open elements.
3337 JS::GCPtr<DOM::Element> node;
3338 if (last && m_parsing_fragment) {
3339 node = m_context_element.ptr();
3340 } else {
3341 node = m_stack_of_open_elements.elements().at(i).ptr();
3342 }
3343
3344 if (node->local_name() == HTML::TagNames::select) {
3345 if (!last) {
3346 for (ssize_t j = i; j > 0; --j) {
3347 auto& ancestor = m_stack_of_open_elements.elements().at(j - 1);
3348
3349 if (is<HTMLTemplateElement>(*ancestor))
3350 break;
3351
3352 if (is<HTMLTableElement>(*ancestor)) {
3353 m_insertion_mode = InsertionMode::InSelectInTable;
3354 return;
3355 }
3356 }
3357 }
3358
3359 m_insertion_mode = InsertionMode::InSelect;
3360 return;
3361 }
3362
3363 if (!last && node->local_name().is_one_of(HTML::TagNames::td, HTML::TagNames::th)) {
3364 m_insertion_mode = InsertionMode::InCell;
3365 return;
3366 }
3367
3368 if (node->local_name() == HTML::TagNames::tr) {
3369 m_insertion_mode = InsertionMode::InRow;
3370 return;
3371 }
3372
3373 if (node->local_name().is_one_of(HTML::TagNames::tbody, HTML::TagNames::thead, HTML::TagNames::tfoot)) {
3374 m_insertion_mode = InsertionMode::InTableBody;
3375 return;
3376 }
3377
3378 if (node->local_name() == HTML::TagNames::caption) {
3379 m_insertion_mode = InsertionMode::InCaption;
3380 return;
3381 }
3382
3383 if (node->local_name() == HTML::TagNames::colgroup) {
3384 m_insertion_mode = InsertionMode::InColumnGroup;
3385 return;
3386 }
3387
3388 if (node->local_name() == HTML::TagNames::table) {
3389 m_insertion_mode = InsertionMode::InTable;
3390 return;
3391 }
3392
3393 if (node->local_name() == HTML::TagNames::template_) {
3394 m_insertion_mode = m_stack_of_template_insertion_modes.last();
3395 return;
3396 }
3397
3398 if (!last && node->local_name() == HTML::TagNames::head) {
3399 m_insertion_mode = InsertionMode::InHead;
3400 return;
3401 }
3402
3403 if (node->local_name() == HTML::TagNames::body) {
3404 m_insertion_mode = InsertionMode::InBody;
3405 return;
3406 }
3407
3408 if (node->local_name() == HTML::TagNames::frameset) {
3409 VERIFY(m_parsing_fragment);
3410 m_insertion_mode = InsertionMode::InFrameset;
3411 return;
3412 }
3413
3414 if (node->local_name() == HTML::TagNames::html) {
3415 if (!m_head_element) {
3416 VERIFY(m_parsing_fragment);
3417 m_insertion_mode = InsertionMode::BeforeHead;
3418 return;
3419 }
3420
3421 m_insertion_mode = InsertionMode::AfterHead;
3422 return;
3423 }
3424 }
3425
3426 VERIFY(m_parsing_fragment);
3427 m_insertion_mode = InsertionMode::InBody;
3428}
3429
3430char const* HTMLParser::insertion_mode_name() const
3431{
3432 switch (m_insertion_mode) {
3433#define __ENUMERATE_INSERTION_MODE(mode) \
3434 case InsertionMode::mode: \
3435 return #mode;
3436 ENUMERATE_INSERTION_MODES
3437#undef __ENUMERATE_INSERTION_MODE
3438 }
3439 VERIFY_NOT_REACHED();
3440}
3441
3442DOM::Document& HTMLParser::document()
3443{
3444 return *m_document;
3445}
3446
3447// https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
3448Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup)
3449{
3450 // 1. Create a new Document node, and mark it as being an HTML document.
3451 auto temp_document = DOM::Document::create(context_element.realm()).release_value_but_fixme_should_propagate_errors();
3452 temp_document->set_document_type(DOM::Document::Type::HTML);
3453
3454 // 2. If the node document of the context element is in quirks mode, then let the Document be in quirks mode.
3455 // Otherwise, the node document of the context element is in limited-quirks mode, then let the Document be in limited-quirks mode.
3456 // Otherwise, leave the Document in no-quirks mode.
3457 temp_document->set_quirks_mode(context_element.document().mode());
3458
3459 // 3. Create a new HTML parser, and associate it with the just created Document node.
3460 auto parser = HTMLParser::create(*temp_document, markup, "utf-8");
3461 parser->m_context_element = JS::make_handle(context_element);
3462 parser->m_parsing_fragment = true;
3463
3464 // 4. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
3465 // - title
3466 // - textarea
3467 if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
3468 // Switch the tokenizer to the RCDATA state.
3469 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
3470 }
3471 // - style
3472 // - xmp
3473 // - iframe
3474 // - noembed
3475 // - noframes
3476 else if (context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) {
3477 // Switch the tokenizer to the RAWTEXT state.
3478 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
3479 }
3480 // - script
3481 else if (context_element.local_name().is_one_of(HTML::TagNames::script)) {
3482 // Switch the tokenizer to the script data state.
3483 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
3484 }
3485 // - noscript
3486 else if (context_element.local_name().is_one_of(HTML::TagNames::noscript)) {
3487 // If the scripting flag is enabled, switch the tokenizer to the RAWTEXT state. Otherwise, leave the tokenizer in the data state.
3488 if (context_element.document().is_scripting_enabled())
3489 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
3490 }
3491 // - plaintext
3492 else if (context_element.local_name().is_one_of(HTML::TagNames::plaintext)) {
3493 // Switch the tokenizer to the PLAINTEXT state.
3494 parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT);
3495 }
3496 // Any other element
3497 else {
3498 // Leave the tokenizer in the data state.
3499 }
3500
3501 // 5. Let root be a new html element with no attributes.
3502 auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors();
3503
3504 // 6. Append the element root to the Document node created above.
3505 MUST(temp_document->append_child(root));
3506
3507 // 7. Set up the parser's stack of open elements so that it contains just the single element root.
3508 parser->m_stack_of_open_elements.push(root);
3509
3510 // 8. If the context element is a template element,
3511 if (context_element.local_name() == HTML::TagNames::template_) {
3512 // push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode.
3513 parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
3514 }
3515
3516 // FIXME: 9. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
3517 // Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point.
3518
3519 // 10. Reset the parser's insertion mode appropriately.
3520 parser->reset_the_insertion_mode_appropriately();
3521
3522 // 11. Set the parser's form element pointer to the nearest node to the context element that is a form element
3523 // (going straight up the ancestor chain, and including the element itself, if it is a form element), if any.
3524 // (If there is no such form element, the form element pointer keeps its initial value, null.)
3525 parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>();
3526
3527 // 12. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
3528 // 13. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
3529 parser->run(context_element.document().url());
3530
3531 // 14. Return the child nodes of root, in tree order.
3532 Vector<JS::Handle<DOM::Node>> children;
3533 while (JS::GCPtr<DOM::Node> child = root->first_child()) {
3534 MUST(root->remove_child(*child));
3535 context_element.document().adopt_node(*child);
3536 children.append(JS::make_handle(*child));
3537 }
3538 return children;
3539}
3540
3541JS::NonnullGCPtr<HTMLParser> HTMLParser::create_for_scripting(DOM::Document& document)
3542{
3543 return document.heap().allocate_without_realm<HTMLParser>(document);
3544}
3545
3546JS::NonnullGCPtr<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Document& document, ByteBuffer const& input)
3547{
3548 if (document.has_encoding())
3549 return document.heap().allocate_without_realm<HTMLParser>(document, input, document.encoding().value());
3550 auto encoding = run_encoding_sniffing_algorithm(document, input);
3551 dbgln_if(HTML_PARSER_DEBUG, "The encoding sniffing algorithm returned encoding '{}'", encoding);
3552 return document.heap().allocate_without_realm<HTMLParser>(document, input, encoding);
3553}
3554
3555JS::NonnullGCPtr<HTMLParser> HTMLParser::create(DOM::Document& document, StringView input, DeprecatedString const& encoding)
3556{
3557 return document.heap().allocate_without_realm<HTMLParser>(document, input, encoding);
3558}
3559
3560// https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm
3561DeprecatedString HTMLParser::serialize_html_fragment(DOM::Node const& node)
3562{
3563 // The algorithm takes as input a DOM Element, Document, or DocumentFragment referred to as the node.
3564 VERIFY(node.is_element() || node.is_document() || node.is_document_fragment());
3565 JS::NonnullGCPtr<DOM::Node const> actual_node = node;
3566
3567 if (is<DOM::Element>(node)) {
3568 auto& element = verify_cast<DOM::Element>(node);
3569
3570 // 1. If the node serializes as void, then return the empty string.
3571 // (NOTE: serializes as void is defined only on elements in the spec)
3572 if (element.serializes_as_void())
3573 return DeprecatedString::empty();
3574
3575 // 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node).
3576 // (NOTE: This is out of order of the spec to avoid another dynamic cast. The second step just creates a string builder, so it shouldn't matter)
3577 if (is<HTML::HTMLTemplateElement>(element))
3578 actual_node = verify_cast<HTML::HTMLTemplateElement>(element).content();
3579 }
3580
3581 enum class AttributeMode {
3582 No,
3583 Yes,
3584 };
3585
3586 auto escape_string = [](StringView string, AttributeMode attribute_mode) -> DeprecatedString {
3587 // https://html.spec.whatwg.org/multipage/parsing.html#escapingString
3588 StringBuilder builder;
3589 for (auto code_point : Utf8View { string }) {
3590 // 1. Replace any occurrence of the "&" character by the string "&".
3591 if (code_point == '&')
3592 builder.append("&"sv);
3593 // 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the string " ".
3594 else if (code_point == 0xA0)
3595 builder.append(" "sv);
3596 // 3. If the algorithm was invoked in the attribute mode, replace any occurrences of the """ character by the string """.
3597 else if (code_point == '"' && attribute_mode == AttributeMode::Yes)
3598 builder.append("""sv);
3599 // 4. If the algorithm was not invoked in the attribute mode, replace any occurrences of the "<" character by the string "<", and any occurrences of the ">" character by the string ">".
3600 else if (code_point == '<' && attribute_mode == AttributeMode::No)
3601 builder.append("<"sv);
3602 else if (code_point == '>' && attribute_mode == AttributeMode::No)
3603 builder.append(">"sv);
3604 else
3605 builder.append_code_point(code_point);
3606 }
3607 return builder.to_deprecated_string();
3608 };
3609
3610 // 2. Let s be a string, and initialize it to the empty string.
3611 StringBuilder builder;
3612
3613 // 4. For each child node of the node, in tree order, run the following steps:
3614 actual_node->for_each_child([&](DOM::Node& current_node) {
3615 // 1. Let current node be the child node being processed.
3616
3617 // 2. Append the appropriate string from the following list to s:
3618
3619 if (is<DOM::Element>(current_node)) {
3620 // -> If current node is an Element
3621 auto& element = verify_cast<DOM::Element>(current_node);
3622
3623 // 1. If current node is an element in the HTML namespace, the MathML namespace, or the SVG namespace, then let tagname be current node's local name.
3624 // Otherwise, let tagname be current node's qualified name.
3625 DeprecatedString tag_name;
3626
3627 if (element.namespace_().is_one_of(Namespace::HTML, Namespace::MathML, Namespace::SVG))
3628 tag_name = element.local_name();
3629 else
3630 tag_name = element.qualified_name();
3631
3632 // 2. Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
3633 builder.append('<');
3634 builder.append(tag_name);
3635
3636 // FIXME: 3. If current node's is value is not null, and the element does not have an is attribute in its attribute list,
3637 // then append the string " is="", followed by current node's is value escaped as described below in attribute mode,
3638 // followed by a U+0022 QUOTATION MARK character (").
3639
3640 // 4. For each attribute that the element has, append a U+0020 SPACE character, the attribute's serialized name as described below, a U+003D EQUALS SIGN character (=),
3641 // a U+0022 QUOTATION MARK character ("), the attribute's value, escaped as described below in attribute mode, and a second U+0022 QUOTATION MARK character (").
3642 // NOTE: The order of attributes is implementation-defined. The only constraint is that the order must be stable.
3643 element.for_each_attribute([&](auto& name, auto& value) {
3644 builder.append(' ');
3645
3646 // An attribute's serialized name for the purposes of the previous paragraph must be determined as follows:
3647
3648 // FIXME: -> If the attribute has no namespace:
3649 // The attribute's serialized name is the attribute's local name.
3650 // (We currently always do this)
3651 builder.append(name);
3652
3653 // FIXME: -> If the attribute is in the XML namespace:
3654 // The attribute's serialized name is the string "xml:" followed by the attribute's local name.
3655
3656 // FIXME: -> If the attribute is in the XMLNS namespace and the attribute's local name is xmlns:
3657 // The attribute's serialized name is the string "xmlns".
3658
3659 // FIXME: -> If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns:
3660 // The attribute's serialized name is the string "xmlns:" followed by the attribute's local name.
3661
3662 // FIXME: -> If the attribute is in the XLink namespace:
3663 // The attribute's serialized name is the string "xlink:" followed by the attribute's local name.
3664
3665 // FIXME: -> If the attribute is in some other namespace:
3666 // The attribute's serialized name is the attribute's qualified name.
3667
3668 builder.append("=\""sv);
3669 builder.append(escape_string(value, AttributeMode::Yes));
3670 builder.append('"');
3671 });
3672
3673 // 5. Append a U+003E GREATER-THAN SIGN character (>).
3674 builder.append('>');
3675
3676 // 6. If current node serializes as void, then continue on to the next child node at this point.
3677 if (element.serializes_as_void())
3678 return IterationDecision::Continue;
3679
3680 // 7. Append the value of running the HTML fragment serialization algorithm on the current node element (thus recursing into this algorithm for that element),
3681 // followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/), tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
3682 builder.append(serialize_html_fragment(element));
3683 builder.append("</"sv);
3684 builder.append(tag_name);
3685 builder.append('>');
3686
3687 return IterationDecision::Continue;
3688 }
3689
3690 if (is<DOM::Text>(current_node)) {
3691 // -> If current node is a Text node
3692 auto& text_node = verify_cast<DOM::Text>(current_node);
3693 auto* parent = current_node.parent();
3694
3695 if (is<DOM::Element>(parent)) {
3696 auto& parent_element = verify_cast<DOM::Element>(*parent);
3697
3698 // 1. If the parent of current node is a style, script, xmp, iframe, noembed, noframes, or plaintext element,
3699 // or if the parent of current node is a noscript element and scripting is enabled for the node, then append the value of current node's data IDL attribute literally.
3700 if (parent_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::plaintext)
3701 || (parent_element.local_name() == HTML::TagNames::noscript && !parent_element.is_scripting_disabled())) {
3702 builder.append(text_node.data());
3703 return IterationDecision::Continue;
3704 }
3705 }
3706
3707 // 2. Otherwise, append the value of current node's data IDL attribute, escaped as described below.
3708 builder.append(escape_string(text_node.data(), AttributeMode::No));
3709 return IterationDecision::Continue;
3710 }
3711
3712 if (is<DOM::Comment>(current_node)) {
3713 // -> If current node is a Comment
3714 auto& comment_node = verify_cast<DOM::Comment>(current_node);
3715
3716 // 1. Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS),
3717 // followed by the value of current node's data IDL attribute, followed by the literal string "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
3718 builder.append("<!--"sv);
3719 builder.append(comment_node.data());
3720 builder.append("-->"sv);
3721 return IterationDecision::Continue;
3722 }
3723
3724 if (is<DOM::ProcessingInstruction>(current_node)) {
3725 // -> If current node is a ProcessingInstruction
3726 auto& processing_instruction_node = verify_cast<DOM::ProcessingInstruction>(current_node);
3727
3728 // 1. Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK), followed by the value of current node's target IDL attribute,
3729 // followed by a single U+0020 SPACE character, followed by the value of current node's data IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
3730 builder.append("<?"sv);
3731 builder.append(processing_instruction_node.target());
3732 builder.append(' ');
3733 builder.append(processing_instruction_node.data());
3734 builder.append('>');
3735 return IterationDecision::Continue;
3736 }
3737
3738 if (is<DOM::DocumentType>(current_node)) {
3739 // -> If current node is a DocumentType
3740 auto& document_type_node = verify_cast<DOM::DocumentType>(current_node);
3741
3742 // 1. Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER O,
3743 // U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059 LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL LETTER E),
3744 // followed by a space (U+0020 SPACE), followed by the value of current node's name IDL attribute, followed by the literal string ">" (U+003E GREATER-THAN SIGN).
3745 builder.append("<!DOCTYPE "sv);
3746 builder.append(document_type_node.name());
3747 builder.append('>');
3748 return IterationDecision::Continue;
3749 }
3750
3751 return IterationDecision::Continue;
3752 });
3753
3754 // 5. Return s.
3755 return builder.to_deprecated_string();
3756}
3757
3758// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#current-dimension-value
3759static RefPtr<CSS::StyleValue> parse_current_dimension_value(float value, Utf8View input, Utf8View::Iterator position)
3760{
3761 // 1. If position is past the end of input, then return value as a length.
3762 if (position == input.end())
3763 return CSS::LengthStyleValue::create(CSS::Length::make_px(value));
3764
3765 // 2. If the code point at position within input is U+0025 (%), then return value as a percentage.
3766 if (*position == '%')
3767 return CSS::PercentageStyleValue::create(CSS::Percentage(value));
3768
3769 // 3. Return value as a length.
3770 return CSS::LengthStyleValue::create(CSS::Length::make_px(value));
3771}
3772
3773// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-dimension-values
3774RefPtr<CSS::StyleValue> parse_dimension_value(StringView string)
3775{
3776 // 1. Let input be the string being parsed.
3777 auto input = Utf8View(string);
3778 if (!input.validate())
3779 return nullptr;
3780
3781 // 2. Let position be a position variable for input, initially pointing at the start of input.
3782 auto position = input.begin();
3783
3784 // 3. Skip ASCII whitespace within input given position.
3785 while (position != input.end() && Infra::is_ascii_whitespace(*position))
3786 ++position;
3787
3788 // 4. If position is past the end of input or the code point at position within input is not an ASCII digit,
3789 // then return failure.
3790 if (position == input.end() || !is_ascii_digit(*position))
3791 return nullptr;
3792
3793 // 5. Collect a sequence of code points that are ASCII digits from input given position,
3794 // and interpret the resulting sequence as a base-ten integer. Let value be that number.
3795 StringBuilder number_string;
3796 while (position != input.end() && is_ascii_digit(*position)) {
3797 number_string.append(*position);
3798 ++position;
3799 }
3800 auto integer_value = number_string.string_view().to_int();
3801
3802 // 6. If position is past the end of input, then return value as a length.
3803 if (position == input.end())
3804 return CSS::LengthStyleValue::create(CSS::Length::make_px(*integer_value));
3805
3806 float value = *integer_value;
3807
3808 // 7. If the code point at position within input is U+002E (.), then:
3809 if (*position == '.') {
3810 // 1. Advance position by 1.
3811 ++position;
3812
3813 // 2. If position is past the end of input or the code point at position within input is not an ASCII digit,
3814 // then return the current dimension value with value, input, and position.
3815 if (position == input.end() || !is_ascii_digit(*position))
3816 return parse_current_dimension_value(value, input, position);
3817
3818 // 3. Let divisor have the value 1.
3819 float divisor = 1;
3820
3821 // 4. While true:
3822 while (true) {
3823 // 1. Multiply divisor by ten.
3824 divisor *= 10;
3825
3826 // 2. Add the value of the code point at position within input,
3827 // interpreted as a base-ten digit (0..9) and divided by divisor, to value.
3828 value += (*position - '0') / divisor;
3829
3830 // 3. Advance position by 1.
3831 ++position;
3832
3833 // 4. If position is past the end of input, then return value as a length.
3834 if (position == input.end())
3835 return CSS::LengthStyleValue::create(CSS::Length::make_px(value));
3836
3837 // 5. If the code point at position within input is not an ASCII digit, then break.
3838 if (!is_ascii_digit(*position))
3839 break;
3840 }
3841 }
3842
3843 // 8. Return the current dimension value with value, input, and position.
3844 return parse_current_dimension_value(value, input, position);
3845}
3846
3847// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-zero-dimension-values
3848RefPtr<CSS::StyleValue> parse_nonzero_dimension_value(StringView string)
3849{
3850 // 1. Let input be the string being parsed.
3851 // 2. Let value be the result of parsing input using the rules for parsing dimension values.
3852 auto value = parse_dimension_value(string);
3853
3854 // 3. If value is an error, return an error.
3855 if (!value)
3856 return nullptr;
3857
3858 // 4. If value is zero, return an error.
3859 if (value->is_length() && value->as_length().length().raw_value() == 0)
3860 return nullptr;
3861 if (value->is_percentage() && value->as_percentage().percentage().value() == 0)
3862 return nullptr;
3863
3864 // 5. If value is a percentage, return value as a percentage.
3865 // 6. Return value as a length.
3866 return value;
3867}
3868
3869JS::Realm& HTMLParser::realm()
3870{
3871 return m_document->realm();
3872}
3873
3874// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
3875void HTMLParser::abort()
3876{
3877 // 1. Throw away any pending content in the input stream, and discard any future content that would have been added to it.
3878 m_tokenizer.abort();
3879
3880 // FIXME: 2. Stop the speculative HTML parser for this HTML parser.
3881
3882 // 3. Update the current document readiness to "interactive".
3883 m_document->update_readiness(DocumentReadyState::Interactive);
3884
3885 // 4. Pop all the nodes off the stack of open elements.
3886 while (!m_stack_of_open_elements.is_empty())
3887 m_stack_of_open_elements.pop();
3888
3889 // 5. Update the current document readiness to "complete".
3890 m_document->update_readiness(DocumentReadyState::Complete);
3891
3892 m_aborted = true;
3893}
3894
3895}