Serenity Operating System
at master 204 lines 7.7 kB view raw
1/* 2 * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#pragma once 8 9#include <LibJS/Heap/Cell.h> 10#include <LibWeb/DOM/Node.h> 11#include <LibWeb/HTML/Parser/HTMLTokenizer.h> 12#include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h> 13#include <LibWeb/HTML/Parser/StackOfOpenElements.h> 14 15namespace Web::HTML { 16 17#define ENUMERATE_INSERTION_MODES \ 18 __ENUMERATE_INSERTION_MODE(Initial) \ 19 __ENUMERATE_INSERTION_MODE(BeforeHTML) \ 20 __ENUMERATE_INSERTION_MODE(BeforeHead) \ 21 __ENUMERATE_INSERTION_MODE(InHead) \ 22 __ENUMERATE_INSERTION_MODE(InHeadNoscript) \ 23 __ENUMERATE_INSERTION_MODE(AfterHead) \ 24 __ENUMERATE_INSERTION_MODE(InBody) \ 25 __ENUMERATE_INSERTION_MODE(Text) \ 26 __ENUMERATE_INSERTION_MODE(InTable) \ 27 __ENUMERATE_INSERTION_MODE(InTableText) \ 28 __ENUMERATE_INSERTION_MODE(InCaption) \ 29 __ENUMERATE_INSERTION_MODE(InColumnGroup) \ 30 __ENUMERATE_INSERTION_MODE(InTableBody) \ 31 __ENUMERATE_INSERTION_MODE(InRow) \ 32 __ENUMERATE_INSERTION_MODE(InCell) \ 33 __ENUMERATE_INSERTION_MODE(InSelect) \ 34 __ENUMERATE_INSERTION_MODE(InSelectInTable) \ 35 __ENUMERATE_INSERTION_MODE(InTemplate) \ 36 __ENUMERATE_INSERTION_MODE(AfterBody) \ 37 __ENUMERATE_INSERTION_MODE(InFrameset) \ 38 __ENUMERATE_INSERTION_MODE(AfterFrameset) \ 39 __ENUMERATE_INSERTION_MODE(AfterAfterBody) \ 40 __ENUMERATE_INSERTION_MODE(AfterAfterFrameset) 41 42class HTMLParser final : public JS::Cell { 43 JS_CELL(HTMLParser, JS::Cell); 44 45 friend class HTMLTokenizer; 46 47public: 48 ~HTMLParser(); 49 50 static JS::NonnullGCPtr<HTMLParser> create_for_scripting(DOM::Document&); 51 static JS::NonnullGCPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input); 52 static JS::NonnullGCPtr<HTMLParser> create(DOM::Document&, StringView input, DeprecatedString const& encoding); 53 54 void run(); 55 void run(const AK::URL&); 56 57 DOM::Document& document(); 58 59 static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView); 60 static DeprecatedString serialize_html_fragment(DOM::Node const& node); 61 62 enum class InsertionMode { 63#define __ENUMERATE_INSERTION_MODE(mode) mode, 64 ENUMERATE_INSERTION_MODES 65#undef __ENUMERATE_INSERTION_MODE 66 }; 67 68 InsertionMode insertion_mode() const { return m_insertion_mode; } 69 70 static bool is_special_tag(DeprecatedFlyString const& tag_name, DeprecatedFlyString const& namespace_); 71 72 HTMLTokenizer& tokenizer() { return m_tokenizer; } 73 74 // https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser 75 void abort(); 76 77 bool aborted() const { return m_aborted; } 78 bool stopped() const { return m_stop_parsing; } 79 80 size_t script_nesting_level() const { return m_script_nesting_level; } 81 82private: 83 HTMLParser(DOM::Document&, StringView input, DeprecatedString const& encoding); 84 HTMLParser(DOM::Document&); 85 86 virtual void visit_edges(Cell::Visitor&) override; 87 88 char const* insertion_mode_name() const; 89 90 DOM::QuirksMode which_quirks_mode(HTMLToken const&) const; 91 92 void handle_initial(HTMLToken&); 93 void handle_before_html(HTMLToken&); 94 void handle_before_head(HTMLToken&); 95 void handle_in_head(HTMLToken&); 96 void handle_in_head_noscript(HTMLToken&); 97 void handle_after_head(HTMLToken&); 98 void handle_in_body(HTMLToken&); 99 void handle_after_body(HTMLToken&); 100 void handle_after_after_body(HTMLToken&); 101 void handle_text(HTMLToken&); 102 void handle_in_table(HTMLToken&); 103 void handle_in_table_body(HTMLToken&); 104 void handle_in_row(HTMLToken&); 105 void handle_in_cell(HTMLToken&); 106 void handle_in_table_text(HTMLToken&); 107 void handle_in_select_in_table(HTMLToken&); 108 void handle_in_select(HTMLToken&); 109 void handle_in_caption(HTMLToken&); 110 void handle_in_column_group(HTMLToken&); 111 void handle_in_template(HTMLToken&); 112 void handle_in_frameset(HTMLToken&); 113 void handle_after_frameset(HTMLToken&); 114 void handle_after_after_frameset(HTMLToken&); 115 116 void the_end(); 117 118 void stop_parsing() { m_stop_parsing = true; } 119 120 void generate_implied_end_tags(DeprecatedFlyString const& exception = {}); 121 void generate_all_implied_end_tags_thoroughly(); 122 JS::NonnullGCPtr<DOM::Element> create_element_for(HTMLToken const&, DeprecatedFlyString const& namespace_, DOM::Node& intended_parent); 123 124 struct AdjustedInsertionLocation { 125 JS::GCPtr<DOM::Node> parent; 126 JS::GCPtr<DOM::Node> insert_before_sibling; 127 }; 128 129 AdjustedInsertionLocation find_appropriate_place_for_inserting_node(JS::GCPtr<DOM::Element> override_target = nullptr); 130 131 DOM::Text* find_character_insertion_node(); 132 void flush_character_insertions(); 133 JS::NonnullGCPtr<DOM::Element> insert_foreign_element(HTMLToken const&, DeprecatedFlyString const&); 134 JS::NonnullGCPtr<DOM::Element> insert_html_element(HTMLToken const&); 135 DOM::Element& current_node(); 136 DOM::Element& adjusted_current_node(); 137 DOM::Element& node_before_current_node(); 138 void insert_character(u32 data); 139 void insert_comment(HTMLToken&); 140 void reconstruct_the_active_formatting_elements(); 141 void close_a_p_element(); 142 void process_using_the_rules_for(InsertionMode, HTMLToken&); 143 void process_using_the_rules_for_foreign_content(HTMLToken&); 144 void parse_generic_raw_text_element(HTMLToken&); 145 void increment_script_nesting_level(); 146 void decrement_script_nesting_level(); 147 void reset_the_insertion_mode_appropriately(); 148 149 void adjust_mathml_attributes(HTMLToken&); 150 void adjust_svg_tag_names(HTMLToken&); 151 void adjust_svg_attributes(HTMLToken&); 152 void adjust_foreign_attributes(HTMLToken&); 153 154 enum AdoptionAgencyAlgorithmOutcome { 155 DoNothing, 156 RunAnyOtherEndTagSteps, 157 }; 158 159 AdoptionAgencyAlgorithmOutcome run_the_adoption_agency_algorithm(HTMLToken&); 160 void clear_the_stack_back_to_a_table_context(); 161 void clear_the_stack_back_to_a_table_body_context(); 162 void clear_the_stack_back_to_a_table_row_context(); 163 void close_the_cell(); 164 165 InsertionMode m_insertion_mode { InsertionMode::Initial }; 166 InsertionMode m_original_insertion_mode { InsertionMode::Initial }; 167 168 StackOfOpenElements m_stack_of_open_elements; 169 Vector<InsertionMode> m_stack_of_template_insertion_modes; 170 ListOfActiveFormattingElements m_list_of_active_formatting_elements; 171 172 HTMLTokenizer m_tokenizer; 173 174 bool m_foster_parenting { false }; 175 bool m_frameset_ok { true }; 176 bool m_parsing_fragment { false }; 177 178 // https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag 179 // The scripting flag is set to "enabled" if scripting was enabled for the Document with which the parser is associated when the parser was created, and "disabled" otherwise. 180 bool m_scripting_enabled { true }; 181 182 bool m_invoked_via_document_write { false }; 183 bool m_aborted { false }; 184 bool m_parser_pause_flag { false }; 185 bool m_stop_parsing { false }; 186 size_t m_script_nesting_level { 0 }; 187 188 JS::Realm& realm(); 189 190 JS::GCPtr<DOM::Document> m_document; 191 JS::GCPtr<HTMLHeadElement> m_head_element; 192 JS::GCPtr<HTMLFormElement> m_form_element; 193 JS::GCPtr<DOM::Element> m_context_element; 194 195 Vector<HTMLToken> m_pending_table_character_tokens; 196 197 JS::GCPtr<DOM::Text> m_character_insertion_node; 198 StringBuilder m_character_insertion_builder; 199}; 200 201RefPtr<CSS::StyleValue> parse_dimension_value(StringView); 202RefPtr<CSS::StyleValue> parse_nonzero_dimension_value(StringView); 203 204}