Serenity Operating System
at master 215 lines 9.0 kB view raw
1/* 2 * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org> 3 * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch> 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#include <AK/Debug.h> 9#include <LibJS/SyntaxHighlighter.h> 10#include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h> 11#include <LibWeb/HTML/Parser/HTMLTokenizer.h> 12#include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h> 13 14namespace Web::HTML { 15 16enum class AugmentedTokenKind : u32 { 17 AttributeName, 18 AttributeValue, 19 OpenTag, 20 CloseTag, 21 Comment, 22 Doctype, 23 __Count, 24}; 25 26bool SyntaxHighlighter::is_identifier(u64 token) const 27{ 28 if (!token) 29 return false; 30 return false; 31} 32 33bool SyntaxHighlighter::is_navigatable(u64) const 34{ 35 return false; 36} 37 38void SyntaxHighlighter::rehighlight(Palette const& palette) 39{ 40 dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) starting rehighlight"); 41 auto text = m_client->get_text(); 42 clear_nested_token_pairs(); 43 44 // FIXME: Add folding regions for start and end tags. 45 Vector<GUI::TextDocumentFoldingRegion> folding_regions; 46 Vector<GUI::TextDocumentSpan> spans; 47 auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) { 48 if (start_line > end_line || (start_line == end_line && start_column >= end_column)) { 49 dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) discarding ({}-{}) to ({}-{}) because it has zero or negative length", start_line, start_column, end_line, end_column); 50 return; 51 } 52 dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) highlighting ({}-{}) to ({}-{}) with color {}", start_line, start_column, end_line, end_column, attributes.color); 53 spans.empend( 54 GUI::TextRange { 55 { start_line, start_column }, 56 { end_line, end_column }, 57 }, 58 move(attributes), 59 static_cast<u64>(kind), 60 false); 61 }; 62 63 HTMLTokenizer tokenizer { text, "utf-8" }; 64 [[maybe_unused]] enum class State { 65 HTML, 66 Javascript, 67 CSS, 68 } state { State::HTML }; 69 StringBuilder substring_builder; 70 GUI::TextPosition substring_start_position; 71 72 for (;;) { 73 auto token = tokenizer.next_token(); 74 if (!token.has_value() || token.value().is_end_of_file()) 75 break; 76 dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) got token of type {}", token->to_deprecated_string()); 77 78 if (token->is_start_tag()) { 79 if (token->tag_name() == "script"sv) { 80 tokenizer.switch_to(HTMLTokenizer::State::ScriptData); 81 state = State::Javascript; 82 substring_start_position = { token->end_position().line, token->end_position().column }; 83 } else if (token->tag_name() == "style"sv) { 84 tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT); 85 state = State::CSS; 86 substring_start_position = { token->end_position().line, token->end_position().column }; 87 } 88 } else if (token->is_end_tag()) { 89 if (token->tag_name().is_one_of("script"sv, "style"sv)) { 90 if (state == State::Javascript) { 91 Syntax::ProxyHighlighterClient proxy_client { 92 *m_client, 93 substring_start_position, 94 static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(), 95 substring_builder.string_view() 96 }; 97 { 98 JS::SyntaxHighlighter highlighter; 99 highlighter.attach(proxy_client); 100 highlighter.rehighlight(palette); 101 highlighter.detach(); 102 register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs())); 103 } 104 105 spans.extend(proxy_client.corrected_spans()); 106 folding_regions.extend(proxy_client.corrected_folding_regions()); 107 substring_builder.clear(); 108 } else if (state == State::CSS) { 109 Syntax::ProxyHighlighterClient proxy_client { 110 *m_client, 111 substring_start_position, 112 static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(), 113 substring_builder.string_view() 114 }; 115 { 116 CSS::SyntaxHighlighter highlighter; 117 highlighter.attach(proxy_client); 118 highlighter.rehighlight(palette); 119 highlighter.detach(); 120 register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs())); 121 } 122 123 spans.extend(proxy_client.corrected_spans()); 124 folding_regions.extend(proxy_client.corrected_folding_regions()); 125 substring_builder.clear(); 126 } 127 state = State::HTML; 128 } 129 } else if (state != State::HTML) { 130 VERIFY(token->is_character()); 131 substring_builder.append_code_point(token->code_point()); 132 continue; 133 } 134 135 size_t token_start_offset = token->is_end_tag() ? 1 : 0; 136 137 if (token->is_comment()) { 138 highlight( 139 token->start_position().line, 140 token->start_position().column, 141 token->end_position().line, 142 token->end_position().column, 143 { palette.syntax_comment(), {} }, 144 AugmentedTokenKind::Comment); 145 146 GUI::TextDocumentFoldingRegion region; 147 region.range.set_start({ token->start_position().line, token->start_position().column + comment_prefix()->length() }); 148 region.range.set_end({ token->end_position().line, token->end_position().column - comment_suffix()->length() }); 149 folding_regions.append(move(region)); 150 } else if (token->is_start_tag() || token->is_end_tag()) { 151 highlight( 152 token->start_position().line, 153 token->start_position().column + token_start_offset, 154 token->start_position().line, 155 token->start_position().column + token_start_offset + token->tag_name().length(), 156 { palette.syntax_keyword(), {}, false, true }, 157 token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag); 158 159 token->for_each_attribute([&](auto& attribute) { 160 highlight( 161 attribute.name_start_position.line, 162 attribute.name_start_position.column + token_start_offset, 163 attribute.name_end_position.line, 164 attribute.name_end_position.column + token_start_offset, 165 { palette.syntax_identifier(), {} }, 166 AugmentedTokenKind::AttributeName); 167 highlight( 168 attribute.value_start_position.line, 169 attribute.value_start_position.column + token_start_offset, 170 attribute.value_end_position.line, 171 attribute.value_end_position.column + token_start_offset, 172 { palette.syntax_string(), {} }, 173 AugmentedTokenKind::AttributeValue); 174 return IterationDecision::Continue; 175 }); 176 } else if (token->is_doctype()) { 177 highlight( 178 token->start_position().line, 179 token->start_position().column, 180 token->start_position().line, 181 token->start_position().column, 182 { palette.syntax_preprocessor_statement(), {} }, 183 AugmentedTokenKind::Doctype); 184 } 185 } 186 187 if constexpr (SYNTAX_HIGHLIGHTING_DEBUG) { 188 dbgln("(HTML::SyntaxHighlighter) list of all spans:"); 189 for (auto& span : spans) 190 dbgln("{}, {} - {}", span.range, span.attributes.color, span.data); 191 dbgln("(HTML::SyntaxHighlighter) end of list"); 192 } 193 194 m_client->do_set_spans(move(spans)); 195 m_client->do_set_folding_regions(move(folding_regions)); 196 m_has_brace_buddies = false; 197 highlight_matching_token_pair(); 198 m_client->do_update(); 199} 200 201Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs_impl() const 202{ 203 static Vector<MatchingTokenPair> pairs; 204 if (pairs.is_empty()) { 205 pairs.append({ static_cast<u64>(AugmentedTokenKind::OpenTag), static_cast<u64>(AugmentedTokenKind::CloseTag) }); 206 } 207 return pairs; 208} 209 210bool SyntaxHighlighter::token_types_equal(u64 token0, u64 token1) const 211{ 212 return token0 == token1; 213} 214 215}