Serenity Operating System
1/*
2 * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
3 * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <AK/Debug.h>
9#include <LibJS/SyntaxHighlighter.h>
10#include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h>
11#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
12#include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
13
14namespace Web::HTML {
15
16enum class AugmentedTokenKind : u32 {
17 AttributeName,
18 AttributeValue,
19 OpenTag,
20 CloseTag,
21 Comment,
22 Doctype,
23 __Count,
24};
25
26bool SyntaxHighlighter::is_identifier(u64 token) const
27{
28 if (!token)
29 return false;
30 return false;
31}
32
33bool SyntaxHighlighter::is_navigatable(u64) const
34{
35 return false;
36}
37
38void SyntaxHighlighter::rehighlight(Palette const& palette)
39{
40 dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) starting rehighlight");
41 auto text = m_client->get_text();
42 clear_nested_token_pairs();
43
44 // FIXME: Add folding regions for start and end tags.
45 Vector<GUI::TextDocumentFoldingRegion> folding_regions;
46 Vector<GUI::TextDocumentSpan> spans;
47 auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
48 if (start_line > end_line || (start_line == end_line && start_column >= end_column)) {
49 dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) discarding ({}-{}) to ({}-{}) because it has zero or negative length", start_line, start_column, end_line, end_column);
50 return;
51 }
52 dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) highlighting ({}-{}) to ({}-{}) with color {}", start_line, start_column, end_line, end_column, attributes.color);
53 spans.empend(
54 GUI::TextRange {
55 { start_line, start_column },
56 { end_line, end_column },
57 },
58 move(attributes),
59 static_cast<u64>(kind),
60 false);
61 };
62
63 HTMLTokenizer tokenizer { text, "utf-8" };
64 [[maybe_unused]] enum class State {
65 HTML,
66 Javascript,
67 CSS,
68 } state { State::HTML };
69 StringBuilder substring_builder;
70 GUI::TextPosition substring_start_position;
71
72 for (;;) {
73 auto token = tokenizer.next_token();
74 if (!token.has_value() || token.value().is_end_of_file())
75 break;
76 dbgln_if(SYNTAX_HIGHLIGHTING_DEBUG, "(HTML::SyntaxHighlighter) got token of type {}", token->to_deprecated_string());
77
78 if (token->is_start_tag()) {
79 if (token->tag_name() == "script"sv) {
80 tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
81 state = State::Javascript;
82 substring_start_position = { token->end_position().line, token->end_position().column };
83 } else if (token->tag_name() == "style"sv) {
84 tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
85 state = State::CSS;
86 substring_start_position = { token->end_position().line, token->end_position().column };
87 }
88 } else if (token->is_end_tag()) {
89 if (token->tag_name().is_one_of("script"sv, "style"sv)) {
90 if (state == State::Javascript) {
91 Syntax::ProxyHighlighterClient proxy_client {
92 *m_client,
93 substring_start_position,
94 static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
95 substring_builder.string_view()
96 };
97 {
98 JS::SyntaxHighlighter highlighter;
99 highlighter.attach(proxy_client);
100 highlighter.rehighlight(palette);
101 highlighter.detach();
102 register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
103 }
104
105 spans.extend(proxy_client.corrected_spans());
106 folding_regions.extend(proxy_client.corrected_folding_regions());
107 substring_builder.clear();
108 } else if (state == State::CSS) {
109 Syntax::ProxyHighlighterClient proxy_client {
110 *m_client,
111 substring_start_position,
112 static_cast<u64>(AugmentedTokenKind::__Count) + first_free_token_kind_serial_value(),
113 substring_builder.string_view()
114 };
115 {
116 CSS::SyntaxHighlighter highlighter;
117 highlighter.attach(proxy_client);
118 highlighter.rehighlight(palette);
119 highlighter.detach();
120 register_nested_token_pairs(proxy_client.corrected_token_pairs(highlighter.matching_token_pairs()));
121 }
122
123 spans.extend(proxy_client.corrected_spans());
124 folding_regions.extend(proxy_client.corrected_folding_regions());
125 substring_builder.clear();
126 }
127 state = State::HTML;
128 }
129 } else if (state != State::HTML) {
130 VERIFY(token->is_character());
131 substring_builder.append_code_point(token->code_point());
132 continue;
133 }
134
135 size_t token_start_offset = token->is_end_tag() ? 1 : 0;
136
137 if (token->is_comment()) {
138 highlight(
139 token->start_position().line,
140 token->start_position().column,
141 token->end_position().line,
142 token->end_position().column,
143 { palette.syntax_comment(), {} },
144 AugmentedTokenKind::Comment);
145
146 GUI::TextDocumentFoldingRegion region;
147 region.range.set_start({ token->start_position().line, token->start_position().column + comment_prefix()->length() });
148 region.range.set_end({ token->end_position().line, token->end_position().column - comment_suffix()->length() });
149 folding_regions.append(move(region));
150 } else if (token->is_start_tag() || token->is_end_tag()) {
151 highlight(
152 token->start_position().line,
153 token->start_position().column + token_start_offset,
154 token->start_position().line,
155 token->start_position().column + token_start_offset + token->tag_name().length(),
156 { palette.syntax_keyword(), {}, false, true },
157 token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
158
159 token->for_each_attribute([&](auto& attribute) {
160 highlight(
161 attribute.name_start_position.line,
162 attribute.name_start_position.column + token_start_offset,
163 attribute.name_end_position.line,
164 attribute.name_end_position.column + token_start_offset,
165 { palette.syntax_identifier(), {} },
166 AugmentedTokenKind::AttributeName);
167 highlight(
168 attribute.value_start_position.line,
169 attribute.value_start_position.column + token_start_offset,
170 attribute.value_end_position.line,
171 attribute.value_end_position.column + token_start_offset,
172 { palette.syntax_string(), {} },
173 AugmentedTokenKind::AttributeValue);
174 return IterationDecision::Continue;
175 });
176 } else if (token->is_doctype()) {
177 highlight(
178 token->start_position().line,
179 token->start_position().column,
180 token->start_position().line,
181 token->start_position().column,
182 { palette.syntax_preprocessor_statement(), {} },
183 AugmentedTokenKind::Doctype);
184 }
185 }
186
187 if constexpr (SYNTAX_HIGHLIGHTING_DEBUG) {
188 dbgln("(HTML::SyntaxHighlighter) list of all spans:");
189 for (auto& span : spans)
190 dbgln("{}, {} - {}", span.range, span.attributes.color, span.data);
191 dbgln("(HTML::SyntaxHighlighter) end of list");
192 }
193
194 m_client->do_set_spans(move(spans));
195 m_client->do_set_folding_regions(move(folding_regions));
196 m_has_brace_buddies = false;
197 highlight_matching_token_pair();
198 m_client->do_update();
199}
200
201Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs_impl() const
202{
203 static Vector<MatchingTokenPair> pairs;
204 if (pairs.is_empty()) {
205 pairs.append({ static_cast<u64>(AugmentedTokenKind::OpenTag), static_cast<u64>(AugmentedTokenKind::CloseTag) });
206 }
207 return pairs;
208}
209
210bool SyntaxHighlighter::token_types_equal(u64 token0, u64 token1) const
211{
212 return token0 == token1;
213}
214
215}