Serenity Operating System
1/*
2 * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/Debug.h>
8#include <AK/JsonArray.h>
9#include <AK/LexicalPath.h>
10#include <AK/SourceGenerator.h>
11#include <LibGemini/Document.h>
12#include <LibGfx/ImageDecoder.h>
13#include <LibMarkdown/Document.h>
14#include <LibWeb/Bindings/MainThreadVM.h>
15#include <LibWeb/DOM/Document.h>
16#include <LibWeb/DOM/ElementFactory.h>
17#include <LibWeb/DOM/Text.h>
18#include <LibWeb/HTML/BrowsingContext.h>
19#include <LibWeb/HTML/HTMLIFrameElement.h>
20#include <LibWeb/HTML/NavigationParams.h>
21#include <LibWeb/HTML/Parser/HTMLParser.h>
22#include <LibWeb/Loader/FrameLoader.h>
23#include <LibWeb/Loader/ResourceLoader.h>
24#include <LibWeb/Page/Page.h>
25#include <LibWeb/Platform/ImageCodecPlugin.h>
26#include <LibWeb/XML/XMLDocumentBuilder.h>
27
28namespace Web {
29
30static DeprecatedString s_default_favicon_path = "/res/icons/16x16/app-browser.png";
31static RefPtr<Gfx::Bitmap> s_default_favicon_bitmap;
32
33void FrameLoader::set_default_favicon_path(DeprecatedString path)
34{
35 s_default_favicon_path = move(path);
36}
37
38FrameLoader::FrameLoader(HTML::BrowsingContext& browsing_context)
39 : m_browsing_context(browsing_context)
40{
41 if (!s_default_favicon_bitmap) {
42 s_default_favicon_bitmap = Gfx::Bitmap::load_from_file(s_default_favicon_path).release_value_but_fixme_should_propagate_errors();
43 VERIFY(s_default_favicon_bitmap);
44 }
45}
46
47FrameLoader::~FrameLoader() = default;
48
49static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data)
50{
51 auto markdown_document = Markdown::Document::parse(data);
52 if (!markdown_document)
53 return false;
54
55 auto extra_head_contents = R"~~~(
56<style>
57 .zoomable {
58 cursor: zoom-in;
59 max-width: 100%;
60 }
61 .zoomable.zoomed-in {
62 cursor: zoom-out;
63 max-width: none;
64 }
65</style>
66<script>
67 function imageClickEventListener(event) {
68 let image = event.target;
69 if (image.classList.contains("zoomable")) {
70 image.classList.toggle("zoomed-in");
71 }
72 }
73 function processImages() {
74 let images = document.querySelectorAll("img");
75 let windowWidth = window.innerWidth;
76 images.forEach((image) => {
77 if (image.naturalWidth > windowWidth) {
78 image.classList.add("zoomable");
79 } else {
80 image.classList.remove("zoomable");
81 image.classList.remove("zoomed-in");
82 }
83
84 image.addEventListener("click", imageClickEventListener);
85 });
86 }
87
88 document.addEventListener("load", () => {
89 processImages();
90 });
91
92 window.addEventListener("resize", () => {
93 processImages();
94 });
95</script>
96)~~~"sv;
97
98 auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8");
99 parser->run(document.url());
100 return true;
101}
102
103static bool build_text_document(DOM::Document& document, ByteBuffer const& data)
104{
105 auto html_element = document.create_element("html").release_value();
106 MUST(document.append_child(html_element));
107
108 auto head_element = document.create_element("head").release_value();
109 MUST(html_element->append_child(head_element));
110 auto title_element = document.create_element("title").release_value();
111 MUST(head_element->append_child(title_element));
112
113 auto title_text = document.create_text_node(document.url().basename());
114 MUST(title_element->append_child(title_text));
115
116 auto body_element = document.create_element("body").release_value();
117 MUST(html_element->append_child(body_element));
118
119 auto pre_element = document.create_element("pre").release_value();
120 MUST(body_element->append_child(pre_element));
121
122 MUST(pre_element->append_child(document.create_text_node(DeprecatedString::copy(data))));
123 return true;
124}
125
126static bool build_image_document(DOM::Document& document, ByteBuffer const& data)
127{
128 auto image = Platform::ImageCodecPlugin::the().decode_image(data);
129 if (!image.has_value() || image->frames.is_empty())
130 return false;
131 auto const& frame = image->frames[0];
132 auto const& bitmap = frame.bitmap;
133 if (!bitmap)
134 return false;
135
136 auto html_element = document.create_element("html").release_value();
137 MUST(document.append_child(html_element));
138
139 auto head_element = document.create_element("head").release_value();
140 MUST(html_element->append_child(head_element));
141 auto title_element = document.create_element("title").release_value();
142 MUST(head_element->append_child(title_element));
143
144 auto basename = LexicalPath::basename(document.url().path());
145 auto title_text = document.heap().allocate<DOM::Text>(document.realm(), document, DeprecatedString::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height())).release_allocated_value_but_fixme_should_propagate_errors();
146 MUST(title_element->append_child(*title_text));
147
148 auto body_element = document.create_element("body").release_value();
149 MUST(html_element->append_child(body_element));
150
151 auto image_element = document.create_element("img").release_value();
152 MUST(image_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string()));
153 MUST(body_element->append_child(image_element));
154
155 return true;
156}
157
158static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data)
159{
160 StringView gemini_data { data };
161 auto gemini_document = Gemini::Document::parse(gemini_data, document.url());
162 DeprecatedString html_data = gemini_document->render_to_html();
163
164 dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data);
165 dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data);
166
167 auto parser = HTML::HTMLParser::create(document, html_data, "utf-8");
168 parser->run(document.url());
169 return true;
170}
171
172static bool build_xml_document(DOM::Document& document, ByteBuffer const& data)
173{
174
175 XML::Parser parser(data, { .resolve_external_resource = resolve_xml_resource });
176 XMLDocumentBuilder builder { document };
177 auto result = parser.parse_with_listener(builder);
178 return !result.is_error() && !builder.has_error();
179}
180
181bool FrameLoader::parse_document(DOM::Document& document, ByteBuffer const& data)
182{
183 auto& mime_type = document.content_type();
184 if (mime_type == "text/html" || mime_type == "image/svg+xml") {
185 auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data);
186 parser->run(document.url());
187 return true;
188 }
189 if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
190 return build_xml_document(document, data);
191 if (mime_type.starts_with("image/"sv))
192 return build_image_document(document, data);
193 if (mime_type == "text/plain" || mime_type == "application/json")
194 return build_text_document(document, data);
195 if (mime_type == "text/markdown")
196 return build_markdown_document(document, data);
197 if (mime_type == "text/gemini")
198 return build_gemini_document(document, data);
199
200 return false;
201}
202
203bool FrameLoader::load(LoadRequest& request, Type type)
204{
205 if (!request.is_valid()) {
206 load_error_page(request.url(), "Invalid request");
207 return false;
208 }
209
210 if (!m_browsing_context.is_frame_nesting_allowed(request.url())) {
211 dbgln("No further recursion is allowed for the frame, abort load!");
212 return false;
213 }
214
215 auto& url = request.url();
216
217 if (type == Type::Navigation || type == Type::Reload || type == Type::Redirect) {
218 if (auto* page = browsing_context().page()) {
219 if (&page->top_level_browsing_context() == &m_browsing_context)
220 page->client().page_did_start_loading(url, type == Type::Redirect);
221 }
222 }
223
224 // https://fetch.spec.whatwg.org/#concept-fetch
225 // Step 12: If request’s header list does not contain `Accept`, then:
226 // 1. Let value be `*/*`. (NOTE: Not necessary as we're about to override it)
227 // 2. A user agent should set value to the first matching statement, if any, switching on request’s destination:
228 // -> "document"
229 // -> "frame"
230 // -> "iframe"
231 // `text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8`
232 if (!request.headers().contains("Accept"))
233 request.set_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
234
235 set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, request));
236
237 if (type == Type::IFrame)
238 return true;
239
240 auto* document = browsing_context().active_document();
241 if (document && document->has_active_favicon())
242 return true;
243
244 if (url.scheme() == "http" || url.scheme() == "https") {
245 AK::URL favicon_url;
246 favicon_url.set_scheme(url.scheme());
247 favicon_url.set_host(url.host());
248 favicon_url.set_port(url.port_or_default());
249 favicon_url.set_paths({ "favicon.ico" });
250
251 ResourceLoader::the().load(
252 favicon_url,
253 [this, favicon_url](auto data, auto&, auto) {
254 // Always fetch the current document
255 auto* document = this->browsing_context().active_document();
256 if (document && document->has_active_favicon())
257 return;
258 dbgln_if(SPAM_DEBUG, "Favicon downloaded, {} bytes from {}", data.size(), favicon_url);
259 if (data.is_empty())
260 return;
261 RefPtr<Gfx::Bitmap> favicon_bitmap;
262 auto decoded_image = Platform::ImageCodecPlugin::the().decode_image(data);
263 if (!decoded_image.has_value() || decoded_image->frames.is_empty()) {
264 dbgln("Could not decode favicon {}", favicon_url);
265 } else {
266 favicon_bitmap = decoded_image->frames[0].bitmap;
267 dbgln_if(IMAGE_DECODER_DEBUG, "Decoded favicon, {}", favicon_bitmap->size());
268 }
269 load_favicon(favicon_bitmap);
270 },
271 [this](auto&, auto) {
272 // Always fetch the current document
273 auto* document = this->browsing_context().active_document();
274 if (document && document->has_active_favicon())
275 return;
276
277 load_favicon();
278 });
279 } else {
280 load_favicon();
281 }
282
283 return true;
284}
285
286bool FrameLoader::load(const AK::URL& url, Type type)
287{
288 dbgln_if(SPAM_DEBUG, "FrameLoader::load: {}", url);
289
290 if (!url.is_valid()) {
291 load_error_page(url, "Invalid URL");
292 return false;
293 }
294
295 auto request = LoadRequest::create_for_url_on_page(url, browsing_context().page());
296 return load(request, type);
297}
298
299void FrameLoader::load_html(StringView html, const AK::URL& url)
300{
301 auto& vm = Bindings::main_thread_vm();
302 auto response = Fetch::Infrastructure::Response::create(vm);
303 response->url_list().append(url);
304 HTML::NavigationParams navigation_params {
305 .id = {},
306 .request = nullptr,
307 .response = response,
308 .origin = HTML::Origin {},
309 .policy_container = HTML::PolicyContainer {},
310 .final_sandboxing_flag_set = HTML::SandboxingFlagSet {},
311 .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
312 .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
313 .reserved_environment = {},
314 .browsing_context = browsing_context(),
315 };
316 auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", move(navigation_params)).release_value_but_fixme_should_propagate_errors();
317 browsing_context().set_active_document(document);
318
319 auto parser = HTML::HTMLParser::create(document, html, "utf-8");
320 parser->run(url);
321}
322
323static DeprecatedString s_error_page_url = "file:///res/html/error.html";
324
325void FrameLoader::set_error_page_url(DeprecatedString error_page_url)
326{
327 s_error_page_url = error_page_url;
328}
329
330// FIXME: Use an actual templating engine (our own one when it's built, preferably
331// with a way to check these usages at compile time)
332
333void FrameLoader::load_error_page(const AK::URL& failed_url, DeprecatedString const& error)
334{
335 LoadRequest request = LoadRequest::create_for_url_on_page(s_error_page_url, browsing_context().page());
336
337 ResourceLoader::the().load(
338 request,
339 [this, failed_url, error](auto data, auto&, auto) {
340 VERIFY(!data.is_null());
341 StringBuilder builder;
342 SourceGenerator generator { builder };
343 generator.set("failed_url", escape_html_entities(failed_url.to_deprecated_string()));
344 generator.set("error", escape_html_entities(error));
345 generator.append(data);
346 load_html(generator.as_string_view(), s_error_page_url);
347 },
348 [](auto& error, auto) {
349 dbgln("Failed to load error page: {}", error);
350 VERIFY_NOT_REACHED();
351 });
352}
353
354void FrameLoader::load_favicon(RefPtr<Gfx::Bitmap> bitmap)
355{
356 if (auto* page = browsing_context().page()) {
357 if (bitmap)
358 page->client().page_did_change_favicon(*bitmap);
359 else if (s_default_favicon_bitmap)
360 page->client().page_did_change_favicon(*s_default_favicon_bitmap);
361 }
362}
363
364void FrameLoader::resource_did_load()
365{
366 // This prevents us setting up the document of a removed browsing context container (BCC, e.g. <iframe>), which will cause a crash
367 // if the document contains a script that inserts another BCC as this will use the stale browsing context it previously set up,
368 // even if it's reinserted.
369 // Example:
370 // index.html:
371 // ```
372 // <body><script>
373 // var i = document.createElement("iframe");
374 // i.src = "b.html";
375 // document.body.append(i);
376 // i.remove();
377 // </script>
378 // ```
379 // b.html:
380 // ```
381 // <body><script>
382 // var i = document.createElement("iframe");
383 // document.body.append(i);
384 // </script>
385 // ```
386 // Required by Prebid.js, which does this by inserting an <iframe> into a <div> in the active document via innerHTML,
387 // then transfers it to the <html> element:
388 // https://github.com/prebid/Prebid.js/blob/7b7389c5abdd05626f71c3df606a93713d1b9f85/src/utils.js#L597
389 // This is done in the spec by removing all tasks and aborting all fetches when a document is destroyed:
390 // https://html.spec.whatwg.org/multipage/document-lifecycle.html#destroy-a-document
391 if (browsing_context().has_been_discarded())
392 return;
393
394 auto url = resource()->url();
395
396 // For 3xx (Redirection) responses, the Location value refers to the preferred target resource for automatically redirecting the request.
397 auto status_code = resource()->status_code();
398 if (status_code.has_value() && *status_code >= 300 && *status_code <= 399) {
399 auto location = resource()->response_headers().get("Location");
400 if (location.has_value()) {
401 if (m_redirects_count > maximum_redirects_allowed) {
402 m_redirects_count = 0;
403 load_error_page(url, "Too many redirects");
404 return;
405 }
406 m_redirects_count++;
407 load(url.complete_url(location.value()), Type::Redirect);
408 return;
409 }
410 }
411 m_redirects_count = 0;
412
413 if (resource()->has_encoding()) {
414 dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding '{}'", resource()->mime_type(), resource()->encoding().value());
415 } else {
416 dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding unknown", resource()->mime_type());
417 }
418
419 auto final_sandboxing_flag_set = HTML::SandboxingFlagSet {};
420
421 // (Part of https://html.spec.whatwg.org/#navigating-across-documents)
422 // 3. Let responseOrigin be the result of determining the origin given browsingContext, resource's url, finalSandboxFlags, and incumbentNavigationOrigin.
423 // FIXME: Pass incumbentNavigationOrigin
424 auto response_origin = HTML::determine_the_origin(browsing_context(), url, final_sandboxing_flag_set, {});
425
426 auto& vm = Bindings::main_thread_vm();
427 auto response = Fetch::Infrastructure::Response::create(vm);
428 response->url_list().append(url);
429 HTML::NavigationParams navigation_params {
430 .id = {},
431 .request = nullptr,
432 .response = response,
433 .origin = move(response_origin),
434 .policy_container = HTML::PolicyContainer {},
435 .final_sandboxing_flag_set = final_sandboxing_flag_set,
436 .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {},
437 .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {},
438 .reserved_environment = {},
439 .browsing_context = browsing_context(),
440 };
441 auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", move(navigation_params)).release_value_but_fixme_should_propagate_errors();
442 document->set_url(url);
443 document->set_encoding(resource()->encoding());
444 document->set_content_type(resource()->mime_type());
445
446 browsing_context().set_active_document(document);
447 if (auto* page = browsing_context().page())
448 page->client().page_did_create_main_document();
449
450 if (!parse_document(*document, resource()->encoded_data())) {
451 load_error_page(url, "Failed to parse content.");
452 return;
453 }
454
455 if (!url.fragment().is_empty())
456 browsing_context().scroll_to_anchor(url.fragment());
457 else
458 browsing_context().scroll_to({ 0, 0 });
459
460 if (auto* page = browsing_context().page())
461 page->client().page_did_finish_loading(url);
462}
463
464void FrameLoader::resource_did_fail()
465{
466 // See comment in resource_did_load() about why this is done.
467 if (browsing_context().has_been_discarded())
468 return;
469
470 load_error_page(resource()->url(), resource()->error());
471}
472
473}