Serenity Operating System
at master 473 lines 18 kB view raw
1/* 2 * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/Debug.h> 8#include <AK/JsonArray.h> 9#include <AK/LexicalPath.h> 10#include <AK/SourceGenerator.h> 11#include <LibGemini/Document.h> 12#include <LibGfx/ImageDecoder.h> 13#include <LibMarkdown/Document.h> 14#include <LibWeb/Bindings/MainThreadVM.h> 15#include <LibWeb/DOM/Document.h> 16#include <LibWeb/DOM/ElementFactory.h> 17#include <LibWeb/DOM/Text.h> 18#include <LibWeb/HTML/BrowsingContext.h> 19#include <LibWeb/HTML/HTMLIFrameElement.h> 20#include <LibWeb/HTML/NavigationParams.h> 21#include <LibWeb/HTML/Parser/HTMLParser.h> 22#include <LibWeb/Loader/FrameLoader.h> 23#include <LibWeb/Loader/ResourceLoader.h> 24#include <LibWeb/Page/Page.h> 25#include <LibWeb/Platform/ImageCodecPlugin.h> 26#include <LibWeb/XML/XMLDocumentBuilder.h> 27 28namespace Web { 29 30static DeprecatedString s_default_favicon_path = "/res/icons/16x16/app-browser.png"; 31static RefPtr<Gfx::Bitmap> s_default_favicon_bitmap; 32 33void FrameLoader::set_default_favicon_path(DeprecatedString path) 34{ 35 s_default_favicon_path = move(path); 36} 37 38FrameLoader::FrameLoader(HTML::BrowsingContext& browsing_context) 39 : m_browsing_context(browsing_context) 40{ 41 if (!s_default_favicon_bitmap) { 42 s_default_favicon_bitmap = Gfx::Bitmap::load_from_file(s_default_favicon_path).release_value_but_fixme_should_propagate_errors(); 43 VERIFY(s_default_favicon_bitmap); 44 } 45} 46 47FrameLoader::~FrameLoader() = default; 48 49static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data) 50{ 51 auto markdown_document = Markdown::Document::parse(data); 52 if (!markdown_document) 53 return false; 54 55 auto extra_head_contents = R"~~~( 56<style> 57 .zoomable { 58 cursor: zoom-in; 59 max-width: 100%; 60 } 61 .zoomable.zoomed-in { 62 cursor: zoom-out; 63 max-width: none; 64 } 65</style> 66<script> 67 function imageClickEventListener(event) { 68 let image = event.target; 69 if (image.classList.contains("zoomable")) { 70 image.classList.toggle("zoomed-in"); 71 } 72 } 73 function processImages() { 74 let images = document.querySelectorAll("img"); 75 let windowWidth = window.innerWidth; 76 images.forEach((image) => { 77 if (image.naturalWidth > windowWidth) { 78 image.classList.add("zoomable"); 79 } else { 80 image.classList.remove("zoomable"); 81 image.classList.remove("zoomed-in"); 82 } 83 84 image.addEventListener("click", imageClickEventListener); 85 }); 86 } 87 88 document.addEventListener("load", () => { 89 processImages(); 90 }); 91 92 window.addEventListener("resize", () => { 93 processImages(); 94 }); 95</script> 96)~~~"sv; 97 98 auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8"); 99 parser->run(document.url()); 100 return true; 101} 102 103static bool build_text_document(DOM::Document& document, ByteBuffer const& data) 104{ 105 auto html_element = document.create_element("html").release_value(); 106 MUST(document.append_child(html_element)); 107 108 auto head_element = document.create_element("head").release_value(); 109 MUST(html_element->append_child(head_element)); 110 auto title_element = document.create_element("title").release_value(); 111 MUST(head_element->append_child(title_element)); 112 113 auto title_text = document.create_text_node(document.url().basename()); 114 MUST(title_element->append_child(title_text)); 115 116 auto body_element = document.create_element("body").release_value(); 117 MUST(html_element->append_child(body_element)); 118 119 auto pre_element = document.create_element("pre").release_value(); 120 MUST(body_element->append_child(pre_element)); 121 122 MUST(pre_element->append_child(document.create_text_node(DeprecatedString::copy(data)))); 123 return true; 124} 125 126static bool build_image_document(DOM::Document& document, ByteBuffer const& data) 127{ 128 auto image = Platform::ImageCodecPlugin::the().decode_image(data); 129 if (!image.has_value() || image->frames.is_empty()) 130 return false; 131 auto const& frame = image->frames[0]; 132 auto const& bitmap = frame.bitmap; 133 if (!bitmap) 134 return false; 135 136 auto html_element = document.create_element("html").release_value(); 137 MUST(document.append_child(html_element)); 138 139 auto head_element = document.create_element("head").release_value(); 140 MUST(html_element->append_child(head_element)); 141 auto title_element = document.create_element("title").release_value(); 142 MUST(head_element->append_child(title_element)); 143 144 auto basename = LexicalPath::basename(document.url().path()); 145 auto title_text = document.heap().allocate<DOM::Text>(document.realm(), document, DeprecatedString::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height())).release_allocated_value_but_fixme_should_propagate_errors(); 146 MUST(title_element->append_child(*title_text)); 147 148 auto body_element = document.create_element("body").release_value(); 149 MUST(html_element->append_child(body_element)); 150 151 auto image_element = document.create_element("img").release_value(); 152 MUST(image_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string())); 153 MUST(body_element->append_child(image_element)); 154 155 return true; 156} 157 158static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data) 159{ 160 StringView gemini_data { data }; 161 auto gemini_document = Gemini::Document::parse(gemini_data, document.url()); 162 DeprecatedString html_data = gemini_document->render_to_html(); 163 164 dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data); 165 dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data); 166 167 auto parser = HTML::HTMLParser::create(document, html_data, "utf-8"); 168 parser->run(document.url()); 169 return true; 170} 171 172static bool build_xml_document(DOM::Document& document, ByteBuffer const& data) 173{ 174 175 XML::Parser parser(data, { .resolve_external_resource = resolve_xml_resource }); 176 XMLDocumentBuilder builder { document }; 177 auto result = parser.parse_with_listener(builder); 178 return !result.is_error() && !builder.has_error(); 179} 180 181bool FrameLoader::parse_document(DOM::Document& document, ByteBuffer const& data) 182{ 183 auto& mime_type = document.content_type(); 184 if (mime_type == "text/html" || mime_type == "image/svg+xml") { 185 auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data); 186 parser->run(document.url()); 187 return true; 188 } 189 if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml")) 190 return build_xml_document(document, data); 191 if (mime_type.starts_with("image/"sv)) 192 return build_image_document(document, data); 193 if (mime_type == "text/plain" || mime_type == "application/json") 194 return build_text_document(document, data); 195 if (mime_type == "text/markdown") 196 return build_markdown_document(document, data); 197 if (mime_type == "text/gemini") 198 return build_gemini_document(document, data); 199 200 return false; 201} 202 203bool FrameLoader::load(LoadRequest& request, Type type) 204{ 205 if (!request.is_valid()) { 206 load_error_page(request.url(), "Invalid request"); 207 return false; 208 } 209 210 if (!m_browsing_context.is_frame_nesting_allowed(request.url())) { 211 dbgln("No further recursion is allowed for the frame, abort load!"); 212 return false; 213 } 214 215 auto& url = request.url(); 216 217 if (type == Type::Navigation || type == Type::Reload || type == Type::Redirect) { 218 if (auto* page = browsing_context().page()) { 219 if (&page->top_level_browsing_context() == &m_browsing_context) 220 page->client().page_did_start_loading(url, type == Type::Redirect); 221 } 222 } 223 224 // https://fetch.spec.whatwg.org/#concept-fetch 225 // Step 12: If request’s header list does not contain `Accept`, then: 226 // 1. Let value be `*/*`. (NOTE: Not necessary as we're about to override it) 227 // 2. A user agent should set value to the first matching statement, if any, switching on request’s destination: 228 // -> "document" 229 // -> "frame" 230 // -> "iframe" 231 // `text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8` 232 if (!request.headers().contains("Accept")) 233 request.set_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); 234 235 set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, request)); 236 237 if (type == Type::IFrame) 238 return true; 239 240 auto* document = browsing_context().active_document(); 241 if (document && document->has_active_favicon()) 242 return true; 243 244 if (url.scheme() == "http" || url.scheme() == "https") { 245 AK::URL favicon_url; 246 favicon_url.set_scheme(url.scheme()); 247 favicon_url.set_host(url.host()); 248 favicon_url.set_port(url.port_or_default()); 249 favicon_url.set_paths({ "favicon.ico" }); 250 251 ResourceLoader::the().load( 252 favicon_url, 253 [this, favicon_url](auto data, auto&, auto) { 254 // Always fetch the current document 255 auto* document = this->browsing_context().active_document(); 256 if (document && document->has_active_favicon()) 257 return; 258 dbgln_if(SPAM_DEBUG, "Favicon downloaded, {} bytes from {}", data.size(), favicon_url); 259 if (data.is_empty()) 260 return; 261 RefPtr<Gfx::Bitmap> favicon_bitmap; 262 auto decoded_image = Platform::ImageCodecPlugin::the().decode_image(data); 263 if (!decoded_image.has_value() || decoded_image->frames.is_empty()) { 264 dbgln("Could not decode favicon {}", favicon_url); 265 } else { 266 favicon_bitmap = decoded_image->frames[0].bitmap; 267 dbgln_if(IMAGE_DECODER_DEBUG, "Decoded favicon, {}", favicon_bitmap->size()); 268 } 269 load_favicon(favicon_bitmap); 270 }, 271 [this](auto&, auto) { 272 // Always fetch the current document 273 auto* document = this->browsing_context().active_document(); 274 if (document && document->has_active_favicon()) 275 return; 276 277 load_favicon(); 278 }); 279 } else { 280 load_favicon(); 281 } 282 283 return true; 284} 285 286bool FrameLoader::load(const AK::URL& url, Type type) 287{ 288 dbgln_if(SPAM_DEBUG, "FrameLoader::load: {}", url); 289 290 if (!url.is_valid()) { 291 load_error_page(url, "Invalid URL"); 292 return false; 293 } 294 295 auto request = LoadRequest::create_for_url_on_page(url, browsing_context().page()); 296 return load(request, type); 297} 298 299void FrameLoader::load_html(StringView html, const AK::URL& url) 300{ 301 auto& vm = Bindings::main_thread_vm(); 302 auto response = Fetch::Infrastructure::Response::create(vm); 303 response->url_list().append(url); 304 HTML::NavigationParams navigation_params { 305 .id = {}, 306 .request = nullptr, 307 .response = response, 308 .origin = HTML::Origin {}, 309 .policy_container = HTML::PolicyContainer {}, 310 .final_sandboxing_flag_set = HTML::SandboxingFlagSet {}, 311 .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {}, 312 .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {}, 313 .reserved_environment = {}, 314 .browsing_context = browsing_context(), 315 }; 316 auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", move(navigation_params)).release_value_but_fixme_should_propagate_errors(); 317 browsing_context().set_active_document(document); 318 319 auto parser = HTML::HTMLParser::create(document, html, "utf-8"); 320 parser->run(url); 321} 322 323static DeprecatedString s_error_page_url = "file:///res/html/error.html"; 324 325void FrameLoader::set_error_page_url(DeprecatedString error_page_url) 326{ 327 s_error_page_url = error_page_url; 328} 329 330// FIXME: Use an actual templating engine (our own one when it's built, preferably 331// with a way to check these usages at compile time) 332 333void FrameLoader::load_error_page(const AK::URL& failed_url, DeprecatedString const& error) 334{ 335 LoadRequest request = LoadRequest::create_for_url_on_page(s_error_page_url, browsing_context().page()); 336 337 ResourceLoader::the().load( 338 request, 339 [this, failed_url, error](auto data, auto&, auto) { 340 VERIFY(!data.is_null()); 341 StringBuilder builder; 342 SourceGenerator generator { builder }; 343 generator.set("failed_url", escape_html_entities(failed_url.to_deprecated_string())); 344 generator.set("error", escape_html_entities(error)); 345 generator.append(data); 346 load_html(generator.as_string_view(), s_error_page_url); 347 }, 348 [](auto& error, auto) { 349 dbgln("Failed to load error page: {}", error); 350 VERIFY_NOT_REACHED(); 351 }); 352} 353 354void FrameLoader::load_favicon(RefPtr<Gfx::Bitmap> bitmap) 355{ 356 if (auto* page = browsing_context().page()) { 357 if (bitmap) 358 page->client().page_did_change_favicon(*bitmap); 359 else if (s_default_favicon_bitmap) 360 page->client().page_did_change_favicon(*s_default_favicon_bitmap); 361 } 362} 363 364void FrameLoader::resource_did_load() 365{ 366 // This prevents us setting up the document of a removed browsing context container (BCC, e.g. <iframe>), which will cause a crash 367 // if the document contains a script that inserts another BCC as this will use the stale browsing context it previously set up, 368 // even if it's reinserted. 369 // Example: 370 // index.html: 371 // ``` 372 // <body><script> 373 // var i = document.createElement("iframe"); 374 // i.src = "b.html"; 375 // document.body.append(i); 376 // i.remove(); 377 // </script> 378 // ``` 379 // b.html: 380 // ``` 381 // <body><script> 382 // var i = document.createElement("iframe"); 383 // document.body.append(i); 384 // </script> 385 // ``` 386 // Required by Prebid.js, which does this by inserting an <iframe> into a <div> in the active document via innerHTML, 387 // then transfers it to the <html> element: 388 // https://github.com/prebid/Prebid.js/blob/7b7389c5abdd05626f71c3df606a93713d1b9f85/src/utils.js#L597 389 // This is done in the spec by removing all tasks and aborting all fetches when a document is destroyed: 390 // https://html.spec.whatwg.org/multipage/document-lifecycle.html#destroy-a-document 391 if (browsing_context().has_been_discarded()) 392 return; 393 394 auto url = resource()->url(); 395 396 // For 3xx (Redirection) responses, the Location value refers to the preferred target resource for automatically redirecting the request. 397 auto status_code = resource()->status_code(); 398 if (status_code.has_value() && *status_code >= 300 && *status_code <= 399) { 399 auto location = resource()->response_headers().get("Location"); 400 if (location.has_value()) { 401 if (m_redirects_count > maximum_redirects_allowed) { 402 m_redirects_count = 0; 403 load_error_page(url, "Too many redirects"); 404 return; 405 } 406 m_redirects_count++; 407 load(url.complete_url(location.value()), Type::Redirect); 408 return; 409 } 410 } 411 m_redirects_count = 0; 412 413 if (resource()->has_encoding()) { 414 dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding '{}'", resource()->mime_type(), resource()->encoding().value()); 415 } else { 416 dbgln_if(RESOURCE_DEBUG, "This content has MIME type '{}', encoding unknown", resource()->mime_type()); 417 } 418 419 auto final_sandboxing_flag_set = HTML::SandboxingFlagSet {}; 420 421 // (Part of https://html.spec.whatwg.org/#navigating-across-documents) 422 // 3. Let responseOrigin be the result of determining the origin given browsingContext, resource's url, finalSandboxFlags, and incumbentNavigationOrigin. 423 // FIXME: Pass incumbentNavigationOrigin 424 auto response_origin = HTML::determine_the_origin(browsing_context(), url, final_sandboxing_flag_set, {}); 425 426 auto& vm = Bindings::main_thread_vm(); 427 auto response = Fetch::Infrastructure::Response::create(vm); 428 response->url_list().append(url); 429 HTML::NavigationParams navigation_params { 430 .id = {}, 431 .request = nullptr, 432 .response = response, 433 .origin = move(response_origin), 434 .policy_container = HTML::PolicyContainer {}, 435 .final_sandboxing_flag_set = final_sandboxing_flag_set, 436 .cross_origin_opener_policy = HTML::CrossOriginOpenerPolicy {}, 437 .coop_enforcement_result = HTML::CrossOriginOpenerPolicyEnforcementResult {}, 438 .reserved_environment = {}, 439 .browsing_context = browsing_context(), 440 }; 441 auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", move(navigation_params)).release_value_but_fixme_should_propagate_errors(); 442 document->set_url(url); 443 document->set_encoding(resource()->encoding()); 444 document->set_content_type(resource()->mime_type()); 445 446 browsing_context().set_active_document(document); 447 if (auto* page = browsing_context().page()) 448 page->client().page_did_create_main_document(); 449 450 if (!parse_document(*document, resource()->encoded_data())) { 451 load_error_page(url, "Failed to parse content."); 452 return; 453 } 454 455 if (!url.fragment().is_empty()) 456 browsing_context().scroll_to_anchor(url.fragment()); 457 else 458 browsing_context().scroll_to({ 0, 0 }); 459 460 if (auto* page = browsing_context().page()) 461 page->client().page_did_finish_loading(url); 462} 463 464void FrameLoader::resource_did_fail() 465{ 466 // See comment in resource_did_load() about why this is done. 467 if (browsing_context().has_been_discarded()) 468 return; 469 470 load_error_page(resource()->url(), resource()->error()); 471} 472 473}