diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt index 3a1f8ed6cb..946d6d32f1 100644 --- a/Userland/Libraries/LibWeb/CMakeLists.txt +++ b/Userland/Libraries/LibWeb/CMakeLists.txt @@ -126,6 +126,7 @@ set(SOURCES DOM/DOMTokenList.cpp DOM/DOMTokenList.idl DOM/Document.cpp + DOM/DocumentLoading.cpp DOM/DocumentFragment.cpp DOM/DocumentLoadEventDelayer.cpp DOM/DocumentType.cpp diff --git a/Userland/Libraries/LibWeb/DOM/DocumentLoading.cpp b/Userland/Libraries/LibWeb/DOM/DocumentLoading.cpp new file mode 100644 index 0000000000..d3f9a95689 --- /dev/null +++ b/Userland/Libraries/LibWeb/DOM/DocumentLoading.cpp @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2020, Andreas Kling + * Copyright (c) 2023, Aliaksandr Kalenik + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Web { + +static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data) +{ + auto markdown_document = Markdown::Document::parse(data); + if (!markdown_document) + return false; + + auto extra_head_contents = R"~~~( + + +)~~~"sv; + + auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8"); + parser->run(document.url()); + return true; +} + +static bool build_text_document(DOM::Document& document, ByteBuffer const& data) +{ + auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(document.append_child(html_element)); + + auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(html_element->append_child(head_element)); + auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(head_element->append_child(title_element)); + + auto title_text = document.create_text_node(document.url().basename()); + MUST(title_element->append_child(title_text)); + + auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(html_element->append_child(body_element)); + + auto pre_element = DOM::create_element(document, HTML::TagNames::pre, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(body_element->append_child(pre_element)); + + MUST(pre_element->append_child(document.create_text_node(DeprecatedString::copy(data)))); + return true; +} + +static bool build_image_document(DOM::Document& document, ByteBuffer const& data) +{ + auto image = Platform::ImageCodecPlugin::the().decode_image(data); + if (!image.has_value() || image->frames.is_empty()) + return false; + auto const& frame = image->frames[0]; + auto const& bitmap = frame.bitmap; + if (!bitmap) + return false; + + auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(document.append_child(html_element)); + + auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(html_element->append_child(head_element)); + auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(head_element->append_child(title_element)); + + auto basename = LexicalPath::basename(document.url().serialize_path()); + auto title_text = document.heap().allocate(document.realm(), document, DeprecatedString::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height())).release_allocated_value_but_fixme_should_propagate_errors(); + MUST(title_element->append_child(*title_text)); + + auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(html_element->append_child(body_element)); + + auto image_element = DOM::create_element(document, HTML::TagNames::img, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(image_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string())); + MUST(body_element->append_child(image_element)); + + return true; +} + +static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data) +{ + StringView gemini_data { data }; + auto gemini_document = Gemini::Document::parse(gemini_data, document.url()); + DeprecatedString html_data = gemini_document->render_to_html(); + + dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data); + dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data); + + auto parser = HTML::HTMLParser::create(document, html_data, "utf-8"); + parser->run(document.url()); + return true; +} + +static bool build_xml_document(DOM::Document& document, ByteBuffer const& data) +{ + auto encoding = HTML::run_encoding_sniffing_algorithm(document, data); + auto decoder = TextCodec::decoder_for(encoding); + VERIFY(decoder.has_value()); + auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors(); + XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource }); + XMLDocumentBuilder builder { document }; + auto result = parser.parse_with_listener(builder); + return !result.is_error() && !builder.has_error(); +} + +static bool build_video_document(DOM::Document& document) +{ + auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(document.append_child(html_element)); + + auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(html_element->append_child(head_element)); + + auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(html_element->append_child(body_element)); + + auto video_element = DOM::create_element(document, HTML::TagNames::video, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + MUST(video_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string())); + MUST(video_element->set_attribute(HTML::AttributeNames::autoplay, DeprecatedString::empty())); + MUST(video_element->set_attribute(HTML::AttributeNames::controls, DeprecatedString::empty())); + MUST(body_element->append_child(video_element)); + + return true; +} + +bool parse_document(DOM::Document& document, ByteBuffer const& data) +{ + auto& mime_type = document.content_type(); + if (mime_type == "text/html" || mime_type == "image/svg+xml") { + auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data); + parser->run(document.url()); + return true; + } + if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml")) + return build_xml_document(document, data); + if (mime_type.starts_with("image/"sv)) + return build_image_document(document, data); + if (mime_type.starts_with("video/"sv)) + return build_video_document(document); + if (mime_type == "text/plain" || mime_type == "application/json") + return build_text_document(document, data); + if (mime_type == "text/markdown") + return build_markdown_document(document, data); + if (mime_type == "text/gemini") + return build_gemini_document(document, data); + + return false; +} + +} diff --git a/Userland/Libraries/LibWeb/DOM/DocumentLoading.h b/Userland/Libraries/LibWeb/DOM/DocumentLoading.h new file mode 100644 index 0000000000..0b330e1458 --- /dev/null +++ b/Userland/Libraries/LibWeb/DOM/DocumentLoading.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2020, Andreas Kling + * Copyright (c) 2023, Aliaksandr Kalenik + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace Web { + +bool parse_document(DOM::Document& document, ByteBuffer const& data); + +} diff --git a/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp b/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp index 466aa0a036..a7ea529bfb 100644 --- a/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp +++ b/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp @@ -8,18 +8,12 @@ #include #include #include -#include -#include -#include -#include #include #include +#include #include #include -#include -#include #include -#include #include #include #include @@ -49,185 +43,6 @@ FrameLoader::FrameLoader(HTML::BrowsingContext& browsing_context) FrameLoader::~FrameLoader() = default; -static bool build_markdown_document(DOM::Document& document, ByteBuffer const& data) -{ - auto markdown_document = Markdown::Document::parse(data); - if (!markdown_document) - return false; - - auto extra_head_contents = R"~~~( - - -)~~~"sv; - - auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(extra_head_contents), "utf-8"); - parser->run(document.url()); - return true; -} - -static bool build_text_document(DOM::Document& document, ByteBuffer const& data) -{ - auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(document.append_child(html_element)); - - auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(html_element->append_child(head_element)); - auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(head_element->append_child(title_element)); - - auto title_text = document.create_text_node(document.url().basename()); - MUST(title_element->append_child(title_text)); - - auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(html_element->append_child(body_element)); - - auto pre_element = DOM::create_element(document, HTML::TagNames::pre, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(body_element->append_child(pre_element)); - - MUST(pre_element->append_child(document.create_text_node(DeprecatedString::copy(data)))); - return true; -} - -static bool build_image_document(DOM::Document& document, ByteBuffer const& data) -{ - auto image = Platform::ImageCodecPlugin::the().decode_image(data); - if (!image.has_value() || image->frames.is_empty()) - return false; - auto const& frame = image->frames[0]; - auto const& bitmap = frame.bitmap; - if (!bitmap) - return false; - - auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(document.append_child(html_element)); - - auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(html_element->append_child(head_element)); - auto title_element = DOM::create_element(document, HTML::TagNames::title, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(head_element->append_child(title_element)); - - auto basename = LexicalPath::basename(document.url().serialize_path()); - auto title_text = document.heap().allocate(document.realm(), document, DeprecatedString::formatted("{} [{}x{}]", basename, bitmap->width(), bitmap->height())).release_allocated_value_but_fixme_should_propagate_errors(); - MUST(title_element->append_child(*title_text)); - - auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(html_element->append_child(body_element)); - - auto image_element = DOM::create_element(document, HTML::TagNames::img, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(image_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string())); - MUST(body_element->append_child(image_element)); - - return true; -} - -static bool build_gemini_document(DOM::Document& document, ByteBuffer const& data) -{ - StringView gemini_data { data }; - auto gemini_document = Gemini::Document::parse(gemini_data, document.url()); - DeprecatedString html_data = gemini_document->render_to_html(); - - dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data); - dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data); - - auto parser = HTML::HTMLParser::create(document, html_data, "utf-8"); - parser->run(document.url()); - return true; -} - -static bool build_xml_document(DOM::Document& document, ByteBuffer const& data) -{ - auto encoding = HTML::run_encoding_sniffing_algorithm(document, data); - auto decoder = TextCodec::decoder_for(encoding); - VERIFY(decoder.has_value()); - auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors(); - XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource }); - XMLDocumentBuilder builder { document }; - auto result = parser.parse_with_listener(builder); - return !result.is_error() && !builder.has_error(); -} - -static bool build_video_document(DOM::Document& document) -{ - auto html_element = DOM::create_element(document, HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(document.append_child(html_element)); - - auto head_element = DOM::create_element(document, HTML::TagNames::head, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(html_element->append_child(head_element)); - - auto body_element = DOM::create_element(document, HTML::TagNames::body, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(html_element->append_child(body_element)); - - auto video_element = DOM::create_element(document, HTML::TagNames::video, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); - MUST(video_element->set_attribute(HTML::AttributeNames::src, document.url().to_deprecated_string())); - MUST(video_element->set_attribute(HTML::AttributeNames::autoplay, DeprecatedString::empty())); - MUST(video_element->set_attribute(HTML::AttributeNames::controls, DeprecatedString::empty())); - MUST(body_element->append_child(video_element)); - - return true; -} - -bool FrameLoader::parse_document(DOM::Document& document, ByteBuffer const& data) -{ - auto& mime_type = document.content_type(); - if (mime_type == "text/html" || mime_type == "image/svg+xml") { - auto parser = HTML::HTMLParser::create_with_uncertain_encoding(document, data); - parser->run(document.url()); - return true; - } - if (mime_type.ends_with("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml")) - return build_xml_document(document, data); - if (mime_type.starts_with("image/"sv)) - return build_image_document(document, data); - if (mime_type.starts_with("video/"sv)) - return build_video_document(document); - if (mime_type == "text/plain" || mime_type == "application/json") - return build_text_document(document, data); - if (mime_type == "text/markdown") - return build_markdown_document(document, data); - if (mime_type == "text/gemini") - return build_gemini_document(document, data); - - return false; -} - bool FrameLoader::load(LoadRequest& request, Type type) { if (!request.is_valid()) { diff --git a/Userland/Libraries/LibWeb/Loader/FrameLoader.h b/Userland/Libraries/LibWeb/Loader/FrameLoader.h index c0e5042921..650d6f2364 100644 --- a/Userland/Libraries/LibWeb/Loader/FrameLoader.h +++ b/Userland/Libraries/LibWeb/Loader/FrameLoader.h @@ -45,7 +45,6 @@ private: void load_error_page(const AK::URL& failed_url, DeprecatedString const& error_message); void load_favicon(RefPtr bitmap = nullptr); - bool parse_document(DOM::Document&, ByteBuffer const& data); JS::NonnullGCPtr m_browsing_context; size_t m_redirects_count { 0 };