diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt index 24486e80b1..53453a6d0a 100644 --- a/Userland/Libraries/LibWeb/CMakeLists.txt +++ b/Userland/Libraries/LibWeb/CMakeLists.txt @@ -354,6 +354,7 @@ set(SOURCES XHR/EventNames.cpp XHR/XMLHttpRequest.cpp XHR/XMLHttpRequestEventTarget.cpp + XML/XMLDocumentBuilder.cpp ) set(GENERATED_SOURCES @@ -364,7 +365,7 @@ set(GENERATED_SOURCES ) serenity_lib(LibWeb web) -target_link_libraries(LibWeb LibCore LibJS LibMarkdown LibGemini LibGUI LibGfx LibTextCodec LibProtocol LibImageDecoderClient LibWasm) +target_link_libraries(LibWeb LibCore LibJS LibMarkdown LibGemini LibGUI LibGfx LibTextCodec LibProtocol LibImageDecoderClient LibWasm LibXML) function(libweb_js_wrapper class) cmake_parse_arguments(PARSE_ARGV 1 LIBWEB_WRAPPER "ITERABLE" "" "") diff --git a/Userland/Libraries/LibWeb/Forward.h b/Userland/Libraries/LibWeb/Forward.h index cbba73e767..72656a128e 100644 --- a/Userland/Libraries/LibWeb/Forward.h +++ b/Userland/Libraries/LibWeb/Forward.h @@ -7,6 +7,10 @@ #pragma once +namespace Web { +class XMLDocumentBuilder; +} + namespace Web::Cookie { struct Cookie; struct ParsedCookie; diff --git a/Userland/Libraries/LibWeb/HTML/DOMParser.cpp b/Userland/Libraries/LibWeb/HTML/DOMParser.cpp index af780dce2b..c2f6df003a 100644 --- a/Userland/Libraries/LibWeb/HTML/DOMParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/DOMParser.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace Web::HTML { @@ -36,16 +37,23 @@ NonnullRefPtr DOMParser::parse_from_string(String const& string, parser->run("about:blank"); } else { // -> Otherwise - // FIXME: 1. Create an XML parser parse, associated with document, and with XML scripting support disabled. - // 2. Parse string using parser. - // 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then: - // 1. Assert: document has no child nodes. - // 2. Let root be the result of creating an element given document, "parsererror", and "http://www.mozilla.org/newlayout/xml/parsererror.xml". - // 3. Optionally, add attributes or children to root to describe the nature of the parsing error. - // 4. Append root to document. - dbgln("DOMParser::parse_from_string: Unimplemented parser for type: {}", Bindings::idl_enum_to_string(type)); - TODO(); + // 1. Create an XML parser parse, associated with document, and with XML scripting support disabled. + XML::Parser parser(string, { .resolve_external_resource = resolve_xml_resource }); + XMLDocumentBuilder builder { document, XMLScriptingSupport::Disabled }; + // 2. Parse string using parser. + auto result = parser.parse_with_listener(builder); + // 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then: + if (result.is_error() || builder.has_error()) { + // NOTE: The XML parsing can produce nodes before it hits an error, just remove them. + // 1. Assert: document has no child nodes. + document->remove_all_children(true); + // 2. Let root be the result of creating an element given document, "parsererror", and "http://www.mozilla.org/newlayout/xml/parsererror.xml". + auto root = DOM::create_element(document, "parsererror", "http://www.mozilla.org/newlayout/xml/parsererror.xml"); + // FIXME: 3. Optionally, add attributes or children to root to describe the nature of the parsing error. + // 4. Append root to document. + document->append_child(root); + } } // 3. Return document. diff --git a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp index d6455511ce..71ab5b1eb9 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp @@ -26,11 +26,6 @@ HTMLScriptElement::HTMLScriptElement(DOM::Document& document, DOM::QualifiedName HTMLScriptElement::~HTMLScriptElement() = default; -void HTMLScriptElement::set_parser_document(Badge, DOM::Document& document) -{ - m_parser_document = document; -} - void HTMLScriptElement::begin_delaying_document_load_event(DOM::Document& document) { // https://html.spec.whatwg.org/multipage/scripting.html#concept-script-script @@ -38,11 +33,6 @@ void HTMLScriptElement::begin_delaying_document_load_event(DOM::Document& docume m_document_load_event_delayer.emplace(document); } -void HTMLScriptElement::set_non_blocking(Badge, bool non_blocking) -{ - m_non_blocking = non_blocking; -} - // https://html.spec.whatwg.org/multipage/scripting.html#execute-the-script-block void HTMLScriptElement::execute_script() { diff --git a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.h b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.h index 589773eae5..fbaa5eab51 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.h +++ b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.h @@ -24,10 +24,18 @@ public: bool is_ready_to_be_parser_executed() const { return m_ready_to_be_parser_executed; } bool failed_to_load() const { return m_failed_to_load; } - void set_parser_document(Badge, DOM::Document&); - void set_non_blocking(Badge, bool); - void set_already_started(Badge, bool b) { m_already_started = b; } - void prepare_script(Badge) { prepare_script(); } + template T> + void set_parser_document(Badge, DOM::Document& document) { m_parser_document = document; } + + template T> + void set_non_blocking(Badge, bool b) { m_non_blocking = b; } + + template T> + void set_already_started(Badge, bool b) { m_already_started = b; } + + template T> + void prepare_script(Badge) { prepare_script(); } + void execute_script(); bool is_parser_inserted() const { return !!m_parser_document; } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp index 7422037db2..ba5e00dc13 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp @@ -791,12 +791,12 @@ void HTMLParser::handle_in_head(HTMLToken& token) auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); auto element = create_element_for(token, Namespace::HTML, *adjusted_insertion_location.parent); auto& script_element = verify_cast(*element); - script_element.set_parser_document({}, document()); - script_element.set_non_blocking({}, false); + script_element.set_parser_document(Badge {}, document()); + script_element.set_non_blocking(Badge {}, false); script_element.set_source_line_number({}, token.start_position().line + 1); // FIXME: This +1 is incorrect for script tags whose script does not start on a new line if (m_parsing_fragment) { - script_element.set_already_started({}, true); + script_element.set_already_started(Badge {}, true); } if (m_invoked_via_document_write) { @@ -2223,7 +2223,7 @@ void HTMLParser::handle_text(HTMLToken& token) if (token.is_end_of_file()) { log_parse_error(); if (current_node().local_name() == HTML::TagNames::script) - verify_cast(current_node()).set_already_started({}, true); + verify_cast(current_node()).set_already_started(Badge {}, true); (void)m_stack_of_open_elements.pop(); m_insertion_mode = m_original_insertion_mode; process_using_the_rules_for(m_insertion_mode, token); @@ -2243,7 +2243,7 @@ void HTMLParser::handle_text(HTMLToken& token) m_tokenizer.update_insertion_point(); increment_script_nesting_level(); // FIXME: Check if active speculative HTML parser is null. - script->prepare_script({}); + script->prepare_script(Badge {}); decrement_script_nesting_level(); if (script_nesting_level() == 0) m_parser_pause_flag = false; diff --git a/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp b/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp index a977ebca0c..648fbb179a 100644 --- a/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp +++ b/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace Web { @@ -119,6 +120,15 @@ static bool build_gemini_document(DOM::Document& document, const ByteBuffer& dat return true; } +static bool build_xml_document(DOM::Document& document, const ByteBuffer& data) +{ + + XML::Parser parser(data, { .resolve_external_resource = resolve_xml_resource }); + XMLDocumentBuilder builder { document }; + auto result = parser.parse_with_listener(builder); + return !result.is_error() && !builder.has_error(); +} + bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data) { auto& mime_type = document.content_type(); @@ -127,6 +137,8 @@ bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data parser->run(document.url()); return true; } + if (mime_type.ends_with("+xml") || mime_type.is_one_of("text/xml", "application/xml")) + return build_xml_document(document, data); if (mime_type.starts_with("image/")) return build_image_document(document, data); if (mime_type == "text/plain" || mime_type == "application/json") diff --git a/Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.cpp b/Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.cpp new file mode 100644 index 0000000000..532d0f695f --- /dev/null +++ b/Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.cpp @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2022, Ali Mohammad Pur + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +inline namespace { +extern const char* s_xhtml_unified_dtd; +} + +static FlyString s_html_namespace = "http://www.w3.org/1999/xhtml"; + +namespace Web { + +ErrorOr resolve_xml_resource(XML::SystemID const&, Optional const& public_id) +{ + if (!public_id.has_value()) + return Error::from_string_literal("Refusing to load disallowed external entity"); + + auto public_literal = public_id->public_literal; + if (!public_literal.is_one_of( + "-//W3C//DTD XHTML 1.0 Transitional//EN", + "-//W3C//DTD XHTML 1.1//EN", + "-//W3C//DTD XHTML 1.0 Strict//EN", + "-//W3C//DTD XHTML 1.0 Frameset//EN", + "-//W3C//DTD XHTML Basic 1.0//EN", + "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", + "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN", + "-//W3C//DTD MathML 2.0//EN", + "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")) + return Error::from_string_literal("Refusing to load disallowed external entity"); + + return s_xhtml_unified_dtd; +} + +XMLDocumentBuilder::XMLDocumentBuilder(DOM::Document& document, XMLScriptingSupport scripting_support) + : m_document(document) + , m_current_node(&m_document) + , m_scripting_support(scripting_support) +{ +} + +void XMLDocumentBuilder::element_start(const XML::Name& name, const HashMap& attributes) +{ + if (m_has_error) + return; + + // FIXME: This should not live here at all. + if (auto it = attributes.find("xmlns"); it != attributes.end()) { + if (name == HTML::TagNames::html && it->value != s_html_namespace) { + m_has_error = true; + return; + } + } + + auto node = DOM::create_element(m_document, name, {}); + // When an XML parser with XML scripting support enabled creates a script element, + // it must have its parser document set and its "non-blocking" flag must be unset. + // FIXME: If the parser was created as part of the XML fragment parsing algorithm, then the element must be marked as "already started" also. + if (m_scripting_support == XMLScriptingSupport::Enabled && HTML::TagNames::script == name) { + auto& script_element = static_cast(*node); + script_element.set_parser_document(Badge {}, m_document); + script_element.set_non_blocking(Badge {}, false); + } + if (HTML::TagNames::template_ == m_current_node->node_name()) { + // When an XML parser would append a node to a template element, it must instead append it to the template element's template contents (a DocumentFragment node). + static_cast(*m_current_node).content()->append_child(node); + } else { + m_current_node->append_child(node); + } + + for (auto& attribute : attributes) + node->set_attribute(attribute.key, attribute.value); + + m_current_node = node.ptr(); +} + +void XMLDocumentBuilder::element_end(const XML::Name& name) +{ + if (m_has_error) + return; + VERIFY(m_current_node->node_name() == name); + // When an XML parser with XML scripting support enabled creates a script element, [...] + // When the element's end tag is subsequently parsed, + if (m_scripting_support == XMLScriptingSupport::Enabled && HTML::TagNames::script == name) { + // the user agent must perform a microtask checkpoint, + HTML::perform_a_microtask_checkpoint(); + // and then prepare the script element. + auto& script_element = static_cast(*m_current_node); + script_element.prepare_script(Badge {}); + // If this causes there to be a pending parsing-blocking script, then the user agent must run the following steps: + if (m_document.pending_parsing_blocking_script()) { + // Block this instance of the XML parser, such that the event loop will not run tasks that invoke it. + // NOTE: Noop. + + // Spin the event loop until the parser's Document has no style sheet that is blocking scripts and the pending parsing-blocking script's "ready to be parser-executed" flag is set. + if (m_document.has_a_style_sheet_that_is_blocking_scripts() || !script_element.is_ready_to_be_parser_executed()) { + HTML::main_thread_event_loop().spin_until([&] { + return !m_document.has_a_style_sheet_that_is_blocking_scripts() && script_element.is_ready_to_be_parser_executed(); + }); + } + + // Unblock this instance of the XML parser, such that tasks that invoke it can again be run. + // NOTE: Noop. + + // Execute the pending parsing-blocking script. + script_element.execute_script(); + + // There is no longer a pending parsing-blocking script. + } + } + m_current_node = m_current_node->parent_node(); +} + +void XMLDocumentBuilder::text(const String& data) +{ + if (m_has_error) + return; + auto last = m_current_node->last_child(); + if (last && last->is_text()) { + auto& text_node = static_cast(*last); + text_builder.append(text_node.data()); + text_builder.append(data); + text_node.set_data(text_builder.to_string()); + text_builder.clear(); + } else { + auto node = m_document.create_text_node(data); + m_current_node->append_child(node); + } +} + +void XMLDocumentBuilder::comment(const String& data) +{ + if (m_has_error) + return; + m_document.append_child(m_document.create_comment(data)); +} + +void XMLDocumentBuilder::document_end() +{ + // When an XML parser reaches the end of its input, it must stop parsing. + // If the active speculative HTML parser is not null, then stop the speculative HTML parser and return. + // NOTE: Noop. + + // Set the insertion point to undefined. + m_current_node = nullptr; + + // Update the current document readiness to "interactive". + m_document.update_readiness(HTML::DocumentReadyState::Interactive); + + // Pop all the nodes off the stack of open elements. + // NOTE: Noop. + + // While the list of scripts that will execute when the document has finished parsing is not empty: + while (!m_document.scripts_to_execute_when_parsing_has_finished().is_empty()) { + // Spin the event loop until the first script in the list of scripts that will execute when the document has finished parsing has its "ready to be parser-executed" flag set + // and the parser's Document has no style sheet that is blocking scripts. + HTML::main_thread_event_loop().spin_until([&] { + return m_document.scripts_to_execute_when_parsing_has_finished().first().is_ready_to_be_parser_executed() + && !m_document.has_a_style_sheet_that_is_blocking_scripts(); + }); + + // Execute the first script in the list of scripts that will execute when the document has finished parsing. + m_document.scripts_to_execute_when_parsing_has_finished().first().execute_script(); + + // Remove the first script element from the list of scripts that will execute when the document has finished parsing (i.e. shift out the first entry in the list). + (void)m_document.scripts_to_execute_when_parsing_has_finished().take_first(); + } + // Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following substeps: + old_queue_global_task_with_document(HTML::Task::Source::DOMManipulation, m_document, [document = NonnullRefPtr(m_document)]() mutable { + // FIXME: Set the Document's load timing info's DOM content loaded event start time to the current high resolution time given the Document's relevant global object. + + // Fire an event named DOMContentLoaded at the Document object, with its bubbles attribute initialized to true. + auto content_loaded_event = DOM::Event::create(HTML::EventNames::DOMContentLoaded); + content_loaded_event->set_bubbles(true); + document->dispatch_event(content_loaded_event); + + // FIXME: Set the Document's load timing info's DOM content loaded event end time to the current high resolution time given the Document's relevant global object. + + // FIXME: Enable the client message queue of the ServiceWorkerContainer object whose associated service worker client is the Document object's relevant settings object. + + // FIXME: Invoke WebDriver BiDi DOM content loaded with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "pending", and url is the Document object's URL. + }); + + // Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts that will execute in order as soon as possible are empty. + HTML::main_thread_event_loop().spin_until([&] { + return m_document.scripts_to_execute_as_soon_as_possible().is_empty(); + }); + + // Spin the event loop until there is nothing that delays the load event in the Document. + HTML::main_thread_event_loop().spin_until([&] { + return m_document.number_of_things_delaying_the_load_event() == 0; + }); + + // Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following steps: + old_queue_global_task_with_document(HTML::Task::Source::DOMManipulation, m_document, [document = NonnullRefPtr(m_document)]() mutable { + // Update the current document readiness to "complete". + document->update_readiness(HTML::DocumentReadyState::Complete); + + // If the Document object's browsing context is null, then abort these steps. + if (!document->browsing_context()) + return; + + // Let window be the Document's relevant global object. + NonnullRefPtr window = document->window(); + + // FIXME: Set the Document's load timing info's load event start time to the current high resolution time given window. + + // Fire an event named load at window, with legacy target override flag set. + // FIXME: The legacy target override flag is currently set by a virtual override of dispatch_event() + // We should reorganize this so that the flag appears explicitly here instead. + window->dispatch_event(DOM::Event::create(HTML::EventNames::load)); + + // FIXME: Invoke WebDriver BiDi load complete with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "complete", and url is the Document object's URL. + + // FIXME: Set the Document object's navigation id to null. + + // FIXME: Set the Document's load timing info's load event end time to the current high resolution time given window. + + // Assert: Document's page showing is false. + VERIFY(!document->page_showing()); + + // Set the Document's page showing flag to true. + document->set_page_showing(true); + + // Fire a page transition event named pageshow at window with false. + window->fire_a_page_transition_event(HTML::EventNames::pageshow, false); + + // Completely finish loading the Document. + document->completely_finish_loading(); + + // FIXME: Queue the navigation timing entry for the Document. + }); + + // FIXME: If the Document's print when loaded flag is set, then run the printing steps. + + // The Document is now ready for post-load tasks. + m_document.set_ready_for_post_load_tasks(true); +} +} + +inline namespace { +const char* s_xhtml_unified_dtd = R"xmlxmlxml( + +)xmlxmlxml"; +} diff --git a/Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.h b/Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.h new file mode 100644 index 0000000000..3b9811d083 --- /dev/null +++ b/Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2022, Ali Mohammad Pur + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace Web { + +enum class XMLScriptingSupport { + Disabled, + Enabled, +}; + +ErrorOr resolve_xml_resource(XML::SystemID const&, Optional const&); + +class XMLDocumentBuilder final : public XML::Listener { +public: + XMLDocumentBuilder(DOM::Document& document, XMLScriptingSupport = XMLScriptingSupport::Enabled); + + bool has_error() const { return m_has_error; } + +private: + virtual void element_start(XML::Name const& name, HashMap const& attributes) override; + virtual void element_end(XML::Name const& name) override; + virtual void text(String const& data) override; + virtual void comment(String const& data) override; + virtual void document_end() override; + + DOM::Document& m_document; + DOM::Node* m_current_node { nullptr }; + XMLScriptingSupport m_scripting_support { XMLScriptingSupport::Enabled }; + bool m_has_error { false }; + StringBuilder text_builder; +}; + +}