diff --git a/Userland/Libraries/LibWeb/DOM/Element.cpp b/Userland/Libraries/LibWeb/DOM/Element.cpp index 7cc7b34d57..9f420bb5c1 100644 --- a/Userland/Libraries/LibWeb/DOM/Element.cpp +++ b/Userland/Libraries/LibWeb/DOM/Element.cpp @@ -256,62 +256,10 @@ void Element::set_inner_html(StringView markup) document().invalidate_layout(); } +// https://w3c.github.io/DOM-Parsing/#dom-innerhtml-innerhtml String Element::inner_html() const { - auto escape_string = [](const StringView& string, bool attribute_mode) -> String { - // https://html.spec.whatwg.org/multipage/parsing.html#escapingString - StringBuilder builder; - for (auto& ch : string) { - if (ch == '&') - builder.append("&"); - // FIXME: also replace U+00A0 NO-BREAK SPACE with   - else if (ch == '"' && attribute_mode) - builder.append("""); - else if (ch == '<' && !attribute_mode) - builder.append("<"); - else if (ch == '>' && !attribute_mode) - builder.append(">"); - else - builder.append(ch); - } - return builder.to_string(); - }; - - StringBuilder builder; - - Function recurse = [&](auto& node) { - for (auto* child = node.first_child(); child; child = child->next_sibling()) { - if (child->is_element()) { - auto& element = verify_cast(*child); - builder.append('<'); - builder.append(element.local_name()); - element.for_each_attribute([&](auto& name, auto& value) { - builder.append(' '); - builder.append(name); - builder.append('='); - builder.append('"'); - builder.append(escape_string(value, true)); - builder.append('"'); - }); - builder.append('>'); - - recurse(*child); - - // FIXME: This should be skipped for void elements - builder.append("'); - } - if (child->is_text()) { - auto& text = verify_cast(*child); - builder.append(escape_string(text.data(), false)); - } - // FIXME: Also handle Comment, ProcessingInstruction, DocumentType - } - }; - recurse(*this); - - return builder.to_string(); + return serialize_fragment(/* FIXME: Providing true for the require well-formed flag (which may throw) */); } bool Element::is_focused() const @@ -376,4 +324,16 @@ void Element::queue_an_element_task(HTML::Task::Source source, Function HTML::main_thread_event_loop().task_queue().add(move(task)); } +// https://html.spec.whatwg.org/multipage/syntax.html#void-elements +bool Element::is_void_element() const +{ + return local_name().is_one_of(HTML::TagNames::area, HTML::TagNames::base, HTML::TagNames::br, HTML::TagNames::col, HTML::TagNames::embed, HTML::TagNames::hr, HTML::TagNames::img, HTML::TagNames::input, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::param, HTML::TagNames::source, HTML::TagNames::track, HTML::TagNames::wbr); +} + +// https://html.spec.whatwg.org/multipage/parsing.html#serializes-as-void +bool Element::serializes_as_void() const +{ + return is_void_element() || local_name().is_one_of(HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::frame, HTML::TagNames::keygen); +} + } diff --git a/Userland/Libraries/LibWeb/DOM/Element.h b/Userland/Libraries/LibWeb/DOM/Element.h index 8f7edebc00..4d54e2ab57 100644 --- a/Userland/Libraries/LibWeb/DOM/Element.h +++ b/Userland/Libraries/LibWeb/DOM/Element.h @@ -108,6 +108,9 @@ public: void queue_an_element_task(HTML::Task::Source, Function); + bool is_void_element() const; + bool serializes_as_void() const; + protected: RefPtr create_layout_node() override; diff --git a/Userland/Libraries/LibWeb/DOM/Node.cpp b/Userland/Libraries/LibWeb/DOM/Node.cpp index b5367692eb..7db87d6e37 100644 --- a/Userland/Libraries/LibWeb/DOM/Node.cpp +++ b/Userland/Libraries/LibWeb/DOM/Node.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -768,6 +769,18 @@ void Node::string_replace_all(String const& string) replace_all(node); } +// https://w3c.github.io/DOM-Parsing/#dfn-fragment-serializing-algorithm +String Node::serialize_fragment(/* FIXME: Requires well-formed flag */) const +{ + // FIXME: Let context document be the value of node's node document. + + // FIXME: If context document is an HTML document, return an HTML serialization of node. + // (We currently always do this) + return HTML::HTMLDocumentParser::serialize_html_fragment(*this); + + // FIXME: Otherwise, context document is an XML document; return an XML serialization of node passing the flag require well-formed. +} + // https://dom.spec.whatwg.org/#dom-node-issamenode bool Node::is_same_node(Node const* other_node) const { diff --git a/Userland/Libraries/LibWeb/DOM/Node.h b/Userland/Libraries/LibWeb/DOM/Node.h index 22266eeafa..91ab9dee9d 100644 --- a/Userland/Libraries/LibWeb/DOM/Node.h +++ b/Userland/Libraries/LibWeb/DOM/Node.h @@ -183,6 +183,8 @@ public: i32 id() const { return m_id; } static Node* from_id(i32 node_id); + String serialize_fragment() const; + void replace_all(RefPtr); void string_replace_all(String const&); diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp index 17324abc95..cf9663fa02 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2020, Andreas Kling + * Copyright (c) 2021, Luke Wilde * * SPDX-License-Identifier: BSD-2-Clause */ @@ -13,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -3054,4 +3056,202 @@ NonnullOwnPtr HTMLDocumentParser::create_with_uncertain_enco return make(document, input, encoding); } +// https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm +String HTMLDocumentParser::serialize_html_fragment(DOM::Node const& node) +{ + // The algorithm takes as input a DOM Element, Document, or DocumentFragment referred to as the node. + VERIFY(node.is_element() || node.is_document() || node.is_document_fragment()); + NonnullRefPtr actual_node = node; + + if (is(node)) { + auto& element = verify_cast(node); + + // 1. If the node serializes as void, then return the empty string. + // (NOTE: serializes as void is defined only on elements in the spec) + if (element.serializes_as_void()) + return String::empty(); + + // 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node). + // (NOTE: This is out of order of the spec to avoid another dynamic cast. The second step just creates a string builder, so it shouldn't matter) + if (is(element)) + actual_node = verify_cast(element).content(); + } + + enum class AttributeMode { + No, + Yes, + }; + + auto escape_string = [](StringView const& string, AttributeMode attribute_mode) -> String { + // https://html.spec.whatwg.org/multipage/parsing.html#escapingString + StringBuilder builder; + for (auto& ch : string) { + // 1. Replace any occurrence of the "&" character by the string "&". + if (ch == '&') + builder.append("&"); + // 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the string " ". + else if (ch == '\xA0') + builder.append(" "); + // 3. If the algorithm was invoked in the attribute mode, replace any occurrences of the """ character by the string """. + else if (ch == '"' && attribute_mode == AttributeMode::Yes) + builder.append("""); + // 4. If the algorithm was not invoked in the attribute mode, replace any occurrences of the "<" character by the string "<", and any occurrences of the ">" character by the string ">". + else if (ch == '<' && attribute_mode == AttributeMode::No) + builder.append("<"); + else if (ch == '>' && attribute_mode == AttributeMode::No) + builder.append(">"); + else + builder.append(ch); + } + return builder.to_string(); + }; + + // 2. Let s be a string, and initialize it to the empty string. + StringBuilder builder; + + // 4. For each child node of the node, in tree order, run the following steps: + actual_node->for_each_child([&](DOM::Node& current_node) { + // 1. Let current node be the child node being processed. + + // 2. Append the appropriate string from the following list to s: + + if (is(current_node)) { + // -> If current node is an Element + auto& element = verify_cast(current_node); + + // 1. If current node is an element in the HTML namespace, the MathML namespace, or the SVG namespace, then let tagname be current node's local name. + // Otherwise, let tagname be current node's qualified name. + String tag_name; + + if (element.namespace_().is_one_of(Namespace::HTML, Namespace::MathML, Namespace::SVG)) + tag_name = element.local_name(); + else + tag_name = element.qualified_name(); + + // 2. Append a U+003C LESS-THAN SIGN character (<), followed by tagname. + builder.append('<'); + builder.append(tag_name); + + // FIXME: 3. If current node's is value is not null, and the element does not have an is attribute in its attribute list, + // then append the string " is="", followed by current node's is value escaped as described below in attribute mode, + // followed by a U+0022 QUOTATION MARK character ("). + + // 4. For each attribute that the element has, append a U+0020 SPACE character, the attribute's serialized name as described below, a U+003D EQUALS SIGN character (=), + // a U+0022 QUOTATION MARK character ("), the attribute's value, escaped as described below in attribute mode, and a second U+0022 QUOTATION MARK character ("). + // NOTE: The order of attributes is implementation-defined. The only constraint is that the order must be stable. + element.for_each_attribute([&](auto& name, auto& value) { + builder.append(' '); + + // An attribute's serialized name for the purposes of the previous paragraph must be determined as follows: + + // FIXME: -> If the attribute has no namespace: + // The attribute's serialized name is the attribute's local name. + // (We currently always do this) + builder.append(name); + + // FIXME: -> If the attribute is in the XML namespace: + // The attribute's serialized name is the string "xml:" followed by the attribute's local name. + + // FIXME: -> If the attribute is in the XMLNS namespace and the attribute's local name is xmlns: + // The attribute's serialized name is the string "xmlns". + + // FIXME: -> If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns: + // The attribute's serialized name is the string "xmlns:" followed by the attribute's local name. + + // FIXME: -> If the attribute is in the XLink namespace: + // The attribute's serialized name is the string "xlink:" followed by the attribute's local name. + + // FIXME: -> If the attribute is in some other namespace: + // The attribute's serialized name is the attribute's qualified name. + + builder.append("=\""); + builder.append(escape_string(value, AttributeMode::Yes)); + builder.append('"'); + }); + + // 5. Append a U+003E GREATER-THAN SIGN character (>). + builder.append('>'); + + // 6. If current node serializes as void, then continue on to the next child node at this point. + if (element.serializes_as_void()) + return IterationDecision::Continue; + + // 7. Append the value of running the HTML fragment serialization algorithm on the current node element (thus recursing into this algorithm for that element), + // followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/), tagname again, and finally a U+003E GREATER-THAN SIGN character (>). + builder.append(serialize_html_fragment(element)); + builder.append("'); + + return IterationDecision::Continue; + } + + if (is(current_node)) { + // -> If current node is a Text node + auto& text_node = verify_cast(current_node); + auto* parent = current_node.parent(); + + if (is(parent)) { + auto& parent_element = verify_cast(*parent); + + // 1. If the parent of current node is a style, script, xmp, iframe, noembed, noframes, or plaintext element, + // or if the parent of current node is a noscript element and scripting is enabled for the node, then append the value of current node's data IDL attribute literally. + if (parent_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::plaintext) + || (parent_element.local_name() == HTML::TagNames::noscript && !parent_element.is_scripting_disabled())) { + builder.append(text_node.data()); + return IterationDecision::Continue; + } + } + + // 2. Otherwise, append the value of current node's data IDL attribute, escaped as described below. + builder.append(escape_string(text_node.data(), AttributeMode::No)); + return IterationDecision::Continue; + } + + if (is(current_node)) { + // -> If current node is a Comment + auto& comment_node = verify_cast(current_node); + + // 1. Append the literal string "" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN). + builder.append(""); + return IterationDecision::Continue; + } + + if (is(current_node)) { + // -> If current node is a ProcessingInstruction + auto& processing_instruction_node = verify_cast(current_node); + + // 1. Append the literal string "). + builder.append("'); + return IterationDecision::Continue; + } + + if (is(current_node)) { + // -> If current node is a DocumentType + auto& document_type_node = verify_cast(current_node); + + // 1. Append the literal string "" (U+003E GREATER-THAN SIGN). + builder.append("'); + return IterationDecision::Continue; + } + + return IterationDecision::Continue; + }); + + // 5. Return s. + return builder.to_string(); +} + } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.h index 5cefb17a43..385daba6db 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.h @@ -53,6 +53,7 @@ public: DOM::Document& document(); static NonnullRefPtrVector parse_html_fragment(DOM::Element& context_element, const StringView&); + static String serialize_html_fragment(DOM::Node const& node); enum class InsertionMode { #define __ENUMERATE_INSERTION_MODE(mode) mode,