mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 19:52:45 +00:00 
			
		
		
		
	LibWeb: Implement HTML fragment serialisation and use it in innerHTML
The previous implementation was about a half implementation and was tied to Element::innerHTML. This separates it and puts it into HTMLDocumentParser, as this is in the parsing section of the spec. This provides a near finished HTML fragment serialisation algorithm, bar namespaces in attributes and the `is` value.
This commit is contained in:
		
							parent
							
								
									ed5128d759
								
							
						
					
					
						commit
						f62477c093
					
				
					 6 changed files with 233 additions and 54 deletions
				
			
		|  | @ -256,62 +256,10 @@ void Element::set_inner_html(StringView markup) | |||
|     document().invalidate_layout(); | ||||
| } | ||||
| 
 | ||||
| // https://w3c.github.io/DOM-Parsing/#dom-innerhtml-innerhtml
 | ||||
| String Element::inner_html() const | ||||
| { | ||||
|     auto escape_string = [](const StringView& string, bool attribute_mode) -> String { | ||||
|         // https://html.spec.whatwg.org/multipage/parsing.html#escapingString
 | ||||
|         StringBuilder builder; | ||||
|         for (auto& ch : string) { | ||||
|             if (ch == '&') | ||||
|                 builder.append("&"); | ||||
|             // FIXME: also replace U+00A0 NO-BREAK SPACE with  
 | ||||
|             else if (ch == '"' && attribute_mode) | ||||
|                 builder.append("""); | ||||
|             else if (ch == '<' && !attribute_mode) | ||||
|                 builder.append("<"); | ||||
|             else if (ch == '>' && !attribute_mode) | ||||
|                 builder.append(">"); | ||||
|             else | ||||
|                 builder.append(ch); | ||||
|         } | ||||
|         return builder.to_string(); | ||||
|     }; | ||||
| 
 | ||||
|     StringBuilder builder; | ||||
| 
 | ||||
|     Function<void(const Node&)> recurse = [&](auto& node) { | ||||
|         for (auto* child = node.first_child(); child; child = child->next_sibling()) { | ||||
|             if (child->is_element()) { | ||||
|                 auto& element = verify_cast<Element>(*child); | ||||
|                 builder.append('<'); | ||||
|                 builder.append(element.local_name()); | ||||
|                 element.for_each_attribute([&](auto& name, auto& value) { | ||||
|                     builder.append(' '); | ||||
|                     builder.append(name); | ||||
|                     builder.append('='); | ||||
|                     builder.append('"'); | ||||
|                     builder.append(escape_string(value, true)); | ||||
|                     builder.append('"'); | ||||
|                 }); | ||||
|                 builder.append('>'); | ||||
| 
 | ||||
|                 recurse(*child); | ||||
| 
 | ||||
|                 // FIXME: This should be skipped for void elements
 | ||||
|                 builder.append("</"); | ||||
|                 builder.append(element.local_name()); | ||||
|                 builder.append('>'); | ||||
|             } | ||||
|             if (child->is_text()) { | ||||
|                 auto& text = verify_cast<Text>(*child); | ||||
|                 builder.append(escape_string(text.data(), false)); | ||||
|             } | ||||
|             // FIXME: Also handle Comment, ProcessingInstruction, DocumentType
 | ||||
|         } | ||||
|     }; | ||||
|     recurse(*this); | ||||
| 
 | ||||
|     return builder.to_string(); | ||||
|     return serialize_fragment(/* FIXME: Providing true for the require well-formed flag (which may throw) */); | ||||
| } | ||||
| 
 | ||||
| bool Element::is_focused() const | ||||
|  | @ -376,4 +324,16 @@ void Element::queue_an_element_task(HTML::Task::Source source, Function<void()> | |||
|     HTML::main_thread_event_loop().task_queue().add(move(task)); | ||||
| } | ||||
| 
 | ||||
| // https://html.spec.whatwg.org/multipage/syntax.html#void-elements
 | ||||
| bool Element::is_void_element() const | ||||
| { | ||||
|     return local_name().is_one_of(HTML::TagNames::area, HTML::TagNames::base, HTML::TagNames::br, HTML::TagNames::col, HTML::TagNames::embed, HTML::TagNames::hr, HTML::TagNames::img, HTML::TagNames::input, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::param, HTML::TagNames::source, HTML::TagNames::track, HTML::TagNames::wbr); | ||||
| } | ||||
| 
 | ||||
| // https://html.spec.whatwg.org/multipage/parsing.html#serializes-as-void
 | ||||
| bool Element::serializes_as_void() const | ||||
| { | ||||
|     return is_void_element() || local_name().is_one_of(HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::frame, HTML::TagNames::keygen); | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -108,6 +108,9 @@ public: | |||
| 
 | ||||
|     void queue_an_element_task(HTML::Task::Source, Function<void()>); | ||||
| 
 | ||||
|     bool is_void_element() const; | ||||
|     bool serializes_as_void() const; | ||||
| 
 | ||||
| protected: | ||||
|     RefPtr<Layout::Node> create_layout_node() override; | ||||
| 
 | ||||
|  |  | |||
|  | @ -24,6 +24,7 @@ | |||
| #include <LibWeb/DOM/ProcessingInstruction.h> | ||||
| #include <LibWeb/DOM/ShadowRoot.h> | ||||
| #include <LibWeb/HTML/HTMLAnchorElement.h> | ||||
| #include <LibWeb/HTML/Parser/HTMLDocumentParser.h> | ||||
| #include <LibWeb/Layout/InitialContainingBlock.h> | ||||
| #include <LibWeb/Layout/Node.h> | ||||
| #include <LibWeb/Layout/TextNode.h> | ||||
|  | @ -768,6 +769,18 @@ void Node::string_replace_all(String const& string) | |||
|     replace_all(node); | ||||
| } | ||||
| 
 | ||||
| // https://w3c.github.io/DOM-Parsing/#dfn-fragment-serializing-algorithm
 | ||||
| String Node::serialize_fragment(/* FIXME: Requires well-formed flag */) const | ||||
| { | ||||
|     // FIXME: Let context document be the value of node's node document.
 | ||||
| 
 | ||||
|     // FIXME: If context document is an HTML document, return an HTML serialization of node.
 | ||||
|     //        (We currently always do this)
 | ||||
|     return HTML::HTMLDocumentParser::serialize_html_fragment(*this); | ||||
| 
 | ||||
|     // FIXME: Otherwise, context document is an XML document; return an XML serialization of node passing the flag require well-formed.
 | ||||
| } | ||||
| 
 | ||||
| // https://dom.spec.whatwg.org/#dom-node-issamenode
 | ||||
| bool Node::is_same_node(Node const* other_node) const | ||||
| { | ||||
|  |  | |||
|  | @ -183,6 +183,8 @@ public: | |||
|     i32 id() const { return m_id; } | ||||
|     static Node* from_id(i32 node_id); | ||||
| 
 | ||||
|     String serialize_fragment() const; | ||||
| 
 | ||||
|     void replace_all(RefPtr<Node>); | ||||
|     void string_replace_all(String const&); | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,5 +1,6 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2020, Andreas Kling <kling@serenityos.org> | ||||
|  * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org> | ||||
|  * | ||||
|  * SPDX-License-Identifier: BSD-2-Clause | ||||
|  */ | ||||
|  | @ -13,6 +14,7 @@ | |||
| #include <LibWeb/DOM/DocumentType.h> | ||||
| #include <LibWeb/DOM/ElementFactory.h> | ||||
| #include <LibWeb/DOM/Event.h> | ||||
| #include <LibWeb/DOM/ProcessingInstruction.h> | ||||
| #include <LibWeb/DOM/Text.h> | ||||
| #include <LibWeb/DOM/Window.h> | ||||
| #include <LibWeb/HTML/EventLoop/EventLoop.h> | ||||
|  | @ -3054,4 +3056,202 @@ NonnullOwnPtr<HTMLDocumentParser> HTMLDocumentParser::create_with_uncertain_enco | |||
|     return make<HTMLDocumentParser>(document, input, encoding); | ||||
| } | ||||
| 
 | ||||
| // https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm
 | ||||
| String HTMLDocumentParser::serialize_html_fragment(DOM::Node const& node) | ||||
| { | ||||
|     // The algorithm takes as input a DOM Element, Document, or DocumentFragment referred to as the node.
 | ||||
|     VERIFY(node.is_element() || node.is_document() || node.is_document_fragment()); | ||||
|     NonnullRefPtr<DOM::Node> actual_node = node; | ||||
| 
 | ||||
|     if (is<DOM::Element>(node)) { | ||||
|         auto& element = verify_cast<DOM::Element>(node); | ||||
| 
 | ||||
|         // 1. If the node serializes as void, then return the empty string.
 | ||||
|         //    (NOTE: serializes as void is defined only on elements in the spec)
 | ||||
|         if (element.serializes_as_void()) | ||||
|             return String::empty(); | ||||
| 
 | ||||
|         // 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node).
 | ||||
|         //    (NOTE: This is out of order of the spec to avoid another dynamic cast. The second step just creates a string builder, so it shouldn't matter)
 | ||||
|         if (is<HTML::HTMLTemplateElement>(element)) | ||||
|             actual_node = verify_cast<HTML::HTMLTemplateElement>(element).content(); | ||||
|     } | ||||
| 
 | ||||
|     enum class AttributeMode { | ||||
|         No, | ||||
|         Yes, | ||||
|     }; | ||||
| 
 | ||||
|     auto escape_string = [](StringView const& string, AttributeMode attribute_mode) -> String { | ||||
|         // https://html.spec.whatwg.org/multipage/parsing.html#escapingString
 | ||||
|         StringBuilder builder; | ||||
|         for (auto& ch : string) { | ||||
|             // 1. Replace any occurrence of the "&" character by the string "&".
 | ||||
|             if (ch == '&') | ||||
|                 builder.append("&"); | ||||
|             // 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the string " ".
 | ||||
|             else if (ch == '\xA0') | ||||
|                 builder.append(" "); | ||||
|             // 3. If the algorithm was invoked in the attribute mode, replace any occurrences of the """ character by the string """.
 | ||||
|             else if (ch == '"' && attribute_mode == AttributeMode::Yes) | ||||
|                 builder.append("""); | ||||
|             // 4. If the algorithm was not invoked in the attribute mode, replace any occurrences of the "<" character by the string "<", and any occurrences of the ">" character by the string ">".
 | ||||
|             else if (ch == '<' && attribute_mode == AttributeMode::No) | ||||
|                 builder.append("<"); | ||||
|             else if (ch == '>' && attribute_mode == AttributeMode::No) | ||||
|                 builder.append(">"); | ||||
|             else | ||||
|                 builder.append(ch); | ||||
|         } | ||||
|         return builder.to_string(); | ||||
|     }; | ||||
| 
 | ||||
|     // 2. Let s be a string, and initialize it to the empty string.
 | ||||
|     StringBuilder builder; | ||||
| 
 | ||||
|     // 4. For each child node of the node, in tree order, run the following steps:
 | ||||
|     actual_node->for_each_child([&](DOM::Node& current_node) { | ||||
|         // 1. Let current node be the child node being processed.
 | ||||
| 
 | ||||
|         // 2. Append the appropriate string from the following list to s:
 | ||||
| 
 | ||||
|         if (is<DOM::Element>(current_node)) { | ||||
|             // -> If current node is an Element
 | ||||
|             auto& element = verify_cast<DOM::Element>(current_node); | ||||
| 
 | ||||
|             // 1. If current node is an element in the HTML namespace, the MathML namespace, or the SVG namespace, then let tagname be current node's local name.
 | ||||
|             //    Otherwise, let tagname be current node's qualified name.
 | ||||
|             String tag_name; | ||||
| 
 | ||||
|             if (element.namespace_().is_one_of(Namespace::HTML, Namespace::MathML, Namespace::SVG)) | ||||
|                 tag_name = element.local_name(); | ||||
|             else | ||||
|                 tag_name = element.qualified_name(); | ||||
| 
 | ||||
|             // 2. Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
 | ||||
|             builder.append('<'); | ||||
|             builder.append(tag_name); | ||||
| 
 | ||||
|             // FIXME: 3. If current node's is value is not null, and the element does not have an is attribute in its attribute list,
 | ||||
|             //           then append the string " is="", followed by current node's is value escaped as described below in attribute mode,
 | ||||
|             //           followed by a U+0022 QUOTATION MARK character (").
 | ||||
| 
 | ||||
|             // 4. For each attribute that the element has, append a U+0020 SPACE character, the attribute's serialized name as described below, a U+003D EQUALS SIGN character (=),
 | ||||
|             //    a U+0022 QUOTATION MARK character ("), the attribute's value, escaped as described below in attribute mode, and a second U+0022 QUOTATION MARK character (").
 | ||||
|             //    NOTE: The order of attributes is implementation-defined. The only constraint is that the order must be stable.
 | ||||
|             element.for_each_attribute([&](auto& name, auto& value) { | ||||
|                 builder.append(' '); | ||||
| 
 | ||||
|                 // An attribute's serialized name for the purposes of the previous paragraph must be determined as follows:
 | ||||
| 
 | ||||
|                 // FIXME: -> If the attribute has no namespace:
 | ||||
|                 //              The attribute's serialized name is the attribute's local name.
 | ||||
|                 //           (We currently always do this)
 | ||||
|                 builder.append(name); | ||||
| 
 | ||||
|                 // FIXME: -> If the attribute is in the XML namespace:
 | ||||
|                 //             The attribute's serialized name is the string "xml:" followed by the attribute's local name.
 | ||||
| 
 | ||||
|                 // FIXME: -> If the attribute is in the XMLNS namespace and the attribute's local name is xmlns:
 | ||||
|                 //             The attribute's serialized name is the string "xmlns".
 | ||||
| 
 | ||||
|                 // FIXME: -> If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns:
 | ||||
|                 //             The attribute's serialized name is the string "xmlns:" followed by the attribute's local name.
 | ||||
| 
 | ||||
|                 // FIXME: -> If the attribute is in the XLink namespace:
 | ||||
|                 //             The attribute's serialized name is the string "xlink:" followed by the attribute's local name.
 | ||||
| 
 | ||||
|                 // FIXME: -> If the attribute is in some other namespace:
 | ||||
|                 //             The attribute's serialized name is the attribute's qualified name.
 | ||||
| 
 | ||||
|                 builder.append("=\""); | ||||
|                 builder.append(escape_string(value, AttributeMode::Yes)); | ||||
|                 builder.append('"'); | ||||
|             }); | ||||
| 
 | ||||
|             // 5. Append a U+003E GREATER-THAN SIGN character (>).
 | ||||
|             builder.append('>'); | ||||
| 
 | ||||
|             // 6. If current node serializes as void, then continue on to the next child node at this point.
 | ||||
|             if (element.serializes_as_void()) | ||||
|                 return IterationDecision::Continue; | ||||
| 
 | ||||
|             // 7. Append the value of running the HTML fragment serialization algorithm on the current node element (thus recursing into this algorithm for that element),
 | ||||
|             //    followed by a U+003C LESS-THAN SIGN character (<), a U+002F SOLIDUS character (/), tagname again, and finally a U+003E GREATER-THAN SIGN character (>).
 | ||||
|             builder.append(serialize_html_fragment(element)); | ||||
|             builder.append("</"); | ||||
|             builder.append(tag_name); | ||||
|             builder.append('>'); | ||||
| 
 | ||||
|             return IterationDecision::Continue; | ||||
|         } | ||||
| 
 | ||||
|         if (is<DOM::Text>(current_node)) { | ||||
|             // -> If current node is a Text node
 | ||||
|             auto& text_node = verify_cast<DOM::Text>(current_node); | ||||
|             auto* parent = current_node.parent(); | ||||
| 
 | ||||
|             if (is<DOM::Element>(parent)) { | ||||
|                 auto& parent_element = verify_cast<DOM::Element>(*parent); | ||||
| 
 | ||||
|                 // 1. If the parent of current node is a style, script, xmp, iframe, noembed, noframes, or plaintext element,
 | ||||
|                 //    or if the parent of current node is a noscript element and scripting is enabled for the node, then append the value of current node's data IDL attribute literally.
 | ||||
|                 if (parent_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::plaintext) | ||||
|                     || (parent_element.local_name() == HTML::TagNames::noscript && !parent_element.is_scripting_disabled())) { | ||||
|                     builder.append(text_node.data()); | ||||
|                     return IterationDecision::Continue; | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             // 2. Otherwise, append the value of current node's data IDL attribute, escaped as described below.
 | ||||
|             builder.append(escape_string(text_node.data(), AttributeMode::No)); | ||||
|             return IterationDecision::Continue; | ||||
|         } | ||||
| 
 | ||||
|         if (is<DOM::Comment>(current_node)) { | ||||
|             // -> If current node is a Comment
 | ||||
|             auto& comment_node = verify_cast<DOM::Comment>(current_node); | ||||
| 
 | ||||
|             // 1. Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS),
 | ||||
|             //    followed by the value of current node's data IDL attribute, followed by the literal string "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
 | ||||
|             builder.append("<!--"); | ||||
|             builder.append(comment_node.data()); | ||||
|             builder.append("-->"); | ||||
|             return IterationDecision::Continue; | ||||
|         } | ||||
| 
 | ||||
|         if (is<DOM::ProcessingInstruction>(current_node)) { | ||||
|             // -> If current node is a ProcessingInstruction
 | ||||
|             auto& processing_instruction_node = verify_cast<DOM::ProcessingInstruction>(current_node); | ||||
| 
 | ||||
|             // 1. Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK), followed by the value of current node's target IDL attribute,
 | ||||
|             //    followed by a single U+0020 SPACE character, followed by the value of current node's data IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
 | ||||
|             builder.append("<?"); | ||||
|             builder.append(processing_instruction_node.target()); | ||||
|             builder.append(' '); | ||||
|             builder.append(processing_instruction_node.data()); | ||||
|             builder.append('>'); | ||||
|             return IterationDecision::Continue; | ||||
|         } | ||||
| 
 | ||||
|         if (is<DOM::DocumentType>(current_node)) { | ||||
|             // -> If current node is a DocumentType
 | ||||
|             auto& document_type_node = verify_cast<DOM::DocumentType>(current_node); | ||||
| 
 | ||||
|             // 1. Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER O,
 | ||||
|             //    U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059 LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL LETTER E),
 | ||||
|             //    followed by a space (U+0020 SPACE), followed by the value of current node's name IDL attribute, followed by the literal string ">" (U+003E GREATER-THAN SIGN).
 | ||||
|             builder.append("<!DOCTYPE "); | ||||
|             builder.append(document_type_node.name()); | ||||
|             builder.append('>'); | ||||
|             return IterationDecision::Continue; | ||||
|         } | ||||
| 
 | ||||
|         return IterationDecision::Continue; | ||||
|     }); | ||||
| 
 | ||||
|     // 5. Return s.
 | ||||
|     return builder.to_string(); | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -53,6 +53,7 @@ public: | |||
|     DOM::Document& document(); | ||||
| 
 | ||||
|     static NonnullRefPtrVector<DOM::Node> parse_html_fragment(DOM::Element& context_element, const StringView&); | ||||
|     static String serialize_html_fragment(DOM::Node const& node); | ||||
| 
 | ||||
|     enum class InsertionMode { | ||||
| #define __ENUMERATE_INSERTION_MODE(mode) mode, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Luke Wilde
						Luke Wilde